diff --git a/.gitignore b/.gitignore index a010ab8bb30..88502af3202 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,8 @@ /bin /bin.* /pom.xml +/nbproject +/nb-build # ./lucene diff --git a/build.xml b/build.xml index 6cf53b99698..8cb28404cf0 100644 --- a/build.xml +++ b/build.xml @@ -36,10 +36,7 @@ depends="check-svn-working-copy,validate,documentation-lint"/> - - - - + @@ -194,6 +191,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev-tools/idea/.idea/ant.xml b/dev-tools/idea/.idea/ant.xml index e9f3c85bcb6..2cd14fd831b 100644 --- a/dev-tools/idea/.idea/ant.xml +++ b/dev-tools/idea/.idea/ant.xml @@ -45,6 +45,9 @@ + + + diff --git a/dev-tools/idea/.idea/libraries/Solr_morphlines_cell_library.xml b/dev-tools/idea/.idea/libraries/Solr_morphlines_cell_library.xml new file mode 100644 index 00000000000..cbc99073448 --- /dev/null +++ b/dev-tools/idea/.idea/libraries/Solr_morphlines_cell_library.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/dev-tools/idea/.idea/libraries/Solr_morphlines_core_library.xml b/dev-tools/idea/.idea/libraries/Solr_morphlines_core_library.xml new file mode 100644 index 00000000000..4260f5816d2 --- /dev/null +++ b/dev-tools/idea/.idea/libraries/Solr_morphlines_core_library.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/dev-tools/idea/.idea/libraries/Solr_morphlines_core_test_library.xml b/dev-tools/idea/.idea/libraries/Solr_morphlines_core_test_library.xml new file mode 100644 index 00000000000..74699f467dd --- /dev/null +++ b/dev-tools/idea/.idea/libraries/Solr_morphlines_core_test_library.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/dev-tools/idea/.idea/modules.xml b/dev-tools/idea/.idea/modules.xml index 58111cfec1b..f966e7f16d8 100644 --- a/dev-tools/idea/.idea/modules.xml +++ b/dev-tools/idea/.idea/modules.xml @@ -49,6 +49,9 @@ + + + diff --git a/dev-tools/idea/.idea/workspace.xml b/dev-tools/idea/.idea/workspace.xml index 0c578089bce..00b4418ef4a 100644 --- a/dev-tools/idea/.idea/workspace.xml +++ b/dev-tools/idea/.idea/workspace.xml @@ -235,6 +235,27 @@ + + + + + + + + + + + + - + @@ -281,10 +302,13 @@ - - - - + + + + + + + diff --git a/dev-tools/idea/lucene/benchmark/src/benchmark.iml b/dev-tools/idea/lucene/benchmark/src/benchmark.iml index 6e3d1b7774a..6c1d5581646 100644 --- a/dev-tools/idea/lucene/benchmark/src/benchmark.iml +++ b/dev-tools/idea/lucene/benchmark/src/benchmark.iml @@ -33,5 +33,6 @@ + diff --git a/dev-tools/idea/lucene/demo/demo.iml b/dev-tools/idea/lucene/demo/demo.iml index 50b0bcad843..a200e9f5779 100644 --- a/dev-tools/idea/lucene/demo/demo.iml +++ b/dev-tools/idea/lucene/demo/demo.iml @@ -26,5 +26,7 @@ + + diff --git a/dev-tools/idea/lucene/facet/facet.iml b/dev-tools/idea/lucene/facet/facet.iml index 8da5b5da794..02cbf8f84f1 100644 --- a/dev-tools/idea/lucene/facet/facet.iml +++ b/dev-tools/idea/lucene/facet/facet.iml @@ -15,6 +15,7 @@ + diff --git a/dev-tools/idea/solr/contrib/map-reduce/map-reduce.iml b/dev-tools/idea/solr/contrib/map-reduce/map-reduce.iml new file mode 100644 index 00000000000..676d4f64a9a --- /dev/null +++ b/dev-tools/idea/solr/contrib/map-reduce/map-reduce.iml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dev-tools/idea/solr/contrib/morphlines-cell/morphlines-cell.iml b/dev-tools/idea/solr/contrib/morphlines-cell/morphlines-cell.iml new file mode 100644 index 00000000000..4e2c9ba1409 --- /dev/null +++ b/dev-tools/idea/solr/contrib/morphlines-cell/morphlines-cell.iml @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dev-tools/idea/solr/contrib/morphlines-core/morphlines-core.iml b/dev-tools/idea/solr/contrib/morphlines-core/morphlines-core.iml new file mode 100644 index 00000000000..0de82170ca1 --- /dev/null +++ b/dev-tools/idea/solr/contrib/morphlines-core/morphlines-core.iml @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dev-tools/maven/pom.xml.template b/dev-tools/maven/pom.xml.template index 81131dd817f..5539f18857e 100644 --- a/dev-tools/maven/pom.xml.template +++ b/dev-tools/maven/pom.xml.template @@ -159,7 +159,7 @@ de.thetaphi forbiddenapis - 1.3 + 1.4 + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../pom.xml + + org.apache.solr + solr-map-reduce + jar + Apache Solr map-reduce index construction + Apache Solr - map-reduce index construction + + solr/contrib/map-reduce + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + org.apache.lucene + lucene-test-framework + test + + + org.apache.solr + solr-test-framework + test + +@solr-map-reduce.internal.dependencies@ +@solr-map-reduce.external.dependencies@ +@solr-map-reduce.internal.test.dependencies@ +@solr-map-reduce.external.test.dependencies@ + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + de.thetaphi + forbiddenapis + + + test-check-forbidden-servlet-api + + + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + testCheck + + + + + + + diff --git a/dev-tools/maven/solr/contrib/morphlines-cell/pom.xml.template b/dev-tools/maven/solr/contrib/morphlines-cell/pom.xml.template new file mode 100644 index 00000000000..9bf9588e590 --- /dev/null +++ b/dev-tools/maven/solr/contrib/morphlines-cell/pom.xml.template @@ -0,0 +1,104 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../pom.xml + + org.apache.solr + solr-morphlines-cell + jar + Apache Solr Cell Morphlines + Apache Solr - Cell Morphlines + + solr/contrib/morphlines-cell + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + org.apache.lucene + lucene-test-framework + test + + + org.apache.solr + solr-test-framework + test + + + org.apache.solr + solr-morphlines-core + ${project.version} + test-jar + test + +@solr-morphlines-cell.internal.dependencies@ +@solr-morphlines-cell.external.dependencies@ +@solr-morphlines-cell.internal.test.dependencies@ +@solr-morphlines-cell.external.test.dependencies@ + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + de.thetaphi + forbiddenapis + + + test-check-forbidden-servlet-api + + + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + testCheck + + + + + + + diff --git a/dev-tools/maven/solr/contrib/morphlines-core/pom.xml.template b/dev-tools/maven/solr/contrib/morphlines-core/pom.xml.template new file mode 100644 index 00000000000..a319566d584 --- /dev/null +++ b/dev-tools/maven/solr/contrib/morphlines-core/pom.xml.template @@ -0,0 +1,108 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../pom.xml + + org.apache.solr + solr-morphlines-core + jar + Apache Solr Morphlines Core + Apache Solr - Morphlines Core + + solr/contrib/morphlines-core + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + org.apache.lucene + lucene-test-framework + test + + + org.apache.solr + solr-test-framework + test + +@solr-morphlines-core.internal.dependencies@ +@solr-morphlines-core.external.dependencies@ +@solr-morphlines-core.internal.test.dependencies@ +@solr-morphlines-core.external.test.dependencies@ + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + de.thetaphi + forbiddenapis + + + test-check-forbidden-servlet-api + + + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + testCheck + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + + diff --git a/dev-tools/maven/solr/contrib/pom.xml.template b/dev-tools/maven/solr/contrib/pom.xml.template index b0f9bc8606c..db6ce3dc7e0 100644 --- a/dev-tools/maven/solr/contrib/pom.xml.template +++ b/dev-tools/maven/solr/contrib/pom.xml.template @@ -37,6 +37,9 @@ dataimporthandler-extras extraction langid + morphlines-cell + morphlines-core + map-reduce uima velocity diff --git a/dev-tools/maven/solr/pom.xml.template b/dev-tools/maven/solr/pom.xml.template index 7554d6987ff..73ceda700b5 100644 --- a/dev-tools/maven/solr/pom.xml.template +++ b/dev-tools/maven/solr/pom.xml.template @@ -81,6 +81,11 @@ Public online Restlet repository http://maven.restlet.org + + releases.cloudera.com + Cloudera Releases + https://repository.cloudera.com/artifactory/libs-release + diff --git a/dev-tools/netbeans/nb-project.xsl b/dev-tools/netbeans/nb-project.xsl new file mode 100644 index 00000000000..69b19447d23 --- /dev/null +++ b/dev-tools/netbeans/nb-project.xsl @@ -0,0 +1,165 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + : + + + + + + + + + org.netbeans.modules.ant.freeform + + + lucene + + + + + + + java + + + + + + + + + + compile + + + clean + + + documentation + + + test + + + clean + compile + + + + + + + + + packages + tree + + + + + + + + + + + build.xml + + + + + + + + + + + + + + + + + + + + + nb-build/classes + + + + + + + + + + + + + nb-build/test-classes + + + + + + + + + diff --git a/dev-tools/netbeans/nbproject/project.properties b/dev-tools/netbeans/nbproject/project.properties new file mode 100644 index 00000000000..db66f9dda6c --- /dev/null +++ b/dev-tools/netbeans/nbproject/project.properties @@ -0,0 +1,9 @@ +auxiliary.org-netbeans-modules-editor-indent.CodeStyle.project.expand-tabs=true +auxiliary.org-netbeans-modules-editor-indent.CodeStyle.project.indent-shift-width=2 +auxiliary.org-netbeans-modules-editor-indent.CodeStyle.project.spaces-per-tab=2 +auxiliary.org-netbeans-modules-editor-indent.CodeStyle.project.tab-size=2 +auxiliary.org-netbeans-modules-editor-indent.CodeStyle.project.text-limit-width=80 +auxiliary.org-netbeans-modules-editor-indent.CodeStyle.project.text-line-wrap=none +auxiliary.org-netbeans-modules-editor-indent.CodeStyle.usedProfile=project +auxiliary.org-netbeans-modules-editor-indent.text.x-java.CodeStyle.project.continuationIndentSize=4 +auxiliary.org-netbeans-modules-editor-indent.text.x-java.CodeStyle.project.spaceAfterTypeCast=false diff --git a/extra-targets.xml b/extra-targets.xml index 9ac1e058dcc..cdf861df86e 100644 --- a/extra-targets.xml +++ b/extra-targets.xml @@ -25,6 +25,27 @@ + + + + + + + + + + + + + + + + - diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index dd75317f960..4f901cf372d 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -68,6 +68,14 @@ New Features * LUCENE-5336: Add SimpleQueryParser: parser for human-entered queries. (Jack Conradson via Robert Muir) +* LUCENE-5329: suggest: DocumentDictionary and + DocumentExpressionDictionary are now lenient for dirty documents + (missing the term, weight or payload). (Areek Zillur via + Mike McCandless) + +* SOLR-1871: The RangeMapFloatFunction accepts an arbitrary ValueSource + as target and default values. (Chris Harris, shalin) + * LUCENE-5371: Speed up Lucene range faceting from O(N) per hit to O(log(N)) per hit using segment trees; this only really starts to matter in practice if the number of ranges is over 10 or so. (Mike @@ -83,6 +91,30 @@ Build * LUCENE-5322: Clean up / simplify Maven-related Ant targets. (Steve Rowe) +* LUCENE-5347: Upgrade forbidden-apis checker to version 1.4. + (Uwe Schindler) + +* LUCENE-4381: Upgrade analysis/icu to 52.1. (Robert Muir) + +* LUCENE-5357: Upgrade StandardTokenizer and UAX29URLEmailTokenizer to + Unicode 6.3; update UAX29URLEmailTokenizer's recognized top level + domains in URLs and Emails from the IANA Root Zone Database. + (Steve Rowe) + +* LUCENE-5360: Add support for developing in Netbeans IDE. + (Michal Hlavac, Uwe Schindler, Steve Rowe) + +Bug fixes + +* LUCENE-5285: Improved highlighting of multi-valued fields with + FastVectorHighlighter. (Nik Everett via Adrien Grand) + +Changes in Runtime Behavior + +* LUCENE-5362: IndexReader and SegmentCoreReaders now throw + AlreadyClosedException if the refCount in incremented but + is less that 1. (Simon Willnauer) + ======================= Lucene 4.6.0 ======================= New Features @@ -176,39 +208,9 @@ New Features Bug Fixes -* LUCENE-4998: Fixed a few places to pass IOContext.READONCE instead - of IOContext.READ (Shikhar Bhushan via Mike McCandless) - -* LUCENE-5242: DirectoryTaxonomyWriter.replaceTaxonomy did not fully reset - its state, which could result in exceptions being thrown, as well as - incorrect ordinals returned from getParent. (Shai Erera) - -* LUCENE-5254: Fixed bounded memory leak, where objects like live - docs bitset were not freed from an starting reader after reopening - to a new reader and closing the original one. (Shai Erera, Mike - McCandless) - -* LUCENE-5262: Fixed file handle leaks when multiple attempts to open an - NRT reader hit exceptions. (Shai Erera) - -* LUCENE-5263: Transient IOExceptions, e.g. due to disk full or file - descriptor exhaustion, hit at unlucky times inside IndexWriter could - lead to silently losing deletions. (Shai Erera, Mike McCandless) - -* LUCENE-5264: CommonTermsQuery ignored minMustMatch if only high-frequent - terms were present in the query and the high-frequent operator was set - to SHOULD. (Simon Willnauer) - -* LUCENE-5269: Fix bug in NGramTokenFilter where it would sometimes count - unicode characters incorrectly. (Mike McCandless, Robert Muir) - * LUCENE-5272: OpenBitSet.ensureCapacity did not modify numBits, causing false assertion errors in fastSet. (Shai Erera) -* LUCENE-5289: IndexWriter.hasUncommittedChanges was returning false - when there were buffered delete-by-Term. (Shalin Shekhar Mangar, - Mike McCandless) - * LUCENE-5303: OrdinalsCache did not use coreCacheKey, resulting in over caching across multiple threads. (Mike McCandless, Shai Erera) @@ -221,7 +223,11 @@ Bug Fixes deleted at a later point in time. This could cause short-term disk pollution or OOM if in-memory directories are used. (Simon Willnauer) -API Changes: +* LUCENE-5342: Fixed bulk-merge issue in CompressingStoredFieldsFormat which + created corrupted segments when mixing chunk sizes. + Lucene41StoredFieldsFormat is not impacted. (Adrien Grand, Robert Muir) + +API Changes * LUCENE-5222: Add SortField.needsScores(). Previously it was not possible for a custom Sort that makes use of the relevance score to work correctly @@ -314,6 +320,40 @@ Tests is either a "word" character or not), but now it gives a general longest-match behavior. (Nik Everett via Robert Muir) +======================= Lucene 4.5.1 ======================= + +Bug Fixes + +* LUCENE-4998: Fixed a few places to pass IOContext.READONCE instead + of IOContext.READ (Shikhar Bhushan via Mike McCandless) + +* LUCENE-5242: DirectoryTaxonomyWriter.replaceTaxonomy did not fully reset + its state, which could result in exceptions being thrown, as well as + incorrect ordinals returned from getParent. (Shai Erera) + +* LUCENE-5254: Fixed bounded memory leak, where objects like live + docs bitset were not freed from an starting reader after reopening + to a new reader and closing the original one. (Shai Erera, Mike + McCandless) + +* LUCENE-5262: Fixed file handle leaks when multiple attempts to open an + NRT reader hit exceptions. (Shai Erera) + +* LUCENE-5263: Transient IOExceptions, e.g. due to disk full or file + descriptor exhaustion, hit at unlucky times inside IndexWriter could + lead to silently losing deletions. (Shai Erera, Mike McCandless) + +* LUCENE-5264: CommonTermsQuery ignored minMustMatch if only high-frequent + terms were present in the query and the high-frequent operator was set + to SHOULD. (Simon Willnauer) + +* LUCENE-5269: Fix bug in NGramTokenFilter where it would sometimes count + unicode characters incorrectly. (Mike McCandless, Robert Muir) + +* LUCENE-5289: IndexWriter.hasUncommittedChanges was returning false + when there were buffered delete-by-Term. (Shalin Shekhar Mangar, + Mike McCandless) + ======================= Lucene 4.5.0 ======================= New features diff --git a/lucene/analysis/common/build.xml b/lucene/analysis/common/build.xml index 2ed5b7a18a4..a4ed2c480a9 100644 --- a/lucene/analysis/common/build.xml +++ b/lucene/analysis/common/build.xml @@ -45,17 +45,13 @@ - - + nobak="on" inputstreamctor="false"/> + - + match="/\*\*\s*\*\s*Creates a new scanner\s*\*\s*\*\s*@param\s*in\s*the java.io.Reader to read input from\.\s*\*/\s*public HTMLStripCharFilter\(java\.io\.Reader in\)\s*\{\s*this.zzReader = in;\s*\}" + replace="" flags="s"/> @@ -96,15 +92,7 @@ - - - + diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex index a32e1480828..c717b03489e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex @@ -73,7 +73,7 @@ CharacterEntities = ( "AElig" | "Aacute" | "Acirc" | "Agrave" | "Alpha" upperCaseVariantsAccepted.put("amp", "AMP"); } private static final CharArrayMap entityValues - = new CharArrayMap(Version.LUCENE_40, 253, false); + = new CharArrayMap(Version.LUCENE_CURRENT, 253, false); static { String[] entities = { "AElig", "\u00C6", "Aacute", "\u00C1", "Acirc", "\u00C2", diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro index f0a1e5ddaf2..f80ebd2de77 100755 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro @@ -14,7 +14,7 @@ * limitations under the License. */ -// Generated using ICU4J 49.1.0.0 +// Generated using ICU4J 52.1.0.0 // by org.apache.lucene.analysis.icu.GenerateHTMLStripCharFilterSupplementaryMacros diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java index 315c0eba500..f39f4ffa084 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex. */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT */ package org.apache.lucene.analysis.charfilter; @@ -152,77 +152,77 @@ public final class HTMLStripCharFilter extends BaseCharFilter { "\21\1\1\41\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0"+ "\4\1\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1"+ "\1\0\3\1\1\0\2\2\14\0\64\1\40\2\3\0\1\1\4\0"+ - "\1\1\1\2\2\0\12\274\41\0\3\2\1\41\1\0\12\274\6\0"+ - "\130\1\10\0\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0"+ - "\14\2\4\0\14\2\12\0\12\274\36\1\2\0\5\1\13\0\54\1"+ - "\4\0\21\2\7\1\2\2\6\0\12\274\1\2\45\0\27\1\5\2"+ - "\4\0\65\1\12\2\1\0\35\2\2\0\1\2\12\274\6\0\12\274"+ - "\15\0\1\1\130\0\5\2\57\1\21\2\7\1\4\0\12\274\21\0"+ - "\11\2\14\0\3\2\36\1\15\2\2\1\12\274\54\1\16\2\14\0"+ - "\44\1\24\2\10\0\12\274\3\0\3\1\12\274\44\1\122\0\3\2"+ - "\1\0\25\2\4\1\1\2\4\1\3\2\2\1\11\0\300\1\47\2"+ - "\25\0\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1\2\0"+ - "\10\1\1\0\1\1\1\0\1\1\1\0\1\1\1\0\37\1\2\0"+ - "\65\1\1\0\7\1\1\0\1\1\3\0\3\1\1\0\7\1\3\0"+ - "\4\1\2\0\6\1\4\0\15\1\5\0\3\1\1\0\7\1\3\0"+ - "\13\41\35\0\2\41\5\0\1\41\17\0\2\2\23\0\1\2\12\0"+ - "\1\41\21\0\1\1\15\0\1\1\20\0\15\1\63\0\15\2\4\0"+ - "\1\2\3\0\14\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0"+ - "\1\1\2\0\6\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0"+ - "\20\1\2\0\4\1\5\0\5\1\4\0\1\1\21\0\51\1\u0a77\0"+ - "\57\1\1\0\57\1\1\0\205\1\6\0\4\1\3\2\2\1\14\0"+ - "\46\1\1\0\1\1\5\0\1\1\2\0\70\1\7\0\1\1\17\0"+ - "\1\2\27\1\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1"+ - "\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2"+ - "\u0200\0\1\41\4\0\3\1\31\0\11\1\6\2\1\0\5\1\2\0"+ - "\5\1\4\0\126\1\2\0\2\2\5\1\1\0\132\1\1\0\4\1"+ - "\5\0\51\1\3\0\136\1\21\0\33\1\65\0\20\1\u0200\0\u19b6\1"+ - "\112\0\u51cd\1\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1"+ - "\12\274\2\1\24\0\57\1\1\2\4\0\12\2\1\0\31\1\7\0"+ - "\1\2\120\1\2\2\45\0\11\1\2\0\147\1\2\0\4\1\1\0"+ - "\4\1\14\0\13\1\115\0\12\1\1\2\3\1\1\2\4\1\1\2"+ - "\27\1\5\2\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\274"+ - "\6\0\22\2\6\1\3\0\1\1\4\0\12\274\34\1\10\2\2\0"+ - "\27\1\15\2\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1"+ - "\12\274\46\0\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0"+ - "\12\274\6\0\27\1\3\0\1\1\1\2\4\0\60\1\1\2\1\1"+ - "\3\2\2\1\2\2\5\1\2\2\1\1\1\2\1\1\30\0\3\1"+ - "\2\0\13\1\5\2\2\0\3\1\2\2\12\0\6\1\2\0\6\1"+ - "\2\0\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0"+ - "\2\2\2\0\12\274\6\0\u2ba4\1\14\0\27\1\4\0\61\1\4\0"+ - "\1\170\1\223\1\103\1\165\1\136\1\214\2\0\1\160\1\153\2\0"+ - "\1\120\1\210\14\0\1\105\1\127\20\0\1\122\7\0\1\256\1\112"+ - "\5\0\1\143\4\0\51\120\1\110\3\120\1\124\1\220\17\0\1\133"+ - "\u02c1\0\1\252\277\0\2\123\1\212\3\222\2\211\1\222\1\211\2\222"+ - "\1\221\21\222\11\213\1\157\7\213\7\204\1\156\1\204\1\246\2\207"+ - "\1\166\1\246\1\207\1\166\10\246\2\167\5\203\2\155\5\203\1\107"+ - "\10\202\5\154\3\224\12\251\20\224\3\225\32\227\1\226\2\200\2\234"+ - "\1\235\2\234\2\235\2\234\1\235\3\200\1\177\2\200\12\250\1\247"+ - "\1\176\1\171\7\176\1\171\13\176\31\200\7\176\12\250\1\176\5\134"+ - "\3\245\3\142\1\140\4\142\2\140\10\142\1\140\7\141\1\137\2\141"+ - "\7\142\16\245\1\135\4\245\1\106\4\244\1\106\5\255\1\254\1\255"+ - "\3\254\7\255\1\254\23\255\5\264\3\255\6\264\2\255\6\253\5\263"+ - "\3\262\2\142\7\257\36\142\4\257\5\142\5\245\6\244\2\245\1\244"+ - "\4\141\13\253\12\244\26\253\15\134\1\243\2\134\1\152\3\237\1\134"+ - "\2\237\5\151\4\237\4\152\1\151\3\152\1\151\5\152\2\147\1\116"+ - "\2\147\1\116\1\147\2\116\1\147\1\116\12\147\1\116\4\146\1\115"+ - "\1\236\1\240\1\150\3\164\1\240\2\164\1\260\2\261\2\164\1\150"+ - "\1\164\1\150\1\164\1\150\1\164\3\150\1\164\2\150\1\164\1\150"+ - "\2\164\1\150\1\164\1\150\1\164\1\150\1\164\1\150\1\164\1\150"+ - "\1\162\2\145\1\162\1\145\2\162\4\145\1\162\7\145\1\162\4\145"+ - "\1\162\4\145\1\164\1\150\1\164\12\216\1\217\21\216\1\217\3\215"+ - "\1\217\3\216\1\217\1\216\2\144\2\216\1\217\15\241\4\201\4\206"+ - "\1\242\1\161\10\242\7\206\6\164\4\113\1\121\37\113\1\121\4\113"+ - "\25\174\1\131\11\174\21\130\5\174\1\104\12\117\5\174\6\205\4\162"+ - "\1\163\1\130\5\231\12\232\17\231\1\125\3\114\14\230\1\126\11\173"+ - "\1\172\5\173\4\233\13\175\2\132\11\173\1\172\31\173\1\172\4\126"+ - "\4\173\2\172\2\265\1\111\5\265\52\111\u1900\0\u016e\1\2\0\152\1"+ - "\46\0\7\1\14\0\5\1\5\0\1\1\1\2\12\1\1\0\15\1"+ - "\1\0\5\1\1\0\1\1\1\0\2\1\1\0\2\1\1\0\154\1"+ - "\41\0\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\2"+ - "\20\0\7\2\14\0\2\2\30\0\3\2\40\0\5\1\1\0\207\1"+ - "\23\0\12\274\7\0\32\1\4\0\1\2\1\0\32\1\13\0\131\1"+ - "\3\0\6\1\2\0\6\1\2\0\6\1\2\0\3\1\43\0"; + "\1\1\1\2\2\0\12\274\41\0\3\2\2\0\12\274\6\0\130\1"+ + "\10\0\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2"+ + "\4\0\14\2\12\0\12\274\36\1\2\0\5\1\13\0\54\1\4\0"+ + "\21\2\7\1\2\2\6\0\12\274\1\2\45\0\27\1\5\2\4\0"+ + "\65\1\12\2\1\0\35\2\2\0\1\2\12\274\6\0\12\274\15\0"+ + "\1\1\130\0\5\2\57\1\21\2\7\1\4\0\12\274\21\0\11\2"+ + "\14\0\3\2\36\1\15\2\2\1\12\274\54\1\16\2\14\0\44\1"+ + "\24\2\10\0\12\274\3\0\3\1\12\274\44\1\122\0\3\2\1\0"+ + "\25\2\4\1\1\2\4\1\3\2\2\1\11\0\300\1\47\2\25\0"+ + "\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1"+ + "\1\0\1\1\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1"+ + "\1\0\7\1\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1"+ + "\2\0\6\1\4\0\15\1\5\0\3\1\1\0\7\1\3\0\13\41"+ + "\35\0\2\41\5\0\1\41\17\0\2\2\23\0\1\2\12\0\1\41"+ + "\21\0\1\1\15\0\1\1\20\0\15\1\63\0\15\2\4\0\1\2"+ + "\3\0\14\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0\1\1"+ + "\2\0\6\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0\20\1"+ + "\2\0\4\1\5\0\5\1\4\0\1\1\21\0\51\1\u0a77\0\57\1"+ + "\1\0\57\1\1\0\205\1\6\0\4\1\3\2\2\1\14\0\46\1"+ + "\1\0\1\1\5\0\1\1\2\0\70\1\7\0\1\1\17\0\1\2"+ + "\27\1\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+ + "\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2\u0200\0"+ + "\1\41\4\0\3\1\31\0\11\1\6\2\1\0\5\1\2\0\5\1"+ + "\4\0\126\1\2\0\2\2\5\1\1\0\132\1\1\0\4\1\5\0"+ + "\51\1\3\0\136\1\21\0\33\1\65\0\20\1\u0200\0\u19b6\1\112\0"+ + "\u51cd\1\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\274"+ + "\2\1\24\0\57\1\1\2\4\0\12\2\1\0\31\1\7\0\1\2"+ + "\120\1\2\2\45\0\11\1\2\0\147\1\2\0\4\1\1\0\4\1"+ + "\14\0\13\1\115\0\12\1\1\2\3\1\1\2\4\1\1\2\27\1"+ + "\5\2\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\274\6\0"+ + "\22\2\6\1\3\0\1\1\4\0\12\274\34\1\10\2\2\0\27\1"+ + "\15\2\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\274"+ + "\46\0\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\274"+ + "\6\0\27\1\3\0\1\1\1\2\4\0\60\1\1\2\1\1\3\2"+ + "\2\1\2\2\5\1\2\2\1\1\1\2\1\1\30\0\3\1\2\0"+ + "\13\1\5\2\2\0\3\1\2\2\12\0\6\1\2\0\6\1\2\0"+ + "\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0\2\2"+ + "\2\0\12\274\6\0\u2ba4\1\14\0\27\1\4\0\61\1\4\0\1\170"+ + "\1\223\1\103\1\165\1\136\1\214\2\0\1\160\1\153\2\0\1\120"+ + "\1\210\14\0\1\105\1\127\20\0\1\122\7\0\1\256\1\112\5\0"+ + "\1\143\4\0\51\120\1\110\3\120\1\124\1\220\17\0\1\133\u02c1\0"+ + "\1\252\277\0\2\123\1\212\3\222\2\211\1\222\1\211\2\222\1\221"+ + "\21\222\11\213\1\157\7\213\7\204\1\156\1\204\1\246\2\207\1\166"+ + "\1\246\1\207\1\166\10\246\2\167\5\203\2\155\5\203\1\107\10\202"+ + "\5\154\3\224\12\251\20\224\3\225\32\227\1\226\2\200\2\234\1\235"+ + "\2\234\2\235\2\234\1\235\3\200\1\177\2\200\12\250\1\247\1\176"+ + "\1\171\7\176\1\171\13\176\31\200\7\176\12\250\1\176\5\134\3\245"+ + "\3\142\1\140\4\142\2\140\10\142\1\140\7\141\1\137\2\141\7\142"+ + "\16\245\1\135\4\245\1\106\4\244\1\106\5\255\1\254\1\255\3\254"+ + "\7\255\1\254\23\255\5\264\3\255\6\264\2\255\6\253\5\263\3\262"+ + "\2\142\7\257\36\142\4\257\5\142\5\245\6\244\2\245\1\244\4\141"+ + "\13\253\12\244\26\253\15\134\1\243\2\134\1\152\3\237\1\134\2\237"+ + "\5\151\4\237\4\152\1\151\3\152\1\151\5\152\2\147\1\116\2\147"+ + "\1\116\1\147\2\116\1\147\1\116\12\147\1\116\4\146\1\115\1\236"+ + "\1\240\1\150\3\164\1\240\2\164\1\260\2\261\2\164\1\150\1\164"+ + "\1\150\1\164\1\150\1\164\3\150\1\164\2\150\1\164\1\150\2\164"+ + "\1\150\1\164\1\150\1\164\1\150\1\164\1\150\1\164\1\150\1\162"+ + "\2\145\1\162\1\145\2\162\4\145\1\162\7\145\1\162\4\145\1\162"+ + "\4\145\1\164\1\150\1\164\12\216\1\217\21\216\1\217\3\215\1\217"+ + "\3\216\1\217\1\216\2\144\2\216\1\217\15\241\4\201\4\206\1\242"+ + "\1\161\10\242\7\206\6\164\4\113\1\121\37\113\1\121\4\113\25\174"+ + "\1\131\11\174\21\130\5\174\1\104\12\117\5\174\6\205\4\162\1\163"+ + "\1\130\5\231\12\232\17\231\1\125\3\114\14\230\1\126\11\173\1\172"+ + "\5\173\4\233\13\175\2\132\11\173\1\172\31\173\1\172\4\126\4\173"+ + "\2\172\2\265\1\111\5\265\52\111\u1900\0\u016e\1\2\0\152\1\46\0"+ + "\7\1\14\0\5\1\5\0\1\1\1\2\12\1\1\0\15\1\1\0"+ + "\5\1\1\0\1\1\1\0\2\1\1\0\2\1\1\0\154\1\41\0"+ + "\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\2\20\0"+ + "\7\2\14\0\2\2\30\0\3\2\40\0\5\1\1\0\207\1\23\0"+ + "\12\274\7\0\32\1\4\0\1\2\1\0\32\1\13\0\131\1\3\0"+ + "\6\1\2\0\6\1\2\0\6\1\2\0\3\1\43\0"; /** * Translates characters to character classes @@ -30673,7 +30673,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter { upperCaseVariantsAccepted.put("amp", "AMP"); } private static final CharArrayMap entityValues - = new CharArrayMap(Version.LUCENE_40, 253, false); + = new CharArrayMap(Version.LUCENE_CURRENT, 253, false); static { String[] entities = { "AElig", "\u00C6", "Aacute", "\u00C1", "Acirc", "\u00C2", @@ -30812,7 +30812,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter { escapeSTYLE = true; } else { if (null == this.escapedTags) { - this.escapedTags = new CharArraySet(Version.LUCENE_40, 16, true); + this.escapedTags = new CharArraySet(Version.LUCENE_CURRENT, 16, true); } this.escapedTags.add(tag); } @@ -30895,6 +30895,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter { + /** * Unpacks the compressed character translation table. * @@ -30905,7 +30906,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter { char [] map = new char[0x10000]; int i = 0; /* index in packed string */ int j = 0; /* index in unpacked array */ - while (i < 2778) { + while (i < 2776) { int count = packed.charAt(i++); char value = packed.charAt(i++); do map[j++] = value; while (--count > 0); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex index 655b427a1ad..cbef3f439bc 100755 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex @@ -34,7 +34,7 @@ import org.apache.lucene.analysis.util.OpenStringBuilder; */ %% -%unicode 6.1 +%unicode 6.3 %apiprivate %type int %final @@ -197,7 +197,7 @@ InlineElment = ( [aAbBiIqQsSuU] | escapeSTYLE = true; } else { if (null == this.escapedTags) { - this.escapedTags = new CharArraySet(Version.LUCENE_40, 16, true); + this.escapedTags = new CharArraySet(Version.LUCENE_CURRENT, 16, true); } this.escapedTags.add(tag); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/htmlentity.py b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/htmlentity.py index ff9ee6bf3a1..8a080b9da3b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/htmlentity.py +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/htmlentity.py @@ -61,7 +61,7 @@ def main(): print ' upperCaseVariantsAccepted.put("amp", "AMP");' print ' }' print ' private static final CharArrayMap entityValues' - print ' = new CharArrayMap(Version.LUCENE_40, %i, false);' % len(keys) + print ' = new CharArrayMap(Version.LUCENE_CURRENT, %i, false);' % len(keys) print ' static {' print ' String[] entities = {' output_line = ' ' diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java index 815474f2b68..3db58dd9fe5 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java @@ -196,7 +196,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc4 = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc4 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("θ", "δ", "ελ", "γαλ", "ν", "π", "ιδ", "παρ"), false); @@ -222,7 +222,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc6 = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc6 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αλ", "αδ", "ενδ", "αμαν", "αμμοχαλ", "ηθ", "ανηθ", "αντιδ", "φυσ", "βρωμ", "γερ", "εξωδ", "καλπ", "καλλιν", "καταδ", "μουλ", "μπαν", "μπαγιατ", "μπολ", "μποσ", "νιτ", "ξικ", "συνομηλ", @@ -247,7 +247,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc7 = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc7 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αναπ", "αποθ", "αποκ", "αποστ", "βουβ", "ξεθ", "ουλ", "πεθ", "πικρ", "ποτ", "σιχ", "χ"), false); @@ -274,11 +274,11 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc8a = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc8a = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("τρ", "τσ"), false); - private static final CharArraySet exc8b = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc8b = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("βετερ", "βουλκ", "βραχμ", "γ", "δραδουμ", "θ", "καλπουζ", "καστελ", "κορμορ", "λαοπλ", "μωαμεθ", "μ", "μουσουλμ", "ν", "ουλ", "π", "πελεκ", "πλ", "πολισ", "πορτολ", "σαρακατσ", "σουλτ", @@ -337,7 +337,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc9 = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc9 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αβαρ", "βεν", "εναρ", "αβρ", "αδ", "αθ", "αν", "απλ", "βαρον", "ντρ", "σκ", "κοπ", "μπορ", "νιφ", "παγ", "παρακαλ", "σερπ", "σκελ", "συρφ", "τοκ", "υ", "δ", "εμ", "θαρρ", "θ"), @@ -425,11 +425,11 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc12a = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc12a = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("π", "απ", "συμπ", "ασυμπ", "ακαταπ", "αμεταμφ"), false); - private static final CharArraySet exc12b = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc12b = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αλ", "αρ", "εκτελ", "ζ", "μ", "ξ", "παρακαλ", "αρ", "προ", "νισ"), false); @@ -449,7 +449,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc13 = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc13 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("διαθ", "θ", "παρακαταθ", "προσθ", "συνθ"), false); @@ -483,7 +483,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc14 = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc14 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("φαρμακ", "χαδ", "αγκ", "αναρρ", "βρομ", "εκλιπ", "λαμπιδ", "λεχ", "μ", "πατ", "ρ", "λ", "μεδ", "μεσαζ", "υποτειν", "αμ", "αιθ", "ανηκ", "δεσποζ", "ενδιαφερ", "δε", "δευτερευ", "καθαρευ", "πλε", @@ -521,7 +521,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc15a = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc15a = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αβαστ", "πολυφ", "αδηφ", "παμφ", "ρ", "ασπ", "αφ", "αμαλ", "αμαλλι", "ανυστ", "απερ", "ασπαρ", "αχαρ", "δερβεν", "δροσοπ", "ξεφ", "νεοπ", "νομοτ", "ολοπ", "ομοτ", "προστ", "προσωποπ", "συμπ", @@ -530,7 +530,7 @@ public class GreekStemmer { "ουλαμ", "ουρ", "π", "τρ", "μ"), false); - private static final CharArraySet exc15b = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc15b = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("ψοφ", "ναυλοχ"), false); @@ -567,7 +567,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc16 = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc16 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("ν", "χερσον", "δωδεκαν", "ερημον", "μεγαλον", "επταν"), false); @@ -587,7 +587,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc17 = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc17 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("ασβ", "σβ", "αχρ", "χρ", "απλ", "αειμν", "δυσχρ", "ευχρ", "κοινοχρ", "παλιμψ"), false); @@ -601,7 +601,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc18 = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc18 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("ν", "ρ", "σπι", "στραβομουτσ", "κακομουτσ", "εξων"), false); @@ -625,7 +625,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc19 = new CharArraySet(Version.LUCENE_50, + private static final CharArraySet exc19 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("παρασουσ", "φ", "χ", "ωριοπλ", "αζ", "αλλοσουσ", "ασουσ"), false); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java index a0d5bc653ed..8f87d91dee9 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java @@ -280,10 +280,7 @@ public class KStemmer { DictEntry defaultEntry; DictEntry entry; - CharArrayMap d = new CharArrayMap( - Version.LUCENE_50, 1000, false); - - d = new CharArrayMap(Version.LUCENE_50, 1000, false); + CharArrayMap d = new CharArrayMap(Version.LUCENE_CURRENT, 1000, false); for (int i = 0; i < exceptionWords.length; i++) { if (!d.containsKey(exceptionWords[i])) { entry = new DictEntry(exceptionWords[i], true); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java index b0ded28e1d1..ae2948284d6 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java @@ -34,7 +34,7 @@ public class HunspellStemmer { private final int recursionCap; private final HunspellDictionary dictionary; private final StringBuilder segment = new StringBuilder(); - private CharacterUtils charUtils = CharacterUtils.getInstance(Version.LUCENE_40); + private CharacterUtils charUtils = CharacterUtils.getInstance(Version.LUCENE_CURRENT); /** * Constructs a new HunspellStemmer which will use the provided HunspellDictionary to create its stems. Uses the @@ -324,7 +324,8 @@ public class HunspellStemmer { InputStream affixInputStream = new FileInputStream(args[offset++]); InputStream dicInputStream = new FileInputStream(args[offset++]); - HunspellDictionary dictionary = new HunspellDictionary(affixInputStream, dicInputStream, Version.LUCENE_40, ignoreCase); + // :Post-Release-Update-Version.LUCENE_XY: + HunspellDictionary dictionary = new HunspellDictionary(affixInputStream, dicInputStream, Version.LUCENE_50, ignoreCase); affixInputStream.close(); dicInputStream.close(); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java index ac779812c67..e3c7a033bdb 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java @@ -35,7 +35,7 @@ public final class RemoveDuplicatesTokenFilter extends TokenFilter { private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class); // use a fixed version, as we don't care about case sensitivity. - private final CharArraySet previous = new CharArraySet(Version.LUCENE_50, 8, false); + private final CharArraySet previous = new CharArraySet(Version.LUCENE_CURRENT, 8, false); /** * Creates a new RemoveDuplicatesTokenFilter diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java index 24cfed798b4..0915d536fb0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java @@ -134,7 +134,7 @@ public abstract class RSLPStemmerBase { if (!exceptions[i].endsWith(suffix)) throw new RuntimeException("useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'"); } - this.exceptions = new CharArraySet(Version.LUCENE_50, + this.exceptions = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(exceptions), false); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro index b907d1438cf..5d78558a20d 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro @@ -1,11 +1,12 @@ /* - * Copyright 2001-2005 The Apache Software Foundation. + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -13,10 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - // Generated from IANA Root Zone Database -// file version from Saturday, July 14, 2012 4:34:14 AM UTC -// generated on Sunday, July 15, 2012 12:59:44 AM UTC +// file version from Friday, December 6, 2013 4:34:10 AM UTC +// generated on Friday, December 6, 2013 3:21:59 PM UTC // by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros ASCIITLD = "." ( @@ -49,6 +49,7 @@ ASCIITLD = "." ( | [bB][gG] | [bB][hH] | [bB][iI] + | [bB][iI][kK][eE] | [bB][iI][zZ] | [bB][jJ] | [bB][mM] @@ -62,6 +63,7 @@ ASCIITLD = "." ( | [bB][yY] | [bB][zZ] | [cC][aA] + | [cC][aA][mM][eE][rR][aA] | [cC][aA][tT] | [cC][cC] | [cC][dD] @@ -71,10 +73,13 @@ ASCIITLD = "." ( | [cC][iI] | [cC][kK] | [cC][lL] + | [cC][lL][oO][tT][hH][iI][nN][gG] | [cC][mM] | [cC][nN] | [cC][oO] | [cC][oO][mM] + | [cC][oO][nN][sS][tT][rR][uU][cC][tT][iI][oO][nN] + | [cC][oO][nN][tT][rR][aA][cC][tT][oO][rR][sS] | [cC][oO][oO][pP] | [cC][rR] | [cC][uU] @@ -84,6 +89,8 @@ ASCIITLD = "." ( | [cC][yY] | [cC][zZ] | [dD][eE] + | [dD][iI][aA][mM][oO][nN][dD][sS] + | [dD][iI][rR][eE][cC][tT][oO][rR][yY] | [dD][jJ] | [dD][kK] | [dD][mM] @@ -93,8 +100,11 @@ ASCIITLD = "." ( | [eE][dD][uU] | [eE][eE] | [eE][gG] + | [eE][nN][tT][eE][rR][pP][rR][iI][sS][eE][sS] + | [eE][qQ][uU][iI][pP][mM][eE][nN][tT] | [eE][rR] | [eE][sS] + | [eE][sS][tT][aA][tT][eE] | [eE][tT] | [eE][uU] | [fF][iI] @@ -104,6 +114,7 @@ ASCIITLD = "." ( | [fF][oO] | [fF][rR] | [gG][aA] + | [gG][aA][lL][lL][eE][rR][yY] | [gG][bB] | [gG][dD] | [gG][eE] @@ -118,14 +129,17 @@ ASCIITLD = "." ( | [gG][pP] | [gG][qQ] | [gG][rR] + | [gG][rR][aA][pP][hH][iI][cC][sS] | [gG][sS] | [gG][tT] | [gG][uU] + | [gG][uU][rR][uU] | [gG][wW] | [gG][yY] | [hH][kK] | [hH][mM] | [hH][nN] + | [hH][oO][lL][dD][iI][nN][gG][sS] | [hH][rR] | [hH][tT] | [hH][uU] @@ -150,6 +164,7 @@ ASCIITLD = "." ( | [kK][gG] | [kK][hH] | [kK][iI] + | [kK][iI][tT][cC][hH][eE][nN] | [kK][mM] | [kK][nN] | [kK][pP] @@ -158,9 +173,11 @@ ASCIITLD = "." ( | [kK][yY] | [kK][zZ] | [lL][aA] + | [lL][aA][nN][dD] | [lL][bB] | [lL][cC] | [lL][iI] + | [lL][iI][gG][hH][tT][iI][nN][gG] | [lL][kK] | [lL][rR] | [lL][sS] @@ -172,6 +189,7 @@ ASCIITLD = "." ( | [mM][cC] | [mM][dD] | [mM][eE] + | [mM][eE][nN][uU] | [mM][gG] | [mM][hH] | [mM][iI][lL] @@ -214,10 +232,13 @@ ASCIITLD = "." ( | [pP][fF] | [pP][gG] | [pP][hH] + | [pP][hH][oO][tT][oO][gG][rR][aA][pP][hH][yY] | [pP][kK] | [pP][lL] + | [pP][lL][uU][mM][bB][iI][nN][gG] | [pP][mM] | [pP][nN] + | [pP][oO][sS][tT] | [pP][rR] | [pP][rR][oO] | [pP][sS] @@ -235,9 +256,11 @@ ASCIITLD = "." ( | [sS][cC] | [sS][dD] | [sS][eE] + | [sS][eE][xX][yY] | [sS][gG] | [sS][hH] | [sS][iI] + | [sS][iI][nN][gG][lL][eE][sS] | [sS][jJ] | [sS][kK] | [sS][lL] @@ -251,18 +274,22 @@ ASCIITLD = "." ( | [sS][xX] | [sS][yY] | [sS][zZ] + | [tT][aA][tT][tT][oO][oO] | [tT][cC] | [tT][dD] + | [tT][eE][cC][hH][nN][oO][lL][oO][gG][yY] | [tT][eE][lL] | [tT][fF] | [tT][gG] | [tT][hH] + | [tT][iI][pP][sS] | [tT][jJ] | [tT][kK] | [tT][lL] | [tT][mM] | [tT][nN] | [tT][oO] + | [tT][oO][dD][aA][yY] | [tT][pP] | [tT][rR] | [tT][rR][aA][vV][eE][lL] @@ -273,61 +300,62 @@ ASCIITLD = "." ( | [uU][aA] | [uU][gG] | [uU][kK] + | [uU][nN][oO] | [uU][sS] | [uU][yY] | [uU][zZ] | [vV][aA] | [vV][cC] | [vV][eE] + | [vV][eE][nN][tT][uU][rR][eE][sS] | [vV][gG] | [vV][iI] | [vV][nN] + | [vV][oO][yY][aA][gG][eE] | [vV][uU] | [wW][fF] | [wW][sS] - | [xX][nN]--0[zZ][wW][mM]56[dD] - | [xX][nN]--11[bB]5[bB][sS]3[aA]9[aA][jJ]6[gG] | [xX][nN]--3[eE]0[bB]707[eE] | [xX][nN]--45[bB][rR][jJ]9[cC] - | [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF] | [xX][nN]--80[aA][oO]21[aA] + | [xX][nN]--80[aA][sS][eE][hH][dD][bB] + | [xX][nN]--80[aA][sS][wW][gG] | [xX][nN]--90[aA]3[aA][cC] - | [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA] | [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD] - | [xX][nN]--[dD][eE][bB][aA]0[aA][dD] | [xX][nN]--[fF][iI][qQ][sS]8[sS] | [xX][nN]--[fF][iI][qQ][zZ]9[sS] | [xX][nN]--[fF][pP][cC][rR][jJ]9[cC]3[dD] | [xX][nN]--[fF][zZ][cC]2[cC]9[eE]2[cC] - | [xX][nN]--[gG]6[wW]251[dD] | [xX][nN]--[gG][eE][cC][rR][jJ]9[cC] | [xX][nN]--[hH]2[bB][rR][jJ]9[cC] - | [xX][nN]--[hH][gG][bB][kK]6[aA][jJ]7[fF]53[bB][bB][aA] - | [xX][nN]--[hH][lL][cC][jJ]6[aA][yY][aA]9[eE][sS][cC]7[aA] + | [xX][nN]--[jJ]1[aA][mM][hH] | [xX][nN]--[jJ]6[wW]193[gG] - | [xX][nN]--[jJ][xX][aA][lL][pP][dD][lL][pP] - | [xX][nN]--[kK][gG][bB][eE][cC][hH][tT][vV] | [xX][nN]--[kK][pP][rR][wW]13[dD] | [xX][nN]--[kK][pP][rR][yY]57[dD] + | [xX][nN]--[lL]1[aA][cC][cC] | [xX][nN]--[lL][gG][bB][bB][aA][tT]1[aA][dD]8[jJ] | [xX][nN]--[mM][gG][bB]9[aA][wW][bB][fF] + | [xX][nN]--[mM][gG][bB][aA]3[aA]4[fF]16[aA] | [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH] | [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA] | [xX][nN]--[mM][gG][bB][bB][hH]1[aA]71[eE] | [xX][nN]--[mM][gG][bB][cC]0[aA]9[aA][zZ][cC][gG] | [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR] + | [xX][nN]--[mM][gG][bB][xX]4[cC][dD]0[aA][bB] + | [xX][nN]--[nN][gG][bB][cC]5[aA][zZ][dD] | [xX][nN]--[oO]3[cC][wW]4[hH] | [xX][nN]--[oO][gG][bB][pP][fF]8[fF][lL] | [xX][nN]--[pP]1[aA][iI] | [xX][nN]--[pP][gG][bB][sS]0[dD][hH] + | [xX][nN]--[qQ]9[jJ][yY][bB]4[cC] | [xX][nN]--[sS]9[bB][rR][jJ]9[cC] + | [xX][nN]--[uU][nN][uU][pP]4[yY] | [xX][nN]--[wW][gG][bB][hH]1[cC] | [xX][nN]--[wW][gG][bB][lL]6[aA] | [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA] | [xX][nN]--[xX][kK][cC]2[dD][lL]3[aA]5[eE][eE]0[hH] | [xX][nN]--[yY][fF][rR][oO]4[iI]67[oO] | [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX] - | [xX][nN]--[zZ][cC][kK][zZ][aA][hH] | [xX][xX][xX] | [yY][eE] | [yY][tT] diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java index 3ce589754cf..27062911945 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex. */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT */ package org.apache.lucene.analysis.standard; @@ -58,64 +58,63 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface { * Translates characters to character classes */ private static final String ZZ_CMAP_PACKED = - "\11\0\1\0\1\15\1\0\1\0\1\14\22\0\1\0\5\0\1\5"+ - "\1\3\4\0\1\11\1\7\1\4\1\11\12\2\6\0\1\6\32\12"+ - "\4\0\1\10\1\0\32\12\57\0\1\12\12\0\1\12\4\0\1\12"+ - "\5\0\27\12\1\0\37\12\1\0\u0128\12\2\0\22\12\34\0\136\12"+ - "\2\0\11\12\2\0\7\12\16\0\2\12\16\0\5\12\11\0\1\12"+ - "\213\0\1\12\13\0\1\12\1\0\3\12\1\0\1\12\1\0\24\12"+ - "\1\0\54\12\1\0\10\12\2\0\32\12\14\0\202\12\12\0\71\12"+ - "\2\0\2\12\2\0\2\12\3\0\46\12\2\0\2\12\67\0\46\12"+ - "\2\0\1\12\7\0\47\12\110\0\33\12\5\0\3\12\56\0\32\12"+ - "\5\0\13\12\25\0\12\2\7\0\143\12\1\0\1\12\17\0\2\12"+ - "\11\0\12\2\3\12\23\0\1\12\1\0\33\12\123\0\46\12\u015f\0"+ - "\65\12\3\0\1\12\22\0\1\12\7\0\12\12\4\0\12\2\25\0"+ - "\10\12\2\0\2\12\2\0\26\12\1\0\7\12\1\0\1\12\3\0"+ - "\4\12\42\0\2\12\1\0\3\12\4\0\12\2\2\12\23\0\6\12"+ - "\4\0\2\12\2\0\26\12\1\0\7\12\1\0\2\12\1\0\2\12"+ - "\1\0\2\12\37\0\4\12\1\0\1\12\7\0\12\2\2\0\3\12"+ - "\20\0\7\12\1\0\1\12\1\0\3\12\1\0\26\12\1\0\7\12"+ - "\1\0\2\12\1\0\5\12\3\0\1\12\22\0\1\12\17\0\1\12"+ - "\5\0\12\2\25\0\10\12\2\0\2\12\2\0\26\12\1\0\7\12"+ - "\1\0\2\12\2\0\4\12\3\0\1\12\36\0\2\12\1\0\3\12"+ - "\4\0\12\2\25\0\6\12\3\0\3\12\1\0\4\12\3\0\2\12"+ - "\1\0\1\12\1\0\2\12\3\0\2\12\3\0\3\12\3\0\10\12"+ - "\1\0\3\12\55\0\11\2\25\0\10\12\1\0\3\12\1\0\27\12"+ - "\1\0\12\12\1\0\5\12\46\0\2\12\4\0\12\2\25\0\10\12"+ - "\1\0\3\12\1\0\27\12\1\0\12\12\1\0\5\12\44\0\1\12"+ - "\1\0\2\12\4\0\12\2\25\0\10\12\1\0\3\12\1\0\27\12"+ - "\1\0\20\12\46\0\2\12\4\0\12\2\25\0\22\12\3\0\30\12"+ - "\1\0\11\12\1\0\1\12\2\0\7\12\71\0\1\1\60\12\1\1"+ - "\2\12\14\1\7\12\11\1\12\2\47\0\2\12\1\0\1\12\2\0"+ - "\2\12\1\0\1\12\2\0\1\12\6\0\4\12\1\0\7\12\1\0"+ - "\3\12\1\0\1\12\1\0\1\12\2\0\2\12\1\0\4\12\1\0"+ - "\2\12\11\0\1\12\2\0\5\12\1\0\1\12\11\0\12\2\2\0"+ - "\2\12\42\0\1\12\37\0\12\2\26\0\10\12\1\0\42\12\35\0"+ - "\4\12\164\0\42\12\1\0\5\12\1\0\2\12\25\0\12\2\6\0"+ - "\6\12\112\0\46\12\12\0\47\12\11\0\132\12\5\0\104\12\5\0"+ - "\122\12\6\0\7\12\1\0\77\12\1\0\1\12\1\0\4\12\2\0"+ - "\7\12\1\0\1\12\1\0\4\12\2\0\47\12\1\0\1\12\1\0"+ - "\4\12\2\0\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0"+ - "\1\12\1\0\4\12\2\0\7\12\1\0\7\12\1\0\27\12\1\0"+ - "\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0\47\12\1\0"+ - "\23\12\16\0\11\2\56\0\125\12\14\0\u026c\12\2\0\10\12\12\0"+ - "\32\12\5\0\113\12\225\0\64\12\54\0\12\2\46\0\12\2\6\0"+ - "\130\12\10\0\51\12\u0557\0\234\12\4\0\132\12\6\0\26\12\2\0"+ - "\6\12\2\0\46\12\2\0\6\12\2\0\10\12\1\0\1\12\1\0"+ - "\1\12\1\0\1\12\1\0\37\12\2\0\65\12\1\0\7\12\1\0"+ - "\1\12\3\0\3\12\1\0\7\12\3\0\4\12\2\0\6\12\4\0"+ - "\15\12\5\0\3\12\1\0\7\12\202\0\1\12\202\0\1\12\4\0"+ - "\1\12\2\0\12\12\1\0\1\12\3\0\5\12\6\0\1\12\1\0"+ - "\1\12\1\0\1\12\1\0\4\12\1\0\3\12\1\0\7\12\u0ecb\0"+ - "\2\12\52\0\5\12\12\0\1\13\124\13\10\13\2\13\2\13\132\13"+ - "\1\13\3\13\6\13\50\13\3\13\1\0\136\12\21\0\30\12\70\0"+ - "\20\13\u0100\0\200\13\200\0\u19b6\13\12\13\100\0\u51a6\13\132\13\u048d\12"+ - "\u0773\0\u2ba4\12\u215c\0\u012e\13\322\13\7\12\14\0\5\12\5\0\1\12"+ - "\1\0\12\12\1\0\15\12\1\0\5\12\1\0\1\12\1\0\2\12"+ - "\1\0\2\12\1\0\154\12\41\0\u016b\12\22\0\100\12\2\0\66\12"+ - "\50\0\14\12\164\0\3\12\1\0\1\12\1\0\207\12\23\0\12\2"+ - "\7\0\32\12\6\0\32\12\12\0\1\13\72\13\37\12\3\0\6\12"+ - "\2\0\6\12\2\0\6\12\2\0\3\12\43\0"; + "\46\0\1\5\1\3\4\0\1\11\1\7\1\4\1\11\12\2\6\0"+ + "\1\6\32\12\4\0\1\10\1\0\32\12\57\0\1\12\12\0\1\12"+ + "\4\0\1\12\5\0\27\12\1\0\37\12\1\0\u0128\12\2\0\22\12"+ + "\34\0\136\12\2\0\11\12\2\0\7\12\16\0\2\12\16\0\5\12"+ + "\11\0\1\12\213\0\1\12\13\0\1\12\1\0\3\12\1\0\1\12"+ + "\1\0\24\12\1\0\54\12\1\0\10\12\2\0\32\12\14\0\202\12"+ + "\12\0\71\12\2\0\2\12\2\0\2\12\3\0\46\12\2\0\2\12"+ + "\67\0\46\12\2\0\1\12\7\0\47\12\110\0\33\12\5\0\3\12"+ + "\56\0\32\12\5\0\13\12\25\0\12\2\7\0\143\12\1\0\1\12"+ + "\17\0\2\12\11\0\12\2\3\12\23\0\1\12\1\0\33\12\123\0"+ + "\46\12\u015f\0\65\12\3\0\1\12\22\0\1\12\7\0\12\12\4\0"+ + "\12\2\25\0\10\12\2\0\2\12\2\0\26\12\1\0\7\12\1\0"+ + "\1\12\3\0\4\12\42\0\2\12\1\0\3\12\4\0\12\2\2\12"+ + "\23\0\6\12\4\0\2\12\2\0\26\12\1\0\7\12\1\0\2\12"+ + "\1\0\2\12\1\0\2\12\37\0\4\12\1\0\1\12\7\0\12\2"+ + "\2\0\3\12\20\0\7\12\1\0\1\12\1\0\3\12\1\0\26\12"+ + "\1\0\7\12\1\0\2\12\1\0\5\12\3\0\1\12\22\0\1\12"+ + "\17\0\1\12\5\0\12\2\25\0\10\12\2\0\2\12\2\0\26\12"+ + "\1\0\7\12\1\0\2\12\2\0\4\12\3\0\1\12\36\0\2\12"+ + "\1\0\3\12\4\0\12\2\25\0\6\12\3\0\3\12\1\0\4\12"+ + "\3\0\2\12\1\0\1\12\1\0\2\12\3\0\2\12\3\0\3\12"+ + "\3\0\10\12\1\0\3\12\55\0\11\2\25\0\10\12\1\0\3\12"+ + "\1\0\27\12\1\0\12\12\1\0\5\12\46\0\2\12\4\0\12\2"+ + "\25\0\10\12\1\0\3\12\1\0\27\12\1\0\12\12\1\0\5\12"+ + "\44\0\1\12\1\0\2\12\4\0\12\2\25\0\10\12\1\0\3\12"+ + "\1\0\27\12\1\0\20\12\46\0\2\12\4\0\12\2\25\0\22\12"+ + "\3\0\30\12\1\0\11\12\1\0\1\12\2\0\7\12\71\0\1\1"+ + "\60\12\1\1\2\12\14\1\7\12\11\1\12\2\47\0\2\12\1\0"+ + "\1\12\2\0\2\12\1\0\1\12\2\0\1\12\6\0\4\12\1\0"+ + "\7\12\1\0\3\12\1\0\1\12\1\0\1\12\2\0\2\12\1\0"+ + "\4\12\1\0\2\12\11\0\1\12\2\0\5\12\1\0\1\12\11\0"+ + "\12\2\2\0\2\12\42\0\1\12\37\0\12\2\26\0\10\12\1\0"+ + "\42\12\35\0\4\12\164\0\42\12\1\0\5\12\1\0\2\12\25\0"+ + "\12\2\6\0\6\12\112\0\46\12\12\0\47\12\11\0\132\12\5\0"+ + "\104\12\5\0\122\12\6\0\7\12\1\0\77\12\1\0\1\12\1\0"+ + "\4\12\2\0\7\12\1\0\1\12\1\0\4\12\2\0\47\12\1\0"+ + "\1\12\1\0\4\12\2\0\37\12\1\0\1\12\1\0\4\12\2\0"+ + "\7\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0\7\12\1\0"+ + "\27\12\1\0\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0"+ + "\47\12\1\0\23\12\16\0\11\2\56\0\125\12\14\0\u026c\12\2\0"+ + "\10\12\12\0\32\12\5\0\113\12\225\0\64\12\54\0\12\2\46\0"+ + "\12\2\6\0\130\12\10\0\51\12\u0557\0\234\12\4\0\132\12\6\0"+ + "\26\12\2\0\6\12\2\0\46\12\2\0\6\12\2\0\10\12\1\0"+ + "\1\12\1\0\1\12\1\0\1\12\1\0\37\12\2\0\65\12\1\0"+ + "\7\12\1\0\1\12\3\0\3\12\1\0\7\12\3\0\4\12\2\0"+ + "\6\12\4\0\15\12\5\0\3\12\1\0\7\12\202\0\1\12\202\0"+ + "\1\12\4\0\1\12\2\0\12\12\1\0\1\12\3\0\5\12\6\0"+ + "\1\12\1\0\1\12\1\0\1\12\1\0\4\12\1\0\3\12\1\0"+ + "\7\12\u0ecb\0\2\12\52\0\5\12\12\0\1\13\124\13\10\13\2\13"+ + "\2\13\132\13\1\13\3\13\6\13\50\13\3\13\1\0\136\12\21\0"+ + "\30\12\70\0\20\13\u0100\0\200\13\200\0\u19b6\13\12\13\100\0\u51a6\13"+ + "\132\13\u048d\12\u0773\0\u2ba4\12\u215c\0\u012e\13\322\13\7\12\14\0\5\12"+ + "\5\0\1\12\1\0\12\12\1\0\15\12\1\0\5\12\1\0\1\12"+ + "\1\0\2\12\1\0\2\12\1\0\154\12\41\0\u016b\12\22\0\100\12"+ + "\2\0\66\12\50\0\14\12\164\0\3\12\1\0\1\12\1\0\207\12"+ + "\23\0\12\2\7\0\32\12\6\0\32\12\12\0\1\13\72\13\37\12"+ + "\3\0\6\12\2\0\6\12\2\0\6\12\2\0\3\12\43\0"; /** * Translates characters to character classes @@ -128,13 +127,12 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface { private static final int [] ZZ_ACTION = zzUnpackAction(); private static final String ZZ_ACTION_PACKED_0 = - "\1\0\1\1\3\2\1\3\1\1\13\0\1\2\3\4"+ - "\2\0\1\5\1\0\1\5\3\4\6\5\1\6\1\4"+ - "\2\7\1\10\1\0\1\10\3\0\2\10\1\11\1\12"+ - "\1\4"; + "\1\0\1\1\3\2\1\3\13\0\1\2\3\4\2\0"+ + "\1\5\1\0\1\5\3\4\6\5\1\6\1\4\2\7"+ + "\1\10\1\0\1\10\3\0\2\10\1\11\1\12\1\4"; private static int [] zzUnpackAction() { - int [] result = new int[51]; + int [] result = new int[50]; int offset = 0; offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); return result; @@ -159,16 +157,16 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface { private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); private static final String ZZ_ROWMAP_PACKED_0 = - "\0\0\0\16\0\34\0\52\0\70\0\16\0\106\0\124"+ - "\0\142\0\160\0\176\0\214\0\232\0\250\0\266\0\304"+ - "\0\322\0\340\0\356\0\374\0\u010a\0\u0118\0\u0126\0\u0134"+ - "\0\u0142\0\u0150\0\u015e\0\u016c\0\u017a\0\u0188\0\u0196\0\u01a4"+ - "\0\u01b2\0\u01c0\0\u01ce\0\u01dc\0\u01ea\0\u01f8\0\322\0\u0206"+ - "\0\u0214\0\u0222\0\u0230\0\u023e\0\u024c\0\u025a\0\124\0\214"+ - "\0\u0268\0\u0276\0\u0284"; + "\0\0\0\14\0\30\0\44\0\60\0\14\0\74\0\110"+ + "\0\124\0\140\0\154\0\170\0\204\0\220\0\234\0\250"+ + "\0\264\0\300\0\314\0\330\0\344\0\360\0\374\0\u0108"+ + "\0\u0114\0\u0120\0\u012c\0\u0138\0\u0144\0\u0150\0\u015c\0\u0168"+ + "\0\u0174\0\u0180\0\u018c\0\u0198\0\u01a4\0\250\0\u01b0\0\u01bc"+ + "\0\u01c8\0\u01d4\0\u01e0\0\u01ec\0\u01f8\0\74\0\154\0\u0204"+ + "\0\u0210\0\u021c"; private static int [] zzUnpackRowMap() { - int [] result = new int[51]; + int [] result = new int[50]; int offset = 0; offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); return result; @@ -191,49 +189,49 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface { private static final int [] ZZ_TRANS = zzUnpackTrans(); private static final String ZZ_TRANS_PACKED_0 = - "\1\2\1\3\1\4\7\2\1\5\1\6\1\7\1\2"+ - "\17\0\2\3\1\0\1\10\1\0\1\11\2\12\1\13"+ - "\1\3\4\0\1\3\1\4\1\0\1\14\1\0\1\11"+ - "\2\15\1\16\1\4\4\0\1\3\1\4\1\17\1\20"+ - "\1\21\1\22\2\12\1\13\1\23\20\0\1\2\1\0"+ - "\1\24\1\25\7\0\1\26\4\0\2\27\7\0\1\27"+ - "\4\0\1\30\1\31\7\0\1\32\5\0\1\33\7\0"+ - "\1\13\4\0\1\34\1\35\7\0\1\36\4\0\1\37"+ - "\1\40\7\0\1\41\4\0\1\42\1\43\7\0\1\44"+ - "\15\0\1\45\4\0\1\24\1\25\7\0\1\46\15\0"+ - "\1\47\4\0\2\27\7\0\1\50\4\0\1\3\1\4"+ - "\1\17\1\10\1\21\1\22\2\12\1\13\1\23\4\0"+ - "\2\24\1\0\1\51\1\0\1\11\2\52\1\0\1\24"+ - "\4\0\1\24\1\25\1\0\1\53\1\0\1\11\2\54"+ - "\1\55\1\25\4\0\1\24\1\25\1\0\1\51\1\0"+ - "\1\11\2\52\1\0\1\26\4\0\2\27\1\0\1\56"+ - "\2\0\1\56\2\0\1\27\4\0\2\30\1\0\1\52"+ - "\1\0\1\11\2\52\1\0\1\30\4\0\1\30\1\31"+ - "\1\0\1\54\1\0\1\11\2\54\1\55\1\31\4\0"+ - "\1\30\1\31\1\0\1\52\1\0\1\11\2\52\1\0"+ - "\1\32\5\0\1\33\1\0\1\55\2\0\3\55\1\33"+ - "\4\0\2\34\1\0\1\57\1\0\1\11\2\12\1\13"+ - "\1\34\4\0\1\34\1\35\1\0\1\60\1\0\1\11"+ - "\2\15\1\16\1\35\4\0\1\34\1\35\1\0\1\57"+ - "\1\0\1\11\2\12\1\13\1\36\4\0\2\37\1\0"+ - "\1\12\1\0\1\11\2\12\1\13\1\37\4\0\1\37"+ - "\1\40\1\0\1\15\1\0\1\11\2\15\1\16\1\40"+ - "\4\0\1\37\1\40\1\0\1\12\1\0\1\11\2\12"+ - "\1\13\1\41\4\0\2\42\1\0\1\13\2\0\3\13"+ - "\1\42\4\0\1\42\1\43\1\0\1\16\2\0\3\16"+ - "\1\43\4\0\1\42\1\43\1\0\1\13\2\0\3\13"+ - "\1\44\6\0\1\17\6\0\1\45\4\0\1\24\1\25"+ - "\1\0\1\61\1\0\1\11\2\52\1\0\1\26\4\0"+ - "\2\27\1\0\1\56\2\0\1\56\2\0\1\50\4\0"+ - "\2\24\7\0\1\24\4\0\2\30\7\0\1\30\4\0"+ - "\2\34\7\0\1\34\4\0\2\37\7\0\1\37\4\0"+ - "\2\42\7\0\1\42\4\0\2\62\7\0\1\62\4\0"+ - "\2\24\7\0\1\63\4\0\2\62\1\0\1\56\2\0"+ - "\1\56\2\0\1\62\4\0\2\24\1\0\1\61\1\0"+ - "\1\11\2\52\1\0\1\24\3\0"; + "\1\2\1\3\1\4\7\2\1\5\1\6\15\0\2\3"+ + "\1\0\1\7\1\0\1\10\2\11\1\12\1\3\2\0"+ + "\1\3\1\4\1\0\1\13\1\0\1\10\2\14\1\15"+ + "\1\4\2\0\1\3\1\4\1\16\1\17\1\20\1\21"+ + "\2\11\1\12\1\22\2\0\1\23\1\24\7\0\1\25"+ + "\2\0\2\26\7\0\1\26\2\0\1\27\1\30\7\0"+ + "\1\31\3\0\1\32\7\0\1\12\2\0\1\33\1\34"+ + "\7\0\1\35\2\0\1\36\1\37\7\0\1\40\2\0"+ + "\1\41\1\42\7\0\1\43\13\0\1\44\2\0\1\23"+ + "\1\24\7\0\1\45\13\0\1\46\2\0\2\26\7\0"+ + "\1\47\2\0\1\3\1\4\1\16\1\7\1\20\1\21"+ + "\2\11\1\12\1\22\2\0\2\23\1\0\1\50\1\0"+ + "\1\10\2\51\1\0\1\23\2\0\1\23\1\24\1\0"+ + "\1\52\1\0\1\10\2\53\1\54\1\24\2\0\1\23"+ + "\1\24\1\0\1\50\1\0\1\10\2\51\1\0\1\25"+ + "\2\0\2\26\1\0\1\55\2\0\1\55\2\0\1\26"+ + "\2\0\2\27\1\0\1\51\1\0\1\10\2\51\1\0"+ + "\1\27\2\0\1\27\1\30\1\0\1\53\1\0\1\10"+ + "\2\53\1\54\1\30\2\0\1\27\1\30\1\0\1\51"+ + "\1\0\1\10\2\51\1\0\1\31\3\0\1\32\1\0"+ + "\1\54\2\0\3\54\1\32\2\0\2\33\1\0\1\56"+ + "\1\0\1\10\2\11\1\12\1\33\2\0\1\33\1\34"+ + "\1\0\1\57\1\0\1\10\2\14\1\15\1\34\2\0"+ + "\1\33\1\34\1\0\1\56\1\0\1\10\2\11\1\12"+ + "\1\35\2\0\2\36\1\0\1\11\1\0\1\10\2\11"+ + "\1\12\1\36\2\0\1\36\1\37\1\0\1\14\1\0"+ + "\1\10\2\14\1\15\1\37\2\0\1\36\1\37\1\0"+ + "\1\11\1\0\1\10\2\11\1\12\1\40\2\0\2\41"+ + "\1\0\1\12\2\0\3\12\1\41\2\0\1\41\1\42"+ + "\1\0\1\15\2\0\3\15\1\42\2\0\1\41\1\42"+ + "\1\0\1\12\2\0\3\12\1\43\4\0\1\16\6\0"+ + "\1\44\2\0\1\23\1\24\1\0\1\60\1\0\1\10"+ + "\2\51\1\0\1\25\2\0\2\26\1\0\1\55\2\0"+ + "\1\55\2\0\1\47\2\0\2\23\7\0\1\23\2\0"+ + "\2\27\7\0\1\27\2\0\2\33\7\0\1\33\2\0"+ + "\2\36\7\0\1\36\2\0\2\41\7\0\1\41\2\0"+ + "\2\61\7\0\1\61\2\0\2\23\7\0\1\62\2\0"+ + "\2\61\1\0\1\55\2\0\1\55\2\0\1\61\2\0"+ + "\2\23\1\0\1\60\1\0\1\10\2\51\1\0\1\23"+ + "\1\0"; private static int [] zzUnpackTrans() { - int [] result = new int[658]; + int [] result = new int[552]; int offset = 0; offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); return result; @@ -271,11 +269,11 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface { private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); private static final String ZZ_ATTRIBUTE_PACKED_0 = - "\1\0\1\11\3\1\1\11\1\1\13\0\4\1\2\0"+ - "\1\1\1\0\17\1\1\0\1\1\3\0\5\1"; + "\1\0\1\11\3\1\1\11\13\0\4\1\2\0\1\1"+ + "\1\0\17\1\1\0\1\1\3\0\5\1"; private static int [] zzUnpackAttribute() { - int [] result = new int[51]; + int [] result = new int[50]; int offset = 0; offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); return result; @@ -372,7 +370,6 @@ public final void getText(CharTermAttribute t) { /** * Creates a new scanner - * There is also a java.io.InputStream version of this constructor. * * @param in the java.io.Reader to read input from. */ @@ -380,7 +377,6 @@ public final void getText(CharTermAttribute t) { this.zzReader = in; } - /** * Unpacks the compressed character translation table. @@ -392,7 +388,7 @@ public final void getText(CharTermAttribute t) { char [] map = new char[0x10000]; int i = 0; /* index in packed string */ int j = 0; /* index in unpacked array */ - while (i < 1154) { + while (i < 1138) { int count = packed.charAt(i++); char value = packed.charAt(i++); do map[j++] = value; while (--count > 0); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex index 4d408b91073..3e4d48e6045 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex @@ -116,8 +116,6 @@ LETTER = !(![:letter:]|{CJ}) // Chinese and Japanese (but NOT Korean, which is included in [:letter:]) CJ = [\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f] -WHITESPACE = \r\n | [ \r\n\t\f] - %% {ALPHANUM} { return ALPHANUM; } @@ -131,4 +129,4 @@ WHITESPACE = \r\n | [ \r\n\t\f] {ACRONYM_DEP} { return ACRONYM_DEP; } /** Ignore the rest */ -. | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } +[^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt index a39d97c3dcd..2aaa082d6e6 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt @@ -18,4 +18,4 @@ WARNING: if you change StandardTokenizerImpl*.jflex or UAX29URLEmailTokenizer and need to regenerate the tokenizer, only use the trunk version - of JFlex 1.5 (with a minimum SVN revision 607) at the moment! + of JFlex 1.5 (with a minimum SVN revision 722) at the moment! diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro index efc0fe1dcd3..f5bf68e254b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro @@ -1,11 +1,12 @@ /* - * Copyright 2010 The Apache Software Foundation. + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -13,8 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -// Generated using ICU4J 49.1.0.0 +// Generated using ICU4J 52.1.0.0 // by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros @@ -39,6 +39,12 @@ FormatSupp = ( | ([\ud834][\uDD73-\uDD7A]) | ([\udb40][\uDC01\uDC20-\uDC7F]) ) +NumericSupp = ( + ([\ud805][\uDEC0-\uDEC9]) + | ([\ud804][\uDC66-\uDC6F\uDCF0-\uDCF9\uDD36-\uDD3F\uDDD0-\uDDD9]) + | ([\ud835][\uDFCE-\uDFFF]) + | ([\ud801][\uDCA0-\uDCA9]) +) ExtendSupp = ( ([\ud81b][\uDF51-\uDF7E\uDF8F-\uDF92]) | ([\ud805][\uDEAB-\uDEB7]) @@ -48,12 +54,6 @@ ExtendSupp = ( | ([\udb40][\uDD00-\uDDEF]) | ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F]) ) -NumericSupp = ( - ([\ud805][\uDEC0-\uDEC9]) - | ([\ud804][\uDC66-\uDC6F\uDCF0-\uDCF9\uDD36-\uDD3F\uDDD0-\uDDD9]) - | ([\ud835][\uDFCE-\uDFFF]) - | ([\ud801][\uDCA0-\uDCA9]) -) KatakanaSupp = ( ([\ud82c][\uDC00]) ) @@ -129,3 +129,15 @@ HiraganaSupp = ( ([\ud83c][\uDE00]) | ([\ud82c][\uDC01]) ) +SingleQuoteSupp = ( + [] +) +DoubleQuoteSupp = ( + [] +) +HebrewLetterSupp = ( + [] +) +RegionalIndicatorSupp = ( + ([\ud83c][\uDDE6-\uDDFF]) +) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java index f5ff0319c46..4f33cfb0398 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex. */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT */ package org.apache.lucene.analysis.standard; @@ -34,6 +34,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; * Asian languages, including Thai, Lao, Myanmar, and Khmer *
  • <IDEOGRAPHIC>: A single CJKV ideographic character
  • *
  • <HIRAGANA>: A single hiragana character
  • + *
  • <KATAKANA>: A sequence of katakana characters
  • + *
  • <HANGUL>: A sequence of Hangul characters
  • * */ @@ -62,149 +64,149 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { * Translates characters to character classes */ private static final String ZZ_CMAP_PACKED = - "\47\0\1\202\4\0\1\201\1\0\1\202\1\0\12\176\1\200\1\201"+ - "\5\0\32\174\4\0\1\203\1\0\32\174\57\0\1\174\2\0\1\175"+ - "\7\0\1\174\1\0\1\200\2\0\1\174\5\0\27\174\1\0\37\174"+ - "\1\0\u01ca\174\4\0\14\174\16\0\5\174\7\0\1\174\1\0\1\174"+ - "\21\0\160\175\5\174\1\0\2\174\2\0\4\174\1\201\7\0\1\174"+ - "\1\200\3\174\1\0\1\174\1\0\24\174\1\0\123\174\1\0\213\174"+ - "\1\0\7\175\236\174\11\0\46\174\2\0\1\174\7\0\47\174\1\0"+ - "\1\201\7\0\55\175\1\0\1\175\1\0\2\175\1\0\2\175\1\0"+ - "\1\175\10\0\33\174\5\0\4\174\1\200\13\0\5\175\7\0\2\201"+ - "\2\0\13\175\5\0\53\174\25\175\12\176\1\0\1\176\1\201\1\0"+ - "\2\174\1\175\143\174\1\0\1\174\7\175\1\175\1\0\6\175\2\174"+ - "\2\175\1\0\4\175\2\174\12\176\3\174\2\0\1\174\17\0\1\175"+ - "\1\174\1\175\36\174\33\175\2\0\131\174\13\175\1\174\16\0\12\176"+ - "\41\174\11\175\2\174\2\0\1\201\1\0\1\174\5\0\26\174\4\175"+ - "\1\174\11\175\1\174\3\175\1\174\5\175\22\0\31\174\3\175\104\0"+ - "\1\174\1\0\13\174\67\0\33\175\1\0\4\175\66\174\3\175\1\174"+ - "\22\175\1\174\7\175\12\174\2\175\2\0\12\176\1\0\7\174\1\0"+ - "\7\174\1\0\3\175\1\0\10\174\2\0\2\174\2\0\26\174\1\0"+ - "\7\174\1\0\1\174\3\0\4\174\2\0\1\175\1\174\7\175\2\0"+ - "\2\175\2\0\3\175\1\174\10\0\1\175\4\0\2\174\1\0\3\174"+ - "\2\175\2\0\12\176\2\174\17\0\3\175\1\0\6\174\4\0\2\174"+ - "\2\0\26\174\1\0\7\174\1\0\2\174\1\0\2\174\1\0\2\174"+ - "\2\0\1\175\1\0\5\175\4\0\2\175\2\0\3\175\3\0\1\175"+ - "\7\0\4\174\1\0\1\174\7\0\12\176\2\175\3\174\1\175\13\0"+ - "\3\175\1\0\11\174\1\0\3\174\1\0\26\174\1\0\7\174\1\0"+ - "\2\174\1\0\5\174\2\0\1\175\1\174\10\175\1\0\3\175\1\0"+ - "\3\175\2\0\1\174\17\0\2\174\2\175\2\0\12\176\21\0\3\175"+ - "\1\0\10\174\2\0\2\174\2\0\26\174\1\0\7\174\1\0\2\174"+ - "\1\0\5\174\2\0\1\175\1\174\7\175\2\0\2\175\2\0\3\175"+ - "\10\0\2\175\4\0\2\174\1\0\3\174\2\175\2\0\12\176\1\0"+ - "\1\174\20\0\1\175\1\174\1\0\6\174\3\0\3\174\1\0\4\174"+ - "\3\0\2\174\1\0\1\174\1\0\2\174\3\0\2\174\3\0\3\174"+ - "\3\0\14\174\4\0\5\175\3\0\3\175\1\0\4\175\2\0\1\174"+ - "\6\0\1\175\16\0\12\176\21\0\3\175\1\0\10\174\1\0\3\174"+ - "\1\0\27\174\1\0\12\174\1\0\5\174\3\0\1\174\7\175\1\0"+ - "\3\175\1\0\4\175\7\0\2\175\1\0\2\174\6\0\2\174\2\175"+ - "\2\0\12\176\22\0\2\175\1\0\10\174\1\0\3\174\1\0\27\174"+ - "\1\0\12\174\1\0\5\174\2\0\1\175\1\174\7\175\1\0\3\175"+ - "\1\0\4\175\7\0\2\175\7\0\1\174\1\0\2\174\2\175\2\0"+ - "\12\176\1\0\2\174\17\0\2\175\1\0\10\174\1\0\3\174\1\0"+ - "\51\174\2\0\1\174\7\175\1\0\3\175\1\0\4\175\1\174\10\0"+ - "\1\175\10\0\2\174\2\175\2\0\12\176\12\0\6\174\2\0\2\175"+ - "\1\0\22\174\3\0\30\174\1\0\11\174\1\0\1\174\2\0\7\174"+ - "\3\0\1\175\4\0\6\175\1\0\1\175\1\0\10\175\22\0\2\175"+ - "\15\0\60\204\1\205\2\204\7\205\5\0\7\204\10\205\1\0\12\176"+ - "\47\0\2\204\1\0\1\204\2\0\2\204\1\0\1\204\2\0\1\204"+ - "\6\0\4\204\1\0\7\204\1\0\3\204\1\0\1\204\1\0\1\204"+ - "\2\0\2\204\1\0\4\204\1\205\2\204\6\205\1\0\2\205\1\204"+ - "\2\0\5\204\1\0\1\204\1\0\6\205\2\0\12\176\2\0\4\204"+ - "\40\0\1\174\27\0\2\175\6\0\12\176\13\0\1\175\1\0\1\175"+ - "\1\0\1\175\4\0\2\175\10\174\1\0\44\174\4\0\24\175\1\0"+ - "\2\175\5\174\13\175\1\0\44\175\11\0\1\175\71\0\53\204\24\205"+ - "\1\204\12\176\6\0\6\204\4\205\4\204\3\205\1\204\3\205\2\204"+ - "\7\205\3\204\4\205\15\204\14\205\1\204\1\205\12\176\4\205\2\204"+ - "\46\174\1\0\1\174\5\0\1\174\2\0\53\174\1\0\4\174\u0100\210"+ - "\111\174\1\0\4\174\2\0\7\174\1\0\1\174\1\0\4\174\2\0"+ - "\51\174\1\0\4\174\2\0\41\174\1\0\4\174\2\0\7\174\1\0"+ - "\1\174\1\0\4\174\2\0\17\174\1\0\71\174\1\0\4\174\2\0"+ - "\103\174\2\0\3\175\40\0\20\174\20\0\125\174\14\0\u026c\174\2\0"+ - "\21\174\1\0\32\174\5\0\113\174\3\0\3\174\17\0\15\174\1\0"+ - "\4\174\3\175\13\0\22\174\3\175\13\0\22\174\2\175\14\0\15\174"+ - "\1\0\3\174\1\0\2\175\14\0\64\204\40\205\3\0\1\204\4\0"+ - "\1\204\1\205\2\0\12\176\41\0\3\175\2\0\12\176\6\0\130\174"+ - "\10\0\51\174\1\175\1\174\5\0\106\174\12\0\35\174\3\0\14\175"+ - "\4\0\14\175\12\0\12\176\36\204\2\0\5\204\13\0\54\204\4\0"+ - "\21\205\7\204\2\205\6\0\12\176\1\204\3\0\2\204\40\0\27\174"+ - "\5\175\4\0\65\204\12\205\1\0\35\205\2\0\1\175\12\176\6\0"+ - "\12\176\6\0\16\204\122\0\5\175\57\174\21\175\7\174\4\0\12\176"+ - "\21\0\11\175\14\0\3\175\36\174\15\175\2\174\12\176\54\174\16\175"+ - "\14\0\44\174\24\175\10\0\12\176\3\0\3\174\12\176\44\174\122\0"+ - "\3\175\1\0\25\175\4\174\1\175\4\174\3\175\2\174\11\0\300\174"+ - "\47\175\25\0\4\175\u0116\174\2\0\6\174\2\0\46\174\2\0\6\174"+ - "\2\0\10\174\1\0\1\174\1\0\1\174\1\0\1\174\1\0\37\174"+ - "\2\0\65\174\1\0\7\174\1\0\1\174\3\0\3\174\1\0\7\174"+ - "\3\0\4\174\2\0\6\174\4\0\15\174\5\0\3\174\1\0\7\174"+ - "\17\0\2\175\2\175\10\0\2\202\12\0\1\202\2\0\1\200\2\0"+ - "\5\175\20\0\2\203\3\0\1\201\17\0\1\203\13\0\5\175\5\0"+ - "\6\175\1\0\1\174\15\0\1\174\20\0\15\174\63\0\41\175\21\0"+ - "\1\174\4\0\1\174\2\0\12\174\1\0\1\174\3\0\5\174\6\0"+ - "\1\174\1\0\1\174\1\0\1\174\1\0\4\174\1\0\13\174\2\0"+ - "\4\174\5\0\5\174\4\0\1\174\21\0\51\174\u032d\0\64\174\u0716\0"+ - "\57\174\1\0\57\174\1\0\205\174\6\0\4\174\3\175\2\174\14\0"+ - "\46\174\1\0\1\174\5\0\1\174\2\0\70\174\7\0\1\174\17\0"+ - "\1\175\27\174\11\0\7\174\1\0\7\174\1\0\7\174\1\0\7\174"+ - "\1\0\7\174\1\0\7\174\1\0\7\174\1\0\7\174\1\0\40\175"+ - "\57\0\1\174\120\0\32\206\1\0\131\206\14\0\326\206\57\0\1\174"+ - "\1\0\1\206\31\0\11\206\4\175\2\175\1\0\5\177\2\0\3\206"+ - "\1\174\1\174\4\0\126\207\2\0\2\175\2\177\3\207\133\177\1\0"+ - "\4\177\5\0\51\174\3\0\136\210\21\0\33\174\65\0\20\177\37\0"+ - "\101\0\37\0\121\0\57\177\1\0\130\177\250\0\u19b6\206\112\0\u51cd\206"+ - "\63\0\u048d\174\103\0\56\174\2\0\u010d\174\3\0\20\174\12\176\2\174"+ - "\24\0\57\174\4\175\1\0\12\175\1\0\31\174\7\0\1\175\120\174"+ - "\2\175\45\0\11\174\2\0\147\174\2\0\4\174\1\0\4\174\14\0"+ - "\13\174\115\0\12\174\1\175\3\174\1\175\4\174\1\175\27\174\5\175"+ - "\30\0\64\174\14\0\2\175\62\174\21\175\13\0\12\176\6\0\22\175"+ - "\6\174\3\0\1\174\4\0\12\176\34\174\10\175\2\0\27\174\15\175"+ - "\14\0\35\210\3\0\4\175\57\174\16\175\16\0\1\174\12\176\46\0"+ - "\51\174\16\175\11\0\3\174\1\175\10\174\2\175\2\0\12\176\6\0"+ - "\33\204\1\205\4\0\60\204\1\205\1\204\3\205\2\204\2\205\5\204"+ - "\2\205\1\204\1\205\1\204\30\0\5\204\13\174\5\175\2\0\3\174"+ - "\2\175\12\0\6\174\2\0\6\174\2\0\6\174\11\0\7\174\1\0"+ - "\7\174\221\0\43\174\10\175\1\0\2\175\2\0\12\176\6\0\u2ba4\210"+ - "\14\0\27\210\4\0\61\210\4\0\1\44\1\40\1\67\1\64\1\33"+ - "\1\30\2\0\1\24\1\21\2\0\1\17\1\15\14\0\1\3\1\6"+ - "\20\0\1\156\7\0\1\111\1\10\5\0\1\1\1\172\3\0\1\163"+ + "\42\0\1\213\4\0\1\212\4\0\1\203\1\0\1\204\1\0\12\200"+ + "\1\202\1\203\5\0\32\176\4\0\1\205\1\0\32\176\57\0\1\176"+ + "\2\0\1\177\7\0\1\176\1\0\1\202\2\0\1\176\5\0\27\176"+ + "\1\0\37\176\1\0\u01ca\176\4\0\14\176\5\0\1\202\10\0\5\176"+ + "\7\0\1\176\1\0\1\176\21\0\160\177\5\176\1\0\2\176\2\0"+ + "\4\176\1\203\7\0\1\176\1\202\3\176\1\0\1\176\1\0\24\176"+ + "\1\0\123\176\1\0\213\176\1\0\7\177\236\176\11\0\46\176\2\0"+ + "\1\176\7\0\47\176\1\0\1\203\7\0\55\177\1\0\1\177\1\0"+ + "\2\177\1\0\2\177\1\0\1\177\10\0\33\214\5\0\3\214\1\176"+ + "\1\202\13\0\5\177\7\0\2\203\2\0\13\177\1\0\1\177\3\0"+ + "\53\176\25\177\12\200\1\0\1\200\1\203\1\0\2\176\1\177\143\176"+ + "\1\0\1\176\7\177\1\177\1\0\6\177\2\176\2\177\1\0\4\177"+ + "\2\176\12\200\3\176\2\0\1\176\17\0\1\177\1\176\1\177\36\176"+ + "\33\177\2\0\131\176\13\177\1\176\16\0\12\200\41\176\11\177\2\176"+ + "\2\0\1\203\1\0\1\176\5\0\26\176\4\177\1\176\11\177\1\176"+ + "\3\177\1\176\5\177\22\0\31\176\3\177\104\0\1\176\1\0\13\176"+ + "\67\0\33\177\1\0\4\177\66\176\3\177\1\176\22\177\1\176\7\177"+ + "\12\176\2\177\2\0\12\200\1\0\7\176\1\0\7\176\1\0\3\177"+ + "\1\0\10\176\2\0\2\176\2\0\26\176\1\0\7\176\1\0\1\176"+ + "\3\0\4\176\2\0\1\177\1\176\7\177\2\0\2\177\2\0\3\177"+ + "\1\176\10\0\1\177\4\0\2\176\1\0\3\176\2\177\2\0\12\200"+ + "\2\176\17\0\3\177\1\0\6\176\4\0\2\176\2\0\26\176\1\0"+ + "\7\176\1\0\2\176\1\0\2\176\1\0\2\176\2\0\1\177\1\0"+ + "\5\177\4\0\2\177\2\0\3\177\3\0\1\177\7\0\4\176\1\0"+ + "\1\176\7\0\12\200\2\177\3\176\1\177\13\0\3\177\1\0\11\176"+ + "\1\0\3\176\1\0\26\176\1\0\7\176\1\0\2\176\1\0\5\176"+ + "\2\0\1\177\1\176\10\177\1\0\3\177\1\0\3\177\2\0\1\176"+ + "\17\0\2\176\2\177\2\0\12\200\21\0\3\177\1\0\10\176\2\0"+ + "\2\176\2\0\26\176\1\0\7\176\1\0\2\176\1\0\5\176\2\0"+ + "\1\177\1\176\7\177\2\0\2\177\2\0\3\177\10\0\2\177\4\0"+ + "\2\176\1\0\3\176\2\177\2\0\12\200\1\0\1\176\20\0\1\177"+ + "\1\176\1\0\6\176\3\0\3\176\1\0\4\176\3\0\2\176\1\0"+ + "\1\176\1\0\2\176\3\0\2\176\3\0\3\176\3\0\14\176\4\0"+ + "\5\177\3\0\3\177\1\0\4\177\2\0\1\176\6\0\1\177\16\0"+ + "\12\200\21\0\3\177\1\0\10\176\1\0\3\176\1\0\27\176\1\0"+ + "\12\176\1\0\5\176\3\0\1\176\7\177\1\0\3\177\1\0\4\177"+ + "\7\0\2\177\1\0\2\176\6\0\2\176\2\177\2\0\12\200\22\0"+ + "\2\177\1\0\10\176\1\0\3\176\1\0\27\176\1\0\12\176\1\0"+ + "\5\176\2\0\1\177\1\176\7\177\1\0\3\177\1\0\4\177\7\0"+ + "\2\177\7\0\1\176\1\0\2\176\2\177\2\0\12\200\1\0\2\176"+ + "\17\0\2\177\1\0\10\176\1\0\3\176\1\0\51\176\2\0\1\176"+ + "\7\177\1\0\3\177\1\0\4\177\1\176\10\0\1\177\10\0\2\176"+ + "\2\177\2\0\12\200\12\0\6\176\2\0\2\177\1\0\22\176\3\0"+ + "\30\176\1\0\11\176\1\0\1\176\2\0\7\176\3\0\1\177\4\0"+ + "\6\177\1\0\1\177\1\0\10\177\22\0\2\177\15\0\60\206\1\207"+ + "\2\206\7\207\5\0\7\206\10\207\1\0\12\200\47\0\2\206\1\0"+ + "\1\206\2\0\2\206\1\0\1\206\2\0\1\206\6\0\4\206\1\0"+ + "\7\206\1\0\3\206\1\0\1\206\1\0\1\206\2\0\2\206\1\0"+ + "\4\206\1\207\2\206\6\207\1\0\2\207\1\206\2\0\5\206\1\0"+ + "\1\206\1\0\6\207\2\0\12\200\2\0\4\206\40\0\1\176\27\0"+ + "\2\177\6\0\12\200\13\0\1\177\1\0\1\177\1\0\1\177\4\0"+ + "\2\177\10\176\1\0\44\176\4\0\24\177\1\0\2\177\5\176\13\177"+ + "\1\0\44\177\11\0\1\177\71\0\53\206\24\207\1\206\12\200\6\0"+ + "\6\206\4\207\4\206\3\207\1\206\3\207\2\206\7\207\3\206\4\207"+ + "\15\206\14\207\1\206\1\207\12\200\4\207\2\206\46\176\1\0\1\176"+ + "\5\0\1\176\2\0\53\176\1\0\4\176\u0100\215\111\176\1\0\4\176"+ + "\2\0\7\176\1\0\1\176\1\0\4\176\2\0\51\176\1\0\4\176"+ + "\2\0\41\176\1\0\4\176\2\0\7\176\1\0\1\176\1\0\4\176"+ + "\2\0\17\176\1\0\71\176\1\0\4\176\2\0\103\176\2\0\3\177"+ + "\40\0\20\176\20\0\125\176\14\0\u026c\176\2\0\21\176\1\0\32\176"+ + "\5\0\113\176\3\0\3\176\17\0\15\176\1\0\4\176\3\177\13\0"+ + "\22\176\3\177\13\0\22\176\2\177\14\0\15\176\1\0\3\176\1\0"+ + "\2\177\14\0\64\206\40\207\3\0\1\206\4\0\1\206\1\207\2\0"+ + "\12\200\41\0\3\177\1\177\1\0\12\200\6\0\130\176\10\0\51\176"+ + "\1\177\1\176\5\0\106\176\12\0\35\176\3\0\14\177\4\0\14\177"+ + "\12\0\12\200\36\206\2\0\5\206\13\0\54\206\4\0\21\207\7\206"+ + "\2\207\6\0\12\200\1\206\3\0\2\206\40\0\27\176\5\177\4\0"+ + "\65\206\12\207\1\0\35\207\2\0\1\177\12\200\6\0\12\200\6\0"+ + "\16\206\122\0\5\177\57\176\21\177\7\176\4\0\12\200\21\0\11\177"+ + "\14\0\3\177\36\176\15\177\2\176\12\200\54\176\16\177\14\0\44\176"+ + "\24\177\10\0\12\200\3\0\3\176\12\200\44\176\122\0\3\177\1\0"+ + "\25\177\4\176\1\177\4\176\3\177\2\176\11\0\300\176\47\177\25\0"+ + "\4\177\u0116\176\2\0\6\176\2\0\46\176\2\0\6\176\2\0\10\176"+ + "\1\0\1\176\1\0\1\176\1\0\1\176\1\0\37\176\2\0\65\176"+ + "\1\0\7\176\1\0\1\176\3\0\3\176\1\0\7\176\3\0\4\176"+ + "\2\0\6\176\4\0\15\176\5\0\3\176\1\0\7\176\17\0\2\177"+ + "\2\177\10\0\2\204\12\0\1\204\2\0\1\202\2\0\5\177\20\0"+ + "\2\205\3\0\1\203\17\0\1\205\13\0\5\177\1\0\12\177\1\0"+ + "\1\176\15\0\1\176\20\0\15\176\63\0\41\177\21\0\1\176\4\0"+ + "\1\176\2\0\12\176\1\0\1\176\3\0\5\176\6\0\1\176\1\0"+ + "\1\176\1\0\1\176\1\0\4\176\1\0\13\176\2\0\4\176\5\0"+ + "\5\176\4\0\1\176\21\0\51\176\u032d\0\64\176\u0716\0\57\176\1\0"+ + "\57\176\1\0\205\176\6\0\4\176\3\177\2\176\14\0\46\176\1\0"+ + "\1\176\5\0\1\176\2\0\70\176\7\0\1\176\17\0\1\177\27\176"+ + "\11\0\7\176\1\0\7\176\1\0\7\176\1\0\7\176\1\0\7\176"+ + "\1\0\7\176\1\0\7\176\1\0\7\176\1\0\40\177\57\0\1\176"+ + "\120\0\32\210\1\0\131\210\14\0\326\210\57\0\1\176\1\0\1\210"+ + "\31\0\11\210\6\177\1\0\5\201\2\0\3\210\1\176\1\176\4\0"+ + "\126\211\2\0\2\177\2\201\3\211\133\201\1\0\4\201\5\0\51\176"+ + "\3\0\136\215\21\0\33\176\65\0\20\201\320\0\57\201\1\0\130\201"+ + "\250\0\u19b6\210\112\0\u51cd\210\63\0\u048d\176\103\0\56\176\2\0\u010d\176"+ + "\3\0\20\176\12\200\2\176\24\0\57\176\4\177\1\0\12\177\1\0"+ + "\31\176\7\0\1\177\120\176\2\177\45\0\11\176\2\0\147\176\2\0"+ + "\4\176\1\0\4\176\14\0\13\176\115\0\12\176\1\177\3\176\1\177"+ + "\4\176\1\177\27\176\5\177\30\0\64\176\14\0\2\177\62\176\21\177"+ + "\13\0\12\200\6\0\22\177\6\176\3\0\1\176\4\0\12\200\34\176"+ + "\10\177\2\0\27\176\15\177\14\0\35\215\3\0\4\177\57\176\16\177"+ + "\16\0\1\176\12\200\46\0\51\176\16\177\11\0\3\176\1\177\10\176"+ + "\2\177\2\0\12\200\6\0\33\206\1\207\4\0\60\206\1\207\1\206"+ + "\3\207\2\206\2\207\5\206\2\207\1\206\1\207\1\206\30\0\5\206"+ + "\13\176\5\177\2\0\3\176\2\177\12\0\6\176\2\0\6\176\2\0"+ + "\6\176\11\0\7\176\1\0\7\176\221\0\43\176\10\177\1\0\2\177"+ + "\2\0\12\200\6\0\u2ba4\215\14\0\27\215\4\0\61\215\4\0\1\44"+ + "\1\40\1\67\1\64\1\33\1\30\2\0\1\24\1\21\2\0\1\17"+ + "\1\15\14\0\1\3\1\6\20\0\1\156\7\0\1\111\1\10\5\0"+ + "\1\1\1\172\3\0\1\163\1\163\1\163\1\163\1\163\1\163\1\163"+ "\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163"+ "\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163"+ "\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163"+ - "\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163"+ - "\1\164\1\163\1\163\1\163\1\170\1\166\17\0\1\160\u02c1\0\1\114"+ - "\277\0\1\157\1\115\1\16\3\167\2\62\1\167\1\62\2\167\1\36"+ - "\21\167\2\106\7\117\1\116\7\117\7\102\1\37\1\102\1\130\2\66"+ - "\1\65\1\130\1\66\1\65\10\130\2\107\5\103\2\75\5\103\1\22"+ - "\10\53\5\23\3\41\12\147\20\41\3\63\32\43\1\42\2\61\2\154"+ - "\1\155\2\154\2\155\2\154\1\155\3\61\1\60\2\61\12\110\1\126"+ - "\1\50\1\45\1\110\6\50\1\45\13\50\31\61\7\50\12\150\1\50"+ - "\5\13\3\127\3\101\1\100\4\101\2\100\10\101\1\100\7\35\1\34"+ - "\2\35\7\101\16\127\1\141\4\152\1\4\4\151\1\4\5\140\1\137"+ - "\1\140\3\137\7\140\1\137\23\140\5\113\3\140\6\113\2\113\6\112"+ - "\5\112\3\134\2\101\7\133\36\101\4\133\5\101\5\127\6\125\2\127"+ - "\1\125\4\35\13\136\12\151\26\136\15\13\1\135\2\13\1\173\3\142"+ - "\1\13\2\142\5\161\4\142\4\162\1\161\3\162\1\161\5\162\2\70"+ - "\1\73\2\70\1\73\1\70\2\73\1\70\1\73\12\70\1\73\4\5"+ - "\1\144\1\143\1\145\1\12\3\165\1\145\2\165\1\131\2\132\2\165"+ - "\1\12\1\165\1\12\1\165\1\12\1\165\3\12\1\165\2\12\1\165"+ - "\1\12\2\165\1\12\1\165\1\12\1\165\1\12\1\165\1\12\1\165"+ - "\1\12\1\76\2\72\1\76\1\72\2\76\4\72\1\76\7\72\1\76"+ - "\4\72\1\76\4\72\1\165\1\12\1\165\12\31\1\57\21\31\1\57"+ - "\3\32\1\57\3\31\1\57\1\31\2\2\2\31\1\57\15\124\4\47"+ - "\4\54\1\146\1\56\10\146\7\54\6\165\4\25\1\27\37\25\1\27"+ - "\4\25\25\105\1\171\11\105\21\26\5\105\1\7\12\55\5\105\6\104"+ - "\4\76\1\77\1\26\5\123\12\121\17\123\1\74\3\71\14\120\1\11"+ - "\11\46\1\52\5\46\4\122\13\51\2\14\11\46\1\52\31\46\1\52"+ - "\4\11\4\46\2\52\2\153\1\20\5\153\52\20\u1900\0\u016e\206\2\0"+ - "\152\206\46\0\7\174\14\0\5\174\5\0\1\174\1\175\12\174\1\0"+ - "\15\174\1\0\5\174\1\0\1\174\1\0\2\174\1\0\2\174\1\0"+ - "\154\174\41\0\u016b\174\22\0\100\174\2\0\66\174\50\0\14\174\4\0"+ - "\20\175\1\201\2\0\1\200\1\201\13\0\7\175\14\0\2\203\30\0"+ - "\3\203\1\201\1\0\1\202\1\0\1\201\1\200\32\0\5\174\1\0"+ - "\207\174\2\0\1\175\7\0\1\202\4\0\1\201\1\0\1\202\1\0"+ - "\12\176\1\200\1\201\5\0\32\174\4\0\1\203\1\0\32\174\13\0"+ - "\70\177\2\175\37\210\3\0\6\210\2\0\6\210\2\0\6\210\2\0"+ - "\3\210\34\0\3\175\4\0"; + "\1\163\1\163\1\163\1\163\1\164\1\163\1\163\1\163\1\170\1\166"+ + "\17\0\1\160\u02c1\0\1\114\277\0\1\157\1\115\1\16\3\167\2\62"+ + "\1\167\1\62\2\167\1\36\21\167\2\106\7\117\1\116\7\117\7\102"+ + "\1\37\1\102\1\140\2\66\1\65\1\140\1\66\1\65\10\140\2\107"+ + "\5\103\2\75\5\103\1\22\10\53\5\23\3\41\12\122\20\41\3\63"+ + "\32\43\1\42\2\61\2\126\1\127\2\126\2\127\2\126\1\127\3\61"+ + "\1\60\2\61\12\110\1\136\1\50\1\45\1\110\6\50\1\45\13\50"+ + "\31\61\7\50\12\123\1\50\5\13\3\137\3\101\1\100\4\101\2\100"+ + "\10\101\1\100\7\35\1\34\2\35\7\101\16\137\1\151\4\124\1\4"+ + "\4\121\1\4\5\150\1\147\1\150\3\147\7\150\1\147\23\150\5\113"+ + "\3\150\6\113\2\113\6\112\5\112\3\144\2\101\7\143\36\101\4\143"+ + "\5\101\5\137\6\135\2\137\1\135\4\35\13\146\12\121\14\146\12\175"+ + "\15\174\1\145\2\174\1\173\3\152\1\13\2\152\5\161\4\152\4\162"+ + "\1\161\3\162\1\161\5\162\2\70\1\73\2\70\1\73\1\70\2\73"+ + "\1\70\1\73\12\70\1\73\4\5\1\154\1\153\1\155\1\12\3\165"+ + "\1\155\2\165\1\141\2\142\2\165\1\12\1\165\1\12\1\165\1\12"+ + "\1\165\3\12\1\165\2\12\1\165\1\12\2\165\1\12\1\165\1\12"+ + "\1\165\1\12\1\165\1\12\1\165\1\12\1\76\2\72\1\76\1\72"+ + "\2\76\4\72\1\76\7\72\1\76\4\72\1\76\4\72\1\165\1\12"+ + "\1\165\12\31\1\57\21\31\1\57\3\32\1\57\3\31\1\57\1\31"+ + "\2\2\2\31\1\57\15\134\4\47\4\54\1\120\1\56\10\120\7\54"+ + "\6\165\4\25\1\27\37\25\1\27\4\25\25\105\1\171\11\105\21\26"+ + "\5\105\1\7\12\55\5\105\6\104\4\76\1\77\1\26\5\133\12\131"+ + "\17\133\1\74\3\71\14\130\1\11\11\46\1\52\5\46\4\132\13\51"+ + "\2\14\11\46\1\52\31\46\1\52\4\11\4\46\2\52\2\125\1\20"+ + "\5\125\52\20\u1900\0\u016e\210\2\0\152\210\46\0\7\176\14\0\5\176"+ + "\5\0\1\214\1\177\12\214\1\0\15\214\1\0\5\214\1\0\1\214"+ + "\1\0\2\214\1\0\2\214\1\0\12\214\142\176\41\0\u016b\176\22\0"+ + "\100\176\2\0\66\176\50\0\14\176\4\0\20\177\1\203\2\0\1\202"+ + "\1\203\13\0\7\177\14\0\2\205\30\0\3\205\1\203\1\0\1\204"+ + "\1\0\1\203\1\202\32\0\5\176\1\0\207\176\2\0\1\177\7\0"+ + "\1\204\4\0\1\203\1\0\1\204\1\0\12\200\1\202\1\203\5\0"+ + "\32\176\4\0\1\205\1\0\32\176\13\0\70\201\2\177\37\215\3\0"+ + "\6\215\2\0\6\215\2\0\6\215\2\0\3\215\34\0\3\177\4\0"; /** * Translates characters to character classes @@ -218,11 +220,12 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { private static final String ZZ_ACTION_PACKED_0 = "\1\0\26\1\1\2\1\3\1\4\1\1\1\5\1\6"+ - "\1\7\1\10\20\0\1\2\1\0\1\2\12\0\1\3"+ - "\21\0\1\2\115\0"; + "\1\7\1\2\1\10\21\0\1\2\1\0\1\2\12\0"+ + "\1\3\10\0\1\2\11\0\1\2\55\0\1\2\65\0"+ + "\1\2\1\1\36\0"; private static int [] zzUnpackAction() { - int [] result = new int[156]; + int [] result = new int[213]; int offset = 0; offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); return result; @@ -247,29 +250,36 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); private static final String ZZ_ROWMAP_PACKED_0 = - "\0\0\0\211\0\u0112\0\u019b\0\u0224\0\u02ad\0\u0336\0\u03bf"+ - "\0\u0448\0\u04d1\0\u055a\0\u05e3\0\u066c\0\u06f5\0\u077e\0\u0807"+ - "\0\u0890\0\u0919\0\u09a2\0\u0a2b\0\u0ab4\0\u0b3d\0\u0bc6\0\u0c4f"+ - "\0\u0cd8\0\u0d61\0\u0dea\0\u0e73\0\u0efc\0\u0f85\0\u100e\0\u0112"+ - "\0\u019b\0\u1097\0\u1120\0\u0336\0\u03bf\0\u0448\0\u04d1\0\u11a9"+ - "\0\u1232\0\u12bb\0\u1344\0\u077e\0\u13cd\0\u1456\0\u14df\0\u1568"+ - "\0\u15f1\0\u167a\0\u1703\0\u02ad\0\u178c\0\u1815\0\u066c\0\u189e"+ - "\0\u1927\0\u19b0\0\u1a39\0\u1ac2\0\u1b4b\0\u1bd4\0\u1c5d\0\u1ce6"+ - "\0\u1d6f\0\u1df8\0\u1e81\0\u1f0a\0\u1f93\0\u201c\0\u20a5\0\u212e"+ - "\0\u21b7\0\u2240\0\u22c9\0\u2352\0\u23db\0\u0dea\0\u2464\0\u24ed"+ - "\0\u2576\0\u25ff\0\u2688\0\u2711\0\u279a\0\u2823\0\u28ac\0\u2935"+ - "\0\u29be\0\u2a47\0\u2ad0\0\u2b59\0\u2be2\0\u2c6b\0\u2cf4\0\u2d7d"+ - "\0\u2e06\0\u2e8f\0\u2f18\0\u2fa1\0\u302a\0\u30b3\0\u313c\0\u31c5"+ - "\0\u324e\0\u32d7\0\u3360\0\u33e9\0\u3472\0\u34fb\0\u3584\0\u360d"+ - "\0\u3696\0\u371f\0\u37a8\0\u3831\0\u38ba\0\u3943\0\u39cc\0\u3a55"+ - "\0\u3ade\0\u3b67\0\u3bf0\0\u3c79\0\u3d02\0\u3d8b\0\u3e14\0\u3e9d"+ - "\0\u3f26\0\u3faf\0\u4038\0\u40c1\0\u414a\0\u41d3\0\u425c\0\u42e5"+ - "\0\u436e\0\u43f7\0\u4480\0\u4509\0\u4592\0\u461b\0\u46a4\0\u472d"+ - "\0\u47b6\0\u483f\0\u48c8\0\u4951\0\u49da\0\u4a63\0\u4aec\0\u4b75"+ - "\0\u4bfe\0\u4c87\0\u4d10\0\u4d99"; + "\0\0\0\216\0\u011c\0\u01aa\0\u0238\0\u02c6\0\u0354\0\u03e2"+ + "\0\u0470\0\u04fe\0\u058c\0\u061a\0\u06a8\0\u0736\0\u07c4\0\u0852"+ + "\0\u08e0\0\u096e\0\u09fc\0\u0a8a\0\u0b18\0\u0ba6\0\u0c34\0\u0cc2"+ + "\0\u0d50\0\u0dde\0\u0e6c\0\u0efa\0\u0f88\0\u1016\0\u10a4\0\u1132"+ + "\0\u11c0\0\u011c\0\u01aa\0\u124e\0\u12dc\0\u0354\0\u03e2\0\u0470"+ + "\0\u04fe\0\u136a\0\u13f8\0\u1486\0\u1514\0\u07c4\0\u15a2\0\u1630"+ + "\0\u16be\0\u174c\0\u17da\0\u1868\0\u18f6\0\u02c6\0\u1984\0\u1a12"+ + "\0\u06a8\0\u1aa0\0\u1b2e\0\u1bbc\0\u1c4a\0\u1cd8\0\u1d66\0\u1df4"+ + "\0\u1e82\0\u1f10\0\u1f9e\0\u202c\0\u20ba\0\u2148\0\u21d6\0\u2264"+ + "\0\u22f2\0\u2380\0\u240e\0\u249c\0\u252a\0\u25b8\0\u2646\0\u26d4"+ + "\0\u0e6c\0\u2762\0\u27f0\0\u287e\0\u290c\0\u299a\0\u2a28\0\u2ab6"+ + "\0\u2b44\0\u2bd2\0\u2c60\0\u2cee\0\u2d7c\0\u2e0a\0\u2e98\0\u2f26"+ + "\0\u2fb4\0\u3042\0\u30d0\0\u315e\0\u31ec\0\u327a\0\u3308\0\u3396"+ + "\0\u3424\0\u34b2\0\u3540\0\u35ce\0\u365c\0\u36ea\0\u3778\0\u3806"+ + "\0\u3894\0\u3922\0\u39b0\0\u3a3e\0\u3acc\0\u3b5a\0\u3be8\0\u3c76"+ + "\0\u3d04\0\u3d92\0\u3e20\0\u3eae\0\u3f3c\0\u3fca\0\u4058\0\u40e6"+ + "\0\u4174\0\u4202\0\u4290\0\u431e\0\u43ac\0\u443a\0\u44c8\0\u4556"+ + "\0\u45e4\0\u4672\0\u4700\0\u478e\0\u481c\0\u48aa\0\u4938\0\u49c6"+ + "\0\u4a54\0\u4ae2\0\u4b70\0\u4bfe\0\u4c8c\0\u4d1a\0\u4da8\0\u4e36"+ + "\0\u4ec4\0\u4f52\0\u4fe0\0\u506e\0\u50fc\0\u518a\0\u5218\0\u52a6"+ + "\0\u5334\0\u53c2\0\u5450\0\u54de\0\u556c\0\u55fa\0\u5688\0\u5716"+ + "\0\u57a4\0\u5832\0\u58c0\0\u594e\0\u59dc\0\u5a6a\0\u5af8\0\u5b86"+ + "\0\u5c14\0\u5ca2\0\u5d30\0\u5dbe\0\u5e4c\0\u5eda\0\u5f68\0\u5ff6"+ + "\0\u6084\0\u6112\0\u61a0\0\u622e\0\u62bc\0\u634a\0\u63d8\0\u6466"+ + "\0\u64f4\0\u6582\0\u6610\0\u669e\0\u672c\0\u67ba\0\u6848\0\u68d6"+ + "\0\u6964\0\u69f2\0\u6a80\0\u6b0e\0\u6b9c\0\u6c2a\0\u6cb8\0\u6d46"+ + "\0\u6dd4\0\u6e62\0\u6ef0\0\u6f7e\0\u700c"; private static int [] zzUnpackRowMap() { - int [] result = new int[156]; + int [] result = new int[213]; int offset = 0; offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); return result; @@ -297,419 +307,578 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { "\3\2\1\13\2\2\1\14\4\2\1\15\3\2\1\16"+ "\17\2\1\17\2\2\1\20\66\2\1\21\1\2\1\22"+ "\2\2\1\23\1\24\1\2\1\25\1\2\1\26\1\2"+ - "\1\27\1\2\1\30\1\2\1\31\1\32\3\2\1\33"+ - "\2\34\1\35\1\36\1\37\213\0\1\30\2\0\1\30"+ - "\4\0\1\30\16\0\1\30\15\0\1\30\20\0\1\30"+ - "\1\0\1\30\31\0\1\30\4\0\1\30\10\0\2\30"+ - "\15\0\2\30\10\0\1\30\21\0\2\30\5\0\1\30"+ - "\2\0\1\30\3\0\2\30\10\0\4\30\1\0\3\30"+ - "\1\0\1\30\2\0\1\30\2\0\1\30\4\0\4\30"+ - "\1\0\2\30\1\0\1\30\2\0\1\30\1\0\1\30"+ - "\2\0\4\30\2\0\3\30\1\0\2\30\1\0\3\30"+ - "\5\0\4\30\2\0\10\30\1\0\1\30\2\0\4\30"+ - "\1\0\2\30\1\0\1\30\1\0\2\30\4\0\1\30"+ - "\3\0\1\30\24\0\1\30\4\0\1\30\11\0\1\30"+ - "\22\0\1\30\3\0\1\30\27\0\1\30\63\0\1\30"+ - "\24\0\1\30\3\0\4\30\1\0\1\30\1\0\1\31"+ - "\2\0\1\30\1\0\2\30\2\0\2\30\2\0\3\30"+ - "\1\0\1\30\1\0\1\30\2\0\4\30\1\0\3\30"+ - "\1\0\1\30\1\0\3\30\1\0\2\30\1\0\4\30"+ - "\1\0\2\30\2\0\10\30\1\0\2\30\1\0\11\30"+ - "\1\0\10\30\1\0\13\30\1\31\1\0\1\30\1\0"+ - "\1\30\1\0\2\30\2\0\1\30\1\0\1\30\3\0"+ - "\1\30\33\0\1\30\17\0\1\30\23\0\1\30\23\0"+ - "\1\30\6\0\3\30\37\0\1\30\7\0\1\30\23\0"+ - "\1\30\1\0\2\30\1\0\1\30\1\0\4\30\1\0"+ - "\1\30\1\0\1\30\1\0\2\30\1\0\3\30\1\0"+ - "\2\30\1\0\4\30\1\0\3\30\1\0\17\30\1\0"+ - "\2\30\1\0\21\30\1\0\2\30\1\0\41\30\1\0"+ - "\1\30\1\0\2\30\2\0\1\30\1\0\1\30\1\0"+ - "\1\30\1\0\1\30\33\0\1\30\3\0\2\30\12\0"+ - "\2\30\13\0\1\30\6\0\1\30\2\0\2\30\6\0"+ - "\1\30\4\0\2\30\2\0\2\30\5\0\3\30\10\0"+ - "\1\30\26\0\1\30\7\0\1\30\23\0\1\30\1\0"+ - "\2\30\1\0\1\30\2\0\2\30\2\0\1\30\3\0"+ - "\2\30\1\0\3\30\1\0\2\30\1\0\4\30\1\0"+ - "\3\30\1\0\1\30\1\0\2\30\2\0\11\30\1\0"+ - "\2\30\1\0\1\30\1\0\2\30\1\0\14\30\1\0"+ - "\2\30\1\0\3\30\1\0\1\30\1\0\30\30\1\0"+ - "\2\30\1\0\1\30\1\0\2\30\2\0\1\30\1\0"+ - "\1\30\1\0\1\30\1\0\1\30\17\0\1\30\26\0"+ - "\2\30\23\0\1\31\1\30\66\0\1\31\46\0\1\31"+ - "\27\0\4\30\2\0\2\30\14\0\3\30\15\0\3\30"+ - "\3\0\1\30\7\0\2\30\13\0\1\30\13\0\4\31"+ - "\1\0\2\30\11\0\1\30\37\0\1\30\3\0\2\30"+ - "\12\0\2\30\1\0\3\30\7\0\1\30\6\0\2\30"+ - "\1\0\2\30\6\0\1\30\4\0\2\30\2\0\2\30"+ - "\5\0\3\30\10\0\1\30\16\0\1\30\4\0\2\31"+ - "\1\0\1\30\7\0\1\30\23\0\1\30\4\0\1\30"+ - "\6\0\1\30\3\0\1\30\6\0\1\30\5\0\1\30"+ - "\2\0\2\30\1\0\17\30\2\0\1\30\13\0\7\30"+ - "\2\0\1\30\1\0\1\30\1\0\1\30\2\0\1\30"+ - "\1\0\1\30\1\0\1\30\1\0\1\30\6\0\2\30"+ - "\5\0\1\30\1\0\1\30\2\0\3\30\1\0\1\30"+ - "\7\0\1\30\1\0\1\30\35\0\1\30\17\0\2\30"+ + "\1\27\3\2\1\30\1\2\1\31\1\32\3\2\1\33"+ + "\2\34\1\35\1\36\2\2\1\37\1\40\220\0\1\30"+ + "\2\0\1\30\4\0\1\30\16\0\1\30\15\0\1\30"+ + "\20\0\1\30\1\0\1\30\41\0\1\30\4\0\1\30"+ + "\10\0\2\30\5\0\2\30\10\0\1\30\26\0\2\30"+ + "\5\0\1\30\2\0\1\30\3\0\2\30\10\0\4\30"+ + "\1\0\3\30\1\0\1\30\2\0\1\30\2\0\1\30"+ + "\4\0\4\30\1\0\2\30\1\0\1\30\2\0\1\30"+ + "\1\0\1\30\2\0\4\30\2\0\3\30\1\0\2\30"+ + "\1\0\3\30\1\0\4\30\1\0\2\30\5\0\4\30"+ + "\2\0\10\30\1\0\1\30\2\0\1\30\1\0\2\30"+ + "\4\0\1\30\3\0\3\30\27\0\1\30\4\0\1\30"+ + "\11\0\1\30\22\0\1\30\3\0\1\30\27\0\1\30"+ + "\63\0\1\30\31\0\1\30\3\0\4\30\1\0\1\30"+ + "\1\0\1\31\2\0\1\30\1\0\2\30\2\0\2\30"+ + "\2\0\3\30\1\0\1\30\1\0\1\30\2\0\4\30"+ + "\1\0\3\30\1\0\1\30\1\0\3\30\1\0\2\30"+ + "\1\0\4\30\1\0\2\30\2\0\10\30\1\0\2\30"+ + "\1\0\10\30\1\31\1\0\7\30\1\0\10\30\1\0"+ + "\6\30\1\0\1\30\1\0\2\30\2\0\1\30\1\0"+ + "\1\30\3\0\3\30\36\0\1\30\17\0\1\30\23\0"+ + "\1\30\23\0\1\30\6\0\3\30\37\0\1\30\7\0"+ + "\1\30\30\0\1\30\1\0\2\30\1\0\1\30\1\0"+ + "\4\30\1\0\1\30\1\0\1\30\1\0\2\30\1\0"+ + "\3\30\1\0\2\30\1\0\4\30\1\0\3\30\1\0"+ + "\17\30\1\0\2\30\1\0\21\30\1\0\2\30\1\0"+ + "\41\30\1\0\1\30\1\0\2\30\2\0\1\30\1\0"+ + "\1\30\1\0\1\30\1\0\3\30\36\0\1\30\3\0"+ + "\2\30\12\0\2\30\13\0\1\30\6\0\1\30\2\0"+ + "\2\30\6\0\1\30\4\0\2\30\2\0\2\30\5\0"+ + "\3\30\20\0\1\30\16\0\1\30\7\0\1\30\30\0"+ + "\1\30\1\0\2\30\1\0\1\30\2\0\2\30\2\0"+ + "\1\30\3\0\2\30\1\0\3\30\1\0\2\30\1\0"+ + "\4\30\1\0\3\30\1\0\1\30\1\0\2\30\2\0"+ + "\11\30\1\0\2\30\1\0\1\30\1\0\2\30\1\0"+ + "\14\30\1\0\2\30\1\0\10\30\1\0\2\30\1\0"+ + "\1\30\1\0\23\30\1\0\1\30\1\0\2\30\2\0"+ + "\1\30\1\0\1\30\1\0\1\30\1\0\3\30\22\0"+ + "\1\30\26\0\2\30\23\0\1\31\1\30\40\0\1\31"+ + "\101\0\1\31\27\0\4\30\2\0\2\30\14\0\3\30"+ + "\15\0\3\30\3\0\1\30\7\0\2\30\1\0\4\31"+ + "\1\0\2\30\13\0\1\30\23\0\1\30\44\0\1\30"+ + "\3\0\2\30\12\0\2\30\1\0\3\30\7\0\1\30"+ + "\6\0\2\30\1\0\2\30\6\0\1\30\4\0\2\30"+ + "\2\0\2\30\5\0\3\30\2\0\1\30\3\0\2\31"+ + "\10\0\1\30\16\0\1\30\7\0\1\30\30\0\1\30"+ + "\4\0\1\30\6\0\1\30\3\0\1\30\6\0\1\30"+ + "\5\0\1\30\2\0\2\30\1\0\17\30\2\0\1\30"+ + "\13\0\7\30\2\0\1\30\1\0\1\30\1\0\2\30"+ + "\2\0\1\30\1\0\3\30\2\0\1\30\1\0\1\30"+ + "\1\0\1\30\1\0\1\30\6\0\2\30\6\0\1\30"+ + "\7\0\1\30\1\0\1\30\42\0\1\30\17\0\2\30"+ "\22\0\1\30\2\0\2\30\13\0\1\30\3\0\2\30"+ - "\5\0\3\30\10\0\1\30\26\0\1\30\7\0\1\30"+ - "\30\0\1\30\6\0\1\30\3\0\1\30\3\0\1\30"+ - "\7\0\1\30\31\0\20\30\5\0\3\30\3\0\1\30"+ - "\3\0\2\30\2\0\2\30\4\0\1\30\10\0\1\30"+ - "\4\0\1\30\2\0\1\30\4\0\1\30\1\0\1\30"+ - "\1\0\1\30\132\0\1\36\41\0\1\32\35\0\1\35"+ + "\5\0\3\30\20\0\1\30\16\0\1\30\7\0\1\30"+ + "\35\0\1\30\6\0\1\30\3\0\1\30\3\0\1\30"+ + "\7\0\1\30\31\0\20\30\5\0\3\30\4\0\1\30"+ + "\6\0\1\30\3\0\2\30\2\0\2\30\4\0\1\30"+ + "\5\0\1\30\2\0\1\30\4\0\1\30\1\0\1\30"+ + "\1\0\1\30\137\0\1\36\41\0\1\32\42\0\1\35"+ "\6\0\1\35\2\0\1\35\3\0\2\35\10\0\4\35"+ "\1\0\3\35\1\0\1\35\2\0\1\35\2\0\1\35"+ "\4\0\4\35\1\0\2\35\6\0\1\35\2\0\4\35"+ - "\2\0\3\35\1\0\2\35\1\0\3\35\5\0\4\35"+ - "\2\0\10\35\4\0\4\35\1\0\2\35\1\0\1\35"+ - "\1\0\2\35\4\0\1\35\3\0\1\35\17\0\1\35"+ + "\2\0\3\35\1\0\2\35\1\0\3\35\1\0\4\35"+ + "\1\0\2\35\5\0\4\35\2\0\10\35\4\0\1\35"+ + "\1\0\2\35\4\0\1\35\3\0\3\35\22\0\1\35"+ "\1\0\2\35\1\0\1\35\1\0\4\35\1\0\1\35"+ "\1\0\1\35\1\0\2\35\1\0\3\35\1\0\2\35"+ "\1\0\4\35\1\0\3\35\1\0\17\35\1\0\2\35"+ "\1\0\21\35\1\0\2\35\1\0\41\35\1\0\1\35"+ "\1\0\2\35\2\0\1\35\1\0\1\35\1\0\1\35"+ - "\1\0\1\35\17\0\1\35\1\0\2\35\1\0\1\35"+ + "\1\0\3\35\22\0\1\35\1\0\2\35\1\0\1\35"+ "\1\0\4\35\1\0\1\35\1\0\1\35\1\0\2\35"+ "\2\0\1\35\2\0\2\35\1\0\4\35\1\0\3\35"+ "\1\0\17\35\1\0\2\35\1\0\21\35\1\0\2\35"+ "\1\0\41\35\1\0\1\35\1\0\2\35\2\0\1\35"+ - "\1\0\1\35\1\0\1\35\1\0\1\35\33\0\1\35"+ + "\1\0\1\35\1\0\1\35\1\0\3\35\36\0\1\35"+ "\17\0\1\35\23\0\1\35\32\0\1\35\41\0\1\35"+ - "\7\0\1\35\23\0\1\35\1\0\2\35\3\0\4\35"+ + "\7\0\1\35\30\0\1\35\1\0\2\35\3\0\4\35"+ "\1\0\1\35\1\0\1\35\1\0\2\35\1\0\3\35"+ "\1\0\2\35\1\0\4\35\1\0\3\35\1\0\10\35"+ "\1\0\6\35\1\0\2\35\1\0\21\35\1\0\2\35"+ "\1\0\41\35\1\0\1\35\1\0\2\35\2\0\1\35"+ - "\1\0\1\35\1\0\1\35\1\0\1\35\210\0\1\36"+ - "\16\0\1\40\1\0\1\41\2\0\1\42\1\0\1\43"+ - "\4\0\1\44\1\0\1\45\1\0\1\46\2\0\1\47"+ - "\3\0\1\50\2\0\1\51\4\0\1\52\3\0\1\53"+ - "\17\0\1\54\2\0\1\55\21\0\1\56\2\0\1\57"+ - "\57\0\2\30\1\60\1\0\1\61\1\0\1\61\1\62"+ - "\1\0\1\30\2\0\1\30\1\0\1\40\1\0\1\41"+ - "\2\0\1\63\1\0\1\64\4\0\1\44\1\0\1\45"+ - "\1\0\1\46\2\0\1\47\3\0\1\65\2\0\1\66"+ - "\4\0\1\67\3\0\1\70\17\0\1\54\2\0\1\71"+ - "\21\0\1\72\2\0\1\73\57\0\1\30\2\31\2\0"+ - "\2\74\1\75\1\0\1\31\2\0\1\30\6\0\1\76"+ - "\21\0\1\77\2\0\1\100\10\0\1\101\22\0\1\102"+ - "\21\0\1\103\2\0\1\104\41\0\1\105\16\0\1\32"+ - "\1\0\1\32\3\0\1\62\1\0\1\32\4\0\1\40"+ - "\1\0\1\41\2\0\1\106\1\0\1\64\4\0\1\44"+ - "\1\0\1\45\1\0\1\46\2\0\1\47\3\0\1\107"+ - "\2\0\1\110\4\0\1\67\3\0\1\111\17\0\1\54"+ - "\2\0\1\112\21\0\1\113\2\0\1\114\41\0\1\115"+ - "\15\0\1\30\1\116\1\31\1\117\3\0\1\116\1\0"+ - "\1\116\2\0\1\30\204\0\2\34\11\0\1\120\21\0"+ - "\1\121\2\0\1\122\10\0\1\123\22\0\1\124\21\0"+ - "\1\125\2\0\1\126\60\0\1\35\7\0\1\35\11\0"+ - "\1\127\21\0\1\130\2\0\1\131\10\0\1\132\22\0"+ - "\1\133\21\0\1\134\2\0\1\135\60\0\1\36\7\0"+ - "\1\36\4\0\1\40\1\0\1\41\2\0\1\136\1\0"+ - "\1\43\4\0\1\44\1\0\1\45\1\0\1\46\2\0"+ - "\1\47\3\0\1\137\2\0\1\140\4\0\1\52\3\0"+ - "\1\141\17\0\1\54\2\0\1\142\21\0\1\143\2\0"+ - "\1\144\57\0\1\30\1\37\1\60\1\0\1\61\1\0"+ - "\1\61\1\62\1\0\1\37\2\0\1\37\7\0\1\30"+ - "\4\0\1\30\11\0\1\30\22\0\1\30\3\0\1\30"+ - "\13\0\1\30\2\0\1\30\10\0\1\30\12\0\4\30"+ - "\45\0\1\30\24\0\1\30\3\0\4\30\1\0\1\30"+ - "\1\0\1\60\2\0\1\30\1\0\2\30\2\0\2\30"+ - "\2\0\3\30\1\0\1\30\1\0\1\30\2\0\4\30"+ - "\1\0\3\30\1\0\1\30\1\0\3\30\1\0\2\30"+ - "\1\0\4\30\1\0\2\30\2\0\10\30\1\0\2\30"+ - "\1\0\11\30\1\0\10\30\1\0\13\30\1\60\1\0"+ - "\1\30\1\0\1\30\1\0\2\30\2\0\1\30\1\0"+ - "\1\30\3\0\1\30\17\0\1\30\26\0\2\30\23\0"+ - "\1\60\1\30\44\0\1\30\21\0\1\60\46\0\1\60"+ - "\11\0\1\30\15\0\4\30\2\0\2\30\14\0\4\30"+ - "\1\0\2\30\11\0\3\30\3\0\1\30\1\0\1\30"+ - "\4\0\3\30\5\0\4\30\2\0\2\30\12\0\4\60"+ - "\1\0\2\30\1\0\1\30\7\0\1\30\37\0\1\30"+ - "\3\0\2\30\12\0\2\30\1\0\3\30\7\0\1\30"+ - "\6\0\2\30\1\0\2\30\6\0\1\30\4\0\2\30"+ - "\2\0\2\30\5\0\3\30\10\0\1\30\16\0\1\30"+ - "\4\0\2\60\1\0\1\30\7\0\1\30\23\0\1\30"+ - "\4\0\1\30\6\0\1\30\3\0\1\30\6\0\1\30"+ - "\5\0\1\30\2\0\2\30\1\0\17\30\2\0\1\30"+ - "\13\0\7\30\2\0\1\30\1\0\1\30\1\0\1\30"+ - "\2\0\1\30\1\0\1\30\1\0\1\30\1\0\1\30"+ - "\4\0\1\30\1\0\2\30\5\0\1\30\1\0\1\30"+ - "\2\0\3\30\1\0\1\30\7\0\1\30\1\0\1\30"+ - "\26\0\1\30\6\0\1\30\3\0\1\30\3\0\1\30"+ - "\7\0\1\30\31\0\20\30\5\0\3\30\3\0\1\30"+ - "\3\0\2\30\2\0\2\30\4\0\5\30\4\0\1\30"+ - "\4\0\1\30\2\0\1\30\4\0\1\30\1\0\1\30"+ - "\1\0\1\30\127\0\2\30\15\0\4\30\60\0\1\30"+ - "\15\0\2\30\10\0\2\30\1\0\1\30\1\0\1\30"+ - "\11\0\1\30\11\0\2\30\6\0\1\30\2\0\4\30"+ - "\3\0\1\30\2\0\2\30\1\0\3\30\5\0\1\30"+ - "\1\0\2\30\2\0\2\30\1\0\4\30\5\0\1\30"+ - "\1\0\2\30\37\0\1\40\1\0\1\41\2\0\1\145"+ - "\1\0\1\43\4\0\1\44\1\0\1\45\1\0\1\46"+ - "\2\0\1\47\3\0\1\146\2\0\1\147\4\0\1\52"+ - "\3\0\1\150\17\0\1\54\2\0\1\151\21\0\1\152"+ - "\2\0\1\153\57\0\1\30\2\60\2\0\2\154\1\62"+ - "\1\0\1\60\2\0\1\30\1\0\1\40\1\0\1\41"+ - "\2\0\1\155\1\0\1\156\4\0\1\44\1\0\1\45"+ - "\1\0\1\46\2\0\1\47\3\0\1\157\2\0\1\160"+ - "\4\0\1\161\3\0\1\162\17\0\1\54\2\0\1\163"+ - "\21\0\1\164\2\0\1\165\57\0\1\30\1\61\7\0"+ - "\1\61\2\0\1\30\1\0\1\40\1\0\1\41\2\0"+ - "\1\166\1\0\1\43\4\0\1\44\1\0\1\45\1\0"+ - "\1\46\2\0\1\47\3\0\1\167\2\0\1\170\4\0"+ - "\1\52\3\0\1\171\17\0\1\54\2\0\1\172\21\0"+ - "\1\173\2\0\1\174\41\0\1\115\15\0\1\30\1\62"+ - "\1\60\1\117\3\0\1\62\1\0\1\62\2\0\1\30"+ + "\1\0\1\35\1\0\1\35\1\0\3\35\165\0\1\41"+ + "\25\0\1\36\2\41\21\0\1\42\1\0\1\43\2\0"+ + "\1\44\1\0\1\45\4\0\1\46\1\0\1\47\1\0"+ + "\1\50\2\0\1\51\3\0\1\52\2\0\1\53\4\0"+ + "\1\54\3\0\1\55\17\0\1\56\2\0\1\57\21\0"+ + "\1\60\2\0\1\61\61\0\2\30\1\62\1\0\1\63"+ + "\1\0\1\63\1\64\1\0\1\30\2\0\1\63\1\0"+ + "\1\37\1\30\1\0\1\42\1\0\1\43\2\0\1\65"+ + "\1\0\1\66\4\0\1\46\1\0\1\47\1\0\1\50"+ + "\2\0\1\51\3\0\1\67\2\0\1\70\4\0\1\71"+ + "\3\0\1\72\17\0\1\56\2\0\1\73\21\0\1\74"+ + "\2\0\1\75\61\0\1\30\2\31\2\0\2\76\1\77"+ + "\1\0\1\31\2\0\1\76\1\0\1\37\1\30\6\0"+ + "\1\100\21\0\1\101\2\0\1\102\10\0\1\103\22\0"+ + "\1\104\21\0\1\105\2\0\1\106\41\0\1\107\20\0"+ + "\1\32\1\0\1\32\3\0\1\110\1\0\1\32\7\0"+ + "\1\42\1\0\1\43\2\0\1\111\1\0\1\66\4\0"+ + "\1\46\1\0\1\47\1\0\1\50\2\0\1\51\3\0"+ + "\1\112\2\0\1\113\4\0\1\71\3\0\1\114\17\0"+ + "\1\56\2\0\1\115\21\0\1\116\2\0\1\117\41\0"+ + "\1\120\17\0\1\30\1\121\1\31\1\122\3\0\1\121"+ + "\1\0\1\121\4\0\1\37\1\30\206\0\2\34\14\0"+ + "\1\123\21\0\1\124\2\0\1\125\10\0\1\126\22\0"+ + "\1\127\21\0\1\130\2\0\1\131\62\0\1\35\7\0"+ + "\1\35\14\0\1\132\21\0\1\133\2\0\1\134\10\0"+ + "\1\135\22\0\1\136\21\0\1\137\2\0\1\140\62\0"+ + "\1\36\7\0\1\36\7\0\1\42\1\0\1\43\2\0"+ + "\1\141\1\0\1\45\4\0\1\46\1\0\1\47\1\0"+ + "\1\50\2\0\1\51\3\0\1\142\2\0\1\143\4\0"+ + "\1\54\3\0\1\144\17\0\1\56\2\0\1\145\21\0"+ + "\1\146\2\0\1\147\61\0\1\30\1\37\1\62\1\0"+ + "\1\63\1\0\1\63\1\64\1\0\1\37\2\0\1\30"+ + "\1\150\1\37\1\30\1\0\1\42\1\0\1\43\2\0"+ + "\1\151\1\0\1\45\4\0\1\46\1\0\1\47\1\0"+ + "\1\50\2\0\1\51\3\0\1\152\2\0\1\153\4\0"+ + "\1\54\3\0\1\154\17\0\1\56\2\0\1\155\21\0"+ + "\1\156\2\0\1\157\61\0\1\30\1\40\1\62\1\0"+ + "\1\63\1\0\1\63\1\64\1\0\1\40\2\0\1\63"+ + "\1\0\1\37\1\40\6\0\1\160\21\0\1\161\2\0"+ + "\1\162\10\0\1\163\22\0\1\164\21\0\1\165\2\0"+ + "\1\166\55\0\1\167\4\0\1\41\7\0\1\41\15\0"+ + "\1\30\4\0\1\30\11\0\1\30\22\0\1\30\3\0"+ + "\1\30\13\0\1\30\2\0\1\30\10\0\1\30\22\0"+ + "\4\30\35\0\1\30\31\0\1\30\3\0\4\30\1\0"+ + "\1\30\1\0\1\62\2\0\1\30\1\0\2\30\2\0"+ + "\2\30\2\0\3\30\1\0\1\30\1\0\1\30\2\0"+ + "\4\30\1\0\3\30\1\0\1\30\1\0\3\30\1\0"+ + "\2\30\1\0\4\30\1\0\2\30\2\0\10\30\1\0"+ + "\2\30\1\0\10\30\1\62\1\0\7\30\1\0\10\30"+ + "\1\0\6\30\1\0\1\30\1\0\2\30\2\0\1\30"+ + "\1\0\1\30\3\0\3\30\22\0\1\30\26\0\2\30"+ + "\23\0\1\62\1\30\40\0\1\62\13\0\1\30\65\0"+ + "\1\62\11\0\1\30\15\0\4\30\2\0\2\30\14\0"+ + "\4\30\1\0\2\30\11\0\3\30\3\0\1\30\1\0"+ + "\1\30\4\0\3\30\1\0\4\62\1\0\2\30\5\0"+ + "\4\30\2\0\2\30\12\0\1\30\7\0\1\30\44\0"+ + "\1\30\3\0\2\30\12\0\2\30\1\0\3\30\7\0"+ + "\1\30\6\0\2\30\1\0\2\30\6\0\1\30\4\0"+ + "\2\30\2\0\2\30\5\0\3\30\2\0\1\30\3\0"+ + "\2\62\10\0\1\30\16\0\1\30\7\0\1\30\30\0"+ + "\1\30\4\0\1\30\6\0\1\30\3\0\1\30\6\0"+ + "\1\30\5\0\1\30\2\0\2\30\1\0\17\30\2\0"+ + "\1\30\13\0\7\30\2\0\1\30\1\0\1\30\1\0"+ + "\2\30\2\0\1\30\1\0\3\30\2\0\1\30\1\0"+ + "\1\30\1\0\1\30\1\0\1\30\4\0\1\30\1\0"+ + "\2\30\6\0\1\30\7\0\1\30\1\0\1\30\33\0"+ + "\1\30\6\0\1\30\3\0\1\30\3\0\1\30\7\0"+ + "\1\30\31\0\20\30\5\0\3\30\4\0\1\30\6\0"+ + "\1\30\3\0\2\30\2\0\2\30\4\0\5\30\1\0"+ + "\1\30\2\0\1\30\4\0\1\30\1\0\1\30\1\0"+ + "\1\30\134\0\2\30\25\0\4\30\55\0\1\30\15\0"+ + "\2\30\10\0\2\30\1\0\1\30\1\0\1\30\11\0"+ + "\1\30\11\0\2\30\6\0\1\30\2\0\4\30\3\0"+ + "\1\30\2\0\2\30\1\0\3\30\1\0\2\30\1\0"+ + "\1\30\10\0\1\30\1\0\2\30\2\0\2\30\1\0"+ + "\4\30\23\0\1\30\21\0\1\42\1\0\1\43\2\0"+ + "\1\170\1\0\1\45\4\0\1\46\1\0\1\47\1\0"+ + "\1\50\2\0\1\51\3\0\1\171\2\0\1\172\4\0"+ + "\1\54\3\0\1\173\17\0\1\56\2\0\1\174\21\0"+ + "\1\175\2\0\1\176\61\0\1\30\2\62\2\0\2\177"+ + "\1\200\1\0\1\62\2\0\1\177\1\0\1\37\1\30"+ + "\1\0\1\42\1\0\1\43\2\0\1\201\1\0\1\202"+ + "\4\0\1\46\1\0\1\47\1\0\1\50\2\0\1\51"+ + "\3\0\1\203\2\0\1\204\4\0\1\205\3\0\1\206"+ + "\17\0\1\56\2\0\1\207\21\0\1\210\2\0\1\211"+ + "\61\0\1\30\1\63\2\0\1\63\1\0\2\63\1\0"+ + "\1\63\2\0\1\63\1\0\2\30\1\0\1\42\1\0"+ + "\1\43\2\0\1\212\1\0\1\45\4\0\1\46\1\0"+ + "\1\47\1\0\1\50\2\0\1\51\3\0\1\213\2\0"+ + "\1\214\4\0\1\54\3\0\1\215\17\0\1\56\2\0"+ + "\1\216\21\0\1\217\2\0\1\220\41\0\1\120\17\0"+ + "\1\30\1\64\1\62\1\122\1\63\1\0\1\63\1\64"+ + "\1\0\1\64\2\0\1\63\1\0\1\37\1\30\7\0"+ + "\1\30\4\0\1\30\11\0\1\30\22\0\1\30\3\0"+ + "\1\30\13\0\1\31\2\0\1\31\10\0\1\30\22\0"+ + "\4\31\35\0\1\30\26\0\1\30\26\0\2\30\23\0"+ + "\1\31\1\30\40\0\1\31\13\0\1\31\65\0\1\31"+ + "\11\0\1\31\15\0\4\30\2\0\2\30\14\0\3\30"+ + "\1\31\1\0\2\31\11\0\3\30\3\0\1\30\1\0"+ + "\1\31\4\0\1\31\2\30\1\0\4\31\1\0\2\30"+ + "\5\0\4\31\2\0\1\30\1\31\12\0\1\31\7\0"+ + "\1\30\30\0\1\30\4\0\1\30\6\0\1\30\3\0"+ + "\1\30\6\0\1\30\5\0\1\30\2\0\2\30\1\0"+ + "\17\30\2\0\1\30\13\0\7\30\2\0\1\30\1\0"+ + "\1\30\1\0\2\30\2\0\1\30\1\0\3\30\2\0"+ + "\1\30\1\0\1\30\1\0\1\30\1\0\1\30\4\0"+ + "\1\31\1\0\2\30\6\0\1\30\7\0\1\30\1\0"+ + "\1\30\33\0\1\30\6\0\1\30\3\0\1\30\3\0"+ + "\1\30\7\0\1\30\31\0\20\30\5\0\3\30\4\0"+ + "\1\30\6\0\1\30\3\0\2\30\2\0\2\30\4\0"+ + "\1\30\4\31\1\0\1\30\2\0\1\30\4\0\1\30"+ + "\1\0\1\30\1\0\1\30\134\0\2\31\25\0\4\31"+ + "\55\0\1\31\15\0\2\31\10\0\2\31\1\0\1\31"+ + "\1\0\1\31\11\0\1\31\11\0\2\31\6\0\1\31"+ + "\2\0\4\31\3\0\1\31\2\0\2\31\1\0\3\31"+ + "\1\0\2\31\1\0\1\31\10\0\1\31\1\0\2\31"+ + "\2\0\2\31\1\0\4\31\23\0\1\31\26\0\1\221"+ + "\1\0\1\222\17\0\1\223\2\0\1\224\4\0\1\225"+ + "\3\0\1\226\22\0\1\227\21\0\1\230\2\0\1\231"+ + "\62\0\1\76\1\31\2\0\3\177\1\0\1\76\2\0"+ + "\1\177\4\0\1\42\1\0\1\43\2\0\1\232\1\0"+ + "\1\66\4\0\1\46\1\0\1\47\1\0\1\50\2\0"+ + "\1\51\3\0\1\233\2\0\1\234\4\0\1\71\3\0"+ + "\1\235\17\0\1\56\2\0\1\236\21\0\1\237\2\0"+ + "\1\240\41\0\1\120\17\0\1\30\1\77\1\31\1\122"+ + "\1\0\2\177\1\77\1\0\1\77\2\0\1\177\1\0"+ + "\1\37\1\30\71\0\1\32\2\0\1\32\33\0\4\32"+ + "\216\0\1\32\77\0\1\32\44\0\1\32\1\0\2\32"+ + "\21\0\1\32\4\0\1\32\17\0\4\32\3\0\1\32"+ + "\12\0\1\32\203\0\1\32\222\0\4\32\152\0\2\32"+ + "\25\0\4\32\55\0\1\32\15\0\2\32\10\0\2\32"+ + "\1\0\1\32\1\0\1\32\11\0\1\32\11\0\2\32"+ + "\6\0\1\32\2\0\4\32\3\0\1\32\2\0\2\32"+ + "\1\0\3\32\1\0\2\32\1\0\1\32\10\0\1\32"+ + "\1\0\2\32\2\0\2\32\1\0\4\32\23\0\1\32"+ + "\177\0\1\32\37\0\1\42\1\0\1\43\2\0\1\241"+ + "\1\0\1\45\4\0\1\46\1\0\1\47\1\0\1\50"+ + "\2\0\1\51\3\0\1\242\2\0\1\243\4\0\1\54"+ + "\3\0\1\244\17\0\1\56\2\0\1\245\21\0\1\246"+ + "\2\0\1\247\41\0\1\120\17\0\1\30\1\110\1\62"+ + "\1\122\3\0\1\110\1\0\1\110\4\0\1\37\1\30"+ "\7\0\1\30\4\0\1\30\11\0\1\30\22\0\1\30"+ - "\3\0\1\30\13\0\1\31\2\0\1\31\10\0\1\30"+ - "\12\0\4\31\45\0\1\30\21\0\1\30\26\0\2\30"+ - "\23\0\1\31\1\30\44\0\1\31\21\0\1\31\46\0"+ - "\1\31\11\0\1\31\15\0\4\30\2\0\2\30\14\0"+ - "\3\30\1\31\1\0\2\31\11\0\3\30\3\0\1\30"+ - "\1\0\1\31\4\0\1\31\2\30\5\0\4\31\2\0"+ - "\1\30\1\31\12\0\4\31\1\0\2\30\1\0\1\31"+ - "\7\0\1\30\23\0\1\30\4\0\1\30\6\0\1\30"+ + "\3\0\1\30\13\0\1\121\2\0\1\121\10\0\1\30"+ + "\22\0\4\121\35\0\1\30\26\0\1\30\26\0\2\30"+ + "\23\0\1\31\1\30\40\0\1\31\13\0\1\121\65\0"+ + "\1\31\11\0\1\121\15\0\4\30\2\0\2\30\14\0"+ + "\3\30\1\121\1\0\2\121\11\0\3\30\3\0\1\30"+ + "\1\0\1\121\4\0\1\121\2\30\1\0\4\31\1\0"+ + "\2\30\5\0\4\121\2\0\1\30\1\121\12\0\1\121"+ + "\7\0\1\30\30\0\1\30\4\0\1\30\6\0\1\30"+ "\3\0\1\30\6\0\1\30\5\0\1\30\2\0\2\30"+ "\1\0\17\30\2\0\1\30\13\0\7\30\2\0\1\30"+ - "\1\0\1\30\1\0\1\30\2\0\1\30\1\0\1\30"+ - "\1\0\1\30\1\0\1\30\4\0\1\31\1\0\2\30"+ - "\5\0\1\30\1\0\1\30\2\0\3\30\1\0\1\30"+ - "\7\0\1\30\1\0\1\30\26\0\1\30\6\0\1\30"+ - "\3\0\1\30\3\0\1\30\7\0\1\30\31\0\20\30"+ - "\5\0\3\30\3\0\1\30\3\0\2\30\2\0\2\30"+ - "\4\0\1\30\4\31\4\0\1\30\4\0\1\30\2\0"+ - "\1\30\4\0\1\30\1\0\1\30\1\0\1\30\127\0"+ - "\2\31\15\0\4\31\60\0\1\31\15\0\2\31\10\0"+ - "\2\31\1\0\1\31\1\0\1\31\11\0\1\31\11\0"+ - "\2\31\6\0\1\31\2\0\4\31\3\0\1\31\2\0"+ - "\2\31\1\0\3\31\5\0\1\31\1\0\2\31\2\0"+ - "\2\31\1\0\4\31\5\0\1\31\1\0\2\31\44\0"+ - "\1\175\1\0\1\176\17\0\1\177\2\0\1\200\4\0"+ - "\1\201\3\0\1\202\22\0\1\203\21\0\1\204\2\0"+ - "\1\205\60\0\1\74\1\31\6\0\1\74\4\0\1\40"+ - "\1\0\1\41\2\0\1\206\1\0\1\64\4\0\1\44"+ - "\1\0\1\45\1\0\1\46\2\0\1\47\3\0\1\207"+ - "\2\0\1\210\4\0\1\67\3\0\1\211\17\0\1\54"+ - "\2\0\1\212\21\0\1\213\2\0\1\214\41\0\1\115"+ - "\15\0\1\30\1\75\1\31\1\117\3\0\1\75\1\0"+ - "\1\75\2\0\1\30\71\0\1\32\2\0\1\32\23\0"+ - "\4\32\211\0\1\32\102\0\1\32\44\0\1\32\1\0"+ - "\2\32\21\0\1\32\4\0\1\32\7\0\4\32\3\0"+ - "\1\32\22\0\1\32\166\0\1\32\215\0\4\32\155\0"+ - "\2\32\15\0\4\32\60\0\1\32\15\0\2\32\10\0"+ - "\2\32\1\0\1\32\1\0\1\32\11\0\1\32\11\0"+ - "\2\32\6\0\1\32\2\0\4\32\3\0\1\32\2\0"+ - "\2\32\1\0\3\32\5\0\1\32\1\0\2\32\2\0"+ - "\2\32\1\0\4\32\5\0\1\32\1\0\2\32\215\0"+ - "\1\32\40\0\1\30\4\0\1\30\11\0\1\30\22\0"+ - "\1\30\3\0\1\30\13\0\1\116\2\0\1\116\10\0"+ - "\1\30\12\0\4\116\45\0\1\30\21\0\1\30\26\0"+ - "\2\30\23\0\1\31\1\30\44\0\1\116\21\0\1\31"+ - "\46\0\1\31\11\0\1\116\15\0\4\30\2\0\2\30"+ - "\14\0\3\30\1\116\1\0\2\116\11\0\3\30\3\0"+ - "\1\30\1\0\1\116\4\0\1\116\2\30\5\0\4\116"+ - "\2\0\1\30\1\116\12\0\4\31\1\0\2\30\1\0"+ - "\1\116\7\0\1\30\23\0\1\30\4\0\1\30\6\0"+ - "\1\30\3\0\1\30\6\0\1\30\5\0\1\30\2\0"+ - "\2\30\1\0\17\30\2\0\1\30\13\0\7\30\2\0"+ - "\1\30\1\0\1\30\1\0\1\30\2\0\1\30\1\0"+ - "\1\30\1\0\1\30\1\0\1\30\4\0\1\116\1\0"+ - "\2\30\5\0\1\30\1\0\1\30\2\0\3\30\1\0"+ - "\1\30\7\0\1\30\1\0\1\30\26\0\1\30\6\0"+ - "\1\30\3\0\1\30\3\0\1\30\7\0\1\30\31\0"+ - "\20\30\5\0\3\30\3\0\1\30\3\0\2\30\2\0"+ - "\2\30\4\0\1\30\4\116\4\0\1\30\4\0\1\30"+ - "\2\0\1\30\4\0\1\30\1\0\1\30\1\0\1\30"+ - "\127\0\2\116\15\0\4\116\60\0\1\116\15\0\2\116"+ - "\10\0\2\116\1\0\1\116\1\0\1\116\11\0\1\116"+ - "\11\0\2\116\6\0\1\116\2\0\4\116\3\0\1\116"+ - "\2\0\2\116\1\0\3\116\5\0\1\116\1\0\2\116"+ - "\2\0\2\116\1\0\4\116\5\0\1\116\1\0\2\116"+ - "\215\0\1\117\37\0\1\215\21\0\1\216\2\0\1\217"+ - "\10\0\1\220\22\0\1\221\21\0\1\222\2\0\1\223"+ - "\41\0\1\115\16\0\1\117\1\0\1\117\3\0\1\62"+ - "\1\0\1\117\74\0\1\35\2\0\1\35\23\0\4\35"+ - "\211\0\1\35\102\0\1\35\44\0\1\35\1\0\2\35"+ - "\21\0\1\35\4\0\1\35\7\0\4\35\3\0\1\35"+ - "\22\0\1\35\166\0\1\35\215\0\4\35\155\0\2\35"+ - "\15\0\4\35\60\0\1\35\15\0\2\35\10\0\2\35"+ - "\1\0\1\35\1\0\1\35\11\0\1\35\11\0\2\35"+ - "\6\0\1\35\2\0\4\35\3\0\1\35\2\0\2\35"+ - "\1\0\3\35\5\0\1\35\1\0\2\35\2\0\2\35"+ - "\1\0\4\35\5\0\1\35\1\0\2\35\127\0\1\36"+ - "\2\0\1\36\23\0\4\36\211\0\1\36\102\0\1\36"+ - "\44\0\1\36\1\0\2\36\21\0\1\36\4\0\1\36"+ - "\7\0\4\36\3\0\1\36\22\0\1\36\166\0\1\36"+ - "\215\0\4\36\155\0\2\36\15\0\4\36\60\0\1\36"+ - "\15\0\2\36\10\0\2\36\1\0\1\36\1\0\1\36"+ - "\11\0\1\36\11\0\2\36\6\0\1\36\2\0\4\36"+ - "\3\0\1\36\2\0\2\36\1\0\3\36\5\0\1\36"+ - "\1\0\2\36\2\0\2\36\1\0\4\36\5\0\1\36"+ - "\1\0\2\36\45\0\1\30\4\0\1\30\11\0\1\30"+ - "\22\0\1\30\3\0\1\30\13\0\1\37\2\0\1\37"+ - "\10\0\1\30\12\0\4\37\45\0\1\30\21\0\1\30"+ - "\26\0\2\30\23\0\1\60\1\30\44\0\1\37\21\0"+ - "\1\60\46\0\1\60\11\0\1\37\15\0\4\30\2\0"+ - "\2\30\14\0\3\30\1\37\1\0\2\37\11\0\3\30"+ - "\3\0\1\30\1\0\1\37\4\0\1\37\2\30\5\0"+ - "\4\37\2\0\1\30\1\37\12\0\4\60\1\0\2\30"+ - "\1\0\1\37\7\0\1\30\23\0\1\30\4\0\1\30"+ - "\6\0\1\30\3\0\1\30\6\0\1\30\5\0\1\30"+ - "\2\0\2\30\1\0\17\30\2\0\1\30\13\0\7\30"+ - "\2\0\1\30\1\0\1\30\1\0\1\30\2\0\1\30"+ - "\1\0\1\30\1\0\1\30\1\0\1\30\4\0\1\37"+ - "\1\0\2\30\5\0\1\30\1\0\1\30\2\0\3\30"+ - "\1\0\1\30\7\0\1\30\1\0\1\30\26\0\1\30"+ - "\6\0\1\30\3\0\1\30\3\0\1\30\7\0\1\30"+ - "\31\0\20\30\5\0\3\30\3\0\1\30\3\0\2\30"+ - "\2\0\2\30\4\0\1\30\4\37\4\0\1\30\4\0"+ - "\1\30\2\0\1\30\4\0\1\30\1\0\1\30\1\0"+ - "\1\30\127\0\2\37\15\0\4\37\60\0\1\37\15\0"+ - "\2\37\10\0\2\37\1\0\1\37\1\0\1\37\11\0"+ - "\1\37\11\0\2\37\6\0\1\37\2\0\4\37\3\0"+ - "\1\37\2\0\2\37\1\0\3\37\5\0\1\37\1\0"+ - "\2\37\2\0\2\37\1\0\4\37\5\0\1\37\1\0"+ - "\2\37\45\0\1\30\4\0\1\30\11\0\1\30\22\0"+ - "\1\30\3\0\1\30\13\0\1\60\2\0\1\60\10\0"+ - "\1\30\12\0\4\60\45\0\1\30\21\0\1\30\26\0"+ - "\2\30\23\0\1\60\1\30\44\0\1\60\21\0\1\60"+ - "\46\0\1\60\11\0\1\60\15\0\4\30\2\0\2\30"+ - "\14\0\3\30\1\60\1\0\2\60\11\0\3\30\3\0"+ - "\1\30\1\0\1\60\4\0\1\60\2\30\5\0\4\60"+ - "\2\0\1\30\1\60\12\0\4\60\1\0\2\30\1\0"+ - "\1\60\7\0\1\30\23\0\1\30\4\0\1\30\6\0"+ - "\1\30\3\0\1\30\6\0\1\30\5\0\1\30\2\0"+ - "\2\30\1\0\17\30\2\0\1\30\13\0\7\30\2\0"+ - "\1\30\1\0\1\30\1\0\1\30\2\0\1\30\1\0"+ - "\1\30\1\0\1\30\1\0\1\30\4\0\1\60\1\0"+ - "\2\30\5\0\1\30\1\0\1\30\2\0\3\30\1\0"+ - "\1\30\7\0\1\30\1\0\1\30\26\0\1\30\6\0"+ - "\1\30\3\0\1\30\3\0\1\30\7\0\1\30\31\0"+ - "\20\30\5\0\3\30\3\0\1\30\3\0\2\30\2\0"+ - "\2\30\4\0\1\30\4\60\4\0\1\30\4\0\1\30"+ - "\2\0\1\30\4\0\1\30\1\0\1\30\1\0\1\30"+ - "\127\0\2\60\15\0\4\60\60\0\1\60\15\0\2\60"+ - "\10\0\2\60\1\0\1\60\1\0\1\60\11\0\1\60"+ - "\11\0\2\60\6\0\1\60\2\0\4\60\3\0\1\60"+ - "\2\0\2\60\1\0\3\60\5\0\1\60\1\0\2\60"+ - "\2\0\2\60\1\0\4\60\5\0\1\60\1\0\2\60"+ - "\44\0\1\224\1\0\1\225\17\0\1\226\2\0\1\227"+ - "\4\0\1\230\3\0\1\231\22\0\1\232\21\0\1\233"+ - "\2\0\1\234\60\0\1\154\1\60\6\0\1\154\12\0"+ - "\1\30\4\0\1\30\11\0\1\30\22\0\1\30\3\0"+ - "\1\30\13\0\1\61\2\0\1\61\10\0\1\30\12\0"+ - "\4\61\45\0\1\30\24\0\1\30\3\0\4\30\1\0"+ - "\1\30\4\0\1\30\1\0\2\30\2\0\2\30\2\0"+ - "\3\30\1\0\1\30\1\0\1\30\2\0\4\30\1\0"+ - "\3\30\1\0\1\30\1\0\3\30\1\0\2\30\1\0"+ - "\4\30\1\0\2\30\2\0\10\30\1\0\2\30\1\0"+ - "\11\30\1\0\10\30\1\0\13\30\2\0\1\30\1\0"+ - "\1\30\1\0\2\30\2\0\1\30\1\0\1\30\3\0"+ - "\1\30\17\0\1\30\26\0\2\30\24\0\1\30\44\0"+ - "\1\61\102\0\1\61\15\0\4\30\2\0\2\30\14\0"+ - "\3\30\1\61\1\0\2\61\11\0\3\30\3\0\1\30"+ - "\1\0\1\61\4\0\1\61\2\30\5\0\4\61\2\0"+ - "\1\30\1\61\17\0\2\30\1\0\1\61\7\0\1\30"+ - "\37\0\1\30\3\0\2\30\12\0\2\30\1\0\3\30"+ - "\7\0\1\30\6\0\2\30\1\0\2\30\6\0\1\30"+ - "\4\0\2\30\2\0\2\30\5\0\3\30\10\0\1\30"+ - "\16\0\1\30\7\0\1\30\7\0\1\30\23\0\1\30"+ - "\4\0\1\30\6\0\1\30\3\0\1\30\6\0\1\30"+ - "\5\0\1\30\2\0\2\30\1\0\17\30\2\0\1\30"+ - "\13\0\7\30\2\0\1\30\1\0\1\30\1\0\1\30"+ + "\1\0\1\30\1\0\2\30\2\0\1\30\1\0\3\30"+ "\2\0\1\30\1\0\1\30\1\0\1\30\1\0\1\30"+ - "\4\0\1\61\1\0\2\30\5\0\1\30\1\0\1\30"+ - "\2\0\3\30\1\0\1\30\7\0\1\30\1\0\1\30"+ - "\26\0\1\30\6\0\1\30\3\0\1\30\3\0\1\30"+ - "\7\0\1\30\31\0\20\30\5\0\3\30\3\0\1\30"+ - "\3\0\2\30\2\0\2\30\4\0\1\30\4\61\4\0"+ - "\1\30\4\0\1\30\2\0\1\30\4\0\1\30\1\0"+ - "\1\30\1\0\1\30\127\0\2\61\15\0\4\61\60\0"+ - "\1\61\15\0\2\61\10\0\2\61\1\0\1\61\1\0"+ - "\1\61\11\0\1\61\11\0\2\61\6\0\1\61\2\0"+ - "\4\61\3\0\1\61\2\0\2\61\1\0\3\61\5\0"+ - "\1\61\1\0\2\61\2\0\2\61\1\0\4\61\5\0"+ - "\1\61\1\0\2\61\45\0\1\30\4\0\1\30\11\0"+ - "\1\30\22\0\1\30\3\0\1\30\13\0\1\62\2\0"+ - "\1\62\10\0\1\30\12\0\4\62\45\0\1\30\21\0"+ - "\1\30\26\0\2\30\23\0\1\60\1\30\44\0\1\62"+ - "\21\0\1\60\46\0\1\60\11\0\1\62\15\0\4\30"+ - "\2\0\2\30\14\0\3\30\1\62\1\0\2\62\11\0"+ - "\3\30\3\0\1\30\1\0\1\62\4\0\1\62\2\30"+ - "\5\0\4\62\2\0\1\30\1\62\12\0\4\60\1\0"+ - "\2\30\1\0\1\62\7\0\1\30\23\0\1\30\4\0"+ + "\4\0\1\121\1\0\2\30\6\0\1\30\7\0\1\30"+ + "\1\0\1\30\33\0\1\30\6\0\1\30\3\0\1\30"+ + "\3\0\1\30\7\0\1\30\31\0\20\30\5\0\3\30"+ + "\4\0\1\30\6\0\1\30\3\0\2\30\2\0\2\30"+ + "\4\0\1\30\4\121\1\0\1\30\2\0\1\30\4\0"+ + "\1\30\1\0\1\30\1\0\1\30\134\0\2\121\25\0"+ + "\4\121\55\0\1\121\15\0\2\121\10\0\2\121\1\0"+ + "\1\121\1\0\1\121\11\0\1\121\11\0\2\121\6\0"+ + "\1\121\2\0\4\121\3\0\1\121\2\0\2\121\1\0"+ + "\3\121\1\0\2\121\1\0\1\121\10\0\1\121\1\0"+ + "\2\121\2\0\2\121\1\0\4\121\23\0\1\121\177\0"+ + "\1\122\44\0\1\250\21\0\1\251\2\0\1\252\10\0"+ + "\1\253\22\0\1\254\21\0\1\255\2\0\1\256\41\0"+ + "\1\120\20\0\1\122\1\0\1\122\3\0\1\110\1\0"+ + "\1\122\77\0\1\35\2\0\1\35\33\0\4\35\216\0"+ + "\1\35\77\0\1\35\44\0\1\35\1\0\2\35\21\0"+ + "\1\35\4\0\1\35\17\0\4\35\3\0\1\35\12\0"+ + "\1\35\203\0\1\35\222\0\4\35\152\0\2\35\25\0"+ + "\4\35\55\0\1\35\15\0\2\35\10\0\2\35\1\0"+ + "\1\35\1\0\1\35\11\0\1\35\11\0\2\35\6\0"+ + "\1\35\2\0\4\35\3\0\1\35\2\0\2\35\1\0"+ + "\3\35\1\0\2\35\1\0\1\35\10\0\1\35\1\0"+ + "\2\35\2\0\2\35\1\0\4\35\23\0\1\35\111\0"+ + "\1\36\2\0\1\36\33\0\4\36\216\0\1\36\77\0"+ + "\1\36\44\0\1\36\1\0\2\36\21\0\1\36\4\0"+ + "\1\36\17\0\4\36\3\0\1\36\12\0\1\36\203\0"+ + "\1\36\222\0\4\36\152\0\2\36\25\0\4\36\55\0"+ + "\1\36\15\0\2\36\10\0\2\36\1\0\1\36\1\0"+ + "\1\36\11\0\1\36\11\0\2\36\6\0\1\36\2\0"+ + "\4\36\3\0\1\36\2\0\2\36\1\0\3\36\1\0"+ + "\2\36\1\0\1\36\10\0\1\36\1\0\2\36\2\0"+ + "\2\36\1\0\4\36\23\0\1\36\27\0\1\30\4\0"+ + "\1\30\11\0\1\30\22\0\1\30\3\0\1\30\13\0"+ + "\1\37\2\0\1\37\10\0\1\30\22\0\4\37\35\0"+ + "\1\30\26\0\1\30\26\0\2\30\23\0\1\62\1\30"+ + "\40\0\1\62\13\0\1\37\65\0\1\62\11\0\1\37"+ + "\15\0\4\30\2\0\2\30\14\0\3\30\1\37\1\0"+ + "\2\37\11\0\3\30\3\0\1\30\1\0\1\37\4\0"+ + "\1\37\2\30\1\0\4\62\1\0\2\30\5\0\4\37"+ + "\2\0\1\30\1\37\12\0\1\37\7\0\1\30\30\0"+ + "\1\30\4\0\1\30\6\0\1\30\3\0\1\30\6\0"+ + "\1\30\5\0\1\30\2\0\2\30\1\0\17\30\2\0"+ + "\1\30\13\0\7\30\2\0\1\30\1\0\1\30\1\0"+ + "\2\30\2\0\1\30\1\0\3\30\2\0\1\30\1\0"+ + "\1\30\1\0\1\30\1\0\1\30\4\0\1\37\1\0"+ + "\2\30\6\0\1\30\7\0\1\30\1\0\1\30\33\0"+ + "\1\30\6\0\1\30\3\0\1\30\3\0\1\30\7\0"+ + "\1\30\31\0\20\30\5\0\3\30\4\0\1\30\6\0"+ + "\1\30\3\0\2\30\2\0\2\30\4\0\1\30\4\37"+ + "\1\0\1\30\2\0\1\30\4\0\1\30\1\0\1\30"+ + "\1\0\1\30\134\0\2\37\25\0\4\37\55\0\1\37"+ + "\15\0\2\37\10\0\2\37\1\0\1\37\1\0\1\37"+ + "\11\0\1\37\11\0\2\37\6\0\1\37\2\0\4\37"+ + "\3\0\1\37\2\0\2\37\1\0\3\37\1\0\2\37"+ + "\1\0\1\37\10\0\1\37\1\0\2\37\2\0\2\37"+ + "\1\0\4\37\23\0\1\37\26\0\1\257\21\0\1\260"+ + "\2\0\1\261\10\0\1\262\22\0\1\263\21\0\1\264"+ + "\2\0\1\265\62\0\1\150\7\0\1\150\4\0\1\266"+ + "\10\0\1\30\4\0\1\30\11\0\1\30\22\0\1\30"+ + "\3\0\1\30\13\0\1\40\2\0\1\40\10\0\1\30"+ + "\22\0\4\40\35\0\1\30\26\0\1\30\26\0\2\30"+ + "\23\0\1\62\1\30\40\0\1\62\13\0\1\40\65\0"+ + "\1\62\11\0\1\40\15\0\4\30\2\0\2\30\14\0"+ + "\3\30\1\40\1\0\2\40\11\0\3\30\3\0\1\30"+ + "\1\0\1\40\4\0\1\40\2\30\1\0\4\62\1\0"+ + "\2\30\5\0\4\40\2\0\1\30\1\40\12\0\1\40"+ + "\7\0\1\30\30\0\1\30\4\0\1\30\6\0\1\30"+ + "\3\0\1\30\6\0\1\30\5\0\1\30\2\0\2\30"+ + "\1\0\17\30\2\0\1\30\13\0\7\30\2\0\1\30"+ + "\1\0\1\30\1\0\2\30\2\0\1\30\1\0\3\30"+ + "\2\0\1\30\1\0\1\30\1\0\1\30\1\0\1\30"+ + "\4\0\1\40\1\0\2\30\6\0\1\30\7\0\1\30"+ + "\1\0\1\30\33\0\1\30\6\0\1\30\3\0\1\30"+ + "\3\0\1\30\7\0\1\30\31\0\20\30\5\0\3\30"+ + "\4\0\1\30\6\0\1\30\3\0\2\30\2\0\2\30"+ + "\4\0\1\30\4\40\1\0\1\30\2\0\1\30\4\0"+ + "\1\30\1\0\1\30\1\0\1\30\134\0\2\40\25\0"+ + "\4\40\55\0\1\40\15\0\2\40\10\0\2\40\1\0"+ + "\1\40\1\0\1\40\11\0\1\40\11\0\2\40\6\0"+ + "\1\40\2\0\4\40\3\0\1\40\2\0\2\40\1\0"+ + "\3\40\1\0\2\40\1\0\1\40\10\0\1\40\1\0"+ + "\2\40\2\0\2\40\1\0\4\40\23\0\1\40\111\0"+ + "\1\41\2\0\1\41\33\0\4\41\216\0\1\41\77\0"+ + "\1\41\44\0\1\41\1\0\2\41\21\0\1\41\4\0"+ + "\1\41\17\0\4\41\3\0\1\41\12\0\1\41\203\0"+ + "\1\41\222\0\4\41\152\0\2\41\25\0\4\41\55\0"+ + "\1\41\15\0\2\41\10\0\2\41\1\0\1\41\1\0"+ + "\1\41\11\0\1\41\11\0\2\41\6\0\1\41\2\0"+ + "\4\41\3\0\1\41\2\0\2\41\1\0\3\41\1\0"+ + "\2\41\1\0\1\41\10\0\1\41\1\0\2\41\2\0"+ + "\2\41\1\0\4\41\23\0\1\41\165\0\1\267\26\0"+ + "\2\267\27\0\1\30\4\0\1\30\11\0\1\30\22\0"+ + "\1\30\3\0\1\30\13\0\1\62\2\0\1\62\10\0"+ + "\1\30\22\0\4\62\35\0\1\30\26\0\1\30\26\0"+ + "\2\30\23\0\1\62\1\30\40\0\1\62\13\0\1\62"+ + "\65\0\1\62\11\0\1\62\15\0\4\30\2\0\2\30"+ + "\14\0\3\30\1\62\1\0\2\62\11\0\3\30\3\0"+ + "\1\30\1\0\1\62\4\0\1\62\2\30\1\0\4\62"+ + "\1\0\2\30\5\0\4\62\2\0\1\30\1\62\12\0"+ + "\1\62\7\0\1\30\30\0\1\30\4\0\1\30\6\0"+ + "\1\30\3\0\1\30\6\0\1\30\5\0\1\30\2\0"+ + "\2\30\1\0\17\30\2\0\1\30\13\0\7\30\2\0"+ + "\1\30\1\0\1\30\1\0\2\30\2\0\1\30\1\0"+ + "\3\30\2\0\1\30\1\0\1\30\1\0\1\30\1\0"+ + "\1\30\4\0\1\62\1\0\2\30\6\0\1\30\7\0"+ + "\1\30\1\0\1\30\33\0\1\30\6\0\1\30\3\0"+ + "\1\30\3\0\1\30\7\0\1\30\31\0\20\30\5\0"+ + "\3\30\4\0\1\30\6\0\1\30\3\0\2\30\2\0"+ + "\2\30\4\0\1\30\4\62\1\0\1\30\2\0\1\30"+ + "\4\0\1\30\1\0\1\30\1\0\1\30\134\0\2\62"+ + "\25\0\4\62\55\0\1\62\15\0\2\62\10\0\2\62"+ + "\1\0\1\62\1\0\1\62\11\0\1\62\11\0\2\62"+ + "\6\0\1\62\2\0\4\62\3\0\1\62\2\0\2\62"+ + "\1\0\3\62\1\0\2\62\1\0\1\62\10\0\1\62"+ + "\1\0\2\62\2\0\2\62\1\0\4\62\23\0\1\62"+ + "\26\0\1\270\1\0\1\271\17\0\1\272\2\0\1\273"+ + "\4\0\1\274\3\0\1\275\22\0\1\276\21\0\1\277"+ + "\2\0\1\300\62\0\1\177\1\62\2\0\3\177\1\0"+ + "\1\177\2\0\1\177\4\0\1\42\1\0\1\43\2\0"+ + "\1\301\1\0\1\45\4\0\1\46\1\0\1\47\1\0"+ + "\1\50\2\0\1\51\3\0\1\302\2\0\1\303\4\0"+ + "\1\54\3\0\1\304\17\0\1\56\2\0\1\305\21\0"+ + "\1\306\2\0\1\307\41\0\1\120\17\0\1\30\1\200"+ + "\1\62\1\122\1\0\2\177\1\200\1\0\1\200\2\0"+ + "\1\177\1\0\1\37\1\30\7\0\1\30\4\0\1\30"+ + "\11\0\1\30\22\0\1\30\3\0\1\30\13\0\1\63"+ + "\2\0\1\63\10\0\1\30\22\0\4\63\35\0\1\30"+ + "\31\0\1\30\3\0\4\30\1\0\1\30\4\0\1\30"+ + "\1\0\2\30\2\0\2\30\2\0\3\30\1\0\1\30"+ + "\1\0\1\30\2\0\4\30\1\0\3\30\1\0\1\30"+ + "\1\0\3\30\1\0\2\30\1\0\4\30\1\0\2\30"+ + "\2\0\10\30\1\0\2\30\1\0\10\30\2\0\7\30"+ + "\1\0\10\30\1\0\6\30\1\0\1\30\1\0\2\30"+ + "\2\0\1\30\1\0\1\30\3\0\3\30\22\0\1\30"+ + "\26\0\2\30\24\0\1\30\54\0\1\63\77\0\1\63"+ + "\15\0\4\30\2\0\2\30\14\0\3\30\1\63\1\0"+ + "\2\63\11\0\3\30\3\0\1\30\1\0\1\63\4\0"+ + "\1\63\2\30\6\0\2\30\5\0\4\63\2\0\1\30"+ + "\1\63\12\0\1\63\7\0\1\30\44\0\1\30\3\0"+ + "\2\30\12\0\2\30\1\0\3\30\7\0\1\30\6\0"+ + "\2\30\1\0\2\30\6\0\1\30\4\0\2\30\2\0"+ + "\2\30\5\0\3\30\2\0\1\30\15\0\1\30\16\0"+ + "\1\30\7\0\1\30\30\0\1\30\4\0\1\30\6\0"+ + "\1\30\3\0\1\30\6\0\1\30\5\0\1\30\2\0"+ + "\2\30\1\0\17\30\2\0\1\30\13\0\7\30\2\0"+ + "\1\30\1\0\1\30\1\0\2\30\2\0\1\30\1\0"+ + "\3\30\2\0\1\30\1\0\1\30\1\0\1\30\1\0"+ + "\1\30\4\0\1\63\1\0\2\30\6\0\1\30\7\0"+ + "\1\30\1\0\1\30\33\0\1\30\6\0\1\30\3\0"+ + "\1\30\3\0\1\30\7\0\1\30\31\0\20\30\5\0"+ + "\3\30\4\0\1\30\6\0\1\30\3\0\2\30\2\0"+ + "\2\30\4\0\1\30\4\63\1\0\1\30\2\0\1\30"+ + "\4\0\1\30\1\0\1\30\1\0\1\30\134\0\2\63"+ + "\25\0\4\63\55\0\1\63\15\0\2\63\10\0\2\63"+ + "\1\0\1\63\1\0\1\63\11\0\1\63\11\0\2\63"+ + "\6\0\1\63\2\0\4\63\3\0\1\63\2\0\2\63"+ + "\1\0\3\63\1\0\2\63\1\0\1\63\10\0\1\63"+ + "\1\0\2\63\2\0\2\63\1\0\4\63\23\0\1\63"+ + "\27\0\1\30\4\0\1\30\11\0\1\30\22\0\1\30"+ + "\3\0\1\30\13\0\1\64\2\0\1\64\10\0\1\30"+ + "\22\0\4\64\35\0\1\30\26\0\1\30\26\0\2\30"+ + "\23\0\1\62\1\30\40\0\1\62\13\0\1\64\65\0"+ + "\1\62\11\0\1\64\15\0\4\30\2\0\2\30\14\0"+ + "\3\30\1\64\1\0\2\64\11\0\3\30\3\0\1\30"+ + "\1\0\1\64\4\0\1\64\2\30\1\0\4\62\1\0"+ + "\2\30\5\0\4\64\2\0\1\30\1\64\12\0\1\64"+ + "\7\0\1\30\30\0\1\30\4\0\1\30\6\0\1\30"+ + "\3\0\1\30\6\0\1\30\5\0\1\30\2\0\2\30"+ + "\1\0\17\30\2\0\1\30\13\0\7\30\2\0\1\30"+ + "\1\0\1\30\1\0\2\30\2\0\1\30\1\0\3\30"+ + "\2\0\1\30\1\0\1\30\1\0\1\30\1\0\1\30"+ + "\4\0\1\64\1\0\2\30\6\0\1\30\7\0\1\30"+ + "\1\0\1\30\33\0\1\30\6\0\1\30\3\0\1\30"+ + "\3\0\1\30\7\0\1\30\31\0\20\30\5\0\3\30"+ + "\4\0\1\30\6\0\1\30\3\0\2\30\2\0\2\30"+ + "\4\0\1\30\4\64\1\0\1\30\2\0\1\30\4\0"+ + "\1\30\1\0\1\30\1\0\1\30\134\0\2\64\25\0"+ + "\4\64\55\0\1\64\15\0\2\64\10\0\2\64\1\0"+ + "\1\64\1\0\1\64\11\0\1\64\11\0\2\64\6\0"+ + "\1\64\2\0\4\64\3\0\1\64\2\0\2\64\1\0"+ + "\3\64\1\0\2\64\1\0\1\64\10\0\1\64\1\0"+ + "\2\64\2\0\2\64\1\0\4\64\23\0\1\64\111\0"+ + "\1\76\2\0\1\76\33\0\4\76\102\0\1\31\104\0"+ + "\1\31\146\0\1\31\41\0\1\31\13\0\1\76\65\0"+ + "\1\31\11\0\1\76\44\0\1\76\1\0\2\76\21\0"+ + "\1\76\4\0\1\76\3\0\4\31\10\0\4\76\3\0"+ + "\1\76\12\0\1\76\164\0\2\31\233\0\1\76\222\0"+ + "\4\76\152\0\2\76\25\0\4\76\55\0\1\76\15\0"+ + "\2\76\10\0\2\76\1\0\1\76\1\0\1\76\11\0"+ + "\1\76\11\0\2\76\6\0\1\76\2\0\4\76\3\0"+ + "\1\76\2\0\2\76\1\0\3\76\1\0\2\76\1\0"+ + "\1\76\10\0\1\76\1\0\2\76\2\0\2\76\1\0"+ + "\4\76\23\0\1\76\27\0\1\30\4\0\1\30\11\0"+ + "\1\30\22\0\1\30\3\0\1\30\13\0\1\77\2\0"+ + "\1\77\10\0\1\30\22\0\4\77\35\0\1\30\26\0"+ + "\1\30\26\0\2\30\23\0\1\31\1\30\40\0\1\31"+ + "\13\0\1\77\65\0\1\31\11\0\1\77\15\0\4\30"+ + "\2\0\2\30\14\0\3\30\1\77\1\0\2\77\11\0"+ + "\3\30\3\0\1\30\1\0\1\77\4\0\1\77\2\30"+ + "\1\0\4\31\1\0\2\30\5\0\4\77\2\0\1\30"+ + "\1\77\12\0\1\77\7\0\1\30\30\0\1\30\4\0"+ "\1\30\6\0\1\30\3\0\1\30\6\0\1\30\5\0"+ "\1\30\2\0\2\30\1\0\17\30\2\0\1\30\13\0"+ - "\7\30\2\0\1\30\1\0\1\30\1\0\1\30\2\0"+ - "\1\30\1\0\1\30\1\0\1\30\1\0\1\30\4\0"+ - "\1\62\1\0\2\30\5\0\1\30\1\0\1\30\2\0"+ - "\3\30\1\0\1\30\7\0\1\30\1\0\1\30\26\0"+ - "\1\30\6\0\1\30\3\0\1\30\3\0\1\30\7\0"+ - "\1\30\31\0\20\30\5\0\3\30\3\0\1\30\3\0"+ - "\2\30\2\0\2\30\4\0\1\30\4\62\4\0\1\30"+ - "\4\0\1\30\2\0\1\30\4\0\1\30\1\0\1\30"+ - "\1\0\1\30\127\0\2\62\15\0\4\62\60\0\1\62"+ - "\15\0\2\62\10\0\2\62\1\0\1\62\1\0\1\62"+ - "\11\0\1\62\11\0\2\62\6\0\1\62\2\0\4\62"+ - "\3\0\1\62\2\0\2\62\1\0\3\62\5\0\1\62"+ - "\1\0\2\62\2\0\2\62\1\0\4\62\5\0\1\62"+ - "\1\0\2\62\127\0\1\74\2\0\1\74\23\0\4\74"+ - "\105\0\1\31\132\0\1\31\113\0\1\31\45\0\1\74"+ - "\21\0\1\31\46\0\1\31\11\0\1\74\44\0\1\74"+ - "\1\0\2\74\21\0\1\74\4\0\1\74\7\0\4\74"+ - "\3\0\1\74\12\0\4\31\4\0\1\74\205\0\2\31"+ - "\170\0\1\74\215\0\4\74\155\0\2\74\15\0\4\74"+ - "\60\0\1\74\15\0\2\74\10\0\2\74\1\0\1\74"+ - "\1\0\1\74\11\0\1\74\11\0\2\74\6\0\1\74"+ - "\2\0\4\74\3\0\1\74\2\0\2\74\1\0\3\74"+ - "\5\0\1\74\1\0\2\74\2\0\2\74\1\0\4\74"+ - "\5\0\1\74\1\0\2\74\45\0\1\30\4\0\1\30"+ - "\11\0\1\30\22\0\1\30\3\0\1\30\13\0\1\75"+ - "\2\0\1\75\10\0\1\30\12\0\4\75\45\0\1\30"+ - "\21\0\1\30\26\0\2\30\23\0\1\31\1\30\44\0"+ - "\1\75\21\0\1\31\46\0\1\31\11\0\1\75\15\0"+ - "\4\30\2\0\2\30\14\0\3\30\1\75\1\0\2\75"+ - "\11\0\3\30\3\0\1\30\1\0\1\75\4\0\1\75"+ - "\2\30\5\0\4\75\2\0\1\30\1\75\12\0\4\31"+ - "\1\0\2\30\1\0\1\75\7\0\1\30\23\0\1\30"+ - "\4\0\1\30\6\0\1\30\3\0\1\30\6\0\1\30"+ - "\5\0\1\30\2\0\2\30\1\0\17\30\2\0\1\30"+ - "\13\0\7\30\2\0\1\30\1\0\1\30\1\0\1\30"+ + "\7\30\2\0\1\30\1\0\1\30\1\0\2\30\2\0"+ + "\1\30\1\0\3\30\2\0\1\30\1\0\1\30\1\0"+ + "\1\30\1\0\1\30\4\0\1\77\1\0\2\30\6\0"+ + "\1\30\7\0\1\30\1\0\1\30\33\0\1\30\6\0"+ + "\1\30\3\0\1\30\3\0\1\30\7\0\1\30\31\0"+ + "\20\30\5\0\3\30\4\0\1\30\6\0\1\30\3\0"+ + "\2\30\2\0\2\30\4\0\1\30\4\77\1\0\1\30"+ + "\2\0\1\30\4\0\1\30\1\0\1\30\1\0\1\30"+ + "\134\0\2\77\25\0\4\77\55\0\1\77\15\0\2\77"+ + "\10\0\2\77\1\0\1\77\1\0\1\77\11\0\1\77"+ + "\11\0\2\77\6\0\1\77\2\0\4\77\3\0\1\77"+ + "\2\0\2\77\1\0\3\77\1\0\2\77\1\0\1\77"+ + "\10\0\1\77\1\0\2\77\2\0\2\77\1\0\4\77"+ + "\23\0\1\77\27\0\1\30\4\0\1\30\11\0\1\30"+ + "\22\0\1\30\3\0\1\30\13\0\1\110\2\0\1\110"+ + "\10\0\1\30\22\0\4\110\35\0\1\30\26\0\1\30"+ + "\26\0\2\30\23\0\1\62\1\30\40\0\1\62\13\0"+ + "\1\110\65\0\1\62\11\0\1\110\15\0\4\30\2\0"+ + "\2\30\14\0\3\30\1\110\1\0\2\110\11\0\3\30"+ + "\3\0\1\30\1\0\1\110\4\0\1\110\2\30\1\0"+ + "\4\62\1\0\2\30\5\0\4\110\2\0\1\30\1\110"+ + "\12\0\1\110\7\0\1\30\30\0\1\30\4\0\1\30"+ + "\6\0\1\30\3\0\1\30\6\0\1\30\5\0\1\30"+ + "\2\0\2\30\1\0\17\30\2\0\1\30\13\0\7\30"+ + "\2\0\1\30\1\0\1\30\1\0\2\30\2\0\1\30"+ + "\1\0\3\30\2\0\1\30\1\0\1\30\1\0\1\30"+ + "\1\0\1\30\4\0\1\110\1\0\2\30\6\0\1\30"+ + "\7\0\1\30\1\0\1\30\33\0\1\30\6\0\1\30"+ + "\3\0\1\30\3\0\1\30\7\0\1\30\31\0\20\30"+ + "\5\0\3\30\4\0\1\30\6\0\1\30\3\0\2\30"+ + "\2\0\2\30\4\0\1\30\4\110\1\0\1\30\2\0"+ + "\1\30\4\0\1\30\1\0\1\30\1\0\1\30\134\0"+ + "\2\110\25\0\4\110\55\0\1\110\15\0\2\110\10\0"+ + "\2\110\1\0\1\110\1\0\1\110\11\0\1\110\11\0"+ + "\2\110\6\0\1\110\2\0\4\110\3\0\1\110\2\0"+ + "\2\110\1\0\3\110\1\0\2\110\1\0\1\110\10\0"+ + "\1\110\1\0\2\110\2\0\2\110\1\0\4\110\23\0"+ + "\1\110\111\0\1\122\2\0\1\122\33\0\4\122\216\0"+ + "\1\122\77\0\1\122\44\0\1\122\1\0\2\122\21\0"+ + "\1\122\4\0\1\122\17\0\4\122\3\0\1\122\12\0"+ + "\1\122\203\0\1\122\222\0\4\122\152\0\2\122\25\0"+ + "\4\122\55\0\1\122\15\0\2\122\10\0\2\122\1\0"+ + "\1\122\1\0\1\122\11\0\1\122\11\0\2\122\6\0"+ + "\1\122\2\0\4\122\3\0\1\122\2\0\2\122\1\0"+ + "\3\122\1\0\2\122\1\0\1\122\10\0\1\122\1\0"+ + "\2\122\2\0\2\122\1\0\4\122\23\0\1\122\111\0"+ + "\1\150\2\0\1\150\33\0\4\150\216\0\1\150\77\0"+ + "\1\150\44\0\1\150\1\0\2\150\21\0\1\150\4\0"+ + "\1\150\17\0\4\150\3\0\1\150\12\0\1\150\203\0"+ + "\1\150\222\0\4\150\152\0\2\150\25\0\4\150\55\0"+ + "\1\150\15\0\2\150\10\0\2\150\1\0\1\150\1\0"+ + "\1\150\11\0\1\150\11\0\2\150\6\0\1\150\2\0"+ + "\4\150\3\0\1\150\2\0\2\150\1\0\3\150\1\0"+ + "\2\150\1\0\1\150\10\0\1\150\1\0\2\150\2\0"+ + "\2\150\1\0\4\150\23\0\1\150\21\0\1\42\1\0"+ + "\1\43\2\0\1\310\1\0\1\45\4\0\1\46\1\0"+ + "\1\47\1\0\1\50\2\0\1\51\3\0\1\311\2\0"+ + "\1\312\4\0\1\54\3\0\1\313\17\0\1\56\2\0"+ + "\1\314\21\0\1\315\2\0\1\316\61\0\1\30\1\266"+ + "\1\62\4\0\1\110\1\0\1\266\4\0\1\37\1\30"+ + "\6\0\1\317\21\0\1\320\2\0\1\321\10\0\1\322"+ + "\22\0\1\323\21\0\1\324\2\0\1\325\55\0\1\167"+ + "\4\0\1\267\7\0\1\267\77\0\1\177\2\0\1\177"+ + "\33\0\4\177\102\0\1\62\104\0\1\62\146\0\1\62"+ + "\41\0\1\62\13\0\1\177\65\0\1\62\11\0\1\177"+ + "\44\0\1\177\1\0\2\177\21\0\1\177\4\0\1\177"+ + "\3\0\4\62\10\0\4\177\3\0\1\177\12\0\1\177"+ + "\164\0\2\62\233\0\1\177\222\0\4\177\152\0\2\177"+ + "\25\0\4\177\55\0\1\177\15\0\2\177\10\0\2\177"+ + "\1\0\1\177\1\0\1\177\11\0\1\177\11\0\2\177"+ + "\6\0\1\177\2\0\4\177\3\0\1\177\2\0\2\177"+ + "\1\0\3\177\1\0\2\177\1\0\1\177\10\0\1\177"+ + "\1\0\2\177\2\0\2\177\1\0\4\177\23\0\1\177"+ + "\27\0\1\30\4\0\1\30\11\0\1\30\22\0\1\30"+ + "\3\0\1\30\13\0\1\200\2\0\1\200\10\0\1\30"+ + "\22\0\4\200\35\0\1\30\26\0\1\30\26\0\2\30"+ + "\23\0\1\62\1\30\40\0\1\62\13\0\1\200\65\0"+ + "\1\62\11\0\1\200\15\0\4\30\2\0\2\30\14\0"+ + "\3\30\1\200\1\0\2\200\11\0\3\30\3\0\1\30"+ + "\1\0\1\200\4\0\1\200\2\30\1\0\4\62\1\0"+ + "\2\30\5\0\4\200\2\0\1\30\1\200\12\0\1\200"+ + "\7\0\1\30\30\0\1\30\4\0\1\30\6\0\1\30"+ + "\3\0\1\30\6\0\1\30\5\0\1\30\2\0\2\30"+ + "\1\0\17\30\2\0\1\30\13\0\7\30\2\0\1\30"+ + "\1\0\1\30\1\0\2\30\2\0\1\30\1\0\3\30"+ "\2\0\1\30\1\0\1\30\1\0\1\30\1\0\1\30"+ - "\4\0\1\75\1\0\2\30\5\0\1\30\1\0\1\30"+ - "\2\0\3\30\1\0\1\30\7\0\1\30\1\0\1\30"+ - "\26\0\1\30\6\0\1\30\3\0\1\30\3\0\1\30"+ - "\7\0\1\30\31\0\20\30\5\0\3\30\3\0\1\30"+ - "\3\0\2\30\2\0\2\30\4\0\1\30\4\75\4\0"+ - "\1\30\4\0\1\30\2\0\1\30\4\0\1\30\1\0"+ - "\1\30\1\0\1\30\127\0\2\75\15\0\4\75\60\0"+ - "\1\75\15\0\2\75\10\0\2\75\1\0\1\75\1\0"+ - "\1\75\11\0\1\75\11\0\2\75\6\0\1\75\2\0"+ - "\4\75\3\0\1\75\2\0\2\75\1\0\3\75\5\0"+ - "\1\75\1\0\2\75\2\0\2\75\1\0\4\75\5\0"+ - "\1\75\1\0\2\75\127\0\1\117\2\0\1\117\23\0"+ - "\4\117\211\0\1\117\102\0\1\117\44\0\1\117\1\0"+ - "\2\117\21\0\1\117\4\0\1\117\7\0\4\117\3\0"+ - "\1\117\22\0\1\117\166\0\1\117\215\0\4\117\155\0"+ - "\2\117\15\0\4\117\60\0\1\117\15\0\2\117\10\0"+ - "\2\117\1\0\1\117\1\0\1\117\11\0\1\117\11\0"+ - "\2\117\6\0\1\117\2\0\4\117\3\0\1\117\2\0"+ - "\2\117\1\0\3\117\5\0\1\117\1\0\2\117\2\0"+ - "\2\117\1\0\4\117\5\0\1\117\1\0\2\117\127\0"+ - "\1\154\2\0\1\154\23\0\4\154\105\0\1\60\132\0"+ - "\1\60\113\0\1\60\45\0\1\154\21\0\1\60\46\0"+ - "\1\60\11\0\1\154\44\0\1\154\1\0\2\154\21\0"+ - "\1\154\4\0\1\154\7\0\4\154\3\0\1\154\12\0"+ - "\4\60\4\0\1\154\205\0\2\60\170\0\1\154\215\0"+ - "\4\154\155\0\2\154\15\0\4\154\60\0\1\154\15\0"+ - "\2\154\10\0\2\154\1\0\1\154\1\0\1\154\11\0"+ - "\1\154\11\0\2\154\6\0\1\154\2\0\4\154\3\0"+ - "\1\154\2\0\2\154\1\0\3\154\5\0\1\154\1\0"+ - "\2\154\2\0\2\154\1\0\4\154\5\0\1\154\1\0"+ - "\2\154\36\0"; + "\4\0\1\200\1\0\2\30\6\0\1\30\7\0\1\30"+ + "\1\0\1\30\33\0\1\30\6\0\1\30\3\0\1\30"+ + "\3\0\1\30\7\0\1\30\31\0\20\30\5\0\3\30"+ + "\4\0\1\30\6\0\1\30\3\0\2\30\2\0\2\30"+ + "\4\0\1\30\4\200\1\0\1\30\2\0\1\30\4\0"+ + "\1\30\1\0\1\30\1\0\1\30\134\0\2\200\25\0"+ + "\4\200\55\0\1\200\15\0\2\200\10\0\2\200\1\0"+ + "\1\200\1\0\1\200\11\0\1\200\11\0\2\200\6\0"+ + "\1\200\2\0\4\200\3\0\1\200\2\0\2\200\1\0"+ + "\3\200\1\0\2\200\1\0\1\200\10\0\1\200\1\0"+ + "\2\200\2\0\2\200\1\0\4\200\23\0\1\200\27\0"+ + "\1\30\4\0\1\30\11\0\1\30\22\0\1\30\3\0"+ + "\1\30\13\0\1\266\2\0\1\266\10\0\1\30\22\0"+ + "\4\266\35\0\1\30\26\0\1\30\26\0\2\30\23\0"+ + "\1\62\1\30\40\0\1\62\13\0\1\266\65\0\1\62"+ + "\11\0\1\266\15\0\4\30\2\0\2\30\14\0\3\30"+ + "\1\266\1\0\2\266\11\0\3\30\3\0\1\30\1\0"+ + "\1\266\4\0\1\266\2\30\1\0\4\62\1\0\2\30"+ + "\5\0\4\266\2\0\1\30\1\266\12\0\1\266\7\0"+ + "\1\30\30\0\1\30\4\0\1\30\6\0\1\30\3\0"+ + "\1\30\6\0\1\30\5\0\1\30\2\0\2\30\1\0"+ + "\17\30\2\0\1\30\13\0\7\30\2\0\1\30\1\0"+ + "\1\30\1\0\2\30\2\0\1\30\1\0\3\30\2\0"+ + "\1\30\1\0\1\30\1\0\1\30\1\0\1\30\4\0"+ + "\1\266\1\0\2\30\6\0\1\30\7\0\1\30\1\0"+ + "\1\30\33\0\1\30\6\0\1\30\3\0\1\30\3\0"+ + "\1\30\7\0\1\30\31\0\20\30\5\0\3\30\4\0"+ + "\1\30\6\0\1\30\3\0\2\30\2\0\2\30\4\0"+ + "\1\30\4\266\1\0\1\30\2\0\1\30\4\0\1\30"+ + "\1\0\1\30\1\0\1\30\134\0\2\266\25\0\4\266"+ + "\55\0\1\266\15\0\2\266\10\0\2\266\1\0\1\266"+ + "\1\0\1\266\11\0\1\266\11\0\2\266\6\0\1\266"+ + "\2\0\4\266\3\0\1\266\2\0\2\266\1\0\3\266"+ + "\1\0\2\266\1\0\1\266\10\0\1\266\1\0\2\266"+ + "\2\0\2\266\1\0\4\266\23\0\1\266\111\0\1\267"+ + "\2\0\1\267\33\0\4\267\216\0\1\267\77\0\1\267"+ + "\44\0\1\267\1\0\2\267\21\0\1\267\4\0\1\267"+ + "\17\0\4\267\3\0\1\267\12\0\1\267\203\0\1\267"+ + "\222\0\4\267\152\0\2\267\25\0\4\267\55\0\1\267"+ + "\15\0\2\267\10\0\2\267\1\0\1\267\1\0\1\267"+ + "\11\0\1\267\11\0\2\267\6\0\1\267\2\0\4\267"+ + "\3\0\1\267\2\0\2\267\1\0\3\267\1\0\2\267"+ + "\1\0\1\267\10\0\1\267\1\0\2\267\2\0\2\267"+ + "\1\0\4\267\23\0\1\267\20\0"; private static int [] zzUnpackTrans() { - int [] result = new int[20002]; + int [] result = new int[28826]; int offset = 0; offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); return result; @@ -747,11 +916,12 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); private static final String ZZ_ATTRIBUTE_PACKED_0 = - "\1\0\1\11\35\1\20\0\1\1\1\0\1\1\12\0"+ - "\1\1\21\0\1\1\115\0"; + "\1\0\1\11\36\1\21\0\1\1\1\0\1\1\12\0"+ + "\1\1\10\0\1\1\11\0\1\1\55\0\1\1\65\0"+ + "\2\1\36\0"; private static int [] zzUnpackAttribute() { - int [] result = new int[156]; + int [] result = new int[213]; int offset = 0; offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); return result; @@ -858,7 +1028,6 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { /** * Creates a new scanner - * There is also a java.io.InputStream version of this constructor. * * @param in the java.io.Reader to read input from. */ @@ -866,7 +1035,6 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { this.zzReader = in; } - /** * Unpacks the compressed character translation table. @@ -878,7 +1046,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { char [] map = new char[0x10000]; int i = 0; /* index in packed string */ int j = 0; /* index in unpacked array */ - while (i < 2848) { + while (i < 2860) { int count = packed.charAt(i++); char value = packed.charAt(i++); do map[j++] = value; while (--count > 0); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex index 8ca3b8054f5..a0ad6ad95fe 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex @@ -32,11 +32,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; * Asian languages, including Thai, Lao, Myanmar, and Khmer *
  • <IDEOGRAPHIC>: A single CJKV ideographic character
  • *
  • <HIRAGANA>: A single hiragana character
  • + *
  • <KATAKANA>: A sequence of katakana characters
  • + *
  • <HANGUL>: A sequence of Hangul characters
  • * */ %% -%unicode 6.1 +%unicode 6.3 %integer %final %public @@ -47,33 +49,40 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; %buffer 4096 %include SUPPLEMENTARY.jflex-macro -ALetter = ([\p{WB:ALetter}] | {ALetterSupp}) -Format = ([\p{WB:Format}] | {FormatSupp}) -Numeric = ([\p{WB:Numeric}] | {NumericSupp}) -Extend = ([\p{WB:Extend}] | {ExtendSupp}) -Katakana = ([\p{WB:Katakana}] | {KatakanaSupp}) -MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp}) -MidNum = ([\p{WB:MidNum}] | {MidNumSupp}) -MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp}) -ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp}) -ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp}) -Han = ([\p{Script:Han}] | {HanSupp}) -Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp}) +ALetter = (\p{WB:ALetter} | {ALetterSupp}) +Format = (\p{WB:Format} | {FormatSupp}) +Numeric = ([\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]] | {NumericSupp}) +Extend = (\p{WB:Extend} | {ExtendSupp}) +Katakana = (\p{WB:Katakana} | {KatakanaSupp}) +MidLetter = (\p{WB:MidLetter} | {MidLetterSupp}) +MidNum = (\p{WB:MidNum} | {MidNumSupp}) +MidNumLet = (\p{WB:MidNumLet} | {MidNumLetSupp}) +ExtendNumLet = (\p{WB:ExtendNumLet} | {ExtendNumLetSupp}) +ComplexContext = (\p{LB:Complex_Context} | {ComplexContextSupp}) +Han = (\p{Script:Han} | {HanSupp}) +Hiragana = (\p{Script:Hiragana} | {HiraganaSupp}) +SingleQuote = (\p{WB:Single_Quote} | {SingleQuoteSupp}) +DoubleQuote = (\p{WB:Double_Quote} | {DoubleQuoteSupp}) +HebrewLetter = (\p{WB:Hebrew_Letter} | {HebrewLetterSupp}) +RegionalIndicator = (\p{WB:Regional_Indicator} | {RegionalIndicatorSupp}) +HebrewOrALetter = ({HebrewLetter} | {ALetter}) -// Script=Hangul & Aletter -HangulEx = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})* // UAX#29 WB4. X (Extend | Format)* --> X // -ALetterEx = {ALetter} ({Format} | {Extend})* -// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it -NumericEx = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})* -KatakanaEx = {Katakana} ({Format} | {Extend})* -MidLetterEx = ({MidLetter} | {MidNumLet}) ({Format} | {Extend})* -MidNumericEx = ({MidNum} | {MidNumLet}) ({Format} | {Extend})* -ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})* +HangulEx = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] ({Format} | {Extend})* +HebrewOrALetterEx = {HebrewOrALetter} ({Format} | {Extend})* +NumericEx = {Numeric} ({Format} | {Extend})* +KatakanaEx = {Katakana} ({Format} | {Extend})* +MidLetterEx = ({MidLetter} | {MidNumLet} | {SingleQuote}) ({Format} | {Extend})* +MidNumericEx = ({MidNum} | {MidNumLet} | {SingleQuote}) ({Format} | {Extend})* +ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})* +HanEx = {Han} ({Format} | {Extend})* +HiraganaEx = {Hiragana} ({Format} | {Extend})* +SingleQuoteEx = {SingleQuote} ({Format} | {Extend})* +DoubleQuoteEx = {DoubleQuote} ({Format} | {Extend})* +HebrewLetterEx = {HebrewLetter} ({Format} | {Extend})* +RegionalIndicatorEx = {RegionalIndicator} ({Format} | {Extend})* -HanEx = {Han} ({Format} | {Extend})* -HiraganaEx = {Hiragana} ({Format} | {Extend})* %{ /** Alphanumeric sequences */ @@ -121,15 +130,12 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})* <> { return StandardTokenizerInterface.YYEOF; } // UAX#29 WB8. Numeric × Numeric -// WB11. Numeric (MidNum | MidNumLet) × Numeric -// WB12. Numeric × (MidNum | MidNumLet) Numeric -// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet -// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana) +// WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric +// WB12. Numeric × (MidNum | MidNumLet | Single_Quote) Numeric +// WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet +// WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana) // -{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx} - | {MidNumericEx} {NumericEx} - | {NumericEx})* -{ExtendNumLetEx}* +{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}* { return NUMERIC_TYPE; } // subset of the below for typing purposes only! @@ -139,22 +145,32 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})* {KatakanaEx}+ { return KATAKANA_TYPE; } -// UAX#29 WB5. ALetter × ALetter -// WB6. ALetter × (MidLetter | MidNumLet) ALetter -// WB7. ALetter (MidLetter | MidNumLet) × ALetter -// WB9. ALetter × Numeric -// WB10. Numeric × ALetter +// UAX#29 WB5. (ALetter | Hebrew_Letter) × (ALetter | Hebrew_Letter) +// WB6. (ALetter | Hebrew_Letter) × (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter) +// WB7. (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) × (ALetter | Hebrew_Letter) +// WB7a. Hebrew_Letter × Single_Quote +// WB7b. Hebrew_Letter × Double_Quote Hebrew_Letter +// WB7c. Hebrew_Letter Double_Quote × Hebrew_Letter +// WB9. (ALetter | Hebrew_Letter) × Numeric +// WB10. Numeric × (ALetter | Hebrew_Letter) // WB13. Katakana × Katakana -// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet -// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana) +// WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet +// WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana) // -{ExtendNumLetEx}* ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* - | ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})* - | {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ ) -({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* - | ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})* - | {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ ) )* -{ExtendNumLetEx}* +{ExtendNumLetEx}* ( {KatakanaEx} ( {ExtendNumLetEx}* {KatakanaEx} )* + | ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} ) + | {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx} )* + | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} )* {HebrewOrALetterEx} )* + )+ + ) +({ExtendNumLetEx}+ ( {KatakanaEx} ( {ExtendNumLetEx}* {KatakanaEx} )* + | ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} ) + | {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx} )* + | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} )* {HebrewOrALetterEx} )* + )+ + ) +)* +{ExtendNumLetEx}* { return WORD_TYPE; } @@ -166,7 +182,7 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})* // annex. That means that satisfactory treatment of languages like Chinese // or Thai requires special handling. // -// In Unicode 6.1, only one character has the \p{Line_Break = Contingent_Break} +// In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break} // property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER. // // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context} @@ -188,6 +204,8 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})* // UAX#29 WB3. CR × LF // WB3a. (Newline | CR | LF) ÷ // WB3b. ÷ (Newline | CR | LF) +// WB13c. Regional_Indicator × Regional_Indicator // WB14. Any ÷ Any // -[^] { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } +{RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^] + { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java index 1619e45e392..0ab1b0ad18e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex. */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT */ package org.apache.lucene.analysis.standard; @@ -37,6 +37,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; * Asian languages, including Thai, Lao, Myanmar, and Khmer *
  • <IDEOGRAPHIC>: A single CJKV ideographic character
  • *
  • <HIRAGANA>: A single hiragana character
  • + *
  • <KATAKANA>: A sequence of katakana characters
  • + *
  • <HANGUL>: A sequence of Hangul characters
  • * */ @@ -65,157 +67,157 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf * Translates characters to character classes */ private static final String ZZ_CMAP_PACKED = - "\1\301\10\277\2\301\2\277\1\301\23\277\1\302\1\276\1\271\1\302"+ - "\1\262\1\260\1\265\2\263\2\302\1\264\1\244\1\211\1\270\1\245"+ - "\1\250\1\256\1\251\1\254\1\246\1\247\1\253\1\255\1\252\1\257"+ - "\1\274\1\277\1\275\1\277\1\267\1\266\1\212\1\236\1\213\1\214"+ - "\1\215\1\220\1\221\1\237\1\222\1\240\1\243\1\223\1\224\1\225"+ - "\1\217\1\227\1\226\1\216\1\230\1\231\1\232\1\241\1\233\1\234"+ - "\1\242\1\235\1\272\1\300\1\273\1\303\1\261\1\303\1\212\1\236"+ - "\1\213\1\214\1\215\1\220\1\221\1\237\1\222\1\240\1\243\1\223"+ - "\1\224\1\225\1\217\1\227\1\226\1\216\1\230\1\231\1\232\1\241"+ - "\1\233\1\234\1\242\1\235\3\303\1\260\1\304\52\0\1\174\2\0"+ - "\1\175\7\0\1\174\1\0\1\200\2\0\1\174\5\0\27\174\1\0"+ - "\37\174\1\0\u01ca\174\4\0\14\174\16\0\5\174\7\0\1\174\1\0"+ - "\1\174\21\0\160\175\5\174\1\0\2\174\2\0\4\174\1\201\7\0"+ - "\1\174\1\200\3\174\1\0\1\174\1\0\24\174\1\0\123\174\1\0"+ - "\213\174\1\0\7\175\236\174\11\0\46\174\2\0\1\174\7\0\47\174"+ - "\1\0\1\201\7\0\55\175\1\0\1\175\1\0\2\175\1\0\2\175"+ - "\1\0\1\175\10\0\33\174\5\0\4\174\1\200\13\0\5\175\7\0"+ - "\2\201\2\0\13\175\5\0\53\174\25\175\12\176\1\0\1\176\1\201"+ - "\1\0\2\174\1\175\143\174\1\0\1\174\7\175\1\175\1\0\6\175"+ - "\2\174\2\175\1\0\4\175\2\174\12\176\3\174\2\0\1\174\17\0"+ - "\1\175\1\174\1\175\36\174\33\175\2\0\131\174\13\175\1\174\16\0"+ - "\12\176\41\174\11\175\2\174\2\0\1\201\1\0\1\174\5\0\26\174"+ - "\4\175\1\174\11\175\1\174\3\175\1\174\5\175\22\0\31\174\3\175"+ - "\104\0\1\174\1\0\13\174\67\0\33\175\1\0\4\175\66\174\3\175"+ - "\1\174\22\175\1\174\7\175\12\174\2\175\2\0\12\176\1\0\7\174"+ - "\1\0\7\174\1\0\3\175\1\0\10\174\2\0\2\174\2\0\26\174"+ - "\1\0\7\174\1\0\1\174\3\0\4\174\2\0\1\175\1\174\7\175"+ - "\2\0\2\175\2\0\3\175\1\174\10\0\1\175\4\0\2\174\1\0"+ - "\3\174\2\175\2\0\12\176\2\174\17\0\3\175\1\0\6\174\4\0"+ - "\2\174\2\0\26\174\1\0\7\174\1\0\2\174\1\0\2\174\1\0"+ - "\2\174\2\0\1\175\1\0\5\175\4\0\2\175\2\0\3\175\3\0"+ - "\1\175\7\0\4\174\1\0\1\174\7\0\12\176\2\175\3\174\1\175"+ - "\13\0\3\175\1\0\11\174\1\0\3\174\1\0\26\174\1\0\7\174"+ - "\1\0\2\174\1\0\5\174\2\0\1\175\1\174\10\175\1\0\3\175"+ - "\1\0\3\175\2\0\1\174\17\0\2\174\2\175\2\0\12\176\21\0"+ - "\3\175\1\0\10\174\2\0\2\174\2\0\26\174\1\0\7\174\1\0"+ - "\2\174\1\0\5\174\2\0\1\175\1\174\7\175\2\0\2\175\2\0"+ - "\3\175\10\0\2\175\4\0\2\174\1\0\3\174\2\175\2\0\12\176"+ - "\1\0\1\174\20\0\1\175\1\174\1\0\6\174\3\0\3\174\1\0"+ - "\4\174\3\0\2\174\1\0\1\174\1\0\2\174\3\0\2\174\3\0"+ - "\3\174\3\0\14\174\4\0\5\175\3\0\3\175\1\0\4\175\2\0"+ - "\1\174\6\0\1\175\16\0\12\176\21\0\3\175\1\0\10\174\1\0"+ - "\3\174\1\0\27\174\1\0\12\174\1\0\5\174\3\0\1\174\7\175"+ - "\1\0\3\175\1\0\4\175\7\0\2\175\1\0\2\174\6\0\2\174"+ - "\2\175\2\0\12\176\22\0\2\175\1\0\10\174\1\0\3\174\1\0"+ - "\27\174\1\0\12\174\1\0\5\174\2\0\1\175\1\174\7\175\1\0"+ - "\3\175\1\0\4\175\7\0\2\175\7\0\1\174\1\0\2\174\2\175"+ - "\2\0\12\176\1\0\2\174\17\0\2\175\1\0\10\174\1\0\3\174"+ - "\1\0\51\174\2\0\1\174\7\175\1\0\3\175\1\0\4\175\1\174"+ - "\10\0\1\175\10\0\2\174\2\175\2\0\12\176\12\0\6\174\2\0"+ - "\2\175\1\0\22\174\3\0\30\174\1\0\11\174\1\0\1\174\2\0"+ - "\7\174\3\0\1\175\4\0\6\175\1\0\1\175\1\0\10\175\22\0"+ - "\2\175\15\0\60\204\1\205\2\204\7\205\5\0\7\204\10\205\1\0"+ - "\12\176\47\0\2\204\1\0\1\204\2\0\2\204\1\0\1\204\2\0"+ - "\1\204\6\0\4\204\1\0\7\204\1\0\3\204\1\0\1\204\1\0"+ - "\1\204\2\0\2\204\1\0\4\204\1\205\2\204\6\205\1\0\2\205"+ - "\1\204\2\0\5\204\1\0\1\204\1\0\6\205\2\0\12\176\2\0"+ - "\4\204\40\0\1\174\27\0\2\175\6\0\12\176\13\0\1\175\1\0"+ - "\1\175\1\0\1\175\4\0\2\175\10\174\1\0\44\174\4\0\24\175"+ - "\1\0\2\175\5\174\13\175\1\0\44\175\11\0\1\175\71\0\53\204"+ - "\24\205\1\204\12\176\6\0\6\204\4\205\4\204\3\205\1\204\3\205"+ - "\2\204\7\205\3\204\4\205\15\204\14\205\1\204\1\205\12\176\4\205"+ - "\2\204\46\174\1\0\1\174\5\0\1\174\2\0\53\174\1\0\4\174"+ - "\u0100\210\111\174\1\0\4\174\2\0\7\174\1\0\1\174\1\0\4\174"+ - "\2\0\51\174\1\0\4\174\2\0\41\174\1\0\4\174\2\0\7\174"+ - "\1\0\1\174\1\0\4\174\2\0\17\174\1\0\71\174\1\0\4\174"+ - "\2\0\103\174\2\0\3\175\40\0\20\174\20\0\125\174\14\0\u026c\174"+ - "\2\0\21\174\1\0\32\174\5\0\113\174\3\0\3\174\17\0\15\174"+ - "\1\0\4\174\3\175\13\0\22\174\3\175\13\0\22\174\2\175\14\0"+ - "\15\174\1\0\3\174\1\0\2\175\14\0\64\204\40\205\3\0\1\204"+ - "\4\0\1\204\1\205\2\0\12\176\41\0\3\175\2\0\12\176\6\0"+ - "\130\174\10\0\51\174\1\175\1\174\5\0\106\174\12\0\35\174\3\0"+ - "\14\175\4\0\14\175\12\0\12\176\36\204\2\0\5\204\13\0\54\204"+ - "\4\0\21\205\7\204\2\205\6\0\12\176\1\204\3\0\2\204\40\0"+ - "\27\174\5\175\4\0\65\204\12\205\1\0\35\205\2\0\1\175\12\176"+ - "\6\0\12\176\6\0\16\204\122\0\5\175\57\174\21\175\7\174\4\0"+ - "\12\176\21\0\11\175\14\0\3\175\36\174\15\175\2\174\12\176\54\174"+ - "\16\175\14\0\44\174\24\175\10\0\12\176\3\0\3\174\12\176\44\174"+ - "\122\0\3\175\1\0\25\175\4\174\1\175\4\174\3\175\2\174\11\0"+ - "\300\174\47\175\25\0\4\175\u0116\174\2\0\6\174\2\0\46\174\2\0"+ - "\6\174\2\0\10\174\1\0\1\174\1\0\1\174\1\0\1\174\1\0"+ - "\37\174\2\0\65\174\1\0\7\174\1\0\1\174\3\0\3\174\1\0"+ - "\7\174\3\0\4\174\2\0\6\174\4\0\15\174\5\0\3\174\1\0"+ - "\7\174\17\0\2\175\2\175\10\0\2\202\12\0\1\202\2\0\1\200"+ - "\2\0\5\175\20\0\2\203\3\0\1\201\17\0\1\203\13\0\5\175"+ - "\5\0\6\175\1\0\1\174\15\0\1\174\20\0\15\174\63\0\41\175"+ - "\21\0\1\174\4\0\1\174\2\0\12\174\1\0\1\174\3\0\5\174"+ - "\6\0\1\174\1\0\1\174\1\0\1\174\1\0\4\174\1\0\13\174"+ - "\2\0\4\174\5\0\5\174\4\0\1\174\21\0\51\174\u032d\0\64\174"+ - "\u0716\0\57\174\1\0\57\174\1\0\205\174\6\0\4\174\3\175\2\174"+ - "\14\0\46\174\1\0\1\174\5\0\1\174\2\0\70\174\7\0\1\174"+ - "\17\0\1\175\27\174\11\0\7\174\1\0\7\174\1\0\7\174\1\0"+ - "\7\174\1\0\7\174\1\0\7\174\1\0\7\174\1\0\7\174\1\0"+ - "\40\175\57\0\1\174\120\0\32\206\1\0\131\206\14\0\326\206\57\0"+ - "\1\174\1\0\1\206\31\0\11\206\4\175\2\175\1\0\5\177\2\0"+ - "\3\206\1\174\1\174\4\0\126\207\2\0\2\175\2\177\3\207\133\177"+ - "\1\0\4\177\5\0\51\174\3\0\136\210\21\0\33\174\65\0\20\177"+ - "\37\0\101\0\37\0\121\0\57\177\1\0\130\177\250\0\u19b6\206\112\0"+ - "\u51cd\206\63\0\u048d\174\103\0\56\174\2\0\u010d\174\3\0\20\174\12\176"+ - "\2\174\24\0\57\174\4\175\1\0\12\175\1\0\31\174\7\0\1\175"+ - "\120\174\2\175\45\0\11\174\2\0\147\174\2\0\4\174\1\0\4\174"+ - "\14\0\13\174\115\0\12\174\1\175\3\174\1\175\4\174\1\175\27\174"+ - "\5\175\30\0\64\174\14\0\2\175\62\174\21\175\13\0\12\176\6\0"+ - "\22\175\6\174\3\0\1\174\4\0\12\176\34\174\10\175\2\0\27\174"+ - "\15\175\14\0\35\210\3\0\4\175\57\174\16\175\16\0\1\174\12\176"+ - "\46\0\51\174\16\175\11\0\3\174\1\175\10\174\2\175\2\0\12\176"+ - "\6\0\33\204\1\205\4\0\60\204\1\205\1\204\3\205\2\204\2\205"+ - "\5\204\2\205\1\204\1\205\1\204\30\0\5\204\13\174\5\175\2\0"+ - "\3\174\2\175\12\0\6\174\2\0\6\174\2\0\6\174\11\0\7\174"+ - "\1\0\7\174\221\0\43\174\10\175\1\0\2\175\2\0\12\176\6\0"+ - "\u2ba4\210\14\0\27\210\4\0\61\210\4\0\1\44\1\40\1\67\1\64"+ - "\1\33\1\30\2\0\1\24\1\21\2\0\1\17\1\15\14\0\1\3"+ - "\1\6\20\0\1\156\7\0\1\111\1\10\5\0\1\1\1\172\3\0"+ + "\1\304\10\302\2\304\2\302\1\304\23\302\1\305\1\213\1\275\1\305"+ + "\1\267\1\265\1\212\2\270\2\305\1\271\1\251\1\216\1\274\1\253"+ + "\1\262\1\261\1\252\1\255\1\256\1\263\1\254\1\260\1\257\1\264"+ + "\1\300\1\302\1\301\1\302\1\273\1\272\1\217\1\243\1\220\1\221"+ + "\1\222\1\225\1\226\1\244\1\227\1\246\1\245\1\230\1\231\1\232"+ + "\1\224\1\234\1\233\1\223\1\235\1\236\1\237\1\247\1\240\1\241"+ + "\1\250\1\242\1\276\1\303\1\277\1\306\1\266\1\306\1\217\1\243"+ + "\1\220\1\221\1\222\1\225\1\226\1\244\1\227\1\246\1\245\1\230"+ + "\1\231\1\232\1\224\1\234\1\233\1\223\1\235\1\236\1\237\1\247"+ + "\1\240\1\241\1\250\1\242\3\306\1\265\1\307\52\0\1\176\2\0"+ + "\1\177\7\0\1\176\1\0\1\202\2\0\1\176\5\0\27\176\1\0"+ + "\37\176\1\0\u01ca\176\4\0\14\176\5\0\1\202\10\0\5\176\7\0"+ + "\1\176\1\0\1\176\21\0\160\177\5\176\1\0\2\176\2\0\4\176"+ + "\1\203\7\0\1\176\1\202\3\176\1\0\1\176\1\0\24\176\1\0"+ + "\123\176\1\0\213\176\1\0\7\177\236\176\11\0\46\176\2\0\1\176"+ + "\7\0\47\176\1\0\1\203\7\0\55\177\1\0\1\177\1\0\2\177"+ + "\1\0\2\177\1\0\1\177\10\0\33\214\5\0\3\214\1\176\1\202"+ + "\13\0\5\177\7\0\2\203\2\0\13\177\1\0\1\177\3\0\53\176"+ + "\25\177\12\200\1\0\1\200\1\203\1\0\2\176\1\177\143\176\1\0"+ + "\1\176\7\177\1\177\1\0\6\177\2\176\2\177\1\0\4\177\2\176"+ + "\12\200\3\176\2\0\1\176\17\0\1\177\1\176\1\177\36\176\33\177"+ + "\2\0\131\176\13\177\1\176\16\0\12\200\41\176\11\177\2\176\2\0"+ + "\1\203\1\0\1\176\5\0\26\176\4\177\1\176\11\177\1\176\3\177"+ + "\1\176\5\177\22\0\31\176\3\177\104\0\1\176\1\0\13\176\67\0"+ + "\33\177\1\0\4\177\66\176\3\177\1\176\22\177\1\176\7\177\12\176"+ + "\2\177\2\0\12\200\1\0\7\176\1\0\7\176\1\0\3\177\1\0"+ + "\10\176\2\0\2\176\2\0\26\176\1\0\7\176\1\0\1\176\3\0"+ + "\4\176\2\0\1\177\1\176\7\177\2\0\2\177\2\0\3\177\1\176"+ + "\10\0\1\177\4\0\2\176\1\0\3\176\2\177\2\0\12\200\2\176"+ + "\17\0\3\177\1\0\6\176\4\0\2\176\2\0\26\176\1\0\7\176"+ + "\1\0\2\176\1\0\2\176\1\0\2\176\2\0\1\177\1\0\5\177"+ + "\4\0\2\177\2\0\3\177\3\0\1\177\7\0\4\176\1\0\1\176"+ + "\7\0\12\200\2\177\3\176\1\177\13\0\3\177\1\0\11\176\1\0"+ + "\3\176\1\0\26\176\1\0\7\176\1\0\2\176\1\0\5\176\2\0"+ + "\1\177\1\176\10\177\1\0\3\177\1\0\3\177\2\0\1\176\17\0"+ + "\2\176\2\177\2\0\12\200\21\0\3\177\1\0\10\176\2\0\2\176"+ + "\2\0\26\176\1\0\7\176\1\0\2\176\1\0\5\176\2\0\1\177"+ + "\1\176\7\177\2\0\2\177\2\0\3\177\10\0\2\177\4\0\2\176"+ + "\1\0\3\176\2\177\2\0\12\200\1\0\1\176\20\0\1\177\1\176"+ + "\1\0\6\176\3\0\3\176\1\0\4\176\3\0\2\176\1\0\1\176"+ + "\1\0\2\176\3\0\2\176\3\0\3\176\3\0\14\176\4\0\5\177"+ + "\3\0\3\177\1\0\4\177\2\0\1\176\6\0\1\177\16\0\12\200"+ + "\21\0\3\177\1\0\10\176\1\0\3\176\1\0\27\176\1\0\12\176"+ + "\1\0\5\176\3\0\1\176\7\177\1\0\3\177\1\0\4\177\7\0"+ + "\2\177\1\0\2\176\6\0\2\176\2\177\2\0\12\200\22\0\2\177"+ + "\1\0\10\176\1\0\3\176\1\0\27\176\1\0\12\176\1\0\5\176"+ + "\2\0\1\177\1\176\7\177\1\0\3\177\1\0\4\177\7\0\2\177"+ + "\7\0\1\176\1\0\2\176\2\177\2\0\12\200\1\0\2\176\17\0"+ + "\2\177\1\0\10\176\1\0\3\176\1\0\51\176\2\0\1\176\7\177"+ + "\1\0\3\177\1\0\4\177\1\176\10\0\1\177\10\0\2\176\2\177"+ + "\2\0\12\200\12\0\6\176\2\0\2\177\1\0\22\176\3\0\30\176"+ + "\1\0\11\176\1\0\1\176\2\0\7\176\3\0\1\177\4\0\6\177"+ + "\1\0\1\177\1\0\10\177\22\0\2\177\15\0\60\206\1\207\2\206"+ + "\7\207\5\0\7\206\10\207\1\0\12\200\47\0\2\206\1\0\1\206"+ + "\2\0\2\206\1\0\1\206\2\0\1\206\6\0\4\206\1\0\7\206"+ + "\1\0\3\206\1\0\1\206\1\0\1\206\2\0\2\206\1\0\4\206"+ + "\1\207\2\206\6\207\1\0\2\207\1\206\2\0\5\206\1\0\1\206"+ + "\1\0\6\207\2\0\12\200\2\0\4\206\40\0\1\176\27\0\2\177"+ + "\6\0\12\200\13\0\1\177\1\0\1\177\1\0\1\177\4\0\2\177"+ + "\10\176\1\0\44\176\4\0\24\177\1\0\2\177\5\176\13\177\1\0"+ + "\44\177\11\0\1\177\71\0\53\206\24\207\1\206\12\200\6\0\6\206"+ + "\4\207\4\206\3\207\1\206\3\207\2\206\7\207\3\206\4\207\15\206"+ + "\14\207\1\206\1\207\12\200\4\207\2\206\46\176\1\0\1\176\5\0"+ + "\1\176\2\0\53\176\1\0\4\176\u0100\215\111\176\1\0\4\176\2\0"+ + "\7\176\1\0\1\176\1\0\4\176\2\0\51\176\1\0\4\176\2\0"+ + "\41\176\1\0\4\176\2\0\7\176\1\0\1\176\1\0\4\176\2\0"+ + "\17\176\1\0\71\176\1\0\4\176\2\0\103\176\2\0\3\177\40\0"+ + "\20\176\20\0\125\176\14\0\u026c\176\2\0\21\176\1\0\32\176\5\0"+ + "\113\176\3\0\3\176\17\0\15\176\1\0\4\176\3\177\13\0\22\176"+ + "\3\177\13\0\22\176\2\177\14\0\15\176\1\0\3\176\1\0\2\177"+ + "\14\0\64\206\40\207\3\0\1\206\4\0\1\206\1\207\2\0\12\200"+ + "\41\0\3\177\1\177\1\0\12\200\6\0\130\176\10\0\51\176\1\177"+ + "\1\176\5\0\106\176\12\0\35\176\3\0\14\177\4\0\14\177\12\0"+ + "\12\200\36\206\2\0\5\206\13\0\54\206\4\0\21\207\7\206\2\207"+ + "\6\0\12\200\1\206\3\0\2\206\40\0\27\176\5\177\4\0\65\206"+ + "\12\207\1\0\35\207\2\0\1\177\12\200\6\0\12\200\6\0\16\206"+ + "\122\0\5\177\57\176\21\177\7\176\4\0\12\200\21\0\11\177\14\0"+ + "\3\177\36\176\15\177\2\176\12\200\54\176\16\177\14\0\44\176\24\177"+ + "\10\0\12\200\3\0\3\176\12\200\44\176\122\0\3\177\1\0\25\177"+ + "\4\176\1\177\4\176\3\177\2\176\11\0\300\176\47\177\25\0\4\177"+ + "\u0116\176\2\0\6\176\2\0\46\176\2\0\6\176\2\0\10\176\1\0"+ + "\1\176\1\0\1\176\1\0\1\176\1\0\37\176\2\0\65\176\1\0"+ + "\7\176\1\0\1\176\3\0\3\176\1\0\7\176\3\0\4\176\2\0"+ + "\6\176\4\0\15\176\5\0\3\176\1\0\7\176\17\0\2\177\2\177"+ + "\10\0\2\204\12\0\1\204\2\0\1\202\2\0\5\177\20\0\2\205"+ + "\3\0\1\203\17\0\1\205\13\0\5\177\1\0\12\177\1\0\1\176"+ + "\15\0\1\176\20\0\15\176\63\0\41\177\21\0\1\176\4\0\1\176"+ + "\2\0\12\176\1\0\1\176\3\0\5\176\6\0\1\176\1\0\1\176"+ + "\1\0\1\176\1\0\4\176\1\0\13\176\2\0\4\176\5\0\5\176"+ + "\4\0\1\176\21\0\51\176\u032d\0\64\176\u0716\0\57\176\1\0\57\176"+ + "\1\0\205\176\6\0\4\176\3\177\2\176\14\0\46\176\1\0\1\176"+ + "\5\0\1\176\2\0\70\176\7\0\1\176\17\0\1\177\27\176\11\0"+ + "\7\176\1\0\7\176\1\0\7\176\1\0\7\176\1\0\7\176\1\0"+ + "\7\176\1\0\7\176\1\0\7\176\1\0\40\177\57\0\1\176\120\0"+ + "\32\210\1\0\131\210\14\0\326\210\57\0\1\176\1\0\1\210\31\0"+ + "\11\210\6\177\1\0\5\201\2\0\3\210\1\176\1\176\4\0\126\211"+ + "\2\0\2\177\2\201\3\211\133\201\1\0\4\201\5\0\51\176\3\0"+ + "\136\215\21\0\33\176\65\0\20\201\320\0\57\201\1\0\130\201\250\0"+ + "\u19b6\210\112\0\u51cd\210\63\0\u048d\176\103\0\56\176\2\0\u010d\176\3\0"+ + "\20\176\12\200\2\176\24\0\57\176\4\177\1\0\12\177\1\0\31\176"+ + "\7\0\1\177\120\176\2\177\45\0\11\176\2\0\147\176\2\0\4\176"+ + "\1\0\4\176\14\0\13\176\115\0\12\176\1\177\3\176\1\177\4\176"+ + "\1\177\27\176\5\177\30\0\64\176\14\0\2\177\62\176\21\177\13\0"+ + "\12\200\6\0\22\177\6\176\3\0\1\176\4\0\12\200\34\176\10\177"+ + "\2\0\27\176\15\177\14\0\35\215\3\0\4\177\57\176\16\177\16\0"+ + "\1\176\12\200\46\0\51\176\16\177\11\0\3\176\1\177\10\176\2\177"+ + "\2\0\12\200\6\0\33\206\1\207\4\0\60\206\1\207\1\206\3\207"+ + "\2\206\2\207\5\206\2\207\1\206\1\207\1\206\30\0\5\206\13\176"+ + "\5\177\2\0\3\176\2\177\12\0\6\176\2\0\6\176\2\0\6\176"+ + "\11\0\7\176\1\0\7\176\221\0\43\176\10\177\1\0\2\177\2\0"+ + "\12\200\6\0\u2ba4\215\14\0\27\215\4\0\61\215\4\0\1\44\1\40"+ + "\1\67\1\64\1\33\1\30\2\0\1\24\1\21\2\0\1\17\1\15"+ + "\14\0\1\3\1\6\20\0\1\156\7\0\1\111\1\10\5\0\1\1"+ + "\1\172\3\0\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163"+ "\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163"+ "\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163"+ "\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163"+ - "\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163\1\163"+ - "\1\163\1\164\1\163\1\163\1\163\1\170\1\166\17\0\1\160\u02c1\0"+ - "\1\114\277\0\1\157\1\115\1\16\3\167\2\62\1\167\1\62\2\167"+ - "\1\36\21\167\2\106\7\117\1\116\7\117\7\102\1\37\1\102\1\130"+ - "\2\66\1\65\1\130\1\66\1\65\10\130\2\107\5\103\2\75\5\103"+ - "\1\22\10\53\5\23\3\41\12\147\20\41\3\63\32\43\1\42\2\61"+ - "\2\154\1\155\2\154\2\155\2\154\1\155\3\61\1\60\2\61\12\110"+ - "\1\126\1\50\1\45\1\110\6\50\1\45\13\50\31\61\7\50\12\150"+ - "\1\50\5\13\3\127\3\101\1\100\4\101\2\100\10\101\1\100\7\35"+ - "\1\34\2\35\7\101\16\127\1\141\4\152\1\4\4\151\1\4\5\140"+ - "\1\137\1\140\3\137\7\140\1\137\23\140\5\113\3\140\6\113\2\113"+ - "\6\112\5\112\3\134\2\101\7\133\36\101\4\133\5\101\5\127\6\125"+ - "\2\127\1\125\4\35\13\136\12\151\26\136\15\13\1\135\2\13\1\173"+ - "\3\142\1\13\2\142\5\161\4\142\4\162\1\161\3\162\1\161\5\162"+ - "\2\70\1\73\2\70\1\73\1\70\2\73\1\70\1\73\12\70\1\73"+ - "\4\5\1\144\1\143\1\145\1\12\3\165\1\145\2\165\1\131\2\132"+ - "\2\165\1\12\1\165\1\12\1\165\1\12\1\165\3\12\1\165\2\12"+ - "\1\165\1\12\2\165\1\12\1\165\1\12\1\165\1\12\1\165\1\12"+ - "\1\165\1\12\1\76\2\72\1\76\1\72\2\76\4\72\1\76\7\72"+ - "\1\76\4\72\1\76\4\72\1\165\1\12\1\165\12\31\1\57\21\31"+ - "\1\57\3\32\1\57\3\31\1\57\1\31\2\2\2\31\1\57\15\124"+ - "\4\47\4\54\1\146\1\56\10\146\7\54\6\165\4\25\1\27\37\25"+ - "\1\27\4\25\25\105\1\171\11\105\21\26\5\105\1\7\12\55\5\105"+ - "\6\104\4\76\1\77\1\26\5\123\12\121\17\123\1\74\3\71\14\120"+ - "\1\11\11\46\1\52\5\46\4\122\13\51\2\14\11\46\1\52\31\46"+ - "\1\52\4\11\4\46\2\52\2\153\1\20\5\153\52\20\u1900\0\u016e\206"+ - "\2\0\152\206\46\0\7\174\14\0\5\174\5\0\1\174\1\175\12\174"+ - "\1\0\15\174\1\0\5\174\1\0\1\174\1\0\2\174\1\0\2\174"+ - "\1\0\154\174\41\0\u016b\174\22\0\100\174\2\0\66\174\50\0\14\174"+ - "\4\0\20\175\1\201\2\0\1\200\1\201\13\0\7\175\14\0\2\203"+ - "\30\0\3\203\1\201\1\0\1\202\1\0\1\201\1\200\32\0\5\174"+ - "\1\0\207\174\2\0\1\175\7\0\1\202\4\0\1\201\1\0\1\202"+ - "\1\0\12\176\1\200\1\201\5\0\32\174\4\0\1\203\1\0\32\174"+ - "\13\0\70\177\2\175\37\210\3\0\6\210\2\0\6\210\2\0\6\210"+ - "\2\0\3\210\34\0\3\175\4\0"; + "\1\163\1\163\1\163\1\164\1\163\1\163\1\163\1\170\1\166\17\0"+ + "\1\160\u02c1\0\1\114\277\0\1\157\1\115\1\16\3\167\2\62\1\167"+ + "\1\62\2\167\1\36\21\167\2\106\7\117\1\116\7\117\7\102\1\37"+ + "\1\102\1\140\2\66\1\65\1\140\1\66\1\65\10\140\2\107\5\103"+ + "\2\75\5\103\1\22\10\53\5\23\3\41\12\122\20\41\3\63\32\43"+ + "\1\42\2\61\2\126\1\127\2\126\2\127\2\126\1\127\3\61\1\60"+ + "\2\61\12\110\1\136\1\50\1\45\1\110\6\50\1\45\13\50\31\61"+ + "\7\50\12\123\1\50\5\13\3\137\3\101\1\100\4\101\2\100\10\101"+ + "\1\100\7\35\1\34\2\35\7\101\16\137\1\151\4\124\1\4\4\121"+ + "\1\4\5\150\1\147\1\150\3\147\7\150\1\147\23\150\5\113\3\150"+ + "\6\113\2\113\6\112\5\112\3\144\2\101\7\143\36\101\4\143\5\101"+ + "\5\137\6\135\2\137\1\135\4\35\13\146\12\121\14\146\12\175\15\174"+ + "\1\145\2\174\1\173\3\152\1\13\2\152\5\161\4\152\4\162\1\161"+ + "\3\162\1\161\5\162\2\70\1\73\2\70\1\73\1\70\2\73\1\70"+ + "\1\73\12\70\1\73\4\5\1\154\1\153\1\155\1\12\3\165\1\155"+ + "\2\165\1\141\2\142\2\165\1\12\1\165\1\12\1\165\1\12\1\165"+ + "\3\12\1\165\2\12\1\165\1\12\2\165\1\12\1\165\1\12\1\165"+ + "\1\12\1\165\1\12\1\165\1\12\1\76\2\72\1\76\1\72\2\76"+ + "\4\72\1\76\7\72\1\76\4\72\1\76\4\72\1\165\1\12\1\165"+ + "\12\31\1\57\21\31\1\57\3\32\1\57\3\31\1\57\1\31\2\2"+ + "\2\31\1\57\15\134\4\47\4\54\1\120\1\56\10\120\7\54\6\165"+ + "\4\25\1\27\37\25\1\27\4\25\25\105\1\171\11\105\21\26\5\105"+ + "\1\7\12\55\5\105\6\104\4\76\1\77\1\26\5\133\12\131\17\133"+ + "\1\74\3\71\14\130\1\11\11\46\1\52\5\46\4\132\13\51\2\14"+ + "\11\46\1\52\31\46\1\52\4\11\4\46\2\52\2\125\1\20\5\125"+ + "\52\20\u1900\0\u016e\210\2\0\152\210\46\0\7\176\14\0\5\176\5\0"+ + "\1\214\1\177\12\214\1\0\15\214\1\0\5\214\1\0\1\214\1\0"+ + "\2\214\1\0\2\214\1\0\12\214\142\176\41\0\u016b\176\22\0\100\176"+ + "\2\0\66\176\50\0\14\176\4\0\20\177\1\203\2\0\1\202\1\203"+ + "\13\0\7\177\14\0\2\205\30\0\3\205\1\203\1\0\1\204\1\0"+ + "\1\203\1\202\32\0\5\176\1\0\207\176\2\0\1\177\7\0\1\204"+ + "\4\0\1\203\1\0\1\204\1\0\12\200\1\202\1\203\5\0\32\176"+ + "\4\0\1\205\1\0\32\176\13\0\70\201\2\177\37\215\3\0\6\215"+ + "\2\0\6\215\2\0\6\215\2\0\3\215\34\0\3\177\4\0"; /** * Translates characters to character classes @@ -229,27 +231,35 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf private static final String ZZ_ACTION_PACKED_0 = "\1\0\26\1\1\2\1\3\1\4\1\1\1\5\1\6"+ - "\1\7\1\10\1\1\4\2\3\3\3\1\20\0\1\2"+ - "\1\0\1\2\12\0\1\3\21\0\1\2\32\0\2\2"+ - "\1\0\4\2\1\0\1\3\1\0\2\3\1\2\1\3"+ - "\67\0\32\2\3\0\5\2\32\0\4\3\21\0\1\11"+ - "\1\0\6\12\3\2\2\12\1\2\4\12\2\2\2\12"+ - "\2\0\1\2\1\0\2\2\6\12\3\0\2\12\1\0"+ - "\4\12\2\0\2\12\1\0\2\3\10\0\1\12\32\0"+ - "\1\12\1\0\3\12\6\2\1\0\1\2\2\0\2\2"+ - "\1\0\1\12\10\0\3\3\15\0\3\12\6\11\3\0"+ - "\2\11\1\0\4\11\2\0\2\11\2\12\1\0\2\12"+ - "\1\0\2\12\1\0\1\12\2\2\3\0\1\2\4\0"+ - "\2\3\20\0\1\11\10\0\1\12\3\0\1\2\40\0"+ - "\3\12\23\0\1\12\40\0\1\12\4\0\1\12\6\0"+ - "\1\2\2\0\1\12\4\0\2\12\43\0\1\12\57\0"+ - "\2\2\10\0\1\12\53\0\1\12\72\0\1\12\150\0"+ - "\1\13\1\0\1\12\177\0\1\12\132\0\6\13\3\0"+ - "\2\13\1\0\4\13\2\0\2\13\1\12\112\0\1\13"+ - "\10\0\1\12\64\0\1\12\u01eb\0"; + "\1\7\2\1\1\2\1\10\4\2\3\3\2\1\21\0"+ + "\1\2\1\0\1\2\12\0\1\3\10\0\1\2\11\0"+ + "\1\2\46\0\6\2\2\0\3\3\1\2\1\3\23\0"+ + "\1\2\70\0\1\2\1\0\32\2\3\0\6\2\33\0"+ + "\4\3\4\0\1\1\22\0\1\11\10\0\7\12\4\2"+ + "\1\12\1\2\2\12\1\2\6\12\1\2\4\12\1\2"+ + "\4\12\2\2\2\12\4\2\1\12\1\2\3\12\1\2"+ + "\1\0\1\2\1\0\2\2\7\12\4\0\1\12\1\0"+ + "\2\12\1\0\6\12\1\0\4\12\1\0\4\12\2\0"+ + "\2\12\4\0\1\12\1\0\3\12\2\0\2\3\10\0"+ + "\1\12\41\0\1\12\1\0\3\12\32\2\1\0\4\2"+ + "\2\0\2\2\1\0\1\12\37\0\3\3\15\0\3\12"+ + "\7\11\4\0\1\11\1\0\2\11\1\0\6\11\1\0"+ + "\4\11\1\0\4\11\2\0\2\11\4\0\1\11\1\0"+ + "\3\11\1\0\2\12\1\0\2\12\1\0\2\12\1\0"+ + "\1\12\23\2\1\0\4\2\2\0\1\2\31\0\2\3"+ + "\20\0\1\11\37\0\1\12\3\0\15\2\25\0\3\2"+ + "\31\0\3\12\50\0\1\12\12\2\32\0\2\2\1\0"+ + "\1\12\4\0\1\12\7\0\1\2\2\0\1\12\20\0"+ + "\2\12\61\0\1\12\6\2\53\0\2\2\6\0\1\12"+ + "\70\0\1\12\5\2\66\0\1\2\1\12\62\0\3\2"+ + "\65\0\1\13\1\0\1\12\64\0\1\2\113\0\1\12"+ + "\125\0\7\13\4\0\1\13\1\0\2\13\1\0\6\13"+ + "\1\0\4\13\1\0\4\13\2\0\2\13\4\0\1\13"+ + "\1\0\3\13\1\0\1\12\104\0\1\13\37\0\1\12"+ + "\56\0\1\12\u01e5\0"; private static int [] zzUnpackAction() { - int [] result = new int[1750]; + int [] result = new int[2125]; int offset = 0; offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); return result; @@ -274,228 +284,275 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); private static final String ZZ_ROWMAP_PACKED_0 = - "\0\0\0\305\0\u018a\0\u024f\0\u0314\0\u03d9\0\u049e\0\u0563"+ - "\0\u0628\0\u06ed\0\u07b2\0\u0877\0\u093c\0\u0a01\0\u0ac6\0\u0b8b"+ - "\0\u0c50\0\u0d15\0\u0dda\0\u0e9f\0\u0f64\0\u1029\0\u10ee\0\u11b3"+ - "\0\u1278\0\u133d\0\u1402\0\u14c7\0\u158c\0\u1651\0\u1716\0\u17db"+ - "\0\u18a0\0\u1965\0\u1a2a\0\u1aef\0\u1bb4\0\u1c79\0\u1d3e\0\u1e03"+ - "\0\u1ec8\0\u1f8d\0\u018a\0\u024f\0\u2052\0\u2117\0\u049e\0\u0563"+ - "\0\u0628\0\u06ed\0\u21dc\0\u22a1\0\u2366\0\u242b\0\u0ac6\0\u24f0"+ - "\0\u25b5\0\u267a\0\u273f\0\u2804\0\u28c9\0\u298e\0\u03d9\0\u2a53"+ - "\0\u2b18\0\u093c\0\u2bdd\0\u2ca2\0\u2d67\0\u2e2c\0\u2ef1\0\u2fb6"+ - "\0\u307b\0\u3140\0\u3205\0\u32ca\0\u338f\0\u3454\0\u3519\0\u35de"+ - "\0\u36a3\0\u3768\0\u382d\0\u38f2\0\u39b7\0\u3a7c\0\u3b41\0\u3c06"+ - "\0\u1402\0\u3ccb\0\u3d90\0\u3e55\0\u3f1a\0\u3fdf\0\u40a4\0\u4169"+ - "\0\u422e\0\u42f3\0\u43b8\0\u447d\0\u4542\0\u4607\0\u46cc\0\u4791"+ - "\0\u4856\0\u491b\0\u49e0\0\u4aa5\0\u4b6a\0\u4c2f\0\u4cf4\0\u4db9"+ - "\0\u17db\0\u4e7e\0\u4f43\0\u5008\0\u50cd\0\u5192\0\u5257\0\u531c"+ - "\0\u53e1\0\u54a6\0\u556b\0\u5630\0\u56f5\0\u57ba\0\u587f\0\u5944"+ - "\0\u5a09\0\u5ace\0\u1e03\0\u5b93\0\u5c58\0\u1f8d\0\u5d1d\0\u5de2"+ - "\0\u5ea7\0\u5f6c\0\u6031\0\u60f6\0\u61bb\0\u6280\0\u6345\0\u640a"+ - "\0\u64cf\0\u6594\0\u6659\0\u671e\0\u67e3\0\u68a8\0\u696d\0\u6a32"+ - "\0\u6af7\0\u6bbc\0\u6c81\0\u6d46\0\u6e0b\0\u6ed0\0\u6f95\0\u705a"+ - "\0\u711f\0\u71e4\0\u72a9\0\u736e\0\u7433\0\u74f8\0\u75bd\0\u7682"+ - "\0\u7747\0\u780c\0\u78d1\0\u7996\0\u7a5b\0\u7b20\0\u7be5\0\u7caa"+ - "\0\u7d6f\0\u7e34\0\u7ef9\0\u7fbe\0\u8083\0\u8148\0\u820d\0\u82d2"+ - "\0\u8397\0\u845c\0\u8521\0\u85e6\0\u86ab\0\u8770\0\u8835\0\u88fa"+ - "\0\u89bf\0\u8a84\0\u8b49\0\u8c0e\0\u8cd3\0\u8d98\0\u8e5d\0\u8f22"+ - "\0\u8fe7\0\u90ac\0\u9171\0\u9236\0\u92fb\0\u93c0\0\u9485\0\u954a"+ - "\0\u960f\0\u96d4\0\u9799\0\u985e\0\u9923\0\u99e8\0\u9aad\0\u9b72"+ - "\0\u9c37\0\u9cfc\0\u9dc1\0\u9e86\0\u9f4b\0\ua010\0\ua0d5\0\ua19a"+ - "\0\ua25f\0\ua324\0\ua3e9\0\ua4ae\0\ua573\0\ua638\0\ua6fd\0\ua7c2"+ - "\0\ua887\0\ua94c\0\uaa11\0\uaad6\0\uab9b\0\uac60\0\uad25\0\uadea"+ - "\0\uaeaf\0\uaf74\0\ub039\0\ub0fe\0\ub1c3\0\ub288\0\ub34d\0\ub412"+ - "\0\ub4d7\0\ub59c\0\ub661\0\ub726\0\ub7eb\0\ub8b0\0\ub975\0\uba3a"+ - "\0\ubaff\0\ubbc4\0\ubc89\0\ubd4e\0\ube13\0\ubed8\0\ubf9d\0\uc062"+ - "\0\uc127\0\uc1ec\0\305\0\uc2b1\0\uc376\0\uc43b\0\uc500\0\uc5c5"+ - "\0\uc68a\0\uc74f\0\uc814\0\uc8d9\0\uc99e\0\uca63\0\ucb28\0\ucbed"+ - "\0\uccb2\0\ucd77\0\uce3c\0\ucf01\0\ucfc6\0\ud08b\0\ud150\0\ud215"+ - "\0\ud2da\0\ud39f\0\ud464\0\ud529\0\ud5ee\0\ud6b3\0\ud778\0\ud83d"+ - "\0\ud902\0\ud9c7\0\uda8c\0\udb51\0\udc16\0\udcdb\0\udda0\0\ude65"+ - "\0\udf2a\0\udfef\0\ue0b4\0\ue179\0\ue23e\0\ue303\0\ue3c8\0\ue48d"+ - "\0\ue552\0\ue617\0\ue6dc\0\ue7a1\0\ue866\0\ue92b\0\ue9f0\0\ueab5"+ - "\0\ueb7a\0\uec3f\0\ued04\0\uedc9\0\uee8e\0\uef53\0\uf018\0\uf0dd"+ - "\0\uf1a2\0\uf267\0\uf32c\0\uf3f1\0\uf4b6\0\uf57b\0\uf640\0\uf705"+ - "\0\uf7ca\0\uf88f\0\uf954\0\ufa19\0\ufade\0\ufba3\0\ufc68\0\ufd2d"+ - "\0\ufdf2\0\ufeb7\0\uff7c\1\101\1\u0106\1\u01cb\1\u0290\1\u0355"+ - "\1\u041a\1\u04df\1\u05a4\1\u0669\1\u072e\1\u07f3\1\u08b8\1\u097d"+ - "\1\u0a42\1\u0b07\1\u0bcc\1\u0c91\1\u0d56\1\u0e1b\1\u0ee0\1\u0fa5"+ - "\1\u106a\1\u112f\1\u11f4\1\u12b9\1\u137e\1\u1443\1\u1508\1\u15cd"+ - "\1\u1692\1\u1757\1\u181c\1\u18e1\1\u19a6\1\u1a6b\1\u1b30\1\u1bf5"+ - "\1\u1cba\1\u1d7f\1\u1e44\1\u1f09\1\u1fce\1\u2093\1\u2158\1\u221d"+ - "\1\u22e2\1\u23a7\1\u246c\1\u2531\1\u25f6\1\u26bb\1\u2780\1\u2845"+ - "\1\u290a\1\u29cf\1\u2a94\1\u2b59\1\u2c1e\1\u2ce3\1\u2da8\1\u2e6d"+ - "\1\u2f32\1\u2ff7\1\u30bc\1\u3181\1\u3246\1\u330b\1\u33d0\1\u3495"+ - "\1\u355a\1\u361f\1\u36e4\1\u37a9\1\u386e\1\u3933\1\u39f8\1\u3abd"+ - "\1\u3b82\1\u3c47\1\u3d0c\1\u3dd1\1\u3e96\1\u3f5b\1\u4020\1\u40e5"+ - "\1\u41aa\1\u426f\1\u4334\1\u43f9\1\u44be\1\u4583\1\u4648\1\u470d"+ - "\1\u47d2\1\u4897\1\u495c\1\u4a21\1\u4ae6\1\u4bab\1\u4c70\1\u4d35"+ - "\1\u4dfa\1\u4ebf\1\u4f84\1\u5049\1\u510e\1\u51d3\1\u5298\1\u535d"+ - "\0\uc127\1\u5422\1\u54e7\1\u55ac\1\u5671\1\u5736\1\u57fb\1\u58c0"+ - "\1\u5985\1\u5a4a\1\u5b0f\1\u5bd4\1\u5c99\1\u5d5e\1\u5e23\1\u5ee8"+ - "\1\u5fad\1\u6072\1\u6137\1\u61fc\1\u62c1\1\u6386\1\u644b\1\u6510"+ - "\1\u65d5\1\u669a\1\u675f\1\u6824\1\u68e9\1\u69ae\1\u6a73\1\u6b38"+ - "\1\u6bfd\1\u6cc2\1\u6d87\1\u6e4c\1\u6f11\1\u6fd6\1\u709b\1\u7160"+ - "\1\u7225\1\u72ea\1\u73af\1\u7474\1\u7539\1\u75fe\1\u76c3\1\u7788"+ - "\1\u784d\1\u7912\1\u79d7\1\u7a9c\1\u7b61\1\u7c26\1\u7ceb\1\u7db0"+ - "\1\u7e75\1\u7f3a\1\u7fff\1\u80c4\1\u8189\1\u824e\1\u8313\1\u83d8"+ - "\1\u849d\1\u8562\1\u8627\1\u86ec\1\u87b1\1\u8876\1\u893b\1\u8a00"+ - "\1\u8ac5\1\u8b8a\1\u8c4f\1\u8d14\1\u8dd9\1\u8e9e\1\u8f63\1\u9028"+ - "\1\u90ed\1\u91b2\1\u9277\1\u933c\1\u9401\1\u94c6\1\u958b\1\u9650"+ - "\1\u9715\1\u97da\1\u989f\1\u9964\1\u9a29\1\u9aee\1\u9bb3\1\u9c78"+ - "\1\u9d3d\1\u9e02\1\u9ec7\1\u9f8c\1\ua051\1\ua116\1\ua1db\1\ua2a0"+ - "\1\ua365\1\ua42a\1\ua4ef\1\ua5b4\1\ua679\1\ua73e\1\ua803\1\ua8c8"+ - "\1\ua98d\1\uaa52\1\uab17\1\uabdc\1\uaca1\1\uad66\1\uae2b\1\uaef0"+ - "\0\u1ec8\1\uafb5\1\ub07a\1\ub13f\1\ub204\1\ub2c9\1\ub38e\1\ub453"+ - "\1\ub518\1\ub5dd\1\ub6a2\1\ub767\1\ub82c\1\ub8f1\1\ub9b6\1\uba7b"+ - "\1\ubb40\1\ubc05\1\ubcca\1\ubd8f\1\ube54\1\ubf19\1\ubfde\1\uc0a3"+ - "\1\uc168\1\uc22d\1\uc2f2\1\uc3b7\1\uc47c\1\uc541\1\uc606\1\uc6cb"+ - "\1\uc790\1\uc855\1\uc91a\1\uc9df\1\ucaa4\1\ucb69\1\ucc2e\1\uccf3"+ - "\1\ucdb8\1\uce7d\1\ucf42\1\ud007\1\ud0cc\1\ud191\1\ud256\1\ud31b"+ - "\1\ud3e0\1\ud4a5\1\ud56a\1\ud62f\1\ud6f4\1\ud7b9\1\ud87e\1\ud943"+ - "\1\uda08\1\udacd\1\udb92\1\udc57\1\udd1c\1\udde1\1\udea6\1\udf6b"+ - "\1\ue030\1\ue0f5\1\ue1ba\1\ue27f\1\ue344\1\ue409\1\ue4ce\1\ue593"+ - "\1\ue658\1\ue71d\1\ue7e2\1\ue8a7\1\ue96c\1\uea31\1\ueaf6\1\uebbb"+ - "\1\uec80\1\ued45\1\uee0a\1\ueecf\1\uef94\1\uf059\1\uf11e\1\uf1e3"+ - "\1\uf2a8\1\uf36d\1\uf432\1\uf4f7\1\uf5bc\1\uf681\1\uf746\1\uf80b"+ - "\1\uf8d0\1\uf995\1\ufa5a\1\ufb1f\1\ufbe4\1\ufca9\1\ufd6e\1\ufe33"+ - "\1\ufef8\1\uffbd\2\202\2\u0147\2\u020c\2\u02d1\2\u0396\2\u045b"+ - "\2\u0520\2\u05e5\2\u06aa\2\u076f\2\u0834\2\u08f9\2\u09be\2\u0a83"+ - "\2\u0b48\2\u0c0d\2\u0cd2\2\u0d97\2\u0e5c\2\u0f21\2\u0fe6\2\u10ab"+ - "\2\u1170\2\u1235\2\u12fa\2\u13bf\2\u1484\2\u1549\2\u160e\2\u16d3"+ - "\2\u1798\2\u185d\2\u1922\2\u19e7\2\u1aac\2\u1b71\2\u1c36\2\u1cfb"+ - "\2\u1dc0\2\u1e85\2\u1f4a\2\u200f\2\u20d4\2\u2199\2\u225e\2\u2323"+ - "\2\u23e8\2\u24ad\2\u2572\2\u2637\2\u26fc\2\u27c1\2\u2886\2\u294b"+ - "\2\u2a10\2\u2ad5\2\u2b9a\2\u2c5f\2\u2d24\2\u2de9\2\u2eae\2\u2f73"+ - "\2\u3038\2\u30fd\2\u31c2\2\u3287\2\u334c\2\u3411\2\u34d6\2\u359b"+ - "\2\u3660\2\u3725\2\u37ea\2\u38af\2\u3974\2\u3a39\2\u3afe\2\u3bc3"+ - "\2\u3c88\2\u3d4d\2\u3e12\2\u3ed7\2\u3f9c\2\u4061\2\u4126\2\u41eb"+ - "\2\u42b0\2\u4375\2\u443a\2\u44ff\2\u45c4\2\u4689\2\u474e\2\u4813"+ - "\2\u48d8\2\u499d\2\u4a62\2\u4b27\2\u4bec\2\u4cb1\2\u4d76\2\u4e3b"+ - "\2\u4f00\2\u4fc5\2\u508a\2\u514f\2\u5214\2\u52d9\2\u539e\2\u5463"+ - "\2\u5528\2\u55ed\2\u56b2\2\u5777\2\u583c\2\u5901\2\u59c6\2\u5a8b"+ - "\2\u5b50\2\u5c15\2\u5cda\2\u5d9f\2\u5e64\2\u5f29\2\u5fee\2\u60b3"+ - "\2\u6178\2\u623d\2\u6302\2\u63c7\2\u648c\2\u6551\2\u6616\2\u66db"+ - "\2\u67a0\2\u6865\2\u692a\2\u69ef\2\u6ab4\2\u6b79\2\u6c3e\2\u6d03"+ - "\2\u6dc8\2\u6e8d\2\u6f52\2\u7017\2\u70dc\2\u71a1\2\u7266\2\u732b"+ - "\2\u73f0\2\u74b5\2\u757a\2\u763f\2\u7704\2\u77c9\2\u788e\2\u7953"+ - "\2\u7a18\2\u7add\2\u7ba2\2\u7c67\2\u7d2c\2\u7df1\2\u7eb6\2\u7f7b"+ - "\2\u8040\2\u8105\2\u81ca\2\u828f\2\u8354\2\u8419\2\u84de\2\u85a3"+ - "\2\u8668\2\u872d\2\u87f2\2\u88b7\2\u897c\2\u8a41\2\u8b06\2\u8bcb"+ - "\2\u8c90\2\u8d55\2\u8e1a\2\u8edf\2\u8fa4\2\u9069\2\u912e\2\u91f3"+ - "\2\u92b8\2\u937d\2\u9442\2\u9507\2\u95cc\2\u9691\2\u9756\2\u981b"+ - "\0\305\2\u98e0\2\u99a5\2\u9a6a\2\u9b2f\2\u9bf4\2\u9cb9\2\u9d7e"+ - "\2\u9e43\2\u9f08\2\u9fcd\2\ua092\2\ua157\2\ua21c\2\ua2e1\2\ua3a6"+ - "\2\ua46b\2\ua530\2\ua5f5\2\ua6ba\2\ua77f\2\ua844\2\ua909\2\ua9ce"+ - "\2\uaa93\2\uab58\2\uac1d\2\uace2\2\uada7\2\uae6c\2\uaf31\2\uaff6"+ - "\2\ub0bb\2\ub180\2\ub245\2\ub30a\2\ub3cf\2\ub494\2\ub559\2\ub61e"+ - "\2\ub6e3\2\ub7a8\2\ub86d\2\ub932\2\ub9f7\2\ubabc\2\ubb81\2\ubc46"+ - "\2\ubd0b\2\ubdd0\2\ube95\2\ubf5a\2\uc01f\2\uc0e4\2\uc1a9\2\uc26e"+ - "\2\uc333\2\uc3f8\2\uc4bd\2\uc582\2\uc647\2\uc70c\2\uc7d1\2\uc896"+ - "\2\uc95b\2\uca20\2\ucae5\2\ucbaa\2\ucc6f\2\ucd34\2\ucdf9\2\ucebe"+ - "\2\ucf83\2\ud048\2\ud10d\2\ud1d2\2\ud297\2\ud35c\2\ud421\2\ud4e6"+ - "\2\ud5ab\2\ud670\2\ud735\2\ud7fa\2\ud8bf\2\ud984\2\uda49\2\udb0e"+ - "\2\udbd3\2\udc98\2\udd5d\2\ude22\2\udee7\2\udfac\2\ue071\2\ue136"+ - "\2\ue1fb\2\ue2c0\2\ue385\2\ue44a\2\ue50f\2\ue5d4\2\ue699\2\ue75e"+ - "\2\ue823\2\ue8e8\2\ue9ad\2\uea72\2\ueb37\2\uebfc\2\uecc1\2\ued86"+ - "\2\uee4b\2\uef10\2\uefd5\2\uf09a\2\uf15f\2\uf224\2\uf2e9\2\uf3ae"+ - "\2\uf473\2\uf538\2\uf5fd\2\uf6c2\2\uf787\2\uf84c\2\uf911\2\uf9d6"+ - "\2\ufa9b\2\ufb60\2\ufc25\2\ufcea\2\ufdaf\2\ufe74\2\uff39\2\ufffe"+ - "\3\303\3\u0188\3\u024d\3\u0312\3\u03d7\3\u049c\3\u0561\3\u0626"+ - "\3\u06eb\3\u07b0\3\u0875\3\u093a\3\u09ff\3\u0ac4\3\u0b89\3\u0c4e"+ - "\3\u0d13\3\u0dd8\3\u0e9d\3\u0f62\3\u1027\3\u10ec\3\u11b1\3\u1276"+ - "\3\u133b\3\u1400\3\u14c5\3\u158a\3\u164f\3\u1714\3\u17d9\3\u189e"+ - "\3\u1963\3\u1a28\3\u1aed\3\u1bb2\3\u1c77\3\u1d3c\3\u1e01\3\u1ec6"+ - "\3\u1f8b\3\u2050\3\u2115\3\u21da\3\u229f\3\u2364\3\u2429\3\u24ee"+ - "\3\u25b3\3\u2678\3\u273d\3\u2802\3\u28c7\3\u298c\3\u2a51\3\u2b16"+ - "\3\u2bdb\3\u2ca0\3\u2d65\3\u2e2a\3\u2eef\3\u2fb4\3\u3079\3\u313e"+ - "\3\u3203\3\u32c8\3\u338d\3\u3452\3\u3517\3\u35dc\3\u36a1\3\u3766"+ - "\3\u382b\3\u38f0\3\u39b5\3\u3a7a\3\u3b3f\3\u3c04\3\u3cc9\3\u3d8e"+ - "\3\u3e53\3\u3f18\3\u3fdd\3\u40a2\3\u4167\3\u422c\3\u42f1\3\u43b6"+ - "\3\u447b\3\u4540\3\u4605\3\u46ca\3\u478f\3\u4854\3\u4919\3\u49de"+ - "\3\u4aa3\3\u4b68\3\u4c2d\3\u4cf2\3\u4db7\3\u4e7c\3\u4f41\3\u5006"+ - "\3\u50cb\3\u5190\3\u5255\3\u531a\3\u53df\3\u54a4\3\u5569\3\u562e"+ - "\3\u56f3\3\u57b8\3\u587d\3\u5942\3\u5a07\3\u5acc\3\u5b91\3\u5c56"+ - "\3\u5d1b\3\u5de0\3\u5ea5\3\u5f6a\3\u602f\3\u60f4\3\u61b9\3\u627e"+ - "\3\u6343\3\u6408\3\u64cd\3\u6592\3\u6657\3\u671c\3\u67e1\3\u68a6"+ - "\3\u696b\3\u6a30\3\u6af5\3\u6bba\3\u6c7f\3\u6d44\3\u6e09\3\u6ece"+ - "\3\u6f93\3\u7058\3\u711d\3\u71e2\3\u72a7\3\u736c\3\u7431\3\u74f6"+ - "\3\u75bb\3\u7680\3\u7745\3\u780a\3\u78cf\3\u7994\3\u7a59\3\u7b1e"+ - "\3\u7be3\3\u7ca8\3\u7d6d\3\u7e32\3\u7ef7\3\u7fbc\3\u8081\3\u8146"+ - "\3\u820b\3\u82d0\3\u8395\3\u845a\3\u851f\3\u85e4\3\u86a9\3\u876e"+ - "\3\u8833\3\u88f8\3\u89bd\3\u8a82\2\u9756\3\u8b47\3\u8c0c\3\u8cd1"+ - "\3\u8d96\3\u8e5b\3\u8f20\3\u8fe5\3\u90aa\3\u916f\3\u9234\3\u92f9"+ - "\3\u93be\3\u9483\3\u9548\3\u960d\3\u96d2\3\u9797\3\u985c\3\u9921"+ - "\3\u99e6\3\u9aab\3\u9b70\3\u9c35\3\u9cfa\3\u9dbf\3\u9e84\3\u9f49"+ - "\3\ua00e\3\ua0d3\3\ua198\3\ua25d\3\ua322\3\ua3e7\3\ua4ac\3\ua571"+ - "\3\ua636\3\ua6fb\3\ua7c0\3\ua885\3\ua94a\3\uaa0f\3\uaad4\3\uab99"+ - "\3\uac5e\3\uad23\3\uade8\3\uaead\3\uaf72\3\ub037\3\ub0fc\3\ub1c1"+ - "\3\ub286\3\ub34b\3\ub410\3\ub4d5\3\ub59a\3\ub65f\3\ub724\3\ub7e9"+ - "\3\ub8ae\3\ub973\3\uba38\3\ubafd\3\ubbc2\3\ubc87\3\ubd4c\3\ube11"+ - "\3\ubed6\3\ubf9b\3\uc060\3\uc125\3\uc1ea\3\uc2af\3\uc374\3\uc439"+ - "\3\uc4fe\3\uc5c3\3\uc688\3\uc74d\3\uc812\3\uc8d7\3\uc99c\3\uca61"+ - "\3\ucb26\3\ucbeb\3\uccb0\3\ucd75\3\uce3a\3\uceff\3\ucfc4\3\ud089"+ - "\3\ud14e\3\ud213\3\ud2d8\3\ud39d\3\ud462\3\ud527\3\ud5ec\3\ud6b1"+ - "\3\ud776\3\ud83b\3\ud900\3\ud9c5\3\uda8a\3\udb4f\3\udc14\3\udcd9"+ - "\3\udd9e\3\ude63\3\udf28\3\udfed\3\ue0b2\3\ue177\3\ue23c\3\ue301"+ - "\3\ue3c6\3\ue48b\3\ue550\3\ue615\3\ue6da\3\ue79f\3\ue864\3\ue929"+ - "\3\ue9ee\3\ueab3\3\ueb78\3\uec3d\3\ued02\3\uedc7\3\uee8c\3\uef51"+ - "\3\uf016\3\uf0db\3\uf1a0\3\uf265\3\uf32a\3\uf3ef\3\uf4b4\3\uf579"+ - "\3\uf63e\3\uf703\3\uf7c8\3\uf88d\3\uf952\3\ufa17\3\ufadc\3\ufba1"+ - "\3\ufc66\3\ufd2b\3\ufdf0\3\ufeb5\3\uff7a\4\77\4\u0104\4\u01c9"+ - "\4\u028e\4\u0353\4\u0418\4\u04dd\4\u05a2\4\u0667\4\u072c\4\u07f1"+ - "\4\u08b6\4\u097b\4\u0a40\4\u0b05\4\u0bca\4\u0c8f\4\u0d54\4\u0e19"+ - "\4\u0ede\4\u0fa3\4\u1068\4\u112d\4\u11f2\4\u12b7\4\u137c\4\u1441"+ - "\4\u1506\4\u15cb\4\u1690\4\u1755\4\u181a\4\u18df\4\u19a4\4\u1a69"+ - "\4\u1b2e\4\u1bf3\4\u1cb8\4\u1d7d\4\u1e42\4\u1f07\4\u1fcc\4\u2091"+ - "\4\u2156\4\u221b\4\u22e0\4\u23a5\4\u246a\4\u252f\4\u25f4\4\u26b9"+ - "\4\u277e\4\u2843\4\u2908\4\u29cd\4\u2a92\4\u2b57\4\u2c1c\4\u2ce1"+ - "\4\u2da6\4\u2e6b\4\u2f30\4\u2ff5\4\u30ba\4\u317f\4\u3244\4\u3309"+ - "\4\u33ce\4\u3493\4\u3558\4\u361d\4\u36e2\4\u37a7\4\u386c\4\u3931"+ - "\4\u39f6\4\u3abb\4\u3b80\4\u3c45\4\u3d0a\4\u3dcf\4\u3e94\4\u3f59"+ - "\4\u401e\4\u40e3\4\u41a8\4\u426d\4\u4332\4\u43f7\4\u44bc\4\u4581"+ - "\4\u4646\4\u470b\4\u47d0\4\u4895\4\u495a\4\u4a1f\4\u4ae4\4\u4ba9"+ - "\4\u4c6e\4\u4d33\4\u4df8\4\u4ebd\4\u4f82\4\u5047\4\u510c\4\u51d1"+ - "\4\u5296\4\u535b\4\u5420\4\u54e5\4\u55aa\4\u566f\4\u5734\4\u57f9"+ - "\4\u58be\4\u5983\4\u5a48\4\u5b0d\4\u5bd2\4\u5c97\4\u5d5c\4\u5e21"+ - "\4\u5ee6\4\u5fab\4\u6070\4\u6135\4\u61fa\4\u62bf\4\u6384\4\u6449"+ - "\4\u650e\4\u65d3\4\u6698\4\u675d\4\u6822\4\u68e7\4\u69ac\4\u6a71"+ - "\4\u6b36\4\u6bfb\4\u6cc0\4\u6d85\4\u6e4a\4\u6f0f\4\u6fd4\4\u7099"+ - "\4\u715e\4\u7223\4\u72e8\4\u73ad\4\u7472\4\u7537\4\u75fc\4\u76c1"+ - "\4\u7786\4\u784b\4\u7910\4\u79d5\4\u7a9a\4\u7b5f\4\u7c24\4\u7ce9"+ - "\4\u7dae\4\u7e73\4\u7f38\4\u7ffd\4\u80c2\4\u8187\4\u824c\4\u8311"+ - "\4\u83d6\4\u849b\4\u8560\4\u8625\4\u86ea\4\u87af\4\u8874\4\u8939"+ - "\4\u89fe\4\u8ac3\4\u8b88\4\u8c4d\4\u8d12\4\u8dd7\4\u8e9c\4\u8f61"+ - "\4\u9026\4\u90eb\4\u91b0\4\u9275\4\u933a\4\u93ff\4\u94c4\4\u9589"+ - "\4\u964e\4\u9713\4\u97d8\4\u989d\4\u9962\4\u9a27\4\u9aec\4\u9bb1"+ - "\4\u9c76\4\u9d3b\4\u9e00\4\u9ec5\4\u9f8a\4\ua04f\4\ua114\4\ua1d9"+ - "\4\ua29e\4\ua363\4\ua428\4\ua4ed\4\ua5b2\4\ua677\4\ua73c\4\ua801"+ - "\4\ua8c6\4\ua98b\4\uaa50\4\uab15\4\uabda\4\uac9f\4\uad64\4\uae29"+ - "\4\uaeee\4\uafb3\4\ub078\4\ub13d\4\ub202\4\ub2c7\4\ub38c\4\ub451"+ - "\4\ub516\4\ub5db\4\ub6a0\4\ub765\4\ub82a\4\ub8ef\4\ub9b4\4\uba79"+ - "\4\ubb3e\4\ubc03\4\ubcc8\4\ubd8d\4\ube52\4\ubf17\4\ubfdc\4\uc0a1"+ - "\4\uc166\4\uc22b\4\uc2f0\4\uc3b5\4\uc47a\4\uc53f\4\uc604\4\uc6c9"+ - "\4\uc78e\4\uc853\4\uc918\4\uc9dd\4\ucaa2\4\ucb67\4\ucc2c\4\uccf1"+ - "\4\ucdb6\4\uce7b\4\ucf40\4\ud005\4\ud0ca\4\ud18f\4\ud254\4\ud319"+ - "\4\ud3de\4\ud4a3\4\ud568\4\ud62d\4\ud6f2\4\ud7b7\4\ud87c\4\ud941"+ - "\4\uda06\4\udacb\4\udb90\4\udc55\4\udd1a\4\udddf\4\udea4\4\udf69"+ - "\4\ue02e\4\ue0f3\4\ue1b8\4\ue27d\4\ue342\4\ue407\4\ue4cc\4\ue591"+ - "\4\ue656\4\ue71b\4\ue7e0\4\ue8a5\4\ue96a\4\uea2f\4\ueaf4\4\uebb9"+ - "\4\uec7e\4\ued43\4\uee08\4\ueecd\4\uef92\4\uf057\4\uf11c\4\uf1e1"+ - "\4\uf2a6\4\uf36b\4\uf430\4\uf4f5\4\uf5ba\4\uf67f\4\uf744\4\uf809"+ - "\4\uf8ce\4\uf993\4\ufa58\4\ufb1d\4\ufbe2\4\ufca7\4\ufd6c\4\ufe31"+ - "\4\ufef6\4\uffbb\5\200\5\u0145\5\u020a\5\u02cf\5\u0394\5\u0459"+ - "\5\u051e\5\u05e3\5\u06a8\5\u076d\5\u0832\5\u08f7\5\u09bc\5\u0a81"+ - "\5\u0b46\5\u0c0b\5\u0cd0\5\u0d95\5\u0e5a\5\u0f1f\5\u0fe4\5\u10a9"+ - "\5\u116e\5\u1233\5\u12f8\5\u13bd\5\u1482\5\u1547\5\u160c\5\u16d1"+ - "\5\u1796\5\u185b\5\u1920\5\u19e5\5\u1aaa\5\u1b6f\5\u1c34\5\u1cf9"+ - "\5\u1dbe\5\u1e83\5\u1f48\5\u200d\5\u20d2\5\u2197\5\u225c\5\u2321"+ - "\5\u23e6\5\u24ab\5\u2570\5\u2635\5\u26fa\5\u27bf\5\u2884\5\u2949"+ - "\5\u2a0e\5\u2ad3\5\u2b98\5\u2c5d\5\u2d22\5\u2de7\5\u2eac\5\u2f71"+ - "\5\u3036\5\u30fb\5\u31c0\5\u3285\5\u334a\5\u340f"; + "\0\0\0\310\0\u0190\0\u0258\0\u0320\0\u03e8\0\u04b0\0\u0578"+ + "\0\u0640\0\u0708\0\u07d0\0\u0898\0\u0960\0\u0a28\0\u0af0\0\u0bb8"+ + "\0\u0c80\0\u0d48\0\u0e10\0\u0ed8\0\u0fa0\0\u1068\0\u1130\0\u11f8"+ + "\0\u12c0\0\u1388\0\u1450\0\u1518\0\u15e0\0\u16a8\0\u1770\0\u1838"+ + "\0\u1900\0\u19c8\0\u1a90\0\u1b58\0\u1c20\0\u1ce8\0\u1db0\0\u1e78"+ + "\0\u1f40\0\u2008\0\u20d0\0\u2198\0\u0190\0\u0258\0\u2260\0\u2328"+ + "\0\u04b0\0\u0578\0\u0640\0\u0708\0\u23f0\0\u24b8\0\u2580\0\u2648"+ + "\0\u0af0\0\u2710\0\u27d8\0\u28a0\0\u2968\0\u2a30\0\u2af8\0\u2bc0"+ + "\0\u03e8\0\u2c88\0\u2d50\0\u0960\0\u2e18\0\u2ee0\0\u2fa8\0\u3070"+ + "\0\u3138\0\u3200\0\u32c8\0\u3390\0\u3458\0\u3520\0\u35e8\0\u36b0"+ + "\0\u3778\0\u3840\0\u3908\0\u39d0\0\u3a98\0\u3b60\0\u3c28\0\u3cf0"+ + "\0\u3db8\0\u3e80\0\u3f48\0\u1450\0\u4010\0\u40d8\0\u41a0\0\u4268"+ + "\0\u4330\0\u43f8\0\u44c0\0\u4588\0\u4650\0\u4718\0\u47e0\0\u48a8"+ + "\0\u4970\0\u4a38\0\u4b00\0\u1770\0\u4bc8\0\u4c90\0\u1838\0\u4d58"+ + "\0\u4e20\0\u4ee8\0\u4fb0\0\u5078\0\u5140\0\u5208\0\u52d0\0\u5398"+ + "\0\u5460\0\u5528\0\u55f0\0\u56b8\0\u5780\0\u5848\0\u5910\0\u59d8"+ + "\0\u5aa0\0\u5b68\0\u5c30\0\u5cf8\0\u5dc0\0\u5e88\0\u5f50\0\u6018"+ + "\0\u60e0\0\u61a8\0\u6270\0\u6338\0\u6400\0\u64c8\0\u6590\0\u6658"+ + "\0\u2008\0\u6720\0\u67e8\0\u68b0\0\u6978\0\u6a40\0\u6b08\0\u6bd0"+ + "\0\u6c98\0\u6d60\0\u6e28\0\u6ef0\0\u6fb8\0\u7080\0\u7148\0\u7210"+ + "\0\u72d8\0\u73a0\0\u7468\0\u7530\0\u75f8\0\u76c0\0\u7788\0\u7850"+ + "\0\u7918\0\u79e0\0\u7aa8\0\u7b70\0\u7c38\0\u7d00\0\u7dc8\0\u7e90"+ + "\0\u7f58\0\u8020\0\u80e8\0\u81b0\0\u8278\0\u8340\0\u8408\0\u84d0"+ + "\0\u8598\0\u8660\0\u8728\0\u87f0\0\u88b8\0\u8980\0\u8a48\0\u8b10"+ + "\0\u8bd8\0\u8ca0\0\u8d68\0\u8e30\0\u8ef8\0\u8fc0\0\u9088\0\u9150"+ + "\0\u9218\0\u92e0\0\u93a8\0\u9470\0\u9538\0\u9600\0\u96c8\0\u9790"+ + "\0\u9858\0\u9920\0\u99e8\0\u9ab0\0\u9b78\0\u9c40\0\u9d08\0\u9dd0"+ + "\0\u9e98\0\u9f60\0\ua028\0\ua0f0\0\ua1b8\0\ua280\0\ua348\0\ua410"+ + "\0\ua4d8\0\ua5a0\0\ua668\0\ua730\0\ua7f8\0\ua8c0\0\ua988\0\uaa50"+ + "\0\uab18\0\uabe0\0\uaca8\0\uad70\0\uae38\0\uaf00\0\uafc8\0\ub090"+ + "\0\ub158\0\ub220\0\ub2e8\0\ub3b0\0\ub478\0\ub540\0\ub608\0\ub6d0"+ + "\0\ub798\0\ub860\0\ub928\0\ub9f0\0\ubab8\0\ubb80\0\ubc48\0\ubd10"+ + "\0\ubdd8\0\ubea0\0\ubf68\0\uc030\0\uc0f8\0\uc1c0\0\uc288\0\uc350"+ + "\0\uc418\0\uc4e0\0\uc5a8\0\uc670\0\uc738\0\uc800\0\uc8c8\0\uc990"+ + "\0\uca58\0\ucb20\0\ucbe8\0\uccb0\0\ucd78\0\uce40\0\ucf08\0\ucfd0"+ + "\0\ud098\0\ud160\0\ud228\0\ud2f0\0\ud3b8\0\ud480\0\ud548\0\ud610"+ + "\0\ud6d8\0\ud7a0\0\ud868\0\ud930\0\ud9f8\0\udac0\0\udb88\0\udc50"+ + "\0\udd18\0\udde0\0\udea8\0\udf70\0\ue038\0\ue100\0\ue1c8\0\ue290"+ + "\0\ue358\0\ue420\0\ue4e8\0\ue5b0\0\ue678\0\ue740\0\ue808\0\310"+ + "\0\ue8d0\0\ue998\0\uea60\0\ueb28\0\uebf0\0\uecb8\0\ued80\0\uee48"+ + "\0\uef10\0\uefd8\0\uf0a0\0\uf168\0\uf230\0\uf2f8\0\uf3c0\0\uf488"+ + "\0\uf550\0\uf618\0\uf6e0\0\uf7a8\0\uf870\0\uf938\0\ufa00\0\ufac8"+ + "\0\ufb90\0\ufc58\0\ufd20\0\ufde8\0\ufeb0\0\uff78\1\100\1\u0108"+ + "\1\u01d0\1\u0298\1\u0360\1\u0428\1\u04f0\1\u05b8\1\u0680\1\u0748"+ + "\1\u0810\1\u08d8\1\u09a0\1\u0a68\1\u0b30\1\u0bf8\1\u0cc0\1\u0d88"+ + "\1\u0e50\1\u0f18\1\u0fe0\1\u10a8\1\u1170\1\u1238\1\u1300\1\u13c8"+ + "\1\u1490\1\u1558\1\u1620\1\u16e8\1\u17b0\1\u1878\1\u1940\1\u1a08"+ + "\1\u1ad0\1\u1b98\1\u1c60\1\u1d28\1\u1df0\1\u1eb8\1\u1f80\1\u2048"+ + "\1\u2110\1\u21d8\1\u22a0\1\u2368\1\u2430\1\u24f8\1\u25c0\1\u2688"+ + "\1\u2750\1\u2818\1\u28e0\1\u29a8\1\u2a70\1\u2b38\1\u2c00\1\u2cc8"+ + "\1\u2d90\1\u2e58\1\u2f20\1\u2fe8\1\u30b0\1\u3178\1\u3240\1\u3308"+ + "\1\u33d0\1\u3498\1\u3560\1\u3628\1\u36f0\1\u37b8\1\u3880\1\u3948"+ + "\1\u3a10\1\u3ad8\1\u3ba0\1\u3c68\1\u3d30\1\u3df8\1\u3ec0\1\u3f88"+ + "\1\u4050\1\u4118\1\u41e0\1\u42a8\1\u4370\1\u4438\1\u4500\1\u45c8"+ + "\1\u4690\1\u4758\1\u4820\1\u48e8\1\u49b0\1\u4a78\1\u4b40\1\u4c08"+ + "\1\u4cd0\1\u4d98\1\u4e60\1\u4f28\1\u4ff0\1\u50b8\1\u5180\1\u5248"+ + "\1\u5310\1\u53d8\1\u54a0\1\u5568\1\u5630\1\u56f8\1\u57c0\1\u5888"+ + "\1\u5950\1\u5a18\1\u5ae0\1\u5ba8\1\u5c70\1\u5d38\1\u5e00\1\u5ec8"+ + "\1\u5f90\1\u6058\1\u6120\1\u61e8\1\u62b0\1\u6378\1\u6440\1\u6508"+ + "\1\u65d0\1\u6698\1\u6760\1\u6828\1\u68f0\1\u69b8\1\u6a80\1\u6b48"+ + "\1\u6c10\1\u6cd8\1\u6da0\1\u6e68\1\u6f30\1\u6ff8\1\u70c0\1\u7188"+ + "\1\u7250\1\u7318\1\u73e0\1\u74a8\1\u7570\1\u7638\1\u7700\1\u77c8"+ + "\1\u7890\1\u7958\1\u7a20\1\u7ae8\1\u7bb0\1\u7c78\1\u7d40\1\u7e08"+ + "\1\u7ed0\1\u7f98\1\u8060\1\u8128\1\u81f0\1\u82b8\1\u8380\1\u8448"+ + "\1\u8510\1\u85d8\1\u86a0\1\u8768\1\u8830\1\u88f8\1\u89c0\1\u8a88"+ + "\1\u8b50\1\u8c18\1\u8ce0\1\u8da8\1\u8e70\1\u8f38\1\u9000\1\u90c8"+ + "\1\u9190\1\u9258\1\u9320\1\u93e8\1\u94b0\1\u9578\1\u9640\1\u9708"+ + "\1\u97d0\1\u9898\1\u9960\1\u9a28\1\u9af0\1\u9bb8\1\u9c80\1\u9d48"+ + "\1\u9e10\1\u9ed8\1\u9fa0\1\ua068\1\ua130\1\ua1f8\1\ua2c0\1\ua388"+ + "\1\ua450\1\ua518\1\ua5e0\1\ua6a8\1\ua770\1\ua838\1\ua900\1\ua9c8"+ + "\1\uaa90\1\uab58\1\uac20\1\uace8\1\uadb0\1\uae78\1\uaf40\1\ub008"+ + "\1\ub0d0\1\ub198\1\ub260\1\ub328\1\ub3f0\1\ub4b8\1\ub580\1\ub648"+ + "\1\ub710\1\ub7d8\1\ub8a0\1\ub968\1\uba30\1\ubaf8\1\ubbc0\1\ubc88"+ + "\1\ubd50\1\ube18\1\ubee0\1\ubfa8\1\uc070\1\uc138\1\uc200\1\uc2c8"+ + "\1\uc390\1\uc458\1\uc520\1\uc5e8\1\uc6b0\1\uc778\1\uc840\1\uc908"+ + "\1\uc9d0\1\uca98\1\ucb60\1\ucc28\1\uccf0\1\ucdb8\1\uce80\1\ucf48"+ + "\1\ud010\1\ud0d8\1\ud1a0\1\ud268\1\ud330\1\ud3f8\1\ud4c0\1\ud588"+ + "\1\ud650\1\ud718\1\ud7e0\1\ud8a8\1\ud970\1\uda38\1\udb00\1\udbc8"+ + "\1\udc90\1\udd58\1\ude20\1\udee8\1\udfb0\1\ue078\1\ue140\1\ue208"+ + "\1\ue2d0\1\ue398\1\ue460\1\ue528\1\ue5f0\1\ue6b8\1\ue780\1\ue848"+ + "\1\ue910\1\ue9d8\1\ueaa0\1\ueb68\1\uec30\1\uecf8\1\uedc0\1\uee88"+ + "\1\uef50\1\uf018\1\uf0e0\1\uf1a8\1\uf270\1\uf338\1\uf400\1\uf4c8"+ + "\1\uf590\1\uf658\1\uf720\1\uf7e8\1\uf8b0\1\uf978\1\ufa40\1\ufb08"+ + "\1\ufbd0\1\ufc98\1\ufd60\1\ufe28\1\ufef0\1\uffb8\2\200\2\u0148"+ + "\2\u0210\2\u02d8\2\u03a0\2\u0468\2\u0530\2\u05f8\2\u06c0\2\u0788"+ + "\0\ue740\2\u0850\2\u0918\2\u09e0\2\u0aa8\2\u0b70\2\u0c38\2\u0d00"+ + "\2\u0dc8\2\u0e90\2\u0f58\2\u1020\2\u10e8\2\u11b0\2\u1278\2\u1340"+ + "\2\u1408\2\u14d0\2\u1598\2\u1660\2\u1728\2\u17f0\2\u18b8\2\u1980"+ + "\2\u1a48\2\u1b10\2\u1bd8\2\u1ca0\2\u1d68\2\u1e30\2\u1ef8\2\u1fc0"+ + "\2\u2088\2\u2150\2\u2218\2\u22e0\2\u23a8\2\u2470\2\u2538\2\u2600"+ + "\2\u26c8\2\u2790\2\u2858\2\u2920\2\u29e8\2\u2ab0\2\u2b78\2\u2c40"+ + "\2\u2d08\2\u2dd0\2\u2e98\2\u2f60\2\u3028\2\u30f0\2\u31b8\2\u3280"+ + "\2\u3348\2\u3410\2\u34d8\2\u35a0\2\u3668\2\u3730\2\u37f8\2\u38c0"+ + "\2\u3988\2\u3a50\2\u3b18\2\u3be0\2\u3ca8\2\u3d70\2\u3e38\2\u3f00"+ + "\2\u3fc8\2\u4090\2\u4158\2\u4220\2\u42e8\2\u43b0\2\u4478\2\u4540"+ + "\2\u4608\2\u46d0\2\u4798\2\u4860\2\u4928\2\u49f0\2\u4ab8\2\u4b80"+ + "\2\u4c48\2\u4d10\2\u4dd8\2\u4ea0\2\u4f68\2\u5030\2\u50f8\2\u51c0"+ + "\2\u5288\2\u5350\2\u5418\2\u54e0\2\u55a8\2\u5670\2\u5738\2\u5800"+ + "\2\u58c8\2\u5990\2\u5a58\2\u5b20\2\u5be8\2\u5cb0\2\u5d78\2\u5e40"+ + "\2\u5f08\2\u5fd0\2\u6098\2\u6160\2\u6228\2\u62f0\2\u63b8\2\u6480"+ + "\2\u6548\2\u6610\2\u66d8\2\u67a0\2\u6868\2\u6930\2\u69f8\2\u6ac0"+ + "\2\u6b88\2\u6c50\2\u6d18\2\u6de0\2\u6ea8\2\u6f70\2\u7038\2\u7100"+ + "\2\u71c8\2\u7290\2\u7358\2\u7420\2\u74e8\2\u75b0\2\u7678\2\u7740"+ + "\2\u7808\2\u78d0\2\u7998\2\u7a60\2\u7b28\2\u7bf0\2\u7cb8\2\u7d80"+ + "\2\u7e48\2\u7f10\2\u7fd8\2\u80a0\2\u8168\2\u8230\2\u82f8\2\u83c0"+ + "\2\u8488\2\u8550\2\u8618\2\u86e0\2\u87a8\2\u8870\2\u8938\2\u8a00"+ + "\2\u8ac8\2\u8b90\2\u8c58\2\u8d20\2\u8de8\2\u8eb0\2\u8f78\2\u9040"+ + "\2\u9108\2\u91d0\2\u9298\2\u9360\2\u9428\2\u94f0\2\u95b8\2\u9680"+ + "\2\u9748\2\u9810\2\u98d8\2\u99a0\2\u9a68\2\u9b30\2\u9bf8\2\u9cc0"+ + "\2\u9d88\2\u9e50\2\u9f18\2\u9fe0\2\ua0a8\2\ua170\2\ua238\2\ua300"+ + "\2\ua3c8\0\u20d0\2\ua490\2\ua558\2\ua620\2\ua6e8\2\ua7b0\2\ua878"+ + "\2\ua940\2\uaa08\2\uaad0\2\uab98\2\uac60\2\uad28\2\uadf0\2\uaeb8"+ + "\2\uaf80\2\ub048\2\ub110\2\ub1d8\2\ub2a0\2\ub368\2\ub430\2\ub4f8"+ + "\2\ub5c0\2\ub688\2\ub750\2\ub818\2\ub8e0\2\ub9a8\2\uba70\2\ubb38"+ + "\2\ubc00\2\ubcc8\2\ubd90\2\ube58\2\ubf20\2\ubfe8\2\uc0b0\2\uc178"+ + "\2\uc240\2\uc308\2\uc3d0\2\uc498\2\uc560\2\uc628\2\uc6f0\2\uc7b8"+ + "\2\uc880\2\uc948\2\uca10\2\ucad8\2\ucba0\2\ucc68\2\ucd30\2\ucdf8"+ + "\2\ucec0\2\ucf88\2\ud050\2\ud118\2\ud1e0\2\ud2a8\2\ud370\2\ud438"+ + "\2\ud500\2\ud5c8\2\ud690\2\ud758\2\ud820\2\ud8e8\2\ud9b0\2\uda78"+ + "\2\udb40\2\udc08\2\udcd0\2\udd98\2\ude60\2\udf28\2\udff0\2\ue0b8"+ + "\2\ue180\2\ue248\2\ue310\2\ue3d8\2\ue4a0\2\ue568\2\ue630\2\ue6f8"+ + "\2\ue7c0\2\ue888\2\ue950\2\uea18\2\ueae0\2\ueba8\2\uec70\2\ued38"+ + "\2\uee00\2\ueec8\2\uef90\2\uf058\2\uf120\2\uf1e8\2\uf2b0\2\uf378"+ + "\2\uf440\2\uf508\2\uf5d0\2\uf698\2\uf760\2\uf828\2\uf8f0\2\uf9b8"+ + "\2\ufa80\2\ufb48\2\ufc10\2\ufcd8\2\ufda0\2\ufe68\2\uff30\2\ufff8"+ + "\3\300\3\u0188\3\u0250\3\u0318\3\u03e0\3\u04a8\3\u0570\3\u0638"+ + "\3\u0700\3\u07c8\3\u0890\3\u0958\3\u0a20\3\u0ae8\3\u0bb0\3\u0c78"+ + "\3\u0d40\3\u0e08\3\u0ed0\3\u0f98\3\u1060\3\u1128\3\u11f0\3\u12b8"+ + "\3\u1380\3\u1448\3\u1510\3\u15d8\3\u16a0\3\u1768\3\u1830\3\u18f8"+ + "\3\u19c0\3\u1a88\3\u1b50\3\u1c18\3\u1ce0\3\u1da8\3\u1e70\3\u1f38"+ + "\3\u2000\3\u20c8\3\u2190\3\u2258\3\u2320\3\u23e8\3\u24b0\3\u2578"+ + "\3\u2640\3\u2708\3\u27d0\3\u2898\3\u2960\3\u2a28\3\u2af0\3\u2bb8"+ + "\3\u2c80\3\u2d48\3\u2e10\3\u2ed8\3\u2fa0\3\u3068\3\u3130\3\u31f8"+ + "\3\u32c0\3\u3388\3\u3450\3\u3518\3\u35e0\3\u36a8\3\u3770\3\u3838"+ + "\3\u3900\3\u39c8\3\u3a90\3\u3b58\3\u3c20\3\u3ce8\3\u3db0\3\u3e78"+ + "\3\u3f40\3\u4008\3\u40d0\3\u4198\3\u4260\3\u4328\3\u43f0\3\u44b8"+ + "\3\u4580\3\u4648\3\u4710\3\u47d8\3\u48a0\3\u4968\3\u4a30\3\u4af8"+ + "\3\u4bc0\3\u4c88\3\u4d50\3\u4e18\3\u4ee0\3\u4fa8\3\u5070\3\u5138"+ + "\3\u5200\3\u52c8\3\u5390\3\u5458\3\u5520\3\u55e8\3\u56b0\3\u5778"+ + "\3\u5840\3\u5908\3\u59d0\3\u5a98\3\u5b60\3\u5c28\3\u5cf0\3\u5db8"+ + "\3\u5e80\3\u5f48\3\u6010\3\u60d8\3\u61a0\3\u6268\3\u6330\3\u63f8"+ + "\3\u64c0\3\u6588\3\u6650\3\u6718\3\u67e0\3\u68a8\3\u6970\3\u6a38"+ + "\3\u6b00\3\u6bc8\3\u6c90\3\u6d58\3\u6e20\3\u6ee8\3\u6fb0\3\u7078"+ + "\3\u7140\3\u7208\3\u72d0\3\u7398\3\u7460\3\u7528\3\u75f0\3\u76b8"+ + "\3\u7780\3\u7848\3\u7910\3\u79d8\3\u7aa0\3\u7b68\3\u7c30\3\u7cf8"+ + "\3\u7dc0\3\u7e88\3\u7f50\3\u8018\3\u80e0\3\u81a8\3\u8270\3\u8338"+ + "\3\u8400\3\u84c8\3\u8590\3\u8658\3\u8720\3\u87e8\3\u88b0\3\u8978"+ + "\3\u8a40\3\u8b08\3\u8bd0\3\u8c98\3\u8d60\3\u8e28\3\u8ef0\3\u8fb8"+ + "\3\u9080\3\u9148\3\u9210\3\u92d8\3\u93a0\3\u9468\3\u9530\3\u95f8"+ + "\3\u96c0\3\u9788\3\u9850\3\u9918\3\u99e0\3\u9aa8\3\u9b70\3\u9c38"+ + "\3\u9d00\3\u9dc8\3\u9e90\3\u9f58\3\ua020\3\ua0e8\3\ua1b0\3\ua278"+ + "\3\ua340\3\ua408\3\ua4d0\3\ua598\3\ua660\3\ua728\3\ua7f0\3\ua8b8"+ + "\3\ua980\3\uaa48\3\uab10\3\uabd8\3\uaca0\3\uad68\3\uae30\3\uaef8"+ + "\3\uafc0\3\ub088\3\ub150\3\ub218\0\310\3\ub2e0\3\ub3a8\3\ub470"+ + "\3\ub538\3\ub600\3\ub6c8\3\ub790\3\ub858\3\ub920\3\ub9e8\3\ubab0"+ + "\3\ubb78\3\ubc40\3\ubd08\3\ubdd0\3\ube98\3\ubf60\3\uc028\3\uc0f0"+ + "\3\uc1b8\3\uc280\3\uc348\3\uc410\3\uc4d8\3\uc5a0\3\uc668\3\uc730"+ + "\3\uc7f8\3\uc8c0\3\uc988\3\uca50\3\ucb18\3\ucbe0\3\ucca8\3\ucd70"+ + "\3\uce38\3\ucf00\3\ucfc8\3\ud090\3\ud158\3\ud220\3\ud2e8\3\ud3b0"+ + "\3\ud478\3\ud540\3\ud608\3\ud6d0\3\ud798\3\ud860\3\ud928\3\ud9f0"+ + "\3\udab8\3\udb80\3\udc48\3\udd10\3\uddd8\3\udea0\3\udf68\3\ue030"+ + "\3\ue0f8\3\ue1c0\3\ue288\3\ue350\3\ue418\3\ue4e0\3\ue5a8\3\ue670"+ + "\3\ue738\3\ue800\3\ue8c8\3\ue990\3\uea58\3\ueb20\3\uebe8\3\uecb0"+ + "\3\ued78\3\uee40\3\uef08\3\uefd0\3\uf098\3\uf160\3\uf228\3\uf2f0"+ + "\3\uf3b8\3\uf480\3\uf548\3\uf610\3\uf6d8\3\uf7a0\3\uf868\3\uf930"+ + "\3\uf9f8\3\ufac0\3\ufb88\3\ufc50\3\ufd18\3\ufde0\3\ufea8\3\uff70"+ + "\4\70\4\u0100\4\u01c8\4\u0290\4\u0358\4\u0420\4\u04e8\4\u05b0"+ + "\4\u0678\4\u0740\4\u0808\4\u08d0\4\u0998\4\u0a60\4\u0b28\4\u0bf0"+ + "\4\u0cb8\4\u0d80\4\u0e48\4\u0f10\4\u0fd8\4\u10a0\4\u1168\4\u1230"+ + "\4\u12f8\4\u13c0\4\u1488\4\u1550\4\u1618\4\u16e0\4\u17a8\4\u1870"+ + "\4\u1938\4\u1a00\4\u1ac8\4\u1b90\4\u1c58\4\u1d20\4\u1de8\4\u1eb0"+ + "\4\u1f78\4\u2040\4\u2108\4\u21d0\4\u2298\4\u2360\4\u2428\4\u24f0"+ + "\4\u25b8\4\u2680\4\u2748\4\u2810\4\u28d8\4\u29a0\4\u2a68\4\u2b30"+ + "\4\u2bf8\4\u2cc0\4\u2d88\4\u2e50\4\u2f18\4\u2fe0\4\u30a8\4\u3170"+ + "\4\u3238\4\u3300\4\u33c8\4\u3490\4\u3558\4\u3620\4\u36e8\4\u37b0"+ + "\4\u3878\4\u3940\4\u3a08\4\u3ad0\4\u3b98\4\u3c60\4\u3d28\4\u3df0"+ + "\4\u3eb8\4\u3f80\4\u4048\4\u4110\4\u41d8\4\u42a0\4\u4368\4\u4430"+ + "\4\u44f8\4\u45c0\4\u4688\4\u4750\4\u4818\4\u48e0\4\u49a8\4\u4a70"+ + "\4\u4b38\4\u4c00\4\u4cc8\4\u4d90\4\u4e58\4\u4f20\4\u4fe8\4\u50b0"+ + "\4\u5178\4\u5240\4\u5308\4\u53d0\4\u5498\4\u5560\4\u5628\4\u56f0"+ + "\4\u57b8\4\u5880\4\u5948\4\u5a10\4\u5ad8\4\u5ba0\4\u5c68\4\u5d30"+ + "\4\u5df8\4\u5ec0\4\u5f88\4\u6050\4\u6118\4\u61e0\4\u62a8\4\u6370"+ + "\4\u6438\4\u6500\4\u65c8\4\u6690\4\u6758\4\u6820\4\u68e8\4\u69b0"+ + "\4\u6a78\4\u6b40\4\u6c08\4\u6cd0\4\u6d98\4\u6e60\4\u6f28\4\u6ff0"+ + "\4\u70b8\4\u7180\4\u7248\4\u7310\4\u73d8\4\u74a0\4\u7568\4\u7630"+ + "\4\u76f8\4\u77c0\4\u7888\4\u7950\4\u7a18\4\u7ae0\4\u7ba8\4\u7c70"+ + "\4\u7d38\4\u7e00\4\u7ec8\4\u7f90\4\u8058\4\u8120\4\u81e8\4\u82b0"+ + "\4\u8378\4\u8440\4\u8508\4\u85d0\4\u8698\4\u8760\4\u8828\4\u88f0"+ + "\4\u89b8\4\u8a80\4\u8b48\4\u8c10\4\u8cd8\4\u8da0\4\u8e68\4\u8f30"+ + "\4\u8ff8\4\u90c0\4\u9188\4\u9250\4\u9318\4\u93e0\4\u94a8\4\u9570"+ + "\4\u9638\4\u9700\4\u97c8\4\u9890\4\u9958\4\u9a20\4\u9ae8\4\u9bb0"+ + "\4\u9c78\4\u9d40\4\u9e08\4\u9ed0\4\u9f98\4\ua060\4\ua128\4\ua1f0"+ + "\4\ua2b8\4\ua380\4\ua448\4\ua510\4\ua5d8\4\ua6a0\4\ua768\4\ua830"+ + "\4\ua8f8\4\ua9c0\4\uaa88\4\uab50\4\uac18\4\uace0\4\uada8\4\uae70"+ + "\4\uaf38\4\ub000\4\ub0c8\4\ub190\4\ub258\4\ub320\4\ub3e8\4\ub4b0"+ + "\3\ub150\4\ub578\4\ub640\4\ub708\4\ub7d0\4\ub898\4\ub960\4\uba28"+ + "\4\ubaf0\4\ubbb8\4\ubc80\4\ubd48\4\ube10\4\ubed8\4\ubfa0\4\uc068"+ + "\4\uc130\4\uc1f8\4\uc2c0\4\uc388\4\uc450\4\uc518\4\uc5e0\4\uc6a8"+ + "\4\uc770\4\uc838\4\uc900\4\uc9c8\4\uca90\4\ucb58\4\ucc20\4\ucce8"+ + "\4\ucdb0\4\uce78\4\ucf40\4\ud008\4\ud0d0\4\ud198\4\ud260\4\ud328"+ + "\4\ud3f0\4\ud4b8\4\ud580\4\ud648\4\ud710\4\ud7d8\4\ud8a0\4\ud968"+ + "\4\uda30\4\udaf8\4\udbc0\4\udc88\4\udd50\4\ude18\4\udee0\4\udfa8"+ + "\4\ue070\4\ue138\4\ue200\4\ue2c8\4\ue390\4\ue458\4\ue520\4\ue5e8"+ + "\4\ue6b0\4\ue778\4\ue840\4\ue908\4\ue9d0\4\uea98\4\ueb60\4\uec28"+ + "\4\uecf0\4\uedb8\4\uee80\4\uef48\4\uf010\4\uf0d8\4\uf1a0\4\uf268"+ + "\4\uf330\4\uf3f8\4\uf4c0\4\uf588\4\uf650\4\uf718\4\uf7e0\4\uf8a8"+ + "\4\uf970\4\ufa38\4\ufb00\4\ufbc8\4\ufc90\4\ufd58\4\ufe20\4\ufee8"+ + "\4\uffb0\5\170\5\u0140\5\u0208\5\u02d0\5\u0398\5\u0460\5\u0528"+ + "\5\u05f0\5\u06b8\5\u0780\5\u0848\5\u0910\5\u09d8\5\u0aa0\5\u0b68"+ + "\5\u0c30\5\u0cf8\5\u0dc0\5\u0e88\5\u0f50\5\u1018\5\u10e0\5\u11a8"+ + "\5\u1270\5\u1338\5\u1400\5\u14c8\5\u1590\5\u1658\5\u1720\5\u17e8"+ + "\5\u18b0\5\u1978\5\u1a40\5\u1b08\5\u1bd0\5\u1c98\5\u1d60\5\u1e28"+ + "\5\u1ef0\5\u1fb8\5\u2080\5\u2148\5\u2210\5\u22d8\5\u23a0\5\u2468"+ + "\5\u2530\5\u25f8\5\u26c0\5\u2788\5\u2850\5\u2918\5\u29e0\5\u2aa8"+ + "\5\u2b70\5\u2c38\5\u2d00\5\u2dc8\5\u2e90\5\u2f58\5\u3020\5\u30e8"+ + "\5\u31b0\5\u3278\5\u3340\5\u3408\5\u34d0\5\u3598\5\u3660\5\u3728"+ + "\5\u37f0\5\u38b8\5\u3980\5\u3a48\5\u3b10\5\u3bd8\5\u3ca0\5\u3d68"+ + "\5\u3e30\5\u3ef8\5\u3fc0\5\u4088\5\u4150\5\u4218\5\u42e0\5\u43a8"+ + "\5\u4470\5\u4538\5\u4600\5\u46c8\5\u4790\5\u4858\5\u4920\5\u49e8"+ + "\5\u4ab0\5\u4b78\5\u4c40\5\u4d08\5\u4dd0\5\u4e98\5\u4f60\5\u5028"+ + "\5\u50f0\5\u51b8\5\u5280\5\u5348\5\u5410\5\u54d8\5\u55a0\5\u5668"+ + "\5\u5730\5\u57f8\5\u58c0\5\u5988\5\u5a50\5\u5b18\5\u5be0\5\u5ca8"+ + "\5\u5d70\5\u5e38\5\u5f00\5\u5fc8\5\u6090\5\u6158\5\u6220\5\u62e8"+ + "\5\u63b0\5\u6478\5\u6540\5\u6608\5\u66d0\5\u6798\5\u6860\5\u6928"+ + "\5\u69f0\5\u6ab8\5\u6b80\5\u6c48\5\u6d10\5\u6dd8\5\u6ea0\5\u6f68"+ + "\5\u7030\5\u70f8\5\u71c0\5\u7288\5\u7350\5\u7418\5\u74e0\5\u75a8"+ + "\5\u7670\5\u7738\5\u7800\5\u78c8\5\u7990\5\u7a58\5\u7b20\5\u7be8"+ + "\5\u7cb0\5\u7d78\5\u7e40\5\u7f08\5\u7fd0\5\u8098\5\u8160\5\u8228"+ + "\5\u82f0\5\u83b8\5\u8480\5\u8548\5\u8610\5\u86d8\5\u87a0\5\u8868"+ + "\5\u8930\5\u89f8\5\u8ac0\5\u8b88\5\u8c50\5\u8d18\5\u8de0\5\u8ea8"+ + "\5\u8f70\5\u9038\5\u9100\5\u91c8\5\u9290\5\u9358\5\u9420\5\u94e8"+ + "\5\u95b0\5\u9678\5\u9740\5\u9808\5\u98d0\5\u9998\5\u9a60\5\u9b28"+ + "\5\u9bf0\5\u9cb8\5\u9d80\5\u9e48\5\u9f10\5\u9fd8\5\ua0a0\5\ua168"+ + "\5\ua230\5\ua2f8\5\ua3c0\5\ua488\5\ua550\5\ua618\5\ua6e0\5\ua7a8"+ + "\5\ua870\5\ua938\5\uaa00\5\uaac8\5\uab90\5\uac58\5\uad20\5\uade8"+ + "\5\uaeb0\5\uaf78\5\ub040\5\ub108\5\ub1d0\5\ub298\5\ub360\5\ub428"+ + "\5\ub4f0\5\ub5b8\5\ub680\5\ub748\5\ub810\5\ub8d8\5\ub9a0\5\uba68"+ + "\5\ubb30\5\ubbf8\5\ubcc0\5\ubd88\5\ube50\5\ubf18\5\ubfe0\5\uc0a8"+ + "\5\uc170\5\uc238\5\uc300\5\uc3c8\5\uc490\5\uc558\5\uc620\5\uc6e8"+ + "\5\uc7b0\5\uc878\5\uc940\5\uca08\5\ucad0\5\ucb98\5\ucc60\5\ucd28"+ + "\5\ucdf0\5\uceb8\5\ucf80\5\ud048\5\ud110\5\ud1d8\5\ud2a0\5\ud368"+ + "\5\ud430\5\ud4f8\5\ud5c0\5\ud688\5\ud750\5\ud818\5\ud8e0\5\ud9a8"+ + "\5\uda70\5\udb38\5\udc00\5\udcc8\5\udd90\5\ude58\5\udf20\5\udfe8"+ + "\5\ue0b0\5\ue178\5\ue240\5\ue308\5\ue3d0\5\ue498\5\ue560\5\ue628"+ + "\5\ue6f0\5\ue7b8\5\ue880\5\ue948\5\uea10\5\uead8\5\ueba0\5\uec68"+ + "\5\ued30\5\uedf8\5\ueec0\5\uef88\5\uf050\5\uf118\5\uf1e0\5\uf2a8"+ + "\5\uf370\5\uf438\5\uf500\5\uf5c8\5\uf690\5\uf758\5\uf820\5\uf8e8"+ + "\5\uf9b0\5\ufa78\5\ufb40\5\ufc08\5\ufcd0\5\ufd98\5\ufe60\5\uff28"+ + "\5\ufff0\6\270\6\u0180\6\u0248\6\u0310\6\u03d8\6\u04a0\6\u0568"+ + "\6\u0630\6\u06f8\6\u07c0\6\u0888\6\u0950\6\u0a18\6\u0ae0\6\u0ba8"+ + "\6\u0c70\6\u0d38\6\u0e00\6\u0ec8\6\u0f90\6\u1058\6\u1120\6\u11e8"+ + "\6\u12b0\6\u1378\6\u1440\6\u1508\6\u15d0\6\u1698\6\u1760\6\u1828"+ + "\6\u18f0\6\u19b8\6\u1a80\6\u1b48\6\u1c10\6\u1cd8\6\u1da0\6\u1e68"+ + "\6\u1f30\6\u1ff8\6\u20c0\6\u2188\6\u2250\6\u2318\6\u23e0\6\u24a8"+ + "\6\u2570\6\u2638\6\u2700\6\u27c8\6\u2890\6\u2958\6\u2a20\6\u2ae8"+ + "\6\u2bb0\6\u2c78\6\u2d40\6\u2e08\6\u2ed0\6\u2f98\6\u3060\6\u3128"+ + "\6\u31f0\6\u32b8\6\u3380\6\u3448\6\u3510\6\u35d8\6\u36a0\6\u3768"+ + "\6\u3830\6\u38f8\6\u39c0\6\u3a88\6\u3b50\6\u3c18\6\u3ce0\6\u3da8"+ + "\6\u3e70\6\u3f38\6\u4000\6\u40c8\6\u4190\6\u4258\6\u4320\6\u43e8"+ + "\6\u44b0\6\u4578\6\u4640\6\u4708\6\u47d0\6\u4898\6\u4960\6\u4a28"+ + "\6\u4af0\6\u4bb8\6\u4c80\6\u4d48\6\u4e10\6\u4ed8\6\u4fa0\6\u5068"+ + "\6\u5130\6\u51f8\6\u52c0\6\u5388\6\u5450\6\u5518\6\u55e0\6\u56a8"+ + "\6\u5770\6\u5838\6\u5900\6\u59c8\6\u5a90\6\u5b58\6\u5c20\6\u5ce8"+ + "\6\u5db0\6\u5e78\6\u5f40\6\u6008\6\u60d0\6\u6198\6\u6260\6\u6328"+ + "\6\u63f0\6\u64b8\6\u6580\6\u6648\6\u6710\6\u67d8\6\u68a0\6\u6968"+ + "\6\u6a30\6\u6af8\6\u6bc0\6\u6c88\6\u6d50"; private static int [] zzUnpackRowMap() { - int [] result = new int[1750]; + int [] result = new int[2125]; int offset = 0; offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); return result; @@ -523,3364 +580,4900 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf "\3\2\1\13\2\2\1\14\4\2\1\15\3\2\1\16"+ "\17\2\1\17\2\2\1\20\66\2\1\21\1\2\1\22"+ "\2\2\1\23\1\24\1\2\1\25\1\2\1\26\1\2"+ - "\1\27\1\2\1\30\1\2\1\31\1\32\3\2\1\33"+ - "\2\34\1\35\1\36\1\37\1\40\6\41\1\42\3\41"+ - "\1\43\12\41\1\44\4\41\1\40\1\45\2\46\1\45"+ - "\5\46\1\47\1\2\1\40\1\50\1\40\1\2\2\40"+ - "\1\2\3\40\1\51\2\2\1\40\1\52\3\2\2\40"+ - "\1\2\307\0\1\30\2\0\1\30\4\0\1\30\16\0"+ - "\1\30\15\0\1\30\20\0\1\30\1\0\1\30\31\0"+ - "\1\30\4\0\1\30\10\0\2\30\15\0\2\30\10\0"+ - "\1\30\115\0\2\30\5\0\1\30\2\0\1\30\3\0"+ - "\2\30\10\0\4\30\1\0\3\30\1\0\1\30\2\0"+ - "\1\30\2\0\1\30\4\0\4\30\1\0\2\30\1\0"+ - "\1\30\2\0\1\30\1\0\1\30\2\0\4\30\2\0"+ - "\3\30\1\0\2\30\1\0\3\30\5\0\4\30\2\0"+ - "\10\30\1\0\1\30\2\0\4\30\1\0\2\30\1\0"+ - "\1\30\1\0\2\30\4\0\1\30\3\0\1\30\120\0"+ - "\1\30\4\0\1\30\11\0\1\30\22\0\1\30\3\0"+ - "\1\30\27\0\1\30\63\0\1\30\120\0\1\30\3\0"+ - "\4\30\1\0\1\30\1\0\1\31\2\0\1\30\1\0"+ - "\2\30\2\0\2\30\2\0\3\30\1\0\1\30\1\0"+ - "\1\30\2\0\4\30\1\0\3\30\1\0\1\30\1\0"+ - "\3\30\1\0\2\30\1\0\4\30\1\0\2\30\2\0"+ - "\10\30\1\0\2\30\1\0\11\30\1\0\10\30\1\0"+ - "\13\30\1\31\1\0\1\30\1\0\1\30\1\0\2\30"+ - "\2\0\1\30\1\0\1\30\3\0\1\30\127\0\1\30"+ - "\17\0\1\30\23\0\1\30\23\0\1\30\6\0\3\30"+ - "\37\0\1\30\7\0\1\30\117\0\1\30\1\0\2\30"+ - "\1\0\1\30\1\0\4\30\1\0\1\30\1\0\1\30"+ - "\1\0\2\30\1\0\3\30\1\0\2\30\1\0\4\30"+ - "\1\0\3\30\1\0\17\30\1\0\2\30\1\0\21\30"+ - "\1\0\2\30\1\0\41\30\1\0\1\30\1\0\2\30"+ - "\2\0\1\30\1\0\1\30\1\0\1\30\1\0\1\30"+ - "\127\0\1\30\3\0\2\30\12\0\2\30\13\0\1\30"+ - "\6\0\1\30\2\0\2\30\6\0\1\30\4\0\2\30"+ - "\2\0\2\30\5\0\3\30\10\0\1\30\26\0\1\30"+ - "\7\0\1\30\117\0\1\30\1\0\2\30\1\0\1\30"+ - "\2\0\2\30\2\0\1\30\3\0\2\30\1\0\3\30"+ - "\1\0\2\30\1\0\4\30\1\0\3\30\1\0\1\30"+ - "\1\0\2\30\2\0\11\30\1\0\2\30\1\0\1\30"+ - "\1\0\2\30\1\0\14\30\1\0\2\30\1\0\3\30"+ - "\1\0\1\30\1\0\30\30\1\0\2\30\1\0\1\30"+ - "\1\0\2\30\2\0\1\30\1\0\1\30\1\0\1\30"+ - "\1\0\1\30\113\0\1\30\26\0\2\30\23\0\1\31"+ - "\1\30\66\0\1\31\142\0\1\31\27\0\4\30\2\0"+ - "\2\30\14\0\3\30\15\0\3\30\3\0\1\30\7\0"+ - "\2\30\13\0\1\30\13\0\4\31\1\0\2\30\11\0"+ - "\1\30\133\0\1\30\3\0\2\30\12\0\2\30\1\0"+ - "\3\30\7\0\1\30\6\0\2\30\1\0\2\30\6\0"+ - "\1\30\4\0\2\30\2\0\2\30\5\0\3\30\10\0"+ - "\1\30\16\0\1\30\4\0\2\31\1\0\1\30\7\0"+ - "\1\30\117\0\1\30\4\0\1\30\6\0\1\30\3\0"+ - "\1\30\6\0\1\30\5\0\1\30\2\0\2\30\1\0"+ - "\17\30\2\0\1\30\13\0\7\30\2\0\1\30\1\0"+ - "\1\30\1\0\1\30\2\0\1\30\1\0\1\30\1\0"+ - "\1\30\1\0\1\30\6\0\2\30\5\0\1\30\1\0"+ - "\1\30\2\0\3\30\1\0\1\30\7\0\1\30\1\0"+ - "\1\30\131\0\1\30\17\0\2\30\22\0\1\30\2\0"+ - "\2\30\13\0\1\30\3\0\2\30\5\0\3\30\10\0"+ - "\1\30\26\0\1\30\7\0\1\30\124\0\1\30\6\0"+ - "\1\30\3\0\1\30\3\0\1\30\7\0\1\30\31\0"+ - "\20\30\5\0\3\30\3\0\1\30\3\0\2\30\2\0"+ - "\2\30\4\0\1\30\10\0\1\30\4\0\1\30\2\0"+ - "\1\30\4\0\1\30\1\0\1\30\1\0\1\30\226\0"+ - "\1\36\41\0\1\32\131\0\1\35\6\0\1\35\2\0"+ - "\1\35\3\0\2\35\10\0\4\35\1\0\3\35\1\0"+ - "\1\35\2\0\1\35\2\0\1\35\4\0\4\35\1\0"+ - "\2\35\6\0\1\35\2\0\4\35\2\0\3\35\1\0"+ - "\2\35\1\0\3\35\5\0\4\35\2\0\10\35\4\0"+ - "\4\35\1\0\2\35\1\0\1\35\1\0\2\35\4\0"+ - "\1\35\3\0\1\35\113\0\1\35\1\0\2\35\1\0"+ - "\1\35\1\0\4\35\1\0\1\35\1\0\1\35\1\0"+ - "\2\35\1\0\3\35\1\0\2\35\1\0\4\35\1\0"+ - "\3\35\1\0\17\35\1\0\2\35\1\0\21\35\1\0"+ - "\2\35\1\0\41\35\1\0\1\35\1\0\2\35\2\0"+ - "\1\35\1\0\1\35\1\0\1\35\1\0\1\35\113\0"+ - "\1\35\1\0\2\35\1\0\1\35\1\0\4\35\1\0"+ - "\1\35\1\0\1\35\1\0\2\35\2\0\1\35\2\0"+ - "\2\35\1\0\4\35\1\0\3\35\1\0\17\35\1\0"+ - "\2\35\1\0\21\35\1\0\2\35\1\0\41\35\1\0"+ - "\1\35\1\0\2\35\2\0\1\35\1\0\1\35\1\0"+ - "\1\35\1\0\1\35\127\0\1\35\17\0\1\35\23\0"+ - "\1\35\32\0\1\35\41\0\1\35\7\0\1\35\117\0"+ - "\1\35\1\0\2\35\3\0\4\35\1\0\1\35\1\0"+ - "\1\35\1\0\2\35\1\0\3\35\1\0\2\35\1\0"+ - "\4\35\1\0\3\35\1\0\10\35\1\0\6\35\1\0"+ - "\2\35\1\0\21\35\1\0\2\35\1\0\41\35\1\0"+ - "\1\35\1\0\2\35\2\0\1\35\1\0\1\35\1\0"+ - "\1\35\1\0\1\35\304\0\1\36\112\0\1\53\1\0"+ - "\1\54\2\0\1\55\1\0\1\56\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\63\2\0"+ - "\1\64\4\0\1\65\3\0\1\66\17\0\1\67\2\0"+ - "\1\70\21\0\1\71\2\0\1\72\57\0\2\30\1\73"+ - "\1\0\1\74\1\0\1\74\1\75\1\0\1\30\2\0"+ - "\1\30\1\74\32\30\1\0\12\73\1\74\1\0\1\75"+ - "\3\0\1\74\20\0\1\53\1\0\1\54\2\0\1\76"+ - "\1\0\1\77\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\100\2\0\1\101\4\0\1\102"+ - "\3\0\1\103\17\0\1\67\2\0\1\104\21\0\1\105"+ - "\2\0\1\106\57\0\1\30\2\31\2\0\2\107\1\110"+ - "\1\0\1\31\2\0\1\30\1\107\32\30\1\0\12\31"+ - "\2\0\1\110\2\0\2\107\6\0\1\107\16\0\1\111"+ - "\21\0\1\112\2\0\1\113\10\0\1\114\22\0\1\115"+ - "\21\0\1\116\2\0\1\117\41\0\1\120\16\0\1\32"+ - "\1\0\1\32\3\0\1\75\1\0\1\32\53\0\1\75"+ - "\24\0\1\53\1\0\1\54\2\0\1\121\1\0\1\77"+ - "\4\0\1\57\1\0\1\60\1\0\1\61\2\0\1\62"+ - "\3\0\1\122\2\0\1\123\4\0\1\102\3\0\1\124"+ - "\17\0\1\67\2\0\1\125\21\0\1\126\2\0\1\127"+ - "\41\0\1\130\15\0\1\30\1\131\1\31\1\132\3\0"+ - "\1\131\1\0\1\131\2\0\1\30\1\0\32\30\1\0"+ - "\12\31\2\0\1\131\227\0\2\34\105\0\1\133\21\0"+ - "\1\134\2\0\1\135\10\0\1\136\22\0\1\137\21\0"+ - "\1\140\2\0\1\141\60\0\1\35\7\0\1\35\105\0"+ - "\1\142\21\0\1\143\2\0\1\144\10\0\1\145\22\0"+ - "\1\146\21\0\1\147\2\0\1\150\60\0\1\36\7\0"+ - "\1\36\100\0\1\53\1\0\1\54\2\0\1\151\1\0"+ - "\1\56\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\152\2\0\1\153\4\0\1\65\3\0"+ - "\1\154\17\0\1\67\2\0\1\155\21\0\1\156\2\0"+ - "\1\157\57\0\1\30\1\37\1\73\1\0\1\74\1\0"+ - "\1\74\1\75\1\0\1\37\2\0\1\37\1\74\32\30"+ - "\1\0\12\73\1\74\1\0\1\75\3\0\1\74\230\0"+ - "\1\160\45\161\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\2\0\1\53\1\0\1\54"+ - "\2\0\1\55\1\0\1\56\4\0\1\57\1\0\1\60"+ - "\1\0\1\61\2\0\1\62\3\0\1\63\2\0\1\64"+ - "\4\0\1\65\3\0\1\66\17\0\1\67\2\0\1\70"+ - "\21\0\1\71\2\0\1\72\57\0\2\30\1\73\1\0"+ - "\1\74\1\0\1\74\1\75\1\0\1\30\2\0\1\30"+ - "\1\163\32\41\1\164\12\165\1\74\1\161\1\166\1\161"+ - "\1\0\1\161\1\167\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\2\0\1\53\1\0\1\54\2\0\1\55\1\0"+ - "\1\56\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\63\2\0\1\64\4\0\1\65\3\0"+ - "\1\66\17\0\1\67\2\0\1\70\21\0\1\71\2\0"+ - "\1\72\57\0\2\30\1\73\1\0\1\74\1\0\1\74"+ - "\1\75\1\0\1\30\2\0\1\30\1\163\10\41\1\170"+ - "\6\41\1\171\12\41\1\164\12\165\1\74\1\161\1\166"+ - "\1\161\1\0\1\161\1\167\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\2\0\1\53\1\0\1\54\2\0\1\55"+ - "\1\0\1\56\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\63\2\0\1\64\4\0\1\65"+ - "\3\0\1\66\17\0\1\67\2\0\1\70\21\0\1\71"+ - "\2\0\1\72\57\0\2\30\1\73\1\0\1\74\1\0"+ - "\1\74\1\75\1\0\1\30\2\0\1\30\1\163\1\172"+ - "\31\41\1\164\12\165\1\74\1\161\1\166\1\161\1\0"+ - "\1\161\1\167\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\2\0\1\53\1\0\1\54\2\0\1\55\1\0\1\56"+ - "\4\0\1\57\1\0\1\60\1\0\1\61\2\0\1\62"+ - "\3\0\1\63\2\0\1\64\4\0\1\65\3\0\1\66"+ - "\17\0\1\67\2\0\1\70\21\0\1\71\2\0\1\72"+ - "\57\0\2\30\1\73\1\0\1\74\1\0\1\74\1\75"+ - "\1\0\1\30\2\0\1\30\1\163\17\41\1\173\12\41"+ - "\1\164\12\165\1\74\1\161\1\166\1\161\1\0\1\161"+ - "\1\167\1\162\3\161\3\0\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\76\1\0\1\77\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\100\2\0\1\101\4\0\1\102\3\0\1\103\17\0"+ - "\1\67\2\0\1\104\21\0\1\105\2\0\1\106\57\0"+ - "\1\30\2\31\2\0\2\107\1\110\1\0\1\31\2\0"+ - "\1\30\1\174\32\41\1\164\12\46\1\0\1\161\1\175"+ - "\1\161\1\0\2\176\1\162\3\161\2\0\1\107\1\161"+ - "\4\0\2\161\2\0\1\53\1\0\1\54\2\0\1\76"+ - "\1\0\1\77\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\100\2\0\1\101\4\0\1\102"+ - "\3\0\1\103\17\0\1\67\2\0\1\104\21\0\1\105"+ - "\2\0\1\106\57\0\1\30\2\31\2\0\2\107\1\110"+ - "\1\0\1\31\2\0\1\30\1\174\32\41\1\164\12\177"+ - "\1\0\1\161\1\175\1\161\1\0\2\176\1\162\3\161"+ - "\2\0\1\107\1\161\4\0\2\161\2\0\1\53\1\0"+ - "\1\54\2\0\1\76\1\0\1\77\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\100\2\0"+ - "\1\101\4\0\1\102\3\0\1\103\17\0\1\67\2\0"+ - "\1\104\21\0\1\105\2\0\1\106\57\0\1\30\2\31"+ - "\2\0\2\107\1\110\1\0\1\31\2\0\1\30\1\174"+ - "\32\41\1\164\1\46\1\200\1\177\2\46\2\177\1\46"+ - "\1\177\1\46\1\0\1\161\1\175\1\161\1\0\2\176"+ - "\1\162\3\161\2\0\1\107\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\121\1\0\1\77\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\122\2\0\1\123\4\0\1\102\3\0\1\124\17\0"+ - "\1\67\2\0\1\125\21\0\1\126\2\0\1\127\41\0"+ - "\1\130\15\0\1\30\1\131\1\31\1\132\3\0\1\131"+ - "\1\0\1\131\2\0\1\30\1\160\32\201\1\161\12\202"+ - "\1\0\1\161\1\203\1\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\213\0\4\204\2\0\1\204"+ - "\15\0\1\204\6\0\12\204\1\205\236\0\65\206\1\207"+ - "\1\206\1\210\1\0\2\206\10\0\1\30\4\0\1\30"+ - "\11\0\1\30\22\0\1\30\3\0\1\30\13\0\1\30"+ - "\2\0\1\30\10\0\1\30\12\0\4\30\45\0\1\30"+ - "\120\0\1\30\3\0\4\30\1\0\1\30\1\0\1\73"+ + "\1\27\3\2\1\30\1\2\1\31\1\32\3\2\1\33"+ + "\2\34\1\35\1\36\1\37\1\40\1\41\1\42\1\37"+ + "\6\43\1\44\3\43\1\45\12\43\1\46\4\43\1\37"+ + "\1\47\1\50\5\47\1\51\1\50\1\47\1\2\1\37"+ + "\1\52\1\37\1\2\1\37\1\2\3\37\1\53\2\2"+ + "\1\37\3\2\2\37\1\2\312\0\1\30\2\0\1\30"+ + "\4\0\1\30\16\0\1\30\15\0\1\30\20\0\1\30"+ + "\1\0\1\30\41\0\1\30\4\0\1\30\10\0\2\30"+ + "\5\0\2\30\10\0\1\30\120\0\2\30\5\0\1\30"+ + "\2\0\1\30\3\0\2\30\10\0\4\30\1\0\3\30"+ + "\1\0\1\30\2\0\1\30\2\0\1\30\4\0\4\30"+ + "\1\0\2\30\1\0\1\30\2\0\1\30\1\0\1\30"+ + "\2\0\4\30\2\0\3\30\1\0\2\30\1\0\3\30"+ + "\1\0\4\30\1\0\2\30\5\0\4\30\2\0\10\30"+ + "\1\0\1\30\2\0\1\30\1\0\2\30\4\0\1\30"+ + "\3\0\3\30\121\0\1\30\4\0\1\30\11\0\1\30"+ + "\22\0\1\30\3\0\1\30\27\0\1\30\63\0\1\30"+ + "\123\0\1\30\3\0\4\30\1\0\1\30\1\0\1\31"+ "\2\0\1\30\1\0\2\30\2\0\2\30\2\0\3\30"+ "\1\0\1\30\1\0\1\30\2\0\4\30\1\0\3\30"+ "\1\0\1\30\1\0\3\30\1\0\2\30\1\0\4\30"+ - "\1\0\2\30\2\0\10\30\1\0\2\30\1\0\11\30"+ - "\1\0\10\30\1\0\13\30\1\73\1\0\1\30\1\0"+ + "\1\0\2\30\2\0\10\30\1\0\2\30\1\0\10\30"+ + "\1\31\1\0\7\30\1\0\10\30\1\0\6\30\1\0"+ "\1\30\1\0\2\30\2\0\1\30\1\0\1\30\3\0"+ - "\1\30\113\0\1\30\26\0\2\30\23\0\1\73\1\30"+ - "\44\0\1\30\21\0\1\73\142\0\1\73\11\0\1\30"+ - "\15\0\4\30\2\0\2\30\14\0\4\30\1\0\2\30"+ - "\11\0\3\30\3\0\1\30\1\0\1\30\4\0\3\30"+ - "\5\0\4\30\2\0\2\30\12\0\4\73\1\0\2\30"+ - "\1\0\1\30\7\0\1\30\133\0\1\30\3\0\2\30"+ + "\3\30\130\0\1\30\17\0\1\30\23\0\1\30\23\0"+ + "\1\30\6\0\3\30\37\0\1\30\7\0\1\30\122\0"+ + "\1\30\1\0\2\30\1\0\1\30\1\0\4\30\1\0"+ + "\1\30\1\0\1\30\1\0\2\30\1\0\3\30\1\0"+ + "\2\30\1\0\4\30\1\0\3\30\1\0\17\30\1\0"+ + "\2\30\1\0\21\30\1\0\2\30\1\0\41\30\1\0"+ + "\1\30\1\0\2\30\2\0\1\30\1\0\1\30\1\0"+ + "\1\30\1\0\3\30\130\0\1\30\3\0\2\30\12\0"+ + "\2\30\13\0\1\30\6\0\1\30\2\0\2\30\6\0"+ + "\1\30\4\0\2\30\2\0\2\30\5\0\3\30\20\0"+ + "\1\30\16\0\1\30\7\0\1\30\122\0\1\30\1\0"+ + "\2\30\1\0\1\30\2\0\2\30\2\0\1\30\3\0"+ + "\2\30\1\0\3\30\1\0\2\30\1\0\4\30\1\0"+ + "\3\30\1\0\1\30\1\0\2\30\2\0\11\30\1\0"+ + "\2\30\1\0\1\30\1\0\2\30\1\0\14\30\1\0"+ + "\2\30\1\0\10\30\1\0\2\30\1\0\1\30\1\0"+ + "\23\30\1\0\1\30\1\0\2\30\2\0\1\30\1\0"+ + "\1\30\1\0\1\30\1\0\3\30\114\0\1\30\26\0"+ + "\2\30\23\0\1\31\1\30\40\0\1\31\173\0\1\31"+ + "\27\0\4\30\2\0\2\30\14\0\3\30\15\0\3\30"+ + "\3\0\1\30\7\0\2\30\1\0\4\31\1\0\2\30"+ + "\13\0\1\30\23\0\1\30\136\0\1\30\3\0\2\30"+ "\12\0\2\30\1\0\3\30\7\0\1\30\6\0\2\30"+ "\1\0\2\30\6\0\1\30\4\0\2\30\2\0\2\30"+ - "\5\0\3\30\10\0\1\30\16\0\1\30\4\0\2\73"+ - "\1\0\1\30\7\0\1\30\117\0\1\30\4\0\1\30"+ + "\5\0\3\30\2\0\1\30\3\0\2\31\10\0\1\30"+ + "\16\0\1\30\7\0\1\30\122\0\1\30\4\0\1\30"+ "\6\0\1\30\3\0\1\30\6\0\1\30\5\0\1\30"+ "\2\0\2\30\1\0\17\30\2\0\1\30\13\0\7\30"+ - "\2\0\1\30\1\0\1\30\1\0\1\30\2\0\1\30"+ - "\1\0\1\30\1\0\1\30\1\0\1\30\4\0\1\30"+ - "\1\0\2\30\5\0\1\30\1\0\1\30\2\0\3\30"+ - "\1\0\1\30\7\0\1\30\1\0\1\30\122\0\1\30"+ + "\2\0\1\30\1\0\1\30\1\0\2\30\2\0\1\30"+ + "\1\0\3\30\2\0\1\30\1\0\1\30\1\0\1\30"+ + "\1\0\1\30\6\0\2\30\6\0\1\30\7\0\1\30"+ + "\1\0\1\30\134\0\1\30\17\0\2\30\22\0\1\30"+ + "\2\0\2\30\13\0\1\30\3\0\2\30\5\0\3\30"+ + "\20\0\1\30\16\0\1\30\7\0\1\30\127\0\1\30"+ "\6\0\1\30\3\0\1\30\3\0\1\30\7\0\1\30"+ - "\31\0\20\30\5\0\3\30\3\0\1\30\3\0\2\30"+ - "\2\0\2\30\4\0\5\30\4\0\1\30\4\0\1\30"+ + "\31\0\20\30\5\0\3\30\4\0\1\30\6\0\1\30"+ + "\3\0\2\30\2\0\2\30\4\0\1\30\5\0\1\30"+ "\2\0\1\30\4\0\1\30\1\0\1\30\1\0\1\30"+ - "\223\0\2\30\15\0\4\30\154\0\1\30\15\0\2\30"+ - "\10\0\2\30\1\0\1\30\1\0\1\30\11\0\1\30"+ - "\11\0\2\30\6\0\1\30\2\0\4\30\3\0\1\30"+ - "\2\0\2\30\1\0\3\30\5\0\1\30\1\0\2\30"+ - "\2\0\2\30\1\0\4\30\5\0\1\30\1\0\2\30"+ - "\133\0\1\53\1\0\1\54\2\0\1\211\1\0\1\56"+ - "\4\0\1\57\1\0\1\60\1\0\1\61\2\0\1\62"+ - "\3\0\1\212\2\0\1\213\4\0\1\65\3\0\1\214"+ - "\17\0\1\67\2\0\1\215\21\0\1\216\2\0\1\217"+ - "\57\0\1\30\2\73\2\0\2\220\1\75\1\0\1\73"+ - "\2\0\1\30\1\220\32\30\1\0\12\73\2\0\1\75"+ - "\2\0\2\220\6\0\1\220\11\0\1\53\1\0\1\54"+ - "\2\0\1\221\1\0\1\222\4\0\1\57\1\0\1\60"+ - "\1\0\1\61\2\0\1\62\3\0\1\223\2\0\1\224"+ - "\4\0\1\225\3\0\1\226\17\0\1\67\2\0\1\227"+ - "\21\0\1\230\2\0\1\231\57\0\1\30\1\74\7\0"+ - "\1\74\2\0\1\30\1\0\32\30\42\0\1\53\1\0"+ - "\1\54\2\0\1\232\1\0\1\56\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\233\2\0"+ - "\1\234\4\0\1\65\3\0\1\235\17\0\1\67\2\0"+ - "\1\236\21\0\1\237\2\0\1\240\41\0\1\130\15\0"+ - "\1\30\1\75\1\73\1\132\3\0\1\75\1\0\1\75"+ - "\2\0\1\30\1\0\32\30\1\0\12\73\2\0\1\75"+ - "\32\0\1\30\4\0\1\30\11\0\1\30\22\0\1\30"+ - "\3\0\1\30\13\0\1\31\2\0\1\31\10\0\1\30"+ - "\12\0\4\31\45\0\1\30\115\0\1\30\26\0\2\30"+ - "\23\0\1\31\1\30\44\0\1\31\21\0\1\31\142\0"+ - "\1\31\11\0\1\31\15\0\4\30\2\0\2\30\14\0"+ - "\3\30\1\31\1\0\2\31\11\0\3\30\3\0\1\30"+ - "\1\0\1\31\4\0\1\31\2\30\5\0\4\31\2\0"+ - "\1\30\1\31\12\0\4\31\1\0\2\30\1\0\1\31"+ - "\7\0\1\30\117\0\1\30\4\0\1\30\6\0\1\30"+ - "\3\0\1\30\6\0\1\30\5\0\1\30\2\0\2\30"+ - "\1\0\17\30\2\0\1\30\13\0\7\30\2\0\1\30"+ - "\1\0\1\30\1\0\1\30\2\0\1\30\1\0\1\30"+ - "\1\0\1\30\1\0\1\30\4\0\1\31\1\0\2\30"+ - "\5\0\1\30\1\0\1\30\2\0\3\30\1\0\1\30"+ - "\7\0\1\30\1\0\1\30\122\0\1\30\6\0\1\30"+ - "\3\0\1\30\3\0\1\30\7\0\1\30\31\0\20\30"+ - "\5\0\3\30\3\0\1\30\3\0\2\30\2\0\2\30"+ - "\4\0\1\30\4\31\4\0\1\30\4\0\1\30\2\0"+ - "\1\30\4\0\1\30\1\0\1\30\1\0\1\30\223\0"+ - "\2\31\15\0\4\31\154\0\1\31\15\0\2\31\10\0"+ - "\2\31\1\0\1\31\1\0\1\31\11\0\1\31\11\0"+ - "\2\31\6\0\1\31\2\0\4\31\3\0\1\31\2\0"+ - "\2\31\1\0\3\31\5\0\1\31\1\0\2\31\2\0"+ - "\2\31\1\0\4\31\5\0\1\31\1\0\2\31\140\0"+ - "\1\241\1\0\1\242\17\0\1\243\2\0\1\244\4\0"+ - "\1\245\3\0\1\246\22\0\1\247\21\0\1\250\2\0"+ - "\1\251\60\0\1\107\1\31\6\0\1\107\37\0\12\31"+ - "\27\0\1\53\1\0\1\54\2\0\1\252\1\0\1\77"+ - "\4\0\1\57\1\0\1\60\1\0\1\61\2\0\1\62"+ - "\3\0\1\253\2\0\1\254\4\0\1\102\3\0\1\255"+ - "\17\0\1\67\2\0\1\256\21\0\1\257\2\0\1\260"+ - "\41\0\1\130\15\0\1\30\1\110\1\31\1\132\3\0"+ - "\1\110\1\0\1\110\2\0\1\30\1\0\32\30\1\0"+ - "\12\31\2\0\1\110\114\0\1\32\2\0\1\32\23\0"+ - "\4\32\305\0\1\32\176\0\1\32\44\0\1\32\1\0"+ - "\2\32\21\0\1\32\4\0\1\32\7\0\4\32\3\0"+ - "\1\32\22\0\1\32\262\0\1\32\311\0\4\32\251\0"+ - "\2\32\15\0\4\32\154\0\1\32\15\0\2\32\10\0"+ - "\2\32\1\0\1\32\1\0\1\32\11\0\1\32\11\0"+ - "\2\32\6\0\1\32\2\0\4\32\3\0\1\32\2\0"+ - "\2\32\1\0\3\32\5\0\1\32\1\0\2\32\2\0"+ - "\2\32\1\0\4\32\5\0\1\32\1\0\2\32\311\0"+ - "\1\32\134\0\1\30\4\0\1\30\11\0\1\30\22\0"+ - "\1\30\3\0\1\30\13\0\1\131\2\0\1\131\10\0"+ - "\1\30\12\0\4\131\45\0\1\30\115\0\1\30\26\0"+ - "\2\30\23\0\1\31\1\30\44\0\1\131\21\0\1\31"+ - "\142\0\1\31\11\0\1\131\15\0\4\30\2\0\2\30"+ - "\14\0\3\30\1\131\1\0\2\131\11\0\3\30\3\0"+ - "\1\30\1\0\1\131\4\0\1\131\2\30\5\0\4\131"+ - "\2\0\1\30\1\131\12\0\4\31\1\0\2\30\1\0"+ - "\1\131\7\0\1\30\117\0\1\30\4\0\1\30\6\0"+ + "\231\0\1\36\41\0\1\32\134\0\1\35\6\0\1\35"+ + "\2\0\1\35\3\0\2\35\10\0\4\35\1\0\3\35"+ + "\1\0\1\35\2\0\1\35\2\0\1\35\4\0\4\35"+ + "\1\0\2\35\6\0\1\35\2\0\4\35\2\0\3\35"+ + "\1\0\2\35\1\0\3\35\1\0\4\35\1\0\2\35"+ + "\5\0\4\35\2\0\10\35\4\0\1\35\1\0\2\35"+ + "\4\0\1\35\3\0\3\35\114\0\1\35\1\0\2\35"+ + "\1\0\1\35\1\0\4\35\1\0\1\35\1\0\1\35"+ + "\1\0\2\35\1\0\3\35\1\0\2\35\1\0\4\35"+ + "\1\0\3\35\1\0\17\35\1\0\2\35\1\0\21\35"+ + "\1\0\2\35\1\0\41\35\1\0\1\35\1\0\2\35"+ + "\2\0\1\35\1\0\1\35\1\0\1\35\1\0\3\35"+ + "\114\0\1\35\1\0\2\35\1\0\1\35\1\0\4\35"+ + "\1\0\1\35\1\0\1\35\1\0\2\35\2\0\1\35"+ + "\2\0\2\35\1\0\4\35\1\0\3\35\1\0\17\35"+ + "\1\0\2\35\1\0\21\35\1\0\2\35\1\0\41\35"+ + "\1\0\1\35\1\0\2\35\2\0\1\35\1\0\1\35"+ + "\1\0\1\35\1\0\3\35\130\0\1\35\17\0\1\35"+ + "\23\0\1\35\32\0\1\35\41\0\1\35\7\0\1\35"+ + "\122\0\1\35\1\0\2\35\3\0\4\35\1\0\1\35"+ + "\1\0\1\35\1\0\2\35\1\0\3\35\1\0\2\35"+ + "\1\0\4\35\1\0\3\35\1\0\10\35\1\0\6\35"+ + "\1\0\2\35\1\0\21\35\1\0\2\35\1\0\41\35"+ + "\1\0\1\35\1\0\2\35\2\0\1\35\1\0\1\35"+ + "\1\0\1\35\1\0\3\35\257\0\1\54\25\0\1\36"+ + "\2\54\113\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\76\1\0\1\41\1\30"+ + "\1\76\32\30\1\0\12\75\1\76\1\0\1\77\22\0"+ + "\1\55\1\0\1\56\2\0\1\100\1\0\1\101\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\102\2\0\1\103\4\0\1\104\3\0\1\105\17\0"+ + "\1\71\2\0\1\106\21\0\1\107\2\0\1\110\61\0"+ + "\1\30\2\31\2\0\2\111\1\112\1\0\1\31\2\0"+ + "\1\111\1\0\1\41\1\30\1\111\32\30\1\0\12\31"+ + "\2\0\1\112\2\0\1\111\6\0\1\111\15\0\1\113"+ + "\21\0\1\114\2\0\1\115\10\0\1\116\22\0\1\117"+ + "\21\0\1\120\2\0\1\121\41\0\1\122\20\0\1\32"+ + "\1\0\1\32\3\0\1\123\1\0\1\32\56\0\1\123"+ + "\22\0\1\55\1\0\1\56\2\0\1\124\1\0\1\101"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\125\2\0\1\126\4\0\1\104\3\0\1\127"+ + "\17\0\1\71\2\0\1\130\21\0\1\131\2\0\1\132"+ + "\41\0\1\133\17\0\1\30\1\134\1\31\1\135\3\0"+ + "\1\134\1\0\1\134\4\0\1\41\1\30\1\0\32\30"+ + "\1\0\12\31\2\0\1\134\227\0\2\34\106\0\1\136"+ + "\21\0\1\137\2\0\1\140\10\0\1\141\22\0\1\142"+ + "\21\0\1\143\2\0\1\144\62\0\1\35\7\0\1\35"+ + "\106\0\1\145\21\0\1\146\2\0\1\147\10\0\1\150"+ + "\22\0\1\151\21\0\1\152\2\0\1\153\62\0\1\36"+ + "\7\0\1\36\312\0\1\154\3\0\1\155\45\154\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\157\1\160\2\0\65\157\1\161\1\0"+ + "\2\157\2\0\1\55\1\0\1\56\2\0\1\162\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\163\2\0\1\164\4\0\1\67\3\0"+ + "\1\165\17\0\1\71\2\0\1\166\21\0\1\167\2\0"+ + "\1\170\61\0\1\30\1\41\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\41\2\0\1\30\1\171\1\41"+ + "\1\30\1\76\32\30\1\0\12\75\1\76\1\0\1\77"+ + "\22\0\1\55\1\0\1\56\2\0\1\172\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\173\2\0\1\174\4\0\1\67\3\0\1\175"+ + "\17\0\1\71\2\0\1\176\21\0\1\177\2\0\1\200"+ + "\61\0\1\30\1\42\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\42\2\0\1\76\1\0\1\41\1\42"+ + "\1\76\32\30\1\0\12\75\1\76\1\0\1\77\22\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\32\43"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\10\43\1\206"+ + "\6\43\1\207\12\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\1\210\31\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\17\43\1\211\12\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\100"+ + "\1\0\1\101\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\102\2\0\1\103\4\0\1\104"+ + "\3\0\1\105\17\0\1\71\2\0\1\106\21\0\1\107"+ + "\2\0\1\110\61\0\1\30\2\31\2\0\2\111\1\112"+ + "\1\0\1\31\2\0\1\212\1\0\1\41\1\30\1\213"+ + "\32\43\1\203\12\214\1\0\1\154\1\215\1\154\1\0"+ + "\1\212\1\156\3\154\2\0\1\111\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\100\1\0\1\101"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\102\2\0\1\103\4\0\1\104\3\0\1\105"+ + "\17\0\1\71\2\0\1\106\21\0\1\107\2\0\1\110"+ + "\61\0\1\30\2\31\2\0\2\111\1\112\1\0\1\31"+ + "\2\0\1\212\1\0\1\41\1\30\1\213\32\43\1\203"+ + "\12\47\1\0\1\154\1\215\1\154\1\0\1\212\1\156"+ + "\3\154\2\0\1\111\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\100\1\0\1\101\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\102"+ + "\2\0\1\103\4\0\1\104\3\0\1\105\17\0\1\71"+ + "\2\0\1\106\21\0\1\107\2\0\1\110\61\0\1\30"+ + "\2\31\2\0\2\111\1\112\1\0\1\31\2\0\1\212"+ + "\1\0\1\41\1\30\1\213\32\43\1\203\2\47\1\214"+ + "\1\47\1\216\2\214\2\47\1\214\1\0\1\154\1\215"+ + "\1\154\1\0\1\212\1\156\3\154\2\0\1\111\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\124"+ + "\1\0\1\101\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\125\2\0\1\126\4\0\1\104"+ + "\3\0\1\127\17\0\1\71\2\0\1\130\21\0\1\131"+ + "\2\0\1\132\41\0\1\133\17\0\1\30\1\134\1\31"+ + "\1\135\3\0\1\134\1\0\1\134\2\0\1\154\1\0"+ + "\1\41\1\30\1\155\32\217\1\154\12\220\1\0\1\154"+ + "\1\221\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\220\0\4\222\2\0\1\222\15\0\1\222"+ + "\6\0\12\222\1\223\31\0\1\224\21\0\1\225\2\0"+ + "\1\226\10\0\1\227\22\0\1\230\21\0\1\231\2\0"+ + "\1\232\55\0\1\233\4\0\1\54\7\0\1\54\107\0"+ + "\1\30\4\0\1\30\11\0\1\30\22\0\1\30\3\0"+ + "\1\30\13\0\1\30\2\0\1\30\10\0\1\30\22\0"+ + "\4\30\35\0\1\30\123\0\1\30\3\0\4\30\1\0"+ + "\1\30\1\0\1\75\2\0\1\30\1\0\2\30\2\0"+ + "\2\30\2\0\3\30\1\0\1\30\1\0\1\30\2\0"+ + "\4\30\1\0\3\30\1\0\1\30\1\0\3\30\1\0"+ + "\2\30\1\0\4\30\1\0\2\30\2\0\10\30\1\0"+ + "\2\30\1\0\10\30\1\75\1\0\7\30\1\0\10\30"+ + "\1\0\6\30\1\0\1\30\1\0\2\30\2\0\1\30"+ + "\1\0\1\30\3\0\3\30\114\0\1\30\26\0\2\30"+ + "\23\0\1\75\1\30\40\0\1\75\13\0\1\30\157\0"+ + "\1\75\11\0\1\30\15\0\4\30\2\0\2\30\14\0"+ + "\4\30\1\0\2\30\11\0\3\30\3\0\1\30\1\0"+ + "\1\30\4\0\3\30\1\0\4\75\1\0\2\30\5\0"+ + "\4\30\2\0\2\30\12\0\1\30\7\0\1\30\136\0"+ + "\1\30\3\0\2\30\12\0\2\30\1\0\3\30\7\0"+ + "\1\30\6\0\2\30\1\0\2\30\6\0\1\30\4\0"+ + "\2\30\2\0\2\30\5\0\3\30\2\0\1\30\3\0"+ + "\2\75\10\0\1\30\16\0\1\30\7\0\1\30\122\0"+ + "\1\30\4\0\1\30\6\0\1\30\3\0\1\30\6\0"+ + "\1\30\5\0\1\30\2\0\2\30\1\0\17\30\2\0"+ + "\1\30\13\0\7\30\2\0\1\30\1\0\1\30\1\0"+ + "\2\30\2\0\1\30\1\0\3\30\2\0\1\30\1\0"+ + "\1\30\1\0\1\30\1\0\1\30\4\0\1\30\1\0"+ + "\2\30\6\0\1\30\7\0\1\30\1\0\1\30\125\0"+ + "\1\30\6\0\1\30\3\0\1\30\3\0\1\30\7\0"+ + "\1\30\31\0\20\30\5\0\3\30\4\0\1\30\6\0"+ + "\1\30\3\0\2\30\2\0\2\30\4\0\5\30\1\0"+ + "\1\30\2\0\1\30\4\0\1\30\1\0\1\30\1\0"+ + "\1\30\226\0\2\30\25\0\4\30\147\0\1\30\15\0"+ + "\2\30\10\0\2\30\1\0\1\30\1\0\1\30\11\0"+ + "\1\30\11\0\2\30\6\0\1\30\2\0\4\30\3\0"+ + "\1\30\2\0\2\30\1\0\3\30\1\0\2\30\1\0"+ + "\1\30\10\0\1\30\1\0\2\30\2\0\2\30\1\0"+ + "\4\30\23\0\1\30\113\0\1\55\1\0\1\56\2\0"+ + "\1\234\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\235\2\0\1\236\4\0"+ + "\1\67\3\0\1\237\17\0\1\71\2\0\1\240\21\0"+ + "\1\241\2\0\1\242\61\0\1\30\2\75\2\0\2\243"+ + "\1\244\1\0\1\75\2\0\1\243\1\0\1\41\1\30"+ + "\1\243\32\30\1\0\12\75\2\0\1\244\2\0\1\243"+ + "\6\0\1\243\10\0\1\55\1\0\1\56\2\0\1\245"+ + "\1\0\1\246\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\247\2\0\1\250\4\0\1\251"+ + "\3\0\1\252\17\0\1\71\2\0\1\253\21\0\1\254"+ + "\2\0\1\255\61\0\1\30\1\76\2\0\1\76\1\0"+ + "\2\76\1\0\1\76\2\0\1\76\1\0\2\30\1\76"+ + "\32\30\13\0\1\76\1\0\1\76\22\0\1\55\1\0"+ + "\1\56\2\0\1\256\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\257\2\0"+ + "\1\260\4\0\1\67\3\0\1\261\17\0\1\71\2\0"+ + "\1\262\21\0\1\263\2\0\1\264\41\0\1\133\17\0"+ + "\1\30\1\77\1\75\1\135\1\76\1\0\1\76\1\77"+ + "\1\0\1\77\2\0\1\76\1\0\1\41\1\30\1\76"+ + "\32\30\1\0\12\75\1\76\1\0\1\77\30\0\1\30"+ + "\4\0\1\30\11\0\1\30\22\0\1\30\3\0\1\30"+ + "\13\0\1\31\2\0\1\31\10\0\1\30\22\0\4\31"+ + "\35\0\1\30\120\0\1\30\26\0\2\30\23\0\1\31"+ + "\1\30\40\0\1\31\13\0\1\31\157\0\1\31\11\0"+ + "\1\31\15\0\4\30\2\0\2\30\14\0\3\30\1\31"+ + "\1\0\2\31\11\0\3\30\3\0\1\30\1\0\1\31"+ + "\4\0\1\31\2\30\1\0\4\31\1\0\2\30\5\0"+ + "\4\31\2\0\1\30\1\31\12\0\1\31\7\0\1\30"+ + "\122\0\1\30\4\0\1\30\6\0\1\30\3\0\1\30"+ + "\6\0\1\30\5\0\1\30\2\0\2\30\1\0\17\30"+ + "\2\0\1\30\13\0\7\30\2\0\1\30\1\0\1\30"+ + "\1\0\2\30\2\0\1\30\1\0\3\30\2\0\1\30"+ + "\1\0\1\30\1\0\1\30\1\0\1\30\4\0\1\31"+ + "\1\0\2\30\6\0\1\30\7\0\1\30\1\0\1\30"+ + "\125\0\1\30\6\0\1\30\3\0\1\30\3\0\1\30"+ + "\7\0\1\30\31\0\20\30\5\0\3\30\4\0\1\30"+ + "\6\0\1\30\3\0\2\30\2\0\2\30\4\0\1\30"+ + "\4\31\1\0\1\30\2\0\1\30\4\0\1\30\1\0"+ + "\1\30\1\0\1\30\226\0\2\31\25\0\4\31\147\0"+ + "\1\31\15\0\2\31\10\0\2\31\1\0\1\31\1\0"+ + "\1\31\11\0\1\31\11\0\2\31\6\0\1\31\2\0"+ + "\4\31\3\0\1\31\2\0\2\31\1\0\3\31\1\0"+ + "\2\31\1\0\1\31\10\0\1\31\1\0\2\31\2\0"+ + "\2\31\1\0\4\31\23\0\1\31\120\0\1\265\1\0"+ + "\1\266\17\0\1\267\2\0\1\270\4\0\1\271\3\0"+ + "\1\272\22\0\1\273\21\0\1\274\2\0\1\275\62\0"+ + "\1\111\1\31\2\0\3\243\1\0\1\111\2\0\1\243"+ + "\3\0\1\243\33\0\12\31\2\0\1\243\2\0\1\243"+ + "\6\0\1\243\10\0\1\55\1\0\1\56\2\0\1\276"+ + "\1\0\1\101\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\277\2\0\1\300\4\0\1\104"+ + "\3\0\1\301\17\0\1\71\2\0\1\302\21\0\1\303"+ + "\2\0\1\304\41\0\1\133\17\0\1\30\1\112\1\31"+ + "\1\135\1\0\2\243\1\112\1\0\1\112\2\0\1\243"+ + "\1\0\1\41\1\30\1\243\32\30\1\0\12\31\2\0"+ + "\1\112\2\0\1\243\6\0\1\243\100\0\1\32\2\0"+ + "\1\32\33\0\4\32\310\0\1\32\171\0\1\32\44\0"+ + "\1\32\1\0\2\32\21\0\1\32\4\0\1\32\17\0"+ + "\4\32\3\0\1\32\12\0\1\32\275\0\1\32\314\0"+ + "\4\32\244\0\2\32\25\0\4\32\147\0\1\32\15\0"+ + "\2\32\10\0\2\32\1\0\1\32\1\0\1\32\11\0"+ + "\1\32\11\0\2\32\6\0\1\32\2\0\4\32\3\0"+ + "\1\32\2\0\2\32\1\0\3\32\1\0\2\32\1\0"+ + "\1\32\10\0\1\32\1\0\2\32\2\0\2\32\1\0"+ + "\4\32\23\0\1\32\271\0\1\32\131\0\1\55\1\0"+ + "\1\56\2\0\1\305\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\306\2\0"+ + "\1\307\4\0\1\67\3\0\1\310\17\0\1\71\2\0"+ + "\1\311\21\0\1\312\2\0\1\313\41\0\1\133\17\0"+ + "\1\30\1\123\1\75\1\135\3\0\1\123\1\0\1\123"+ + "\4\0\1\41\1\30\1\0\32\30\1\0\12\75\2\0"+ + "\1\123\30\0\1\30\4\0\1\30\11\0\1\30\22\0"+ + "\1\30\3\0\1\30\13\0\1\134\2\0\1\134\10\0"+ + "\1\30\22\0\4\134\35\0\1\30\120\0\1\30\26\0"+ + "\2\30\23\0\1\31\1\30\40\0\1\31\13\0\1\134"+ + "\157\0\1\31\11\0\1\134\15\0\4\30\2\0\2\30"+ + "\14\0\3\30\1\134\1\0\2\134\11\0\3\30\3\0"+ + "\1\30\1\0\1\134\4\0\1\134\2\30\1\0\4\31"+ + "\1\0\2\30\5\0\4\134\2\0\1\30\1\134\12\0"+ + "\1\134\7\0\1\30\122\0\1\30\4\0\1\30\6\0"+ "\1\30\3\0\1\30\6\0\1\30\5\0\1\30\2\0"+ "\2\30\1\0\17\30\2\0\1\30\13\0\7\30\2\0"+ - "\1\30\1\0\1\30\1\0\1\30\2\0\1\30\1\0"+ - "\1\30\1\0\1\30\1\0\1\30\4\0\1\131\1\0"+ - "\2\30\5\0\1\30\1\0\1\30\2\0\3\30\1\0"+ - "\1\30\7\0\1\30\1\0\1\30\122\0\1\30\6\0"+ - "\1\30\3\0\1\30\3\0\1\30\7\0\1\30\31\0"+ - "\20\30\5\0\3\30\3\0\1\30\3\0\2\30\2\0"+ - "\2\30\4\0\1\30\4\131\4\0\1\30\4\0\1\30"+ - "\2\0\1\30\4\0\1\30\1\0\1\30\1\0\1\30"+ - "\223\0\2\131\15\0\4\131\154\0\1\131\15\0\2\131"+ - "\10\0\2\131\1\0\1\131\1\0\1\131\11\0\1\131"+ - "\11\0\2\131\6\0\1\131\2\0\4\131\3\0\1\131"+ - "\2\0\2\131\1\0\3\131\5\0\1\131\1\0\2\131"+ - "\2\0\2\131\1\0\4\131\5\0\1\131\1\0\2\131"+ - "\311\0\1\132\133\0\1\261\21\0\1\262\2\0\1\263"+ - "\10\0\1\264\22\0\1\265\21\0\1\266\2\0\1\267"+ - "\41\0\1\130\16\0\1\132\1\0\1\132\3\0\1\75"+ - "\1\0\1\132\53\0\1\75\114\0\1\35\2\0\1\35"+ - "\23\0\4\35\305\0\1\35\176\0\1\35\44\0\1\35"+ - "\1\0\2\35\21\0\1\35\4\0\1\35\7\0\4\35"+ - "\3\0\1\35\22\0\1\35\262\0\1\35\311\0\4\35"+ - "\251\0\2\35\15\0\4\35\154\0\1\35\15\0\2\35"+ + "\1\30\1\0\1\30\1\0\2\30\2\0\1\30\1\0"+ + "\3\30\2\0\1\30\1\0\1\30\1\0\1\30\1\0"+ + "\1\30\4\0\1\134\1\0\2\30\6\0\1\30\7\0"+ + "\1\30\1\0\1\30\125\0\1\30\6\0\1\30\3\0"+ + "\1\30\3\0\1\30\7\0\1\30\31\0\20\30\5\0"+ + "\3\30\4\0\1\30\6\0\1\30\3\0\2\30\2\0"+ + "\2\30\4\0\1\30\4\134\1\0\1\30\2\0\1\30"+ + "\4\0\1\30\1\0\1\30\1\0\1\30\226\0\2\134"+ + "\25\0\4\134\147\0\1\134\15\0\2\134\10\0\2\134"+ + "\1\0\1\134\1\0\1\134\11\0\1\134\11\0\2\134"+ + "\6\0\1\134\2\0\4\134\3\0\1\134\2\0\2\134"+ + "\1\0\3\134\1\0\2\134\1\0\1\134\10\0\1\134"+ + "\1\0\2\134\2\0\2\134\1\0\4\134\23\0\1\134"+ + "\271\0\1\135\136\0\1\314\21\0\1\315\2\0\1\316"+ + "\10\0\1\317\22\0\1\320\21\0\1\321\2\0\1\322"+ + "\41\0\1\133\20\0\1\135\1\0\1\135\3\0\1\123"+ + "\1\0\1\135\56\0\1\123\112\0\1\35\2\0\1\35"+ + "\33\0\4\35\310\0\1\35\171\0\1\35\44\0\1\35"+ + "\1\0\2\35\21\0\1\35\4\0\1\35\17\0\4\35"+ + "\3\0\1\35\12\0\1\35\275\0\1\35\314\0\4\35"+ + "\244\0\2\35\25\0\4\35\147\0\1\35\15\0\2\35"+ "\10\0\2\35\1\0\1\35\1\0\1\35\11\0\1\35"+ "\11\0\2\35\6\0\1\35\2\0\4\35\3\0\1\35"+ - "\2\0\2\35\1\0\3\35\5\0\1\35\1\0\2\35"+ - "\2\0\2\35\1\0\4\35\5\0\1\35\1\0\2\35"+ - "\223\0\1\36\2\0\1\36\23\0\4\36\305\0\1\36"+ - "\176\0\1\36\44\0\1\36\1\0\2\36\21\0\1\36"+ - "\4\0\1\36\7\0\4\36\3\0\1\36\22\0\1\36"+ - "\262\0\1\36\311\0\4\36\251\0\2\36\15\0\4\36"+ - "\154\0\1\36\15\0\2\36\10\0\2\36\1\0\1\36"+ - "\1\0\1\36\11\0\1\36\11\0\2\36\6\0\1\36"+ - "\2\0\4\36\3\0\1\36\2\0\2\36\1\0\3\36"+ - "\5\0\1\36\1\0\2\36\2\0\2\36\1\0\4\36"+ - "\5\0\1\36\1\0\2\36\141\0\1\30\4\0\1\30"+ - "\11\0\1\30\22\0\1\30\3\0\1\30\13\0\1\37"+ - "\2\0\1\37\10\0\1\30\12\0\4\37\45\0\1\30"+ - "\115\0\1\30\26\0\2\30\23\0\1\73\1\30\44\0"+ - "\1\37\21\0\1\73\142\0\1\73\11\0\1\37\15\0"+ - "\4\30\2\0\2\30\14\0\3\30\1\37\1\0\2\37"+ - "\11\0\3\30\3\0\1\30\1\0\1\37\4\0\1\37"+ - "\2\30\5\0\4\37\2\0\1\30\1\37\12\0\4\73"+ - "\1\0\2\30\1\0\1\37\7\0\1\30\117\0\1\30"+ + "\2\0\2\35\1\0\3\35\1\0\2\35\1\0\1\35"+ + "\10\0\1\35\1\0\2\35\2\0\2\35\1\0\4\35"+ + "\23\0\1\35\203\0\1\36\2\0\1\36\33\0\4\36"+ + "\310\0\1\36\171\0\1\36\44\0\1\36\1\0\2\36"+ + "\21\0\1\36\4\0\1\36\17\0\4\36\3\0\1\36"+ + "\12\0\1\36\275\0\1\36\314\0\4\36\244\0\2\36"+ + "\25\0\4\36\147\0\1\36\15\0\2\36\10\0\2\36"+ + "\1\0\1\36\1\0\1\36\11\0\1\36\11\0\2\36"+ + "\6\0\1\36\2\0\4\36\3\0\1\36\2\0\2\36"+ + "\1\0\3\36\1\0\2\36\1\0\1\36\10\0\1\36"+ + "\1\0\2\36\2\0\2\36\1\0\4\36\23\0\1\36"+ + "\324\0\1\154\1\157\2\0\1\155\45\154\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\220\0\32\323\1\0\12\323\12\0\1\324\227\0\1\325"+ + "\53\0\1\156\227\0\2\157\2\0\72\157\7\0\1\30"+ + "\4\0\1\30\11\0\1\30\22\0\1\30\3\0\1\30"+ + "\13\0\1\41\2\0\1\41\10\0\1\30\22\0\4\41"+ + "\35\0\1\30\120\0\1\30\26\0\2\30\23\0\1\75"+ + "\1\30\40\0\1\75\13\0\1\41\157\0\1\75\11\0"+ + "\1\41\15\0\4\30\2\0\2\30\14\0\3\30\1\41"+ + "\1\0\2\41\11\0\3\30\3\0\1\30\1\0\1\41"+ + "\4\0\1\41\2\30\1\0\4\75\1\0\2\30\5\0"+ + "\4\41\2\0\1\30\1\41\12\0\1\41\7\0\1\30"+ + "\122\0\1\30\4\0\1\30\6\0\1\30\3\0\1\30"+ + "\6\0\1\30\5\0\1\30\2\0\2\30\1\0\17\30"+ + "\2\0\1\30\13\0\7\30\2\0\1\30\1\0\1\30"+ + "\1\0\2\30\2\0\1\30\1\0\3\30\2\0\1\30"+ + "\1\0\1\30\1\0\1\30\1\0\1\30\4\0\1\41"+ + "\1\0\2\30\6\0\1\30\7\0\1\30\1\0\1\30"+ + "\125\0\1\30\6\0\1\30\3\0\1\30\3\0\1\30"+ + "\7\0\1\30\31\0\20\30\5\0\3\30\4\0\1\30"+ + "\6\0\1\30\3\0\2\30\2\0\2\30\4\0\1\30"+ + "\4\41\1\0\1\30\2\0\1\30\4\0\1\30\1\0"+ + "\1\30\1\0\1\30\226\0\2\41\25\0\4\41\147\0"+ + "\1\41\15\0\2\41\10\0\2\41\1\0\1\41\1\0"+ + "\1\41\11\0\1\41\11\0\2\41\6\0\1\41\2\0"+ + "\4\41\3\0\1\41\2\0\2\41\1\0\3\41\1\0"+ + "\2\41\1\0\1\41\10\0\1\41\1\0\2\41\2\0"+ + "\2\41\1\0\4\41\23\0\1\41\120\0\1\326\21\0"+ + "\1\327\2\0\1\330\10\0\1\331\22\0\1\332\21\0"+ + "\1\333\2\0\1\334\62\0\1\171\7\0\1\171\4\0"+ + "\1\335\102\0\1\30\4\0\1\30\11\0\1\30\22\0"+ + "\1\30\3\0\1\30\13\0\1\42\2\0\1\42\10\0"+ + "\1\30\22\0\4\42\35\0\1\30\120\0\1\30\26\0"+ + "\2\30\23\0\1\75\1\30\40\0\1\75\13\0\1\42"+ + "\157\0\1\75\11\0\1\42\15\0\4\30\2\0\2\30"+ + "\14\0\3\30\1\42\1\0\2\42\11\0\3\30\3\0"+ + "\1\30\1\0\1\42\4\0\1\42\2\30\1\0\4\75"+ + "\1\0\2\30\5\0\4\42\2\0\1\30\1\42\12\0"+ + "\1\42\7\0\1\30\122\0\1\30\4\0\1\30\6\0"+ + "\1\30\3\0\1\30\6\0\1\30\5\0\1\30\2\0"+ + "\2\30\1\0\17\30\2\0\1\30\13\0\7\30\2\0"+ + "\1\30\1\0\1\30\1\0\2\30\2\0\1\30\1\0"+ + "\3\30\2\0\1\30\1\0\1\30\1\0\1\30\1\0"+ + "\1\30\4\0\1\42\1\0\2\30\6\0\1\30\7\0"+ + "\1\30\1\0\1\30\125\0\1\30\6\0\1\30\3\0"+ + "\1\30\3\0\1\30\7\0\1\30\31\0\20\30\5\0"+ + "\3\30\4\0\1\30\6\0\1\30\3\0\2\30\2\0"+ + "\2\30\4\0\1\30\4\42\1\0\1\30\2\0\1\30"+ + "\4\0\1\30\1\0\1\30\1\0\1\30\226\0\2\42"+ + "\25\0\4\42\147\0\1\42\15\0\2\42\10\0\2\42"+ + "\1\0\1\42\1\0\1\42\11\0\1\42\11\0\2\42"+ + "\6\0\1\42\2\0\4\42\3\0\1\42\2\0\2\42"+ + "\1\0\3\42\1\0\2\42\1\0\1\42\10\0\1\42"+ + "\1\0\2\42\2\0\2\42\1\0\4\42\23\0\1\42"+ + "\113\0\1\55\1\0\1\56\2\0\1\245\1\0\1\246"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\247\2\0\1\250\4\0\1\251\3\0\1\252"+ + "\17\0\1\71\2\0\1\253\21\0\1\254\2\0\1\255"+ + "\61\0\1\30\1\76\2\0\1\76\1\0\2\76\1\0"+ + "\1\76\2\0\1\201\1\0\2\30\1\336\32\217\13\154"+ + "\1\76\1\154\1\201\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\245\1\0\1\246\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\247\2\0\1\250"+ + "\4\0\1\251\3\0\1\252\17\0\1\71\2\0\1\253"+ + "\21\0\1\254\2\0\1\255\61\0\1\30\1\76\2\0"+ + "\1\76\1\0\2\76\1\0\1\76\2\0\1\201\1\157"+ + "\2\30\1\336\1\337\1\340\1\341\1\342\1\343\1\344"+ + "\1\345\1\346\1\347\1\350\1\351\1\352\1\353\1\354"+ + "\1\355\1\356\1\357\1\360\1\361\1\362\1\363\1\364"+ + "\1\365\1\366\1\367\1\370\1\154\12\371\1\76\1\154"+ + "\1\201\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\155\32\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\234\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\235\2\0\1\236\4\0"+ + "\1\67\3\0\1\237\17\0\1\71\2\0\1\240\21\0"+ + "\1\241\2\0\1\242\61\0\1\30\2\75\2\0\2\243"+ + "\1\244\1\0\1\75\2\0\1\372\1\0\1\41\1\30"+ + "\1\373\32\43\1\203\12\204\1\0\1\154\1\374\1\154"+ + "\1\0\1\372\1\156\3\154\2\0\1\243\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\256\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\257\2\0\1\260\4\0\1\67\3\0"+ + "\1\261\17\0\1\71\2\0\1\262\21\0\1\263\2\0"+ + "\1\264\41\0\1\133\17\0\1\30\1\77\1\75\1\135"+ + "\1\76\1\0\1\76\1\77\1\0\1\77\2\0\1\201"+ + "\1\0\1\41\1\30\1\336\32\217\1\154\12\375\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\11\43\1\376\20\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\15\43\1\377\14\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\10\43\1\u0100\21\43"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\17\43\1\u0101"+ + "\12\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\7\0"+ + "\1\265\1\0\1\266\17\0\1\267\2\0\1\270\4\0"+ + "\1\271\3\0\1\272\22\0\1\273\21\0\1\274\2\0"+ + "\1\275\62\0\1\111\1\31\2\0\3\243\1\0\1\111"+ + "\2\0\1\372\3\0\1\u0102\33\154\12\220\1\0\1\154"+ + "\1\372\1\154\1\0\1\372\1\156\3\154\2\0\1\243"+ + "\1\154\3\0\2\154\7\0\1\265\1\0\1\266\17\0"+ + "\1\267\2\0\1\270\4\0\1\271\3\0\1\272\22\0"+ + "\1\273\21\0\1\274\2\0\1\275\62\0\1\111\1\31"+ + "\2\0\3\243\1\0\1\111\2\0\1\372\1\157\2\0"+ + "\1\u0102\1\u0103\1\u0104\1\u0105\1\u0106\1\u0107\1\u0108\1\u0109"+ + "\1\u010a\1\u010b\1\u010c\1\u010d\1\u010e\1\u010f\1\u0110\1\u0111"+ + "\1\u0112\1\u0113\1\u0114\1\u0115\1\u0116\1\u0117\1\u0118\1\u0119"+ + "\1\u011a\1\u011b\1\u011c\1\154\1\u011d\1\u011e\5\u011d\1\u011f"+ + "\1\u011e\1\u011d\1\0\1\154\1\372\1\154\1\0\1\372"+ + "\1\156\3\154\2\0\1\243\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\100\1\0\1\101\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\102\2\0\1\103\4\0\1\104\3\0\1\105\17\0"+ + "\1\71\2\0\1\106\21\0\1\107\2\0\1\110\61\0"+ + "\1\30\2\31\2\0\2\111\1\112\1\0\1\31\2\0"+ + "\1\212\1\0\1\41\1\30\1\213\32\43\1\203\12\u0120"+ + "\1\0\1\154\1\215\1\154\1\0\1\212\1\156\3\154"+ + "\2\0\1\111\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\276\1\0\1\101\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\277\2\0"+ + "\1\300\4\0\1\104\3\0\1\301\17\0\1\71\2\0"+ + "\1\302\21\0\1\303\2\0\1\304\41\0\1\133\17\0"+ + "\1\30\1\112\1\31\1\135\1\0\2\243\1\112\1\0"+ + "\1\112\2\0\1\372\1\0\1\41\1\30\1\u0102\32\217"+ + "\1\154\12\220\1\0\1\154\1\215\1\154\1\0\1\372"+ + "\1\156\3\154\2\0\1\243\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\100\1\0\1\101\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\102\2\0\1\103\4\0\1\104\3\0\1\105\17\0"+ + "\1\71\2\0\1\106\21\0\1\107\2\0\1\110\61\0"+ + "\1\30\2\31\2\0\2\111\1\112\1\0\1\31\2\0"+ + "\1\212\1\0\1\41\1\30\1\213\32\43\1\203\2\214"+ + "\1\u0120\2\214\2\u0120\2\214\1\u0120\1\0\1\154\1\215"+ + "\1\154\1\0\1\212\1\156\3\154\2\0\1\111\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\336\32\217\1\154\12\375\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\100\1\0"+ + "\1\101\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\102\2\0\1\103\4\0\1\104\3\0"+ + "\1\105\17\0\1\71\2\0\1\106\21\0\1\107\2\0"+ + "\1\110\61\0\1\30\2\31\2\0\2\111\1\112\1\0"+ + "\1\31\2\0\1\212\1\0\1\41\1\30\1\u0121\32\217"+ + "\1\154\12\220\1\0\1\154\1\215\1\154\1\0\1\212"+ + "\1\156\3\154\2\0\1\111\1\154\3\0\2\154\220\0"+ + "\4\u0122\2\0\1\u0122\15\0\1\u0122\6\0\12\u0122\1\u0123"+ + "\307\0\1\u0124\114\0\1\54\2\0\1\54\33\0\4\54"+ + "\310\0\1\54\171\0\1\54\44\0\1\54\1\0\2\54"+ + "\21\0\1\54\4\0\1\54\17\0\4\54\3\0\1\54"+ + "\12\0\1\54\275\0\1\54\314\0\4\54\244\0\2\54"+ + "\25\0\4\54\147\0\1\54\15\0\2\54\10\0\2\54"+ + "\1\0\1\54\1\0\1\54\11\0\1\54\11\0\2\54"+ + "\6\0\1\54\2\0\4\54\3\0\1\54\2\0\2\54"+ + "\1\0\3\54\1\0\2\54\1\0\1\54\10\0\1\54"+ + "\1\0\2\54\2\0\2\54\1\0\4\54\23\0\1\54"+ + "\257\0\1\u0125\26\0\2\u0125\121\0\1\30\4\0\1\30"+ + "\11\0\1\30\22\0\1\30\3\0\1\30\13\0\1\75"+ + "\2\0\1\75\10\0\1\30\22\0\4\75\35\0\1\30"+ + "\120\0\1\30\26\0\2\30\23\0\1\75\1\30\40\0"+ + "\1\75\13\0\1\75\157\0\1\75\11\0\1\75\15\0"+ + "\4\30\2\0\2\30\14\0\3\30\1\75\1\0\2\75"+ + "\11\0\3\30\3\0\1\30\1\0\1\75\4\0\1\75"+ + "\2\30\1\0\4\75\1\0\2\30\5\0\4\75\2\0"+ + "\1\30\1\75\12\0\1\75\7\0\1\30\122\0\1\30"+ "\4\0\1\30\6\0\1\30\3\0\1\30\6\0\1\30"+ "\5\0\1\30\2\0\2\30\1\0\17\30\2\0\1\30"+ - "\13\0\7\30\2\0\1\30\1\0\1\30\1\0\1\30"+ + "\13\0\7\30\2\0\1\30\1\0\1\30\1\0\2\30"+ + "\2\0\1\30\1\0\3\30\2\0\1\30\1\0\1\30"+ + "\1\0\1\30\1\0\1\30\4\0\1\75\1\0\2\30"+ + "\6\0\1\30\7\0\1\30\1\0\1\30\125\0\1\30"+ + "\6\0\1\30\3\0\1\30\3\0\1\30\7\0\1\30"+ + "\31\0\20\30\5\0\3\30\4\0\1\30\6\0\1\30"+ + "\3\0\2\30\2\0\2\30\4\0\1\30\4\75\1\0"+ + "\1\30\2\0\1\30\4\0\1\30\1\0\1\30\1\0"+ + "\1\30\226\0\2\75\25\0\4\75\147\0\1\75\15\0"+ + "\2\75\10\0\2\75\1\0\1\75\1\0\1\75\11\0"+ + "\1\75\11\0\2\75\6\0\1\75\2\0\4\75\3\0"+ + "\1\75\2\0\2\75\1\0\3\75\1\0\2\75\1\0"+ + "\1\75\10\0\1\75\1\0\2\75\2\0\2\75\1\0"+ + "\4\75\23\0\1\75\120\0\1\u0126\1\0\1\u0127\17\0"+ + "\1\u0128\2\0\1\u0129\4\0\1\u012a\3\0\1\u012b\22\0"+ + "\1\u012c\21\0\1\u012d\2\0\1\u012e\62\0\1\243\1\75"+ + "\2\0\3\243\1\0\1\243\2\0\1\243\3\0\1\243"+ + "\33\0\12\75\2\0\1\243\2\0\1\243\6\0\1\243"+ + "\10\0\1\55\1\0\1\56\2\0\1\u012f\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\u0130\2\0\1\u0131\4\0\1\67\3\0\1\u0132"+ + "\17\0\1\71\2\0\1\u0133\21\0\1\u0134\2\0\1\u0135"+ + "\41\0\1\133\17\0\1\30\1\244\1\75\1\135\1\0"+ + "\2\243\1\244\1\0\1\244\2\0\1\243\1\0\1\41"+ + "\1\30\1\243\32\30\1\0\12\75\2\0\1\244\2\0"+ + "\1\243\6\0\1\243\16\0\1\30\4\0\1\30\11\0"+ + "\1\30\22\0\1\30\3\0\1\30\13\0\1\76\2\0"+ + "\1\76\10\0\1\30\22\0\4\76\35\0\1\30\123\0"+ + "\1\30\3\0\4\30\1\0\1\30\4\0\1\30\1\0"+ + "\2\30\2\0\2\30\2\0\3\30\1\0\1\30\1\0"+ + "\1\30\2\0\4\30\1\0\3\30\1\0\1\30\1\0"+ + "\3\30\1\0\2\30\1\0\4\30\1\0\2\30\2\0"+ + "\10\30\1\0\2\30\1\0\10\30\2\0\7\30\1\0"+ + "\10\30\1\0\6\30\1\0\1\30\1\0\2\30\2\0"+ + "\1\30\1\0\1\30\3\0\3\30\114\0\1\30\26\0"+ + "\2\30\24\0\1\30\54\0\1\76\171\0\1\76\15\0"+ + "\4\30\2\0\2\30\14\0\3\30\1\76\1\0\2\76"+ + "\11\0\3\30\3\0\1\30\1\0\1\76\4\0\1\76"+ + "\2\30\6\0\2\30\5\0\4\76\2\0\1\30\1\76"+ + "\12\0\1\76\7\0\1\30\136\0\1\30\3\0\2\30"+ + "\12\0\2\30\1\0\3\30\7\0\1\30\6\0\2\30"+ + "\1\0\2\30\6\0\1\30\4\0\2\30\2\0\2\30"+ + "\5\0\3\30\2\0\1\30\15\0\1\30\16\0\1\30"+ + "\7\0\1\30\122\0\1\30\4\0\1\30\6\0\1\30"+ + "\3\0\1\30\6\0\1\30\5\0\1\30\2\0\2\30"+ + "\1\0\17\30\2\0\1\30\13\0\7\30\2\0\1\30"+ + "\1\0\1\30\1\0\2\30\2\0\1\30\1\0\3\30"+ "\2\0\1\30\1\0\1\30\1\0\1\30\1\0\1\30"+ - "\4\0\1\37\1\0\2\30\5\0\1\30\1\0\1\30"+ - "\2\0\3\30\1\0\1\30\7\0\1\30\1\0\1\30"+ - "\122\0\1\30\6\0\1\30\3\0\1\30\3\0\1\30"+ - "\7\0\1\30\31\0\20\30\5\0\3\30\3\0\1\30"+ - "\3\0\2\30\2\0\2\30\4\0\1\30\4\37\4\0"+ - "\1\30\4\0\1\30\2\0\1\30\4\0\1\30\1\0"+ - "\1\30\1\0\1\30\223\0\2\37\15\0\4\37\154\0"+ - "\1\37\15\0\2\37\10\0\2\37\1\0\1\37\1\0"+ - "\1\37\11\0\1\37\11\0\2\37\6\0\1\37\2\0"+ - "\4\37\3\0\1\37\2\0\2\37\1\0\3\37\5\0"+ - "\1\37\1\0\2\37\2\0\2\37\1\0\4\37\5\0"+ - "\1\37\1\0\2\37\343\0\1\160\45\161\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\1\206\3\0"+ - "\2\161\213\0\32\270\1\0\12\270\13\0\1\271\13\0"+ - "\1\53\1\0\1\54\2\0\1\221\1\0\1\222\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\223\2\0\1\224\4\0\1\225\3\0\1\226\17\0"+ - "\1\67\2\0\1\227\21\0\1\230\2\0\1\231\57\0"+ - "\1\30\1\74\7\0\1\74\2\0\1\30\1\160\1\272"+ - "\1\273\1\274\1\275\1\276\1\277\1\300\1\301\1\302"+ - "\1\303\1\304\1\305\1\306\1\307\1\310\1\311\1\312"+ - "\1\313\1\314\1\315\1\316\1\317\1\320\1\321\1\322"+ - "\1\323\1\161\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\1\206\3\0\2\161\212\0\1\160"+ - "\32\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\2\0\1\53\1\0"+ - "\1\54\2\0\1\211\1\0\1\56\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\212\2\0"+ - "\1\213\4\0\1\65\3\0\1\214\17\0\1\67\2\0"+ - "\1\215\21\0\1\216\2\0\1\217\57\0\1\30\2\73"+ - "\2\0\2\220\1\75\1\0\1\73\2\0\1\30\1\325"+ - "\32\41\1\164\12\165\1\0\1\161\1\166\1\161\1\0"+ - "\2\326\1\162\3\161\2\0\1\220\1\161\4\0\2\161"+ - "\2\0\1\53\1\0\1\54\2\0\1\232\1\0\1\56"+ - "\4\0\1\57\1\0\1\60\1\0\1\61\2\0\1\62"+ - "\3\0\1\233\2\0\1\234\4\0\1\65\3\0\1\235"+ - "\17\0\1\67\2\0\1\236\21\0\1\237\2\0\1\240"+ - "\41\0\1\130\15\0\1\30\1\75\1\73\1\132\3\0"+ - "\1\75\1\0\1\75\2\0\1\30\1\160\32\201\1\161"+ - "\12\327\1\0\1\161\1\166\1\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\2\0\1\53\1\0"+ - "\1\54\2\0\1\221\1\0\1\222\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\223\2\0"+ - "\1\224\4\0\1\225\3\0\1\226\17\0\1\67\2\0"+ - "\1\227\21\0\1\230\2\0\1\231\57\0\1\30\1\74"+ - "\7\0\1\74\2\0\1\30\1\160\32\201\13\161\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\2\0\1\53\1\0\1\54\2\0\1\55\1\0"+ - "\1\56\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\63\2\0\1\64\4\0\1\65\3\0"+ - "\1\66\17\0\1\67\2\0\1\70\21\0\1\71\2\0"+ - "\1\72\57\0\2\30\1\73\1\0\1\74\1\0\1\74"+ - "\1\75\1\0\1\30\2\0\1\30\1\163\11\41\1\330"+ - "\20\41\1\164\12\165\1\74\1\161\1\166\1\161\1\0"+ - "\1\161\1\167\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\2\0\1\53\1\0\1\54\2\0\1\55\1\0\1\56"+ - "\4\0\1\57\1\0\1\60\1\0\1\61\2\0\1\62"+ - "\3\0\1\63\2\0\1\64\4\0\1\65\3\0\1\66"+ - "\17\0\1\67\2\0\1\70\21\0\1\71\2\0\1\72"+ - "\57\0\2\30\1\73\1\0\1\74\1\0\1\74\1\75"+ - "\1\0\1\30\2\0\1\30\1\163\15\41\1\331\14\41"+ - "\1\164\12\165\1\74\1\161\1\166\1\161\1\0\1\161"+ - "\1\167\1\162\3\161\3\0\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\55\1\0\1\56\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\63\2\0\1\64\4\0\1\65\3\0\1\66\17\0"+ - "\1\67\2\0\1\70\21\0\1\71\2\0\1\72\57\0"+ - "\2\30\1\73\1\0\1\74\1\0\1\74\1\75\1\0"+ - "\1\30\2\0\1\30\1\163\10\41\1\332\21\41\1\164"+ - "\12\165\1\74\1\161\1\166\1\161\1\0\1\161\1\167"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\2\0\1\53"+ - "\1\0\1\54\2\0\1\55\1\0\1\56\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\63"+ - "\2\0\1\64\4\0\1\65\3\0\1\66\17\0\1\67"+ - "\2\0\1\70\21\0\1\71\2\0\1\72\57\0\2\30"+ - "\1\73\1\0\1\74\1\0\1\74\1\75\1\0\1\30"+ - "\2\0\1\30\1\163\17\41\1\333\12\41\1\164\12\165"+ - "\1\74\1\161\1\166\1\161\1\0\1\161\1\167\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\7\0\1\241\1\0"+ - "\1\242\17\0\1\243\2\0\1\244\4\0\1\245\3\0"+ - "\1\246\22\0\1\247\21\0\1\250\2\0\1\251\60\0"+ - "\1\107\1\31\6\0\1\107\3\0\1\160\1\334\1\335"+ + "\4\0\1\76\1\0\2\30\6\0\1\30\7\0\1\30"+ + "\1\0\1\30\125\0\1\30\6\0\1\30\3\0\1\30"+ + "\3\0\1\30\7\0\1\30\31\0\20\30\5\0\3\30"+ + "\4\0\1\30\6\0\1\30\3\0\2\30\2\0\2\30"+ + "\4\0\1\30\4\76\1\0\1\30\2\0\1\30\4\0"+ + "\1\30\1\0\1\30\1\0\1\30\226\0\2\76\25\0"+ + "\4\76\147\0\1\76\15\0\2\76\10\0\2\76\1\0"+ + "\1\76\1\0\1\76\11\0\1\76\11\0\2\76\6\0"+ + "\1\76\2\0\4\76\3\0\1\76\2\0\2\76\1\0"+ + "\3\76\1\0\2\76\1\0\1\76\10\0\1\76\1\0"+ + "\2\76\2\0\2\76\1\0\4\76\23\0\1\76\121\0"+ + "\1\30\4\0\1\30\11\0\1\30\22\0\1\30\3\0"+ + "\1\30\13\0\1\77\2\0\1\77\10\0\1\30\22\0"+ + "\4\77\35\0\1\30\120\0\1\30\26\0\2\30\23\0"+ + "\1\75\1\30\40\0\1\75\13\0\1\77\157\0\1\75"+ + "\11\0\1\77\15\0\4\30\2\0\2\30\14\0\3\30"+ + "\1\77\1\0\2\77\11\0\3\30\3\0\1\30\1\0"+ + "\1\77\4\0\1\77\2\30\1\0\4\75\1\0\2\30"+ + "\5\0\4\77\2\0\1\30\1\77\12\0\1\77\7\0"+ + "\1\30\122\0\1\30\4\0\1\30\6\0\1\30\3\0"+ + "\1\30\6\0\1\30\5\0\1\30\2\0\2\30\1\0"+ + "\17\30\2\0\1\30\13\0\7\30\2\0\1\30\1\0"+ + "\1\30\1\0\2\30\2\0\1\30\1\0\3\30\2\0"+ + "\1\30\1\0\1\30\1\0\1\30\1\0\1\30\4\0"+ + "\1\77\1\0\2\30\6\0\1\30\7\0\1\30\1\0"+ + "\1\30\125\0\1\30\6\0\1\30\3\0\1\30\3\0"+ + "\1\30\7\0\1\30\31\0\20\30\5\0\3\30\4\0"+ + "\1\30\6\0\1\30\3\0\2\30\2\0\2\30\4\0"+ + "\1\30\4\77\1\0\1\30\2\0\1\30\4\0\1\30"+ + "\1\0\1\30\1\0\1\30\226\0\2\77\25\0\4\77"+ + "\147\0\1\77\15\0\2\77\10\0\2\77\1\0\1\77"+ + "\1\0\1\77\11\0\1\77\11\0\2\77\6\0\1\77"+ + "\2\0\4\77\3\0\1\77\2\0\2\77\1\0\3\77"+ + "\1\0\2\77\1\0\1\77\10\0\1\77\1\0\2\77"+ + "\2\0\2\77\1\0\4\77\23\0\1\77\203\0\1\111"+ + "\2\0\1\111\33\0\4\111\174\0\1\31\104\0\1\31"+ + "\240\0\1\31\41\0\1\31\13\0\1\111\157\0\1\31"+ + "\11\0\1\111\44\0\1\111\1\0\2\111\21\0\1\111"+ + "\4\0\1\111\3\0\4\31\10\0\4\111\3\0\1\111"+ + "\12\0\1\111\256\0\2\31\325\0\1\111\314\0\4\111"+ + "\244\0\2\111\25\0\4\111\147\0\1\111\15\0\2\111"+ + "\10\0\2\111\1\0\1\111\1\0\1\111\11\0\1\111"+ + "\11\0\2\111\6\0\1\111\2\0\4\111\3\0\1\111"+ + "\2\0\2\111\1\0\3\111\1\0\2\111\1\0\1\111"+ + "\10\0\1\111\1\0\2\111\2\0\2\111\1\0\4\111"+ + "\23\0\1\111\121\0\1\30\4\0\1\30\11\0\1\30"+ + "\22\0\1\30\3\0\1\30\13\0\1\112\2\0\1\112"+ + "\10\0\1\30\22\0\4\112\35\0\1\30\120\0\1\30"+ + "\26\0\2\30\23\0\1\31\1\30\40\0\1\31\13\0"+ + "\1\112\157\0\1\31\11\0\1\112\15\0\4\30\2\0"+ + "\2\30\14\0\3\30\1\112\1\0\2\112\11\0\3\30"+ + "\3\0\1\30\1\0\1\112\4\0\1\112\2\30\1\0"+ + "\4\31\1\0\2\30\5\0\4\112\2\0\1\30\1\112"+ + "\12\0\1\112\7\0\1\30\122\0\1\30\4\0\1\30"+ + "\6\0\1\30\3\0\1\30\6\0\1\30\5\0\1\30"+ + "\2\0\2\30\1\0\17\30\2\0\1\30\13\0\7\30"+ + "\2\0\1\30\1\0\1\30\1\0\2\30\2\0\1\30"+ + "\1\0\3\30\2\0\1\30\1\0\1\30\1\0\1\30"+ + "\1\0\1\30\4\0\1\112\1\0\2\30\6\0\1\30"+ + "\7\0\1\30\1\0\1\30\125\0\1\30\6\0\1\30"+ + "\3\0\1\30\3\0\1\30\7\0\1\30\31\0\20\30"+ + "\5\0\3\30\4\0\1\30\6\0\1\30\3\0\2\30"+ + "\2\0\2\30\4\0\1\30\4\112\1\0\1\30\2\0"+ + "\1\30\4\0\1\30\1\0\1\30\1\0\1\30\226\0"+ + "\2\112\25\0\4\112\147\0\1\112\15\0\2\112\10\0"+ + "\2\112\1\0\1\112\1\0\1\112\11\0\1\112\11\0"+ + "\2\112\6\0\1\112\2\0\4\112\3\0\1\112\2\0"+ + "\2\112\1\0\3\112\1\0\2\112\1\0\1\112\10\0"+ + "\1\112\1\0\2\112\2\0\2\112\1\0\4\112\23\0"+ + "\1\112\121\0\1\30\4\0\1\30\11\0\1\30\22\0"+ + "\1\30\3\0\1\30\13\0\1\123\2\0\1\123\10\0"+ + "\1\30\22\0\4\123\35\0\1\30\120\0\1\30\26\0"+ + "\2\30\23\0\1\75\1\30\40\0\1\75\13\0\1\123"+ + "\157\0\1\75\11\0\1\123\15\0\4\30\2\0\2\30"+ + "\14\0\3\30\1\123\1\0\2\123\11\0\3\30\3\0"+ + "\1\30\1\0\1\123\4\0\1\123\2\30\1\0\4\75"+ + "\1\0\2\30\5\0\4\123\2\0\1\30\1\123\12\0"+ + "\1\123\7\0\1\30\122\0\1\30\4\0\1\30\6\0"+ + "\1\30\3\0\1\30\6\0\1\30\5\0\1\30\2\0"+ + "\2\30\1\0\17\30\2\0\1\30\13\0\7\30\2\0"+ + "\1\30\1\0\1\30\1\0\2\30\2\0\1\30\1\0"+ + "\3\30\2\0\1\30\1\0\1\30\1\0\1\30\1\0"+ + "\1\30\4\0\1\123\1\0\2\30\6\0\1\30\7\0"+ + "\1\30\1\0\1\30\125\0\1\30\6\0\1\30\3\0"+ + "\1\30\3\0\1\30\7\0\1\30\31\0\20\30\5\0"+ + "\3\30\4\0\1\30\6\0\1\30\3\0\2\30\2\0"+ + "\2\30\4\0\1\30\4\123\1\0\1\30\2\0\1\30"+ + "\4\0\1\30\1\0\1\30\1\0\1\30\226\0\2\123"+ + "\25\0\4\123\147\0\1\123\15\0\2\123\10\0\2\123"+ + "\1\0\1\123\1\0\1\123\11\0\1\123\11\0\2\123"+ + "\6\0\1\123\2\0\4\123\3\0\1\123\2\0\2\123"+ + "\1\0\3\123\1\0\2\123\1\0\1\123\10\0\1\123"+ + "\1\0\2\123\2\0\2\123\1\0\4\123\23\0\1\123"+ + "\203\0\1\135\2\0\1\135\33\0\4\135\310\0\1\135"+ + "\171\0\1\135\44\0\1\135\1\0\2\135\21\0\1\135"+ + "\4\0\1\135\17\0\4\135\3\0\1\135\12\0\1\135"+ + "\275\0\1\135\314\0\4\135\244\0\2\135\25\0\4\135"+ + "\147\0\1\135\15\0\2\135\10\0\2\135\1\0\1\135"+ + "\1\0\1\135\11\0\1\135\11\0\2\135\6\0\1\135"+ + "\2\0\4\135\3\0\1\135\2\0\2\135\1\0\3\135"+ + "\1\0\2\135\1\0\1\135\10\0\1\135\1\0\2\135"+ + "\2\0\2\135\1\0\4\135\23\0\1\135\330\0\1\u0136"+ + "\32\323\1\u0137\12\323\236\0\2\324\2\0\60\324\1\0"+ + "\1\u0138\3\324\1\u0139\1\0\3\324\212\0\1\154\1\157"+ + "\2\0\46\154\1\0\3\154\1\0\1\154\1\0\3\154"+ + "\3\0\1\154\3\0\2\154\72\0\1\171\2\0\1\171"+ + "\33\0\4\171\310\0\1\171\171\0\1\171\44\0\1\171"+ + "\1\0\2\171\21\0\1\171\4\0\1\171\17\0\4\171"+ + "\3\0\1\171\12\0\1\171\275\0\1\171\314\0\4\171"+ + "\244\0\2\171\25\0\4\171\147\0\1\171\15\0\2\171"+ + "\10\0\2\171\1\0\1\171\1\0\1\171\11\0\1\171"+ + "\11\0\2\171\6\0\1\171\2\0\4\171\3\0\1\171"+ + "\2\0\2\171\1\0\3\171\1\0\2\171\1\0\1\171"+ + "\10\0\1\171\1\0\2\171\2\0\2\171\1\0\4\171"+ + "\23\0\1\171\113\0\1\55\1\0\1\56\2\0\1\u013a"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\u013b\2\0\1\u013c\4\0\1\67"+ + "\3\0\1\u013d\17\0\1\71\2\0\1\u013e\21\0\1\u013f"+ + "\2\0\1\u0140\61\0\1\30\1\335\1\75\4\0\1\123"+ + "\1\0\1\335\4\0\1\41\1\30\1\0\32\30\1\0"+ + "\12\75\2\0\1\123\22\0\1\55\1\0\1\56\2\0"+ + "\1\245\1\0\1\246\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\247\2\0\1\250\4\0"+ + "\1\251\3\0\1\252\17\0\1\71\2\0\1\253\21\0"+ + "\1\254\2\0\1\255\61\0\1\30\1\76\2\0\1\76"+ + "\1\0\2\76\1\0\1\76\2\0\1\201\1\157\2\30"+ + "\1\336\32\217\13\154\1\76\1\154\1\201\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\1\43"+ + "\2\u0141\1\u0142\1\u0143\10\u0141\1\43\1\u0144\5\u0141\6\43"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\1\u0145\2\u0141"+ + "\1\43\1\u0141\1\u0146\3\u0141\1\u0147\2\u0141\4\43\4\u0141"+ + "\1\43\2\u0141\1\43\2\u0141\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\3\43\1\u0141\1\43\1\u0141\2\43\1\u0148"+ + "\1\43\1\u0141\10\43\1\u0141\2\43\2\u0141\2\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\1\43\1\u0141\1\u0149"+ + "\2\u0141\2\43\1\u0141\3\43\1\u014a\1\u014b\1\43\1\u014c"+ + "\2\u0141\11\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\3\43\1\u0141\1\43\1\u0141\10\43\1\u0141\1\43\2\u0141"+ + "\10\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\4\43"+ + "\1\u014d\5\43\1\u0141\17\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\4\43\2\u0141\2\43\1\u0141\1\43\1\u0141"+ + "\13\43\2\u0141\2\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\1\u014e\1\43\2\u0141\1\u014f\1\u0150\12\u0141\1\u0151"+ + "\1\u0141\2\43\2\u0141\3\43\1\u0141\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\2\43\4\u0141\3\43\2\u0141\1\u0152"+ + "\1\u0141\1\43\2\u0141\12\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\1\u0153\1\u0141\2\43\1\u0141\3\43\1\u0154"+ + "\5\43\3\u0141\3\43\1\u0141\1\43\1\u0141\1\43\2\u0141"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\3\u0141\1\u0155"+ + "\1\u0141\1\u0156\1\43\1\u0141\1\u0157\7\u0141\1\u0158\3\u0141"+ + "\1\43\2\u0141\1\43\2\u0141\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\1\u0159\1\u0141\1\43\1\u015a\6\u0141\3\43"+ + "\1\u0141\2\43\1\u0141\2\43\1\u0141\6\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\1\u0141\31\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\1\u0141\2\43\1\u0141\1\u015b"+ + "\1\u015c\2\u0141\1\43\1\u015d\2\u0141\2\43\2\u0141\1\43"+ + "\1\u0141\3\43\1\u015e\1\u0141\2\43\1\u0141\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\3\u0141\1\u015f\2\u0141\1\43"+ + "\1\u0141\1\u0160\3\u0141\3\43\2\u0141\1\43\10\u0141\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\1\u0161\2\u0141\1\u0162"+ + "\1\u0163\1\u0164\2\u0141\1\u0165\3\u0141\1\43\1\u0141\1\43"+ + "\1\u0141\1\43\1\u0141\1\43\1\u0141\1\43\4\u0141\1\43"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\1\u0141\6\43"+ + "\1\u0141\3\43\1\u0166\2\43\1\u0141\4\43\1\u0141\2\43"+ + "\1\u0141\2\43\1\u0141\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\6\43\1\u0141\7\43\1\u0141\13\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\13\43\1\u0167\6\43\1\u0168"+ + "\7\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\1\u0141"+ + "\11\43\1\u0141\6\43\1\u0141\10\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\1\u0141\1\43\6\u0141\1\u0169\1\43"+ + "\2\u0141\2\43\2\u0141\1\43\1\u0141\1\43\3\u0141\1\43"+ + "\3\u0141\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\4\43"+ + "\1\u0141\1\u016a\4\43\2\u0141\3\43\2\u0141\5\43\1\u0141"+ + "\3\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\3\43"+ + "\2\u0141\2\43\1\u0141\1\u016b\1\43\2\u0141\1\43\1\u0141"+ + "\3\43\1\u0141\1\43\1\u0141\1\43\1\u0141\3\43\1\u0141"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\3\43\1\u0141"+ + "\1\43\1\u016c\4\43\1\u0141\2\43\1\u0141\14\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\2\u0141\1\43\1\u016d"+ + "\1\43\1\u016e\1\43\2\u0141\2\43\1\u0141\4\43\1\u0141"+ + "\11\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\3\43"+ + "\1\u0141\13\43\1\u0141\12\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\7\0\1\u0126\1\0\1\u0127\17\0"+ + "\1\u0128\2\0\1\u0129\4\0\1\u012a\3\0\1\u012b\22\0"+ + "\1\u012c\21\0\1\u012d\2\0\1\u012e\62\0\1\243\1\75"+ + "\2\0\3\243\1\0\1\243\2\0\1\372\3\0\1\u0102"+ + "\33\154\12\375\1\0\1\154\1\372\1\154\1\0\1\372"+ + "\1\156\3\154\2\0\1\243\1\154\3\0\2\154\7\0"+ + "\1\u0126\1\0\1\u0127\17\0\1\u0128\2\0\1\u0129\4\0"+ + "\1\u012a\3\0\1\u012b\22\0\1\u012c\21\0\1\u012d\2\0"+ + "\1\u012e\62\0\1\243\1\75\2\0\3\243\1\0\1\243"+ + "\2\0\1\372\1\157\2\0\1\u0102\1\u0103\1\u0104\1\u0105"+ + "\1\u0106\1\u0107\1\u0108\1\u0109\1\u010a\1\u010b\1\u010c\1\u010d"+ + "\1\u010e\1\u010f\1\u0110\1\u0111\1\u0112\1\u0113\1\u0114\1\u0115"+ + "\1\u0116\1\u0117\1\u0118\1\u0119\1\u011a\1\u011b\1\u011c\1\154"+ + "\12\204\1\0\1\154\1\372\1\154\1\0\1\372\1\156"+ + "\3\154\2\0\1\243\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\u012f\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\u0130"+ + "\2\0\1\u0131\4\0\1\67\3\0\1\u0132\17\0\1\71"+ + "\2\0\1\u0133\21\0\1\u0134\2\0\1\u0135\41\0\1\133"+ + "\17\0\1\30\1\244\1\75\1\135\1\0\2\243\1\244"+ + "\1\0\1\244\2\0\1\372\1\0\1\41\1\30\1\u0102"+ + "\32\217\1\154\12\375\1\0\1\154\1\374\1\154\1\0"+ + "\1\372\1\156\3\154\2\0\1\243\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\234\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\235\2\0\1\236\4\0\1\67\3\0\1\237"+ + "\17\0\1\71\2\0\1\240\21\0\1\241\2\0\1\242"+ + "\61\0\1\30\2\75\2\0\2\243\1\244\1\0\1\75"+ + "\2\0\1\372\1\0\1\41\1\30\1\u0102\32\217\1\154"+ + "\12\375\1\0\1\154\1\374\1\154\1\0\1\372\1\156"+ + "\3\154\2\0\1\243\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\3\43\1\u0170"+ + "\26\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\32\43"+ + "\1\203\12\204\1\u0171\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\11\43\1\u0172"+ + "\20\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\15\43"+ + "\1\u0173\14\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\7\0\1\u0126\1\0\1\u0127\17\0\1\u0128\2\0\1\u0129"+ + "\4\0\1\u012a\3\0\1\u012b\22\0\1\u012c\21\0\1\u012d"+ + "\2\0\1\u012e\62\0\1\243\1\75\2\0\3\243\1\0"+ + "\1\243\2\0\1\372\1\157\2\0\1\u0102\33\154\12\375"+ + "\1\0\1\154\1\372\1\154\1\0\1\372\1\156\3\154"+ + "\2\0\1\243\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\1\371\2\u0174\1\u0175\1\u0176\10\u0174\1\371\1\u0177"+ + "\5\u0174\6\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\1\u0178\2\u0174\1\371\1\u0174\1\u0179\3\u0174"+ + "\1\u017a\2\u0174\4\371\4\u0174\1\371\2\u0174\1\371\2\u0174"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\3\371\1\u0174\1\371\1\u0174\2\371\1\u017b\1\371\1\u0174"+ + "\10\371\1\u0174\2\371\2\u0174\2\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\1\371\1\u0174\1\u017c"+ + "\2\u0174\2\371\1\u0174\3\371\1\u017d\1\u017e\1\371\1\u017f"+ + "\2\u0174\11\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\3\371\1\u0174\1\371\1\u0174\10\371\1\u0174"+ + "\1\371\2\u0174\10\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\4\371\1\u0180\5\371\1\u0174\17\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\4\371\2\u0174\2\371\1\u0174\1\371\1\u0174\13\371\2\u0174"+ + "\2\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\1\u0181\1\371\2\u0174\1\u0182\1\u0183\12\u0174\1\u0184"+ + "\1\u0174\2\371\2\u0174\3\371\1\u0174\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\2\371\4\u0174\3\371"+ + "\2\u0174\1\u0185\1\u0174\1\371\2\u0174\12\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\1\u0186\1\u0174"+ + "\2\371\1\u0174\3\371\1\u0187\5\371\3\u0174\3\371\1\u0174"+ + "\1\371\1\u0174\1\371\2\u0174\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\3\u0174\1\u0188\1\u0174\1\u0189"+ + "\1\371\1\u0174\1\u018a\7\u0174\1\u018b\3\u0174\1\371\2\u0174"+ + "\1\371\2\u0174\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\1\u018c\1\u0174\1\371\1\u018d\6\u0174\3\371"+ + "\1\u0174\2\371\1\u0174\2\371\1\u0174\6\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\1\u0174\31\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\1\u0174\2\371\1\u0174\1\u018e\1\u018f\2\u0174\1\371\1\u0190"+ + "\2\u0174\2\371\2\u0174\1\371\1\u0174\3\371\1\u0191\1\u0174"+ + "\2\371\1\u0174\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\3\u0174\1\u0192\2\u0174\1\371\1\u0174\1\u0193"+ + "\3\u0174\3\371\2\u0174\1\371\10\u0174\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\1\u0194\2\u0174\1\u0195"+ + "\1\u0196\1\u0197\2\u0174\1\u0198\3\u0174\1\371\1\u0174\1\371"+ + "\1\u0174\1\371\1\u0174\1\371\1\u0174\1\371\4\u0174\1\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\1\u0174\6\371\1\u0174\3\371\1\u0199\2\371\1\u0174\4\371"+ + "\1\u0174\2\371\1\u0174\2\371\1\u0174\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\6\371\1\u0174\7\371"+ + "\1\u0174\13\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\13\371\1\u019a\6\371\1\u019b\7\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\1\u0174"+ + "\11\371\1\u0174\6\371\1\u0174\10\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\1\u0174\1\371\6\u0174"+ + "\1\u019c\1\371\2\u0174\2\371\2\u0174\1\371\1\u0174\1\371"+ + "\3\u0174\1\371\3\u0174\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\4\371\1\u0174\1\u019d\4\371\2\u0174"+ + "\3\371\2\u0174\5\371\1\u0174\3\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\3\371\2\u0174\2\371"+ + "\1\u0174\1\u019e\1\371\2\u0174\1\371\1\u0174\3\371\1\u0174"+ + "\1\371\1\u0174\1\371\1\u0174\3\371\1\u0174\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\3\371\1\u0174"+ + "\1\371\1\u019f\4\371\1\u0174\2\371\1\u0174\14\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\2\u0174"+ + "\1\371\1\u01a0\1\371\1\u01a1\1\371\2\u0174\2\371\1\u0174"+ + "\4\371\1\u0174\11\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\3\371\1\u0174\13\371\1\u0174\12\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\100\1\0\1\101\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\102\2\0\1\103"+ + "\4\0\1\104\3\0\1\105\17\0\1\71\2\0\1\106"+ + "\21\0\1\107\2\0\1\110\61\0\1\30\2\31\2\0"+ + "\2\111\1\112\1\0\1\31\2\0\1\212\1\0\1\41"+ + "\1\30\1\u01a2\32\43\1\203\12\u01a3\1\0\1\154\1\215"+ + "\1\154\1\0\1\212\1\156\3\154\2\0\1\111\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\100"+ + "\1\0\1\101\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\102\2\0\1\103\4\0\1\104"+ + "\3\0\1\105\17\0\1\71\2\0\1\106\21\0\1\107"+ + "\2\0\1\110\61\0\1\30\2\31\2\0\2\111\1\112"+ + "\1\0\1\31\2\0\1\212\1\0\1\41\1\30\1\u01a2"+ + "\32\43\1\203\12\u011d\1\0\1\154\1\215\1\154\1\0"+ + "\1\212\1\156\3\154\2\0\1\111\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\100\1\0\1\101"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\102\2\0\1\103\4\0\1\104\3\0\1\105"+ + "\17\0\1\71\2\0\1\106\21\0\1\107\2\0\1\110"+ + "\61\0\1\30\2\31\2\0\2\111\1\112\1\0\1\31"+ + "\2\0\1\212\1\0\1\41\1\30\1\u01a2\32\43\1\203"+ + "\2\u011d\1\u01a3\1\u011d\1\u01a4\2\u01a3\2\u011d\1\u01a3\1\0"+ + "\1\154\1\215\1\154\1\0\1\212\1\156\3\154\2\0"+ + "\1\111\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\100\1\0\1\101\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\102\2\0\1\103"+ + "\4\0\1\104\3\0\1\105\17\0\1\71\2\0\1\106"+ + "\21\0\1\107\2\0\1\110\61\0\1\30\2\31\2\0"+ + "\2\111\1\112\1\0\1\31\2\0\1\212\1\0\1\41"+ + "\1\30\1\u01a5\32\43\1\203\12\u0120\1\0\1\154\1\215"+ + "\1\154\1\0\1\212\1\156\3\154\2\0\1\111\1\154"+ + "\3\0\2\154\7\0\1\265\1\0\1\266\17\0\1\267"+ + "\2\0\1\270\4\0\1\271\3\0\1\272\22\0\1\273"+ + "\21\0\1\274\2\0\1\275\62\0\1\111\1\31\2\0"+ + "\3\243\1\0\1\111\2\0\1\372\1\157\2\0\1\u0102"+ + "\33\154\12\220\1\0\1\154\1\372\1\154\1\0\1\372"+ + "\1\156\3\154\2\0\1\243\1\154\3\0\2\154\220\0"+ + "\4\u01a6\2\0\1\u01a6\15\0\1\u01a6\6\0\12\u01a6\1\u0123"+ + "\242\0\4\u01a7\2\0\1\u01a7\15\0\1\u01a7\6\0\12\u01a7"+ + "\1\u01a8\242\0\4\u01a9\2\0\1\u01a9\15\0\1\u01a9\6\0"+ + "\1\u01aa\1\u01ab\5\u01aa\1\u01ac\1\u01ab\1\u01aa\13\0\1\u01ad"+ + "\16\0\1\u01ae\21\0\1\u01af\2\0\1\u01b0\10\0\1\u01b1"+ + "\22\0\1\u01b2\21\0\1\u01b3\2\0\1\u01b4\55\0\1\233"+ + "\4\0\1\u0125\7\0\1\u0125\171\0\1\243\2\0\1\243"+ + "\33\0\4\243\174\0\1\75\104\0\1\75\240\0\1\75"+ + "\41\0\1\75\13\0\1\243\157\0\1\75\11\0\1\243"+ + "\44\0\1\243\1\0\2\243\21\0\1\243\4\0\1\243"+ + "\3\0\4\75\10\0\4\243\3\0\1\243\12\0\1\243"+ + "\256\0\2\75\325\0\1\243\314\0\4\243\244\0\2\243"+ + "\25\0\4\243\147\0\1\243\15\0\2\243\10\0\2\243"+ + "\1\0\1\243\1\0\1\243\11\0\1\243\11\0\2\243"+ + "\6\0\1\243\2\0\4\243\3\0\1\243\2\0\2\243"+ + "\1\0\3\243\1\0\2\243\1\0\1\243\10\0\1\243"+ + "\1\0\2\243\2\0\2\243\1\0\4\243\23\0\1\243"+ + "\121\0\1\30\4\0\1\30\11\0\1\30\22\0\1\30"+ + "\3\0\1\30\13\0\1\244\2\0\1\244\10\0\1\30"+ + "\22\0\4\244\35\0\1\30\120\0\1\30\26\0\2\30"+ + "\23\0\1\75\1\30\40\0\1\75\13\0\1\244\157\0"+ + "\1\75\11\0\1\244\15\0\4\30\2\0\2\30\14\0"+ + "\3\30\1\244\1\0\2\244\11\0\3\30\3\0\1\30"+ + "\1\0\1\244\4\0\1\244\2\30\1\0\4\75\1\0"+ + "\2\30\5\0\4\244\2\0\1\30\1\244\12\0\1\244"+ + "\7\0\1\30\122\0\1\30\4\0\1\30\6\0\1\30"+ + "\3\0\1\30\6\0\1\30\5\0\1\30\2\0\2\30"+ + "\1\0\17\30\2\0\1\30\13\0\7\30\2\0\1\30"+ + "\1\0\1\30\1\0\2\30\2\0\1\30\1\0\3\30"+ + "\2\0\1\30\1\0\1\30\1\0\1\30\1\0\1\30"+ + "\4\0\1\244\1\0\2\30\6\0\1\30\7\0\1\30"+ + "\1\0\1\30\125\0\1\30\6\0\1\30\3\0\1\30"+ + "\3\0\1\30\7\0\1\30\31\0\20\30\5\0\3\30"+ + "\4\0\1\30\6\0\1\30\3\0\2\30\2\0\2\30"+ + "\4\0\1\30\4\244\1\0\1\30\2\0\1\30\4\0"+ + "\1\30\1\0\1\30\1\0\1\30\226\0\2\244\25\0"+ + "\4\244\147\0\1\244\15\0\2\244\10\0\2\244\1\0"+ + "\1\244\1\0\1\244\11\0\1\244\11\0\2\244\6\0"+ + "\1\244\2\0\4\244\3\0\1\244\2\0\2\244\1\0"+ + "\3\244\1\0\2\244\1\0\1\244\10\0\1\244\1\0"+ + "\2\244\2\0\2\244\1\0\4\244\23\0\1\244\331\0"+ + "\1\u01b5\1\u01b6\1\u01b7\1\u01b8\1\u01b9\1\u01ba\1\u01bb\1\u01bc"+ + "\1\u01bd\1\u01be\1\u01bf\1\u01c0\1\u01c1\1\u01c2\1\u01c3\1\u01c4"+ + "\1\u01c5\1\u01c6\1\u01c7\1\u01c8\1\u01c9\1\u01ca\1\u01cb\1\u01cc"+ + "\1\u01cd\1\u01ce\1\0\12\323\243\0\32\323\1\u0137\12\323"+ + "\236\0\2\324\2\0\72\324\7\0\1\30\4\0\1\30"+ + "\11\0\1\30\22\0\1\30\3\0\1\30\13\0\1\335"+ + "\2\0\1\335\10\0\1\30\22\0\4\335\35\0\1\30"+ + "\120\0\1\30\26\0\2\30\23\0\1\75\1\30\40\0"+ + "\1\75\13\0\1\335\157\0\1\75\11\0\1\335\15\0"+ + "\4\30\2\0\2\30\14\0\3\30\1\335\1\0\2\335"+ + "\11\0\3\30\3\0\1\30\1\0\1\335\4\0\1\335"+ + "\2\30\1\0\4\75\1\0\2\30\5\0\4\335\2\0"+ + "\1\30\1\335\12\0\1\335\7\0\1\30\122\0\1\30"+ + "\4\0\1\30\6\0\1\30\3\0\1\30\6\0\1\30"+ + "\5\0\1\30\2\0\2\30\1\0\17\30\2\0\1\30"+ + "\13\0\7\30\2\0\1\30\1\0\1\30\1\0\2\30"+ + "\2\0\1\30\1\0\3\30\2\0\1\30\1\0\1\30"+ + "\1\0\1\30\1\0\1\30\4\0\1\335\1\0\2\30"+ + "\6\0\1\30\7\0\1\30\1\0\1\30\125\0\1\30"+ + "\6\0\1\30\3\0\1\30\3\0\1\30\7\0\1\30"+ + "\31\0\20\30\5\0\3\30\4\0\1\30\6\0\1\30"+ + "\3\0\2\30\2\0\2\30\4\0\1\30\4\335\1\0"+ + "\1\30\2\0\1\30\4\0\1\30\1\0\1\30\1\0"+ + "\1\30\226\0\2\335\25\0\4\335\147\0\1\335\15\0"+ + "\2\335\10\0\2\335\1\0\1\335\1\0\1\335\11\0"+ + "\1\335\11\0\2\335\6\0\1\335\2\0\4\335\3\0"+ + "\1\335\2\0\2\335\1\0\3\335\1\0\2\335\1\0"+ + "\1\335\10\0\1\335\1\0\2\335\2\0\2\335\1\0"+ + "\4\335\23\0\1\335\113\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\u01cf\32\43\1\203\12\204\1\u01d0\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\u01cf\4\43\1\u0166\25\43\1\203"+ + "\12\204\1\u01d0\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\u01cf\15\43"+ + "\1\353\14\43\1\203\12\204\1\u01d0\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\u01cf\10\43\1\353\21\43\1\203\12\204\1\u01d0"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\1\u01d1\1\u01d2"+ + "\1\u01d3\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\u01cf\12\43\1\u01d4\4\43"+ + "\1\u0141\12\43\1\203\12\204\1\u01d0\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\u01cf\5\43\1\u01d5\4\43\1\u0141\1\u01d6\16\43"+ + "\1\203\12\204\1\u01d0\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\u01cf"+ + "\5\43\1\u01d7\24\43\1\203\12\204\1\u01d0\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\1\u01d8\3\43\1\u01d9\25\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\20\43\1\u0141\11\43"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\17\43\1\u01da"+ + "\12\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\20\43"+ + "\1\u01db\11\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\u01cf"+ + "\17\43\1\u01dc\12\43\1\203\12\204\1\u01d0\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\7\43\1\u0141\22\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\u01cf\11\43\1\u01dd\20\43\1\203"+ + "\12\204\1\u01d0\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\u01cf\1\u01de"+ + "\31\43\1\203\12\204\1\u01d0\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\30\43\1\u0141\1\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\u01cf\4\43\1\u0149\25\43\1\203\12\204\1\u01d0"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\1\u01d1\1\u01d2"+ + "\1\u01d3\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\u01cf\6\43\1\u0166\10\43"+ + "\1\u0141\12\43\1\203\12\204\1\u01d0\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\u01cf\13\43\1\u01df\16\43\1\203\12\204\1\u01d0"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\1\u01d1\1\u01d2"+ + "\1\u01d3\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\u01cf\7\43\1\u01e0\22\43"+ + "\1\203\12\204\1\u01d0\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\u01cf"+ + "\13\43\1\u0149\16\43\1\203\12\204\1\u01d0\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\u01cf\24\43\1\u01e1\5\43\1\203\12\204"+ + "\1\u01d0\1\154\1\205\1\154\1\0\1\154\1\156\1\u01d1"+ + "\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\11\43\1\u0141"+ + "\20\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\u01cf\16\43"+ + "\1\u01e2\13\43\1\203\12\204\1\u01d0\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\u01cf\12\43\1\u01e3\17\43\1\203\12\204\1\u01d0"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\1\u01d1\1\u01d2"+ + "\1\u01d3\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\u01cf\17\43\1\u0141\12\43"+ + "\1\203\12\204\1\u01d0\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\u01cf"+ + "\5\43\1\u0141\24\43\1\203\12\204\1\u01d0\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\16\43\1\u01e4\13\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\u01cf\20\43\1\u01e5\11\43\1\203"+ + "\12\204\1\u01d0\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\u01cf\5\43"+ + "\1\u01e6\24\43\1\203\12\204\1\u01d0\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\u01cf\22\43\1\u01e7\7\43\1\203\12\204\1\u01d0"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\1\u01d1\1\u01d2"+ + "\1\u01d3\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\u01cf\13\43\1\u01e8\16\43"+ + "\1\203\12\204\1\u01d0\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\17\43\1\u01e9\12\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\1\43\1\u01ea\7\43\1\u0141\20\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\u01cf\1\u01eb\31\43\1\203\12\204"+ + "\1\u01d0\1\154\1\205\1\154\1\0\1\154\1\156\1\u01d1"+ + "\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\u01cf\2\43\1\u01ec"+ + "\27\43\1\203\12\204\1\u01d0\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\15\43\1\u01ed\14\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\5\43\1\u0141\24\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\32\43\1\u01ee\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\22\43\1\u0141\7\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\u01cf\23\43\1\u0141\2\43\1\u01e3\3\43"+ + "\1\203\12\204\1\u01d0\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\11\43\1\u01ef\20\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\u01cf\17\43\1\u01f0\12\43\1\203\12\204\1\u01d0\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\u01cf\24\43\1\u01ed\5\43\1\203"+ + "\12\204\1\u01d0\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\u01cf\13\43"+ + "\1\u01f1\16\43\1\203\12\204\1\u01d0\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\31\43\1\u01f2\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\1\157\2\0\1\155\1\u0103"+ + "\1\u0104\1\u0105\1\u0106\1\u0107\1\u0108\1\u0109\1\u010a\1\u010b"+ + "\1\u010c\1\u010d\1\u010e\1\u010f\1\u0110\1\u0111\1\u0112\1\u0113"+ + "\1\u0114\1\u0115\1\u0116\1\u0117\1\u0118\1\u0119\1\u011a\1\u011b"+ + "\1\u011c\1\154\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\32\43\1\203\12\204"+ + "\1\u01f3\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\245\1\0\1\246\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\247\2\0\1\250"+ + "\4\0\1\251\3\0\1\252\17\0\1\71\2\0\1\253"+ + "\21\0\1\254\2\0\1\255\61\0\1\30\1\76\2\0"+ + "\1\76\1\0\2\76\1\0\1\76\2\0\1\76\1\0"+ + "\2\30\1\76\32\30\13\0\1\76\1\0\1\76\5\0"+ + "\1\u01f4\14\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\17\43\1\u01f5\12\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\16\43\1\u01f6\13\43\1\203\12\204\1\u01f7"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u01f8\32\371"+ + "\1\203\12\371\1\u01f9\3\154\1\0\1\154\1\156\1\u01d1"+ + "\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u01f8\4\371\1\u0199\25\371\1\203\12\371\1\u01f9"+ + "\3\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u01f8\15\371"+ + "\1\u010f\14\371\1\203\12\371\1\u01f9\3\154\1\0\1\154"+ + "\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u01f8\10\371\1\u010f\21\371\1\203"+ + "\12\371\1\u01f9\3\154\1\0\1\154\1\156\1\u01d1\1\u01d2"+ + "\1\u01d3\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u01f8\12\371\1\u01fa\4\371\1\u0174\12\371\1\203\12\371"+ + "\1\u01f9\3\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u01f8"+ + "\5\371\1\u01fb\4\371\1\u0174\1\u01fc\16\371\1\203\12\371"+ + "\1\u01f9\3\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u01f8"+ + "\5\371\1\u01fd\24\371\1\203\12\371\1\u01f9\3\154\1\0"+ + "\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\1\u01fe\3\371\1\u01ff"+ + "\25\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\20\371\1\u0174\11\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\17\371\1\u0200\12\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\20\371"+ + "\1\u0201\11\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u01f8\17\371\1\u0202\12\371\1\203\12\371\1\u01f9"+ + "\3\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\7\371"+ + "\1\u0174\22\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u01f8\11\371\1\u0203\20\371\1\203\12\371\1\u01f9"+ + "\3\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u01f8\1\u0204"+ + "\31\371\1\203\12\371\1\u01f9\3\154\1\0\1\154\1\156"+ + "\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\30\371\1\u0174\1\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u01f8\4\371\1\u017c"+ + "\25\371\1\203\12\371\1\u01f9\3\154\1\0\1\154\1\156"+ + "\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u01f8\6\371\1\u0199\10\371\1\u0174\12\371"+ + "\1\203\12\371\1\u01f9\3\154\1\0\1\154\1\156\1\u01d1"+ + "\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u01f8\13\371\1\u0205\16\371\1\203\12\371\1\u01f9"+ + "\3\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u01f8\7\371"+ + "\1\u0206\22\371\1\203\12\371\1\u01f9\3\154\1\0\1\154"+ + "\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u01f8\13\371\1\u017c\16\371\1\203"+ + "\12\371\1\u01f9\3\154\1\0\1\154\1\156\1\u01d1\1\u01d2"+ + "\1\u01d3\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u01f8\24\371\1\u0207\5\371\1\203\12\371\1\u01f9\3\154"+ + "\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\11\371\1\u0174"+ + "\20\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u01f8\16\371\1\u0208\13\371\1\203\12\371\1\u01f9\3\154"+ + "\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u01f8\12\371\1\u0209"+ + "\17\371\1\203\12\371\1\u01f9\3\154\1\0\1\154\1\156"+ + "\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u01f8\17\371\1\u0174\12\371\1\203\12\371"+ + "\1\u01f9\3\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u01f8"+ + "\5\371\1\u0174\24\371\1\203\12\371\1\u01f9\3\154\1\0"+ + "\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\16\371\1\u020a\13\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u01f8"+ + "\20\371\1\u020b\11\371\1\203\12\371\1\u01f9\3\154\1\0"+ + "\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u01f8\5\371\1\u020c\24\371"+ + "\1\203\12\371\1\u01f9\3\154\1\0\1\154\1\156\1\u01d1"+ + "\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u01f8\22\371\1\u020d\7\371\1\203\12\371\1\u01f9"+ + "\3\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u01f8\13\371"+ + "\1\u020e\16\371\1\203\12\371\1\u01f9\3\154\1\0\1\154"+ + "\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\17\371\1\u020f\12\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\1\371"+ + "\1\u0210\7\371\1\u0174\20\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u01f8\1\u0211\31\371\1\203\12\371"+ + "\1\u01f9\3\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u01f8"+ + "\2\371\1\u0212\27\371\1\203\12\371\1\u01f9\3\154\1\0"+ + "\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\15\371\1\u0213\14\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\5\371\1\u0174\24\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\32\371\1\u01ee\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\22\371\1\u0174\7\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u01f8\23\371"+ + "\1\u0174\2\371\1\u0209\3\371\1\203\12\371\1\u01f9\3\154"+ + "\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\11\371\1\u0214"+ + "\20\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u01f8\17\371\1\u0215\12\371\1\203\12\371\1\u01f9\3\154"+ + "\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u01f8\24\371\1\u0213"+ + "\5\371\1\203\12\371\1\u01f9\3\154\1\0\1\154\1\156"+ + "\1\u01d1\1\u01d2\1\u01d3\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u01f8\13\371\1\u0216\16\371\1\203\12\371"+ + "\1\u01f9\3\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\31\371\1\u0217\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\7\0\1\265"+ + "\1\0\1\266\17\0\1\267\2\0\1\270\4\0\1\271"+ + "\3\0\1\272\22\0\1\273\21\0\1\274\2\0\1\275"+ + "\62\0\1\111\1\31\2\0\3\243\1\0\1\111\2\0"+ + "\1\372\1\157\2\0\1\u0102\1\u0103\1\u0104\1\u0105\1\u0106"+ + "\1\u0107\1\u0108\1\u0109\1\u010a\1\u010b\1\u010c\1\u010d\1\u010e"+ + "\1\u010f\1\u0110\1\u0111\1\u0112\1\u0113\1\u0114\1\u0115\1\u0116"+ + "\1\u0117\1\u0118\1\u0119\1\u011a\1\u011b\1\u011c\1\154\1\u0218"+ + "\1\u0219\5\u0218\1\u021a\1\u0219\1\u0218\1\0\1\154\1\372"+ + "\1\154\1\0\1\372\1\156\3\154\2\0\1\243\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\100"+ + "\1\0\1\101\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\102\2\0\1\103\4\0\1\104"+ + "\3\0\1\105\17\0\1\71\2\0\1\106\21\0\1\107"+ + "\2\0\1\110\61\0\1\30\2\31\2\0\2\111\1\112"+ + "\1\0\1\31\2\0\1\212\1\0\1\41\1\30\1\u01a2"+ + "\32\43\1\203\12\u0120\1\0\1\154\1\215\1\154\1\0"+ + "\1\212\1\156\3\154\2\0\1\111\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\100\1\0\1\101"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\102\2\0\1\103\4\0\1\104\3\0\1\105"+ + "\17\0\1\71\2\0\1\106\21\0\1\107\2\0\1\110"+ + "\61\0\1\30\2\31\2\0\2\111\1\112\1\0\1\31"+ + "\2\0\1\212\1\0\1\41\1\30\1\u01a2\32\43\1\203"+ + "\2\u01a3\1\u0120\2\u01a3\2\u0120\2\u01a3\1\u0120\1\0\1\154"+ + "\1\215\1\154\1\0\1\212\1\156\3\154\2\0\1\111"+ + "\1\154\3\0\2\154\7\0\1\265\1\0\1\266\17\0"+ + "\1\267\2\0\1\270\4\0\1\271\3\0\1\272\22\0"+ + "\1\273\21\0\1\274\2\0\1\275\62\0\1\111\1\31"+ + "\2\0\3\243\1\0\1\111\2\0\1\372\1\157\2\0"+ + "\1\u0102\1\u0103\1\u0104\1\u0105\1\u0106\1\u0107\1\u0108\1\u0109"+ + "\1\u010a\1\u010b\1\u010c\1\u010d\1\u010e\1\u010f\1\u0110\1\u0111"+ + "\1\u0112\1\u0113\1\u0114\1\u0115\1\u0116\1\u0117\1\u0118\1\u0119"+ + "\1\u011a\1\u011b\1\u011c\1\154\12\u0120\1\0\1\154\1\372"+ + "\1\154\1\0\1\372\1\156\3\154\2\0\1\243\1\154"+ + "\3\0\2\154\220\0\4\u021b\2\0\1\u021b\15\0\1\u021b"+ + "\6\0\12\u021b\1\u0123\242\0\4\u021c\2\0\1\u021c\15\0"+ + "\1\u021c\6\0\12\u021c\1\u021d\242\0\4\u021e\2\0\1\u021e"+ + "\15\0\1\u021e\6\0\1\u021f\1\u0220\5\u021f\1\u0221\1\u0220"+ + "\1\u021f\13\0\1\u01ad\227\0\4\u0222\2\0\1\u0222\15\0"+ + "\1\u0222\6\0\12\u0222\1\u0223\12\0\1\u01ad\226\0\1\u0224"+ + "\4\u0222\2\0\1\u0222\15\0\1\u0222\6\0\12\u0225\1\u0223"+ + "\12\0\1\u01ad\226\0\1\u0224\4\u0222\2\0\1\u0222\15\0"+ + "\1\u0222\6\0\12\u0226\1\u0223\12\0\1\u01ad\226\0\1\u0224"+ + "\4\u0222\2\0\1\u0222\15\0\1\u0222\6\0\2\u0226\1\u0225"+ + "\1\u0226\1\u0227\2\u0225\2\u0226\1\u0225\1\u0223\12\0\1\u01ad"+ + "\274\0\1\u01f9\6\0\1\u0228\1\u0229\1\u022a\103\0\1\u0125"+ + "\2\0\1\u0125\33\0\4\u0125\310\0\1\u0125\171\0\1\u0125"+ + "\44\0\1\u0125\1\0\2\u0125\21\0\1\u0125\4\0\1\u0125"+ + "\17\0\4\u0125\3\0\1\u0125\12\0\1\u0125\275\0\1\u0125"+ + "\314\0\4\u0125\244\0\2\u0125\25\0\4\u0125\147\0\1\u0125"+ + "\15\0\2\u0125\10\0\2\u0125\1\0\1\u0125\1\0\1\u0125"+ + "\11\0\1\u0125\11\0\2\u0125\6\0\1\u0125\2\0\4\u0125"+ + "\3\0\1\u0125\2\0\2\u0125\1\0\3\u0125\1\0\2\u0125"+ + "\1\0\1\u0125\10\0\1\u0125\1\0\2\u0125\2\0\2\u0125"+ + "\1\0\4\u0125\23\0\1\u0125\330\0\1\u0136\1\323\2\u022b"+ + "\1\u022c\1\u022d\10\u022b\1\323\1\u022e\5\u022b\6\323\1\u0137"+ + "\12\323\242\0\1\u0136\1\u022f\2\u022b\1\323\1\u022b\1\u0230"+ + "\3\u022b\1\u0231\2\u022b\4\323\4\u022b\1\323\2\u022b\1\323"+ + "\2\u022b\1\u0137\12\323\242\0\1\u0136\3\323\1\u022b\1\323"+ + "\1\u022b\2\323\1\u0232\1\323\1\u022b\10\323\1\u022b\2\323"+ + "\2\u022b\2\323\1\u0137\12\323\242\0\1\u0136\1\323\1\u022b"+ + "\1\u0233\2\u022b\2\323\1\u022b\3\323\1\u0234\1\u0235\1\323"+ + "\1\u0236\2\u022b\11\323\1\u0137\12\323\242\0\1\u0136\3\323"+ + "\1\u022b\1\323\1\u022b\10\323\1\u022b\1\323\2\u022b\10\323"+ + "\1\u0137\12\323\242\0\1\u0136\4\323\1\u0237\5\323\1\u022b"+ + "\17\323\1\u0137\12\323\242\0\1\u0136\4\323\2\u022b\2\323"+ + "\1\u022b\1\323\1\u022b\13\323\2\u022b\2\323\1\u0137\12\323"+ + "\242\0\1\u0136\1\u0238\1\323\2\u022b\1\u0239\1\u023a\12\u022b"+ + "\1\u023b\1\u022b\2\323\2\u022b\3\323\1\u022b\1\u0137\12\323"+ + "\242\0\1\u0136\2\323\4\u022b\3\323\2\u022b\1\u023c\1\u022b"+ + "\1\323\2\u022b\12\323\1\u0137\12\323\242\0\1\u0136\1\u023d"+ + "\1\u022b\2\323\1\u022b\3\323\1\u023e\5\323\3\u022b\3\323"+ + "\1\u022b\1\323\1\u022b\1\323\2\u022b\1\u0137\12\323\242\0"+ + "\1\u0136\3\u022b\1\u023f\1\u022b\1\u0240\1\323\1\u022b\1\u0241"+ + "\7\u022b\1\u0242\3\u022b\1\323\2\u022b\1\323\2\u022b\1\u0137"+ + "\12\323\242\0\1\u0136\1\u0243\1\u022b\1\323\1\u0244\6\u022b"+ + "\3\323\1\u022b\2\323\1\u022b\2\323\1\u022b\6\323\1\u0137"+ + "\12\323\242\0\1\u0136\1\u022b\31\323\1\u0137\12\323\242\0"+ + "\1\u0136\1\u022b\2\323\1\u022b\1\u0245\1\u0246\2\u022b\1\323"+ + "\1\u0247\2\u022b\2\323\2\u022b\1\323\1\u022b\3\323\1\u0248"+ + "\1\u022b\2\323\1\u022b\1\u0137\12\323\242\0\1\u0136\3\u022b"+ + "\1\u0249\2\u022b\1\323\1\u022b\1\u024a\3\u022b\3\323\2\u022b"+ + "\1\323\10\u022b\1\u0137\12\323\242\0\1\u0136\1\u024b\2\u022b"+ + "\1\u024c\1\u024d\1\u024e\2\u022b\1\u024f\3\u022b\1\323\1\u022b"+ + "\1\323\1\u022b\1\323\1\u022b\1\323\1\u022b\1\323\4\u022b"+ + "\1\323\1\u0137\12\323\242\0\1\u0136\1\u022b\6\323\1\u022b"+ + "\3\323\1\u0250\2\323\1\u022b\4\323\1\u022b\2\323\1\u022b"+ + "\2\323\1\u022b\1\u0137\12\323\242\0\1\u0136\6\323\1\u022b"+ + "\7\323\1\u022b\13\323\1\u0137\12\323\242\0\1\u0136\13\323"+ + "\1\u0251\6\323\1\u0252\7\323\1\u0137\12\323\242\0\1\u0136"+ + "\1\u022b\11\323\1\u022b\6\323\1\u022b\10\323\1\u0137\12\323"+ + "\242\0\1\u0136\1\u022b\1\323\6\u022b\1\u0253\1\323\2\u022b"+ + "\2\323\2\u022b\1\323\1\u022b\1\323\3\u022b\1\323\3\u022b"+ + "\1\u0137\12\323\242\0\1\u0136\4\323\1\u022b\1\u0254\4\323"+ + "\2\u022b\3\323\2\u022b\5\323\1\u022b\3\323\1\u0137\12\323"+ + "\242\0\1\u0136\3\323\2\u022b\2\323\1\u022b\1\u0255\1\323"+ + "\2\u022b\1\323\1\u022b\3\323\1\u022b\1\323\1\u022b\1\323"+ + "\1\u022b\3\323\1\u022b\1\u0137\12\323\242\0\1\u0136\3\323"+ + "\1\u022b\1\323\1\u0256\4\323\1\u022b\2\323\1\u022b\14\323"+ + "\1\u0137\12\323\242\0\1\u0136\2\u022b\1\323\1\u0257\1\323"+ + "\1\u0258\1\323\2\u022b\2\323\1\u022b\4\323\1\u022b\11\323"+ + "\1\u0137\12\323\242\0\1\u0136\3\323\1\u022b\13\323\1\u022b"+ + "\12\323\1\u0137\12\323\25\0\1\55\1\0\1\56\2\0"+ + "\1\245\1\0\1\246\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\247\2\0\1\250\4\0"+ + "\1\251\3\0\1\252\17\0\1\71\2\0\1\253\21\0"+ + "\1\254\2\0\1\255\61\0\1\30\1\76\2\0\1\76"+ + "\1\0\2\76\1\0\1\76\2\0\1\201\1\157\2\30"+ "\1\336\1\337\1\340\1\341\1\342\1\343\1\344\1\345"+ "\1\346\1\347\1\350\1\351\1\352\1\353\1\354\1\355"+ "\1\356\1\357\1\360\1\361\1\362\1\363\1\364\1\365"+ - "\1\161\1\366\2\367\1\366\5\367\1\370\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\1\206\3\0"+ - "\2\161\2\0\1\53\1\0\1\54\2\0\1\252\1\0"+ - "\1\77\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\253\2\0\1\254\4\0\1\102\3\0"+ - "\1\255\17\0\1\67\2\0\1\256\21\0\1\257\2\0"+ - "\1\260\41\0\1\130\15\0\1\30\1\110\1\31\1\132"+ - "\3\0\1\110\1\0\1\110\2\0\1\30\1\160\32\201"+ - "\1\161\12\202\1\0\1\161\1\175\1\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\7\0\1\241"+ - "\1\0\1\242\17\0\1\243\2\0\1\244\4\0\1\245"+ - "\3\0\1\246\22\0\1\247\21\0\1\250\2\0\1\251"+ - "\60\0\1\107\1\31\6\0\1\107\3\0\1\160\33\161"+ - "\12\202\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\2\0\1\53\1\0\1\54\2\0"+ - "\1\76\1\0\1\77\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\100\2\0\1\101\4\0"+ - "\1\102\3\0\1\103\17\0\1\67\2\0\1\104\21\0"+ - "\1\105\2\0\1\106\57\0\1\30\2\31\2\0\2\107"+ - "\1\110\1\0\1\31\2\0\1\30\1\174\32\41\1\164"+ - "\12\371\1\0\1\161\1\175\1\161\1\0\2\176\1\162"+ - "\3\161\2\0\1\107\1\161\4\0\2\161\2\0\1\53"+ - "\1\0\1\54\2\0\1\76\1\0\1\77\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\100"+ - "\2\0\1\101\4\0\1\102\3\0\1\103\17\0\1\67"+ - "\2\0\1\104\21\0\1\105\2\0\1\106\57\0\1\30"+ - "\2\31\2\0\2\107\1\110\1\0\1\31\2\0\1\30"+ - "\1\174\32\41\1\164\2\177\1\371\2\177\2\371\1\177"+ - "\1\371\1\177\1\0\1\161\1\175\1\161\1\0\2\176"+ - "\1\162\3\161\2\0\1\107\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\55\1\0\1\56\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\63\2\0\1\64\4\0\1\65\3\0\1\66\17\0"+ - "\1\67\2\0\1\70\21\0\1\71\2\0\1\72\57\0"+ - "\2\30\1\73\1\0\1\74\1\0\1\74\1\75\1\0"+ - "\1\30\2\0\1\30\1\372\32\201\1\161\12\327\1\74"+ - "\1\161\1\166\1\161\1\0\1\161\1\167\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\2\0\1\53\1\0\1\54"+ - "\2\0\1\76\1\0\1\77\4\0\1\57\1\0\1\60"+ - "\1\0\1\61\2\0\1\62\3\0\1\100\2\0\1\101"+ - "\4\0\1\102\3\0\1\103\17\0\1\67\2\0\1\104"+ - "\21\0\1\105\2\0\1\106\57\0\1\30\2\31\2\0"+ - "\2\107\1\110\1\0\1\31\2\0\1\30\1\373\32\201"+ - "\1\161\12\202\1\0\1\161\1\175\1\161\1\0\2\176"+ - "\1\162\3\161\2\0\1\107\1\161\4\0\2\161\213\0"+ - "\4\374\2\0\1\374\15\0\1\374\6\0\12\374\1\375"+ - "\304\0\1\376\236\0\1\377\54\0\1\162\227\0\74\206"+ - "\7\0\1\30\4\0\1\30\11\0\1\30\22\0\1\30"+ - "\3\0\1\30\13\0\1\73\2\0\1\73\10\0\1\30"+ - "\12\0\4\73\45\0\1\30\115\0\1\30\26\0\2\30"+ - "\23\0\1\73\1\30\44\0\1\73\21\0\1\73\142\0"+ - "\1\73\11\0\1\73\15\0\4\30\2\0\2\30\14\0"+ - "\3\30\1\73\1\0\2\73\11\0\3\30\3\0\1\30"+ - "\1\0\1\73\4\0\1\73\2\30\5\0\4\73\2\0"+ - "\1\30\1\73\12\0\4\73\1\0\2\30\1\0\1\73"+ - "\7\0\1\30\117\0\1\30\4\0\1\30\6\0\1\30"+ - "\3\0\1\30\6\0\1\30\5\0\1\30\2\0\2\30"+ - "\1\0\17\30\2\0\1\30\13\0\7\30\2\0\1\30"+ - "\1\0\1\30\1\0\1\30\2\0\1\30\1\0\1\30"+ - "\1\0\1\30\1\0\1\30\4\0\1\73\1\0\2\30"+ - "\5\0\1\30\1\0\1\30\2\0\3\30\1\0\1\30"+ - "\7\0\1\30\1\0\1\30\122\0\1\30\6\0\1\30"+ - "\3\0\1\30\3\0\1\30\7\0\1\30\31\0\20\30"+ - "\5\0\3\30\3\0\1\30\3\0\2\30\2\0\2\30"+ - "\4\0\1\30\4\73\4\0\1\30\4\0\1\30\2\0"+ - "\1\30\4\0\1\30\1\0\1\30\1\0\1\30\223\0"+ - "\2\73\15\0\4\73\154\0\1\73\15\0\2\73\10\0"+ - "\2\73\1\0\1\73\1\0\1\73\11\0\1\73\11\0"+ - "\2\73\6\0\1\73\2\0\4\73\3\0\1\73\2\0"+ - "\2\73\1\0\3\73\5\0\1\73\1\0\2\73\2\0"+ - "\2\73\1\0\4\73\5\0\1\73\1\0\2\73\140\0"+ - "\1\u0100\1\0\1\u0101\17\0\1\u0102\2\0\1\u0103\4\0"+ - "\1\u0104\3\0\1\u0105\22\0\1\u0106\21\0\1\u0107\2\0"+ - "\1\u0108\60\0\1\220\1\73\6\0\1\220\37\0\12\73"+ - "\35\0\1\30\4\0\1\30\11\0\1\30\22\0\1\30"+ - "\3\0\1\30\13\0\1\74\2\0\1\74\10\0\1\30"+ - "\12\0\4\74\45\0\1\30\120\0\1\30\3\0\4\30"+ - "\1\0\1\30\4\0\1\30\1\0\2\30\2\0\2\30"+ - "\2\0\3\30\1\0\1\30\1\0\1\30\2\0\4\30"+ - "\1\0\3\30\1\0\1\30\1\0\3\30\1\0\2\30"+ - "\1\0\4\30\1\0\2\30\2\0\10\30\1\0\2\30"+ - "\1\0\11\30\1\0\10\30\1\0\13\30\2\0\1\30"+ - "\1\0\1\30\1\0\2\30\2\0\1\30\1\0\1\30"+ - "\3\0\1\30\113\0\1\30\26\0\2\30\24\0\1\30"+ - "\44\0\1\74\176\0\1\74\15\0\4\30\2\0\2\30"+ - "\14\0\3\30\1\74\1\0\2\74\11\0\3\30\3\0"+ - "\1\30\1\0\1\74\4\0\1\74\2\30\5\0\4\74"+ - "\2\0\1\30\1\74\17\0\2\30\1\0\1\74\7\0"+ - "\1\30\133\0\1\30\3\0\2\30\12\0\2\30\1\0"+ - "\3\30\7\0\1\30\6\0\2\30\1\0\2\30\6\0"+ - "\1\30\4\0\2\30\2\0\2\30\5\0\3\30\10\0"+ - "\1\30\16\0\1\30\7\0\1\30\7\0\1\30\117\0"+ - "\1\30\4\0\1\30\6\0\1\30\3\0\1\30\6\0"+ - "\1\30\5\0\1\30\2\0\2\30\1\0\17\30\2\0"+ - "\1\30\13\0\7\30\2\0\1\30\1\0\1\30\1\0"+ - "\1\30\2\0\1\30\1\0\1\30\1\0\1\30\1\0"+ - "\1\30\4\0\1\74\1\0\2\30\5\0\1\30\1\0"+ - "\1\30\2\0\3\30\1\0\1\30\7\0\1\30\1\0"+ - "\1\30\122\0\1\30\6\0\1\30\3\0\1\30\3\0"+ - "\1\30\7\0\1\30\31\0\20\30\5\0\3\30\3\0"+ - "\1\30\3\0\2\30\2\0\2\30\4\0\1\30\4\74"+ - "\4\0\1\30\4\0\1\30\2\0\1\30\4\0\1\30"+ - "\1\0\1\30\1\0\1\30\223\0\2\74\15\0\4\74"+ - "\154\0\1\74\15\0\2\74\10\0\2\74\1\0\1\74"+ - "\1\0\1\74\11\0\1\74\11\0\2\74\6\0\1\74"+ - "\2\0\4\74\3\0\1\74\2\0\2\74\1\0\3\74"+ - "\5\0\1\74\1\0\2\74\2\0\2\74\1\0\4\74"+ - "\5\0\1\74\1\0\2\74\141\0\1\30\4\0\1\30"+ - "\11\0\1\30\22\0\1\30\3\0\1\30\13\0\1\75"+ - "\2\0\1\75\10\0\1\30\12\0\4\75\45\0\1\30"+ - "\115\0\1\30\26\0\2\30\23\0\1\73\1\30\44\0"+ - "\1\75\21\0\1\73\142\0\1\73\11\0\1\75\15\0"+ - "\4\30\2\0\2\30\14\0\3\30\1\75\1\0\2\75"+ - "\11\0\3\30\3\0\1\30\1\0\1\75\4\0\1\75"+ - "\2\30\5\0\4\75\2\0\1\30\1\75\12\0\4\73"+ - "\1\0\2\30\1\0\1\75\7\0\1\30\117\0\1\30"+ - "\4\0\1\30\6\0\1\30\3\0\1\30\6\0\1\30"+ - "\5\0\1\30\2\0\2\30\1\0\17\30\2\0\1\30"+ - "\13\0\7\30\2\0\1\30\1\0\1\30\1\0\1\30"+ - "\2\0\1\30\1\0\1\30\1\0\1\30\1\0\1\30"+ - "\4\0\1\75\1\0\2\30\5\0\1\30\1\0\1\30"+ - "\2\0\3\30\1\0\1\30\7\0\1\30\1\0\1\30"+ - "\122\0\1\30\6\0\1\30\3\0\1\30\3\0\1\30"+ - "\7\0\1\30\31\0\20\30\5\0\3\30\3\0\1\30"+ - "\3\0\2\30\2\0\2\30\4\0\1\30\4\75\4\0"+ - "\1\30\4\0\1\30\2\0\1\30\4\0\1\30\1\0"+ - "\1\30\1\0\1\30\223\0\2\75\15\0\4\75\154\0"+ - "\1\75\15\0\2\75\10\0\2\75\1\0\1\75\1\0"+ - "\1\75\11\0\1\75\11\0\2\75\6\0\1\75\2\0"+ - "\4\75\3\0\1\75\2\0\2\75\1\0\3\75\5\0"+ - "\1\75\1\0\2\75\2\0\2\75\1\0\4\75\5\0"+ - "\1\75\1\0\2\75\223\0\1\107\2\0\1\107\23\0"+ - "\4\107\201\0\1\31\132\0\1\31\207\0\1\31\45\0"+ - "\1\107\21\0\1\31\142\0\1\31\11\0\1\107\44\0"+ - "\1\107\1\0\2\107\21\0\1\107\4\0\1\107\7\0"+ - "\4\107\3\0\1\107\12\0\4\31\4\0\1\107\301\0"+ - "\2\31\264\0\1\107\311\0\4\107\251\0\2\107\15\0"+ - "\4\107\154\0\1\107\15\0\2\107\10\0\2\107\1\0"+ - "\1\107\1\0\1\107\11\0\1\107\11\0\2\107\6\0"+ - "\1\107\2\0\4\107\3\0\1\107\2\0\2\107\1\0"+ - "\3\107\5\0\1\107\1\0\2\107\2\0\2\107\1\0"+ - "\4\107\5\0\1\107\1\0\2\107\141\0\1\30\4\0"+ - "\1\30\11\0\1\30\22\0\1\30\3\0\1\30\13\0"+ - "\1\110\2\0\1\110\10\0\1\30\12\0\4\110\45\0"+ - "\1\30\115\0\1\30\26\0\2\30\23\0\1\31\1\30"+ - "\44\0\1\110\21\0\1\31\142\0\1\31\11\0\1\110"+ - "\15\0\4\30\2\0\2\30\14\0\3\30\1\110\1\0"+ - "\2\110\11\0\3\30\3\0\1\30\1\0\1\110\4\0"+ - "\1\110\2\30\5\0\4\110\2\0\1\30\1\110\12\0"+ - "\4\31\1\0\2\30\1\0\1\110\7\0\1\30\117\0"+ - "\1\30\4\0\1\30\6\0\1\30\3\0\1\30\6\0"+ - "\1\30\5\0\1\30\2\0\2\30\1\0\17\30\2\0"+ - "\1\30\13\0\7\30\2\0\1\30\1\0\1\30\1\0"+ - "\1\30\2\0\1\30\1\0\1\30\1\0\1\30\1\0"+ - "\1\30\4\0\1\110\1\0\2\30\5\0\1\30\1\0"+ - "\1\30\2\0\3\30\1\0\1\30\7\0\1\30\1\0"+ - "\1\30\122\0\1\30\6\0\1\30\3\0\1\30\3\0"+ - "\1\30\7\0\1\30\31\0\20\30\5\0\3\30\3\0"+ - "\1\30\3\0\2\30\2\0\2\30\4\0\1\30\4\110"+ - "\4\0\1\30\4\0\1\30\2\0\1\30\4\0\1\30"+ - "\1\0\1\30\1\0\1\30\223\0\2\110\15\0\4\110"+ - "\154\0\1\110\15\0\2\110\10\0\2\110\1\0\1\110"+ - "\1\0\1\110\11\0\1\110\11\0\2\110\6\0\1\110"+ - "\2\0\4\110\3\0\1\110\2\0\2\110\1\0\3\110"+ - "\5\0\1\110\1\0\2\110\2\0\2\110\1\0\4\110"+ - "\5\0\1\110\1\0\2\110\223\0\1\132\2\0\1\132"+ - "\23\0\4\132\305\0\1\132\176\0\1\132\44\0\1\132"+ - "\1\0\2\132\21\0\1\132\4\0\1\132\7\0\4\132"+ - "\3\0\1\132\22\0\1\132\262\0\1\132\311\0\4\132"+ - "\251\0\2\132\15\0\4\132\154\0\1\132\15\0\2\132"+ - "\10\0\2\132\1\0\1\132\1\0\1\132\11\0\1\132"+ - "\11\0\2\132\6\0\1\132\2\0\4\132\3\0\1\132"+ - "\2\0\2\132\1\0\3\132\5\0\1\132\1\0\2\132"+ - "\2\0\2\132\1\0\4\132\5\0\1\132\1\0\2\132"+ - "\343\0\1\u0109\32\270\1\u010a\12\270\237\0\61\271\1\0"+ - "\1\u010b\4\271\1\u010c\1\0\3\271\1\0\1\53\1\0"+ - "\1\54\2\0\1\55\1\0\1\56\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\63\2\0"+ - "\1\64\4\0\1\65\3\0\1\66\17\0\1\67\2\0"+ - "\1\70\21\0\1\71\2\0\1\72\57\0\2\30\1\73"+ - "\1\0\1\74\1\0\1\74\1\75\1\0\1\30\2\0"+ - "\1\30\1\163\1\41\2\u010d\1\u010e\1\u010f\10\u010d\1\41"+ - "\1\u0110\5\u010d\6\41\1\164\12\165\1\74\1\161\1\166"+ - "\1\161\1\0\1\161\1\167\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\2\0\1\53\1\0\1\54\2\0\1\55"+ - "\1\0\1\56\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\63\2\0\1\64\4\0\1\65"+ - "\3\0\1\66\17\0\1\67\2\0\1\70\21\0\1\71"+ - "\2\0\1\72\57\0\2\30\1\73\1\0\1\74\1\0"+ - "\1\74\1\75\1\0\1\30\2\0\1\30\1\163\1\u0111"+ - "\2\u010d\1\41\1\u010d\1\u0112\6\u010d\4\41\4\u010d\1\41"+ - "\1\u010d\1\41\3\u010d\1\164\12\165\1\74\1\161\1\166"+ - "\1\161\1\0\1\161\1\167\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\2\0\1\53\1\0\1\54\2\0\1\55"+ - "\1\0\1\56\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\63\2\0\1\64\4\0\1\65"+ - "\3\0\1\66\17\0\1\67\2\0\1\70\21\0\1\71"+ - "\2\0\1\72\57\0\2\30\1\73\1\0\1\74\1\0"+ - "\1\74\1\75\1\0\1\30\2\0\1\30\1\163\3\41"+ - "\1\u010d\1\41\1\u010d\4\41\1\u010d\10\41\1\u010d\2\41"+ - "\1\u010d\2\41\1\u010d\1\164\12\165\1\74\1\161\1\166"+ - "\1\161\1\0\1\161\1\167\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\2\0\1\53\1\0\1\54\2\0\1\55"+ - "\1\0\1\56\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\63\2\0\1\64\4\0\1\65"+ - "\3\0\1\66\17\0\1\67\2\0\1\70\21\0\1\71"+ - "\2\0\1\72\57\0\2\30\1\73\1\0\1\74\1\0"+ - "\1\74\1\75\1\0\1\30\2\0\1\30\1\163\1\41"+ - "\1\u010d\1\u0113\2\u010d\2\41\1\u010d\6\41\3\u010d\11\41"+ - "\1\164\12\165\1\74\1\161\1\166\1\161\1\0\1\161"+ - "\1\167\1\162\3\161\3\0\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\55\1\0\1\56\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\63\2\0\1\64\4\0\1\65\3\0\1\66\17\0"+ - "\1\67\2\0\1\70\21\0\1\71\2\0\1\72\57\0"+ - "\2\30\1\73\1\0\1\74\1\0\1\74\1\75\1\0"+ - "\1\30\2\0\1\30\1\163\3\41\1\u010d\1\41\1\u010d"+ - "\10\41\1\u010d\1\41\2\u010d\10\41\1\164\12\165\1\74"+ - "\1\161\1\166\1\161\1\0\1\161\1\167\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\2\0\1\53\1\0\1\54"+ - "\2\0\1\55\1\0\1\56\4\0\1\57\1\0\1\60"+ - "\1\0\1\61\2\0\1\62\3\0\1\63\2\0\1\64"+ - "\4\0\1\65\3\0\1\66\17\0\1\67\2\0\1\70"+ - "\21\0\1\71\2\0\1\72\57\0\2\30\1\73\1\0"+ - "\1\74\1\0\1\74\1\75\1\0\1\30\2\0\1\30"+ - "\1\163\4\41\1\u0114\5\41\1\u010d\17\41\1\164\12\165"+ - "\1\74\1\161\1\166\1\161\1\0\1\161\1\167\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\2\0\1\53\1\0"+ - "\1\54\2\0\1\55\1\0\1\56\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\63\2\0"+ - "\1\64\4\0\1\65\3\0\1\66\17\0\1\67\2\0"+ - "\1\70\21\0\1\71\2\0\1\72\57\0\2\30\1\73"+ - "\1\0\1\74\1\0\1\74\1\75\1\0\1\30\2\0"+ - "\1\30\1\163\4\41\2\u010d\2\41\1\u010d\1\41\1\u010d"+ - "\13\41\1\u010d\2\41\1\u010d\1\164\12\165\1\74\1\161"+ - "\1\166\1\161\1\0\1\161\1\167\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\2\0\1\53\1\0\1\54\2\0"+ - "\1\55\1\0\1\56\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\63\2\0\1\64\4\0"+ - "\1\65\3\0\1\66\17\0\1\67\2\0\1\70\21\0"+ - "\1\71\2\0\1\72\57\0\2\30\1\73\1\0\1\74"+ - "\1\0\1\74\1\75\1\0\1\30\2\0\1\30\1\163"+ - "\1\u010d\1\41\3\u010d\1\u0115\14\u010d\2\41\2\u010d\2\41"+ - "\1\u010d\1\41\1\164\12\165\1\74\1\161\1\166\1\161"+ - "\1\0\1\161\1\167\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\2\0\1\53\1\0\1\54\2\0\1\55\1\0"+ - "\1\56\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\63\2\0\1\64\4\0\1\65\3\0"+ - "\1\66\17\0\1\67\2\0\1\70\21\0\1\71\2\0"+ - "\1\72\57\0\2\30\1\73\1\0\1\74\1\0\1\74"+ - "\1\75\1\0\1\30\2\0\1\30\1\163\2\41\4\u010d"+ - "\3\41\2\u010d\1\u0116\1\u010d\1\41\2\u010d\12\41\1\164"+ - "\12\165\1\74\1\161\1\166\1\161\1\0\1\161\1\167"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\2\0\1\53"+ - "\1\0\1\54\2\0\1\55\1\0\1\56\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\63"+ - "\2\0\1\64\4\0\1\65\3\0\1\66\17\0\1\67"+ - "\2\0\1\70\21\0\1\71\2\0\1\72\57\0\2\30"+ - "\1\73\1\0\1\74\1\0\1\74\1\75\1\0\1\30"+ - "\2\0\1\30\1\163\2\u010d\2\41\1\u010d\3\41\1\u010d"+ - "\5\41\3\u010d\3\41\1\u010d\2\41\3\u010d\1\164\12\165"+ - "\1\74\1\161\1\166\1\161\1\0\1\161\1\167\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\2\0\1\53\1\0"+ - "\1\54\2\0\1\55\1\0\1\56\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\63\2\0"+ - "\1\64\4\0\1\65\3\0\1\66\17\0\1\67\2\0"+ - "\1\70\21\0\1\71\2\0\1\72\57\0\2\30\1\73"+ - "\1\0\1\74\1\0\1\74\1\75\1\0\1\30\2\0"+ - "\1\30\1\163\5\u010d\1\u0117\1\41\1\u010d\1\u0118\7\u010d"+ - "\1\u0119\3\u010d\1\41\1\u010d\1\41\3\u010d\1\164\12\165"+ - "\1\74\1\161\1\166\1\161\1\0\1\161\1\167\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\2\0\1\53\1\0"+ - "\1\54\2\0\1\55\1\0\1\56\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\63\2\0"+ - "\1\64\4\0\1\65\3\0\1\66\17\0\1\67\2\0"+ - "\1\70\21\0\1\71\2\0\1\72\57\0\2\30\1\73"+ - "\1\0\1\74\1\0\1\74\1\75\1\0\1\30\2\0"+ - "\1\30\1\163\1\u011a\1\u010d\1\41\1\u0111\6\u010d\3\41"+ - "\1\u010d\2\41\1\u010d\2\41\1\u010d\6\41\1\164\12\165"+ - "\1\74\1\161\1\166\1\161\1\0\1\161\1\167\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\2\0\1\53\1\0"+ - "\1\54\2\0\1\55\1\0\1\56\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\63\2\0"+ - "\1\64\4\0\1\65\3\0\1\66\17\0\1\67\2\0"+ - "\1\70\21\0\1\71\2\0\1\72\57\0\2\30\1\73"+ - "\1\0\1\74\1\0\1\74\1\75\1\0\1\30\2\0"+ - "\1\30\1\163\1\u010d\31\41\1\164\12\165\1\74\1\161"+ - "\1\166\1\161\1\0\1\161\1\167\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\2\0\1\53\1\0\1\54\2\0"+ - "\1\55\1\0\1\56\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\63\2\0\1\64\4\0"+ - "\1\65\3\0\1\66\17\0\1\67\2\0\1\70\21\0"+ - "\1\71\2\0\1\72\57\0\2\30\1\73\1\0\1\74"+ - "\1\0\1\74\1\75\1\0\1\30\2\0\1\30\1\163"+ - "\1\u010d\2\41\1\u010d\1\u011b\1\41\2\u010d\1\41\3\u010d"+ - "\2\41\2\u010d\1\41\1\u010d\3\41\1\u010d\2\41\2\u010d"+ - "\1\164\12\165\1\74\1\161\1\166\1\161\1\0\1\161"+ - "\1\167\1\162\3\161\3\0\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\55\1\0\1\56\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\63\2\0\1\64\4\0\1\65\3\0\1\66\17\0"+ - "\1\67\2\0\1\70\21\0\1\71\2\0\1\72\57\0"+ - "\2\30\1\73\1\0\1\74\1\0\1\74\1\75\1\0"+ - "\1\30\2\0\1\30\1\163\6\u010d\1\41\5\u010d\3\41"+ - "\2\u010d\1\41\10\u010d\1\164\12\165\1\74\1\161\1\166"+ - "\1\161\1\0\1\161\1\167\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\2\0\1\53\1\0\1\54\2\0\1\55"+ - "\1\0\1\56\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\63\2\0\1\64\4\0\1\65"+ - "\3\0\1\66\17\0\1\67\2\0\1\70\21\0\1\71"+ - "\2\0\1\72\57\0\2\30\1\73\1\0\1\74\1\0"+ - "\1\74\1\75\1\0\1\30\2\0\1\30\1\163\1\41"+ - "\2\u010d\1\u0118\1\u011c\3\u010d\1\41\3\u010d\1\41\1\u010d"+ - "\1\41\1\u010d\1\41\1\u010d\1\41\1\u010d\1\41\3\u010d"+ - "\1\41\1\u010d\1\164\12\165\1\74\1\161\1\166\1\161"+ - "\1\0\1\161\1\167\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\2\0\1\53\1\0\1\54\2\0\1\55\1\0"+ - "\1\56\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\63\2\0\1\64\4\0\1\65\3\0"+ - "\1\66\17\0\1\67\2\0\1\70\21\0\1\71\2\0"+ - "\1\72\57\0\2\30\1\73\1\0\1\74\1\0\1\74"+ - "\1\75\1\0\1\30\2\0\1\30\1\163\1\u010d\6\41"+ - "\1\u010d\6\41\1\u010d\4\41\1\u010d\4\41\2\u010d\1\164"+ - "\12\165\1\74\1\161\1\166\1\161\1\0\1\161\1\167"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\2\0\1\53"+ - "\1\0\1\54\2\0\1\55\1\0\1\56\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\63"+ - "\2\0\1\64\4\0\1\65\3\0\1\66\17\0\1\67"+ - "\2\0\1\70\21\0\1\71\2\0\1\72\57\0\2\30"+ - "\1\73\1\0\1\74\1\0\1\74\1\75\1\0\1\30"+ - "\2\0\1\30\1\163\6\41\1\u010d\7\41\1\u010d\13\41"+ - "\1\164\12\165\1\74\1\161\1\166\1\161\1\0\1\161"+ - "\1\167\1\162\3\161\3\0\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\55\1\0\1\56\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\63\2\0\1\64\4\0\1\65\3\0\1\66\17\0"+ - "\1\67\2\0\1\70\21\0\1\71\2\0\1\72\57\0"+ - "\2\30\1\73\1\0\1\74\1\0\1\74\1\75\1\0"+ - "\1\30\2\0\1\30\1\163\13\41\1\u011d\6\41\1\u011e"+ - "\7\41\1\164\12\165\1\74\1\161\1\166\1\161\1\0"+ - "\1\161\1\167\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\2\0\1\53\1\0\1\54\2\0\1\55\1\0\1\56"+ - "\4\0\1\57\1\0\1\60\1\0\1\61\2\0\1\62"+ - "\3\0\1\63\2\0\1\64\4\0\1\65\3\0\1\66"+ - "\17\0\1\67\2\0\1\70\21\0\1\71\2\0\1\72"+ - "\57\0\2\30\1\73\1\0\1\74\1\0\1\74\1\75"+ - "\1\0\1\30\2\0\1\30\1\163\1\u010d\11\41\1\u010d"+ - "\6\41\1\u010d\10\41\1\164\12\165\1\74\1\161\1\166"+ - "\1\161\1\0\1\161\1\167\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\2\0\1\53\1\0\1\54\2\0\1\55"+ - "\1\0\1\56\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\63\2\0\1\64\4\0\1\65"+ - "\3\0\1\66\17\0\1\67\2\0\1\70\21\0\1\71"+ - "\2\0\1\72\57\0\2\30\1\73\1\0\1\74\1\0"+ - "\1\74\1\75\1\0\1\30\2\0\1\30\1\163\1\u010d"+ - "\1\41\6\u010d\1\u011f\1\41\2\u010d\2\41\2\u010d\1\41"+ - "\1\u010d\1\41\6\u010d\1\41\1\164\12\165\1\74\1\161"+ - "\1\166\1\161\1\0\1\161\1\167\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\2\0\1\53\1\0\1\54\2\0"+ - "\1\55\1\0\1\56\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\63\2\0\1\64\4\0"+ - "\1\65\3\0\1\66\17\0\1\67\2\0\1\70\21\0"+ - "\1\71\2\0\1\72\57\0\2\30\1\73\1\0\1\74"+ - "\1\0\1\74\1\75\1\0\1\30\2\0\1\30\1\163"+ - "\4\41\1\u010d\5\41\2\u010d\3\41\2\u010d\10\41\1\u010d"+ - "\1\164\12\165\1\74\1\161\1\166\1\161\1\0\1\161"+ - "\1\167\1\162\3\161\3\0\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\55\1\0\1\56\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\63\2\0\1\64\4\0\1\65\3\0\1\66\17\0"+ - "\1\67\2\0\1\70\21\0\1\71\2\0\1\72\57\0"+ - "\2\30\1\73\1\0\1\74\1\0\1\74\1\75\1\0"+ - "\1\30\2\0\1\30\1\163\3\41\1\u010d\1\41\1\u0120"+ - "\4\41\1\u010d\2\41\1\u010d\14\41\1\164\12\165\1\74"+ - "\1\161\1\166\1\161\1\0\1\161\1\167\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\2\0\1\53\1\0\1\54"+ - "\2\0\1\55\1\0\1\56\4\0\1\57\1\0\1\60"+ - "\1\0\1\61\2\0\1\62\3\0\1\63\2\0\1\64"+ - "\4\0\1\65\3\0\1\66\17\0\1\67\2\0\1\70"+ - "\21\0\1\71\2\0\1\72\57\0\2\30\1\73\1\0"+ - "\1\74\1\0\1\74\1\75\1\0\1\30\2\0\1\30"+ - "\1\163\2\u010d\1\41\1\u010d\3\41\2\u010d\2\41\1\u010d"+ - "\4\41\1\u010d\11\41\1\164\12\165\1\74\1\161\1\166"+ - "\1\161\1\0\1\161\1\167\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\2\0\1\53\1\0\1\54\2\0\1\55"+ - "\1\0\1\56\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\63\2\0\1\64\4\0\1\65"+ - "\3\0\1\66\17\0\1\67\2\0\1\70\21\0\1\71"+ - "\2\0\1\72\57\0\2\30\1\73\1\0\1\74\1\0"+ - "\1\74\1\75\1\0\1\30\2\0\1\30\1\163\3\41"+ - "\1\u010d\13\41\1\u010d\12\41\1\164\12\165\1\74\1\161"+ - "\1\166\1\161\1\0\1\161\1\167\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\2\0\1\53\1\0\1\54\2\0"+ - "\1\55\1\0\1\56\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\63\2\0\1\64\4\0"+ - "\1\65\3\0\1\66\17\0\1\67\2\0\1\70\21\0"+ - "\1\71\2\0\1\72\57\0\2\30\1\73\1\0\1\74"+ - "\1\0\1\74\1\75\1\0\1\30\2\0\1\30\1\163"+ - "\3\41\2\u010d\2\41\2\u010d\1\41\2\u010d\1\41\1\u010d"+ - "\3\41\1\u010d\1\41\1\u010d\1\41\1\u010d\2\41\1\u010d"+ - "\1\41\1\164\12\165\1\74\1\161\1\166\1\161\1\0"+ - "\1\161\1\167\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\32\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\7\0"+ - "\1\u0100\1\0\1\u0101\17\0\1\u0102\2\0\1\u0103\4\0"+ - "\1\u0104\3\0\1\u0105\22\0\1\u0106\21\0\1\u0107\2\0"+ - "\1\u0108\60\0\1\220\1\73\6\0\1\220\3\0\1\160"+ - "\1\334\1\335\1\336\1\337\1\340\1\341\1\342\1\343"+ - "\1\344\1\345\1\346\1\347\1\350\1\351\1\352\1\353"+ - "\1\354\1\355\1\356\1\357\1\360\1\361\1\362\1\363"+ - "\1\364\1\365\1\161\12\165\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\1\206\3\0\2\161\7\0"+ - "\1\u0100\1\0\1\u0101\17\0\1\u0102\2\0\1\u0103\4\0"+ - "\1\u0104\3\0\1\u0105\22\0\1\u0106\21\0\1\u0107\2\0"+ - "\1\u0108\60\0\1\220\1\73\6\0\1\220\3\0\1\160"+ - "\33\161\12\327\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\2\0\1\53\1\0\1\54"+ - "\2\0\1\211\1\0\1\56\4\0\1\57\1\0\1\60"+ - "\1\0\1\61\2\0\1\62\3\0\1\212\2\0\1\213"+ - "\4\0\1\65\3\0\1\214\17\0\1\67\2\0\1\215"+ - "\21\0\1\216\2\0\1\217\57\0\1\30\2\73\2\0"+ - "\2\220\1\75\1\0\1\73\2\0\1\30\1\u0122\32\201"+ - "\1\161\12\327\1\0\1\161\1\166\1\161\1\0\2\326"+ - "\1\162\3\161\2\0\1\220\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\55\1\0\1\56\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\63\2\0\1\64\4\0\1\65\3\0\1\66\17\0"+ - "\1\67\2\0\1\70\21\0\1\71\2\0\1\72\57\0"+ - "\2\30\1\73\1\0\1\74\1\0\1\74\1\75\1\0"+ - "\1\30\2\0\1\30\1\163\3\41\1\u0123\26\41\1\164"+ - "\12\165\1\74\1\161\1\166\1\161\1\0\1\161\1\167"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\2\0\1\53"+ - "\1\0\1\54\2\0\1\55\1\0\1\56\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\63"+ - "\2\0\1\64\4\0\1\65\3\0\1\66\17\0\1\67"+ - "\2\0\1\70\21\0\1\71\2\0\1\72\57\0\2\30"+ - "\1\73\1\0\1\74\1\0\1\74\1\75\1\0\1\30"+ - "\2\0\1\30\1\163\32\41\1\164\12\165\1\u0124\1\161"+ - "\1\166\1\161\1\0\1\161\1\167\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\2\0\1\53\1\0\1\54\2\0"+ - "\1\55\1\0\1\56\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\63\2\0\1\64\4\0"+ - "\1\65\3\0\1\66\17\0\1\67\2\0\1\70\21\0"+ - "\1\71\2\0\1\72\57\0\2\30\1\73\1\0\1\74"+ - "\1\0\1\74\1\75\1\0\1\30\2\0\1\30\1\163"+ - "\11\41\1\u0125\20\41\1\164\12\165\1\74\1\161\1\166"+ - "\1\161\1\0\1\161\1\167\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\2\0\1\53\1\0\1\54\2\0\1\55"+ - "\1\0\1\56\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\63\2\0\1\64\4\0\1\65"+ - "\3\0\1\66\17\0\1\67\2\0\1\70\21\0\1\71"+ - "\2\0\1\72\57\0\2\30\1\73\1\0\1\74\1\0"+ - "\1\74\1\75\1\0\1\30\2\0\1\30\1\163\15\41"+ - "\1\u0126\14\41\1\164\12\165\1\74\1\161\1\166\1\161"+ - "\1\0\1\161\1\167\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\1\324\2\u0127\1\u0128\1\u0129\10\u0127"+ - "\1\324\1\u012a\5\u0127\6\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\1\u012b\2\u0127\1\324\1\u0127\1\u012c\6\u0127"+ - "\4\324\4\u0127\1\324\1\u0127\1\324\3\u0127\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\3\324\1\u0127\1\324\1\u0127"+ - "\4\324\1\u0127\10\324\1\u0127\2\324\1\u0127\2\324\1\u0127"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\1\324\1\u0127"+ - "\1\u012d\2\u0127\2\324\1\u0127\6\324\3\u0127\11\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\3\324\1\u0127\1\324"+ - "\1\u0127\10\324\1\u0127\1\324\2\u0127\10\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\4\324\1\u012e\5\324\1\u0127"+ - "\17\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\4\324"+ - "\2\u0127\2\324\1\u0127\1\324\1\u0127\13\324\1\u0127\2\324"+ - "\1\u0127\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\1\u0127"+ - "\1\324\3\u0127\1\u012f\14\u0127\2\324\2\u0127\2\324\1\u0127"+ - "\1\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\2\324"+ - "\4\u0127\3\324\2\u0127\1\u0130\1\u0127\1\324\2\u0127\12\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\2\u0127\2\324"+ - "\1\u0127\3\324\1\u0127\5\324\3\u0127\3\324\1\u0127\2\324"+ - "\3\u0127\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\5\u0127"+ - "\1\u0131\1\324\1\u0127\1\u0132\7\u0127\1\u0133\3\u0127\1\324"+ - "\1\u0127\1\324\3\u0127\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\1\u0134\1\u0127\1\324\1\u012b\6\u0127\3\324\1\u0127"+ - "\2\324\1\u0127\2\324\1\u0127\6\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\1\u0127\31\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\1\u0127\2\324\1\u0127\1\u0135\1\324"+ - "\2\u0127\1\324\3\u0127\2\324\2\u0127\1\324\1\u0127\3\324"+ - "\1\u0127\2\324\2\u0127\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\6\u0127\1\324\5\u0127\3\324\2\u0127\1\324\10\u0127"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\1\324\2\u0127"+ - "\1\u0132\1\u0136\3\u0127\1\324\3\u0127\1\324\1\u0127\1\324"+ - "\1\u0127\1\324\1\u0127\1\324\1\u0127\1\324\3\u0127\1\324"+ - "\1\u0127\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\1\u0127"+ - "\6\324\1\u0127\6\324\1\u0127\4\324\1\u0127\4\324\2\u0127"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\6\324\1\u0127"+ - "\7\324\1\u0127\13\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\13\324\1\u0137\6\324\1\u0138\7\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\1\u0127\11\324\1\u0127\6\324"+ - "\1\u0127\10\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\1\u0127\1\324\6\u0127\1\u0139\1\324\2\u0127\2\324\2\u0127"+ - "\1\324\1\u0127\1\324\6\u0127\1\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\4\324\1\u0127\5\324\2\u0127\3\324"+ - "\2\u0127\10\324\1\u0127\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\3\324\1\u0127\1\324\1\u013a\4\324\1\u0127\2\324"+ - "\1\u0127\14\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\2\u0127\1\324\1\u0127\3\324\2\u0127\2\324\1\u0127\4\324"+ - "\1\u0127\11\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\3\324\1\u0127\13\324\1\u0127\12\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\3\324\2\u0127\2\324\2\u0127\1\324"+ - "\2\u0127\1\324\1\u0127\3\324\1\u0127\1\324\1\u0127\1\324"+ - "\1\u0127\2\324\1\u0127\1\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\2\0\1\53\1\0\1\54\2\0\1\76\1\0\1\77"+ - "\4\0\1\57\1\0\1\60\1\0\1\61\2\0\1\62"+ - "\3\0\1\100\2\0\1\101\4\0\1\102\3\0\1\103"+ - "\17\0\1\67\2\0\1\104\21\0\1\105\2\0\1\106"+ - "\57\0\1\30\2\31\2\0\2\107\1\110\1\0\1\31"+ - "\2\0\1\30\1\u013b\32\41\1\164\12\367\1\0\1\161"+ - "\1\175\1\161\1\0\2\176\1\162\3\161\2\0\1\107"+ - "\1\161\4\0\2\161\2\0\1\53\1\0\1\54\2\0"+ - "\1\76\1\0\1\77\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\100\2\0\1\101\4\0"+ - "\1\102\3\0\1\103\17\0\1\67\2\0\1\104\21\0"+ - "\1\105\2\0\1\106\57\0\1\30\2\31\2\0\2\107"+ - "\1\110\1\0\1\31\2\0\1\30\1\u013b\32\41\1\164"+ - "\12\u013c\1\0\1\161\1\175\1\161\1\0\2\176\1\162"+ - "\3\161\2\0\1\107\1\161\4\0\2\161\2\0\1\53"+ - "\1\0\1\54\2\0\1\76\1\0\1\77\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\100"+ - "\2\0\1\101\4\0\1\102\3\0\1\103\17\0\1\67"+ - "\2\0\1\104\21\0\1\105\2\0\1\106\57\0\1\30"+ - "\2\31\2\0\2\107\1\110\1\0\1\31\2\0\1\30"+ - "\1\u013b\32\41\1\164\1\367\1\u013d\1\u013c\2\367\2\u013c"+ - "\1\367\1\u013c\1\367\1\0\1\161\1\175\1\161\1\0"+ - "\2\176\1\162\3\161\2\0\1\107\1\161\4\0\2\161"+ - "\2\0\1\53\1\0\1\54\2\0\1\76\1\0\1\77"+ - "\4\0\1\57\1\0\1\60\1\0\1\61\2\0\1\62"+ - "\3\0\1\100\2\0\1\101\4\0\1\102\3\0\1\103"+ - "\17\0\1\67\2\0\1\104\21\0\1\105\2\0\1\106"+ - "\57\0\1\30\2\31\2\0\2\107\1\110\1\0\1\31"+ - "\2\0\1\30\1\u013e\32\41\1\164\12\371\1\0\1\161"+ - "\1\175\1\161\1\0\2\176\1\162\3\161\2\0\1\107"+ - "\1\161\4\0\2\161\2\0\1\53\1\0\1\54\2\0"+ - "\1\221\1\0\1\222\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\223\2\0\1\224\4\0"+ - "\1\225\3\0\1\226\17\0\1\67\2\0\1\227\21\0"+ - "\1\230\2\0\1\231\57\0\1\30\1\74\7\0\1\74"+ - "\2\0\1\30\1\160\32\201\13\161\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\1\206\3\0\2\161"+ - "\7\0\1\241\1\0\1\242\17\0\1\243\2\0\1\244"+ - "\4\0\1\245\3\0\1\246\22\0\1\247\21\0\1\250"+ - "\2\0\1\251\60\0\1\107\1\31\6\0\1\107\3\0"+ - "\1\160\33\161\12\202\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\1\206\3\0\2\161\213\0\4\u013f"+ - "\2\0\1\u013f\15\0\1\u013f\6\0\12\u013f\1\375\237\0"+ - "\4\u0140\2\0\1\u0140\15\0\1\u0140\6\0\12\u0140\1\u0141"+ - "\237\0\4\u0142\2\0\1\u0142\15\0\1\u0142\6\0\1\u0143"+ - "\2\u0144\1\u0143\5\u0144\1\u0145\14\0\1\u0146\222\0\46\161"+ - "\1\0\3\161\1\0\2\161\1\0\3\161\3\0\1\161"+ - "\1\206\3\0\2\161\72\0\1\220\2\0\1\220\23\0"+ - "\4\220\201\0\1\73\132\0\1\73\207\0\1\73\45\0"+ - "\1\220\21\0\1\73\142\0\1\73\11\0\1\220\44\0"+ - "\1\220\1\0\2\220\21\0\1\220\4\0\1\220\7\0"+ - "\4\220\3\0\1\220\12\0\4\73\4\0\1\220\301\0"+ - "\2\73\264\0\1\220\311\0\4\220\251\0\2\220\15\0"+ - "\4\220\154\0\1\220\15\0\2\220\10\0\2\220\1\0"+ - "\1\220\1\0\1\220\11\0\1\220\11\0\2\220\6\0"+ - "\1\220\2\0\4\220\3\0\1\220\2\0\2\220\1\0"+ - "\3\220\5\0\1\220\1\0\2\220\2\0\2\220\1\0"+ - "\4\220\5\0\1\220\1\0\2\220\344\0\1\u0147\1\u0148"+ - "\1\u0149\1\u014a\1\u014b\1\u014c\1\u014d\1\u014e\1\u014f\1\u0150"+ - "\1\u0151\1\u0152\1\u0153\1\u0154\1\u0155\1\u0156\1\u0157\1\u0158"+ - "\1\u0159\1\u015a\1\u015b\1\u015c\1\u015d\1\u015e\1\u015f\1\u0160"+ - "\1\0\12\270\240\0\32\270\1\u010a\12\270\237\0\74\271"+ - "\1\0\1\53\1\0\1\54\2\0\1\55\1\0\1\56"+ - "\4\0\1\57\1\0\1\60\1\0\1\61\2\0\1\62"+ - "\3\0\1\63\2\0\1\64\4\0\1\65\3\0\1\66"+ - "\17\0\1\67\2\0\1\70\21\0\1\71\2\0\1\72"+ - "\57\0\2\30\1\73\1\0\1\74\1\0\1\74\1\75"+ - "\1\0\1\30\2\0\1\30\1\u0161\32\41\1\164\12\165"+ - "\1\u0162\1\161\1\166\1\161\1\0\1\161\1\167\1\162"+ - "\1\u0163\1\u0164\1\u0165\3\0\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\55\1\0\1\56\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\63\2\0\1\64\4\0\1\65\3\0\1\66\17\0"+ - "\1\67\2\0\1\70\21\0\1\71\2\0\1\72\57\0"+ - "\2\30\1\73\1\0\1\74\1\0\1\74\1\75\1\0"+ - "\1\30\2\0\1\30\1\u0161\4\41\1\u0166\25\41\1\164"+ - "\12\165\1\u0162\1\161\1\166\1\161\1\0\1\161\1\167"+ - "\1\162\1\u0163\1\u0164\1\u0165\3\0\1\161\4\0\2\161"+ - "\2\0\1\53\1\0\1\54\2\0\1\55\1\0\1\56"+ - "\4\0\1\57\1\0\1\60\1\0\1\61\2\0\1\62"+ - "\3\0\1\63\2\0\1\64\4\0\1\65\3\0\1\66"+ - "\17\0\1\67\2\0\1\70\21\0\1\71\2\0\1\72"+ - "\57\0\2\30\1\73\1\0\1\74\1\0\1\74\1\75"+ - "\1\0\1\30\2\0\1\30\1\u0161\15\41\1\306\14\41"+ - "\1\164\12\165\1\u0162\1\161\1\166\1\161\1\0\1\161"+ - "\1\167\1\162\1\u0163\1\u0164\1\u0165\3\0\1\161\4\0"+ - "\2\161\2\0\1\53\1\0\1\54\2\0\1\55\1\0"+ - "\1\56\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\63\2\0\1\64\4\0\1\65\3\0"+ - "\1\66\17\0\1\67\2\0\1\70\21\0\1\71\2\0"+ - "\1\72\57\0\2\30\1\73\1\0\1\74\1\0\1\74"+ - "\1\75\1\0\1\30\2\0\1\30\1\u0161\10\41\1\306"+ - "\21\41\1\164\12\165\1\u0162\1\161\1\166\1\161\1\0"+ - "\1\161\1\167\1\162\1\u0163\1\u0164\1\u0165\3\0\1\161"+ - "\4\0\2\161\2\0\1\53\1\0\1\54\2\0\1\55"+ - "\1\0\1\56\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\63\2\0\1\64\4\0\1\65"+ - "\3\0\1\66\17\0\1\67\2\0\1\70\21\0\1\71"+ - "\2\0\1\72\57\0\2\30\1\73\1\0\1\74\1\0"+ - "\1\74\1\75\1\0\1\30\2\0\1\30\1\u0161\17\41"+ - "\1\u010d\12\41\1\164\12\165\1\u0162\1\161\1\166\1\161"+ - "\1\0\1\161\1\167\1\162\1\u0163\1\u0164\1\u0165\3\0"+ - "\1\161\4\0\2\161\2\0\1\53\1\0\1\54\2\0"+ - "\1\55\1\0\1\56\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\63\2\0\1\64\4\0"+ - "\1\65\3\0\1\66\17\0\1\67\2\0\1\70\21\0"+ - "\1\71\2\0\1\72\57\0\2\30\1\73\1\0\1\74"+ - "\1\0\1\74\1\75\1\0\1\30\2\0\1\30\1\u0161"+ - "\5\41\1\u0167\4\41\1\u010d\17\41\1\164\12\165\1\u0162"+ - "\1\161\1\166\1\161\1\0\1\161\1\167\1\162\1\u0163"+ - "\1\u0164\1\u0165\3\0\1\161\4\0\2\161\2\0\1\53"+ - "\1\0\1\54\2\0\1\55\1\0\1\56\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\63"+ - "\2\0\1\64\4\0\1\65\3\0\1\66\17\0\1\67"+ - "\2\0\1\70\21\0\1\71\2\0\1\72\57\0\2\30"+ - "\1\73\1\0\1\74\1\0\1\74\1\75\1\0\1\30"+ - "\2\0\1\30\1\163\20\41\1\u010d\11\41\1\164\12\165"+ - "\1\74\1\161\1\166\1\161\1\0\1\161\1\167\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\2\0\1\53\1\0"+ - "\1\54\2\0\1\55\1\0\1\56\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\63\2\0"+ - "\1\64\4\0\1\65\3\0\1\66\17\0\1\67\2\0"+ - "\1\70\21\0\1\71\2\0\1\72\57\0\2\30\1\73"+ - "\1\0\1\74\1\0\1\74\1\75\1\0\1\30\2\0"+ - "\1\30\1\163\7\41\1\u010d\22\41\1\164\12\165\1\74"+ - "\1\161\1\166\1\161\1\0\1\161\1\167\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\2\0\1\53\1\0\1\54"+ - "\2\0\1\55\1\0\1\56\4\0\1\57\1\0\1\60"+ - "\1\0\1\61\2\0\1\62\3\0\1\63\2\0\1\64"+ - "\4\0\1\65\3\0\1\66\17\0\1\67\2\0\1\70"+ - "\21\0\1\71\2\0\1\72\57\0\2\30\1\73\1\0"+ - "\1\74\1\0\1\74\1\75\1\0\1\30\2\0\1\30"+ - "\1\163\27\41\1\u010d\2\41\1\164\12\165\1\74\1\161"+ - "\1\166\1\161\1\0\1\161\1\167\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\2\0\1\53\1\0\1\54\2\0"+ - "\1\55\1\0\1\56\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\63\2\0\1\64\4\0"+ - "\1\65\3\0\1\66\17\0\1\67\2\0\1\70\21\0"+ - "\1\71\2\0\1\72\57\0\2\30\1\73\1\0\1\74"+ - "\1\0\1\74\1\75\1\0\1\30\2\0\1\30\1\u0161"+ - "\6\41\1\u0166\10\41\1\u010d\12\41\1\164\12\165\1\u0162"+ - "\1\161\1\166\1\161\1\0\1\161\1\167\1\162\1\u0163"+ - "\1\u0164\1\u0165\3\0\1\161\4\0\2\161\2\0\1\53"+ - "\1\0\1\54\2\0\1\55\1\0\1\56\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\63"+ - "\2\0\1\64\4\0\1\65\3\0\1\66\17\0\1\67"+ - "\2\0\1\70\21\0\1\71\2\0\1\72\57\0\2\30"+ - "\1\73\1\0\1\74\1\0\1\74\1\75\1\0\1\30"+ - "\2\0\1\30\1\u0161\24\41\1\u0168\5\41\1\164\12\165"+ - "\1\u0162\1\161\1\166\1\161\1\0\1\161\1\167\1\162"+ - "\1\u0163\1\u0164\1\u0165\3\0\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\55\1\0\1\56\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\63\2\0\1\64\4\0\1\65\3\0\1\66\17\0"+ - "\1\67\2\0\1\70\21\0\1\71\2\0\1\72\57\0"+ - "\2\30\1\73\1\0\1\74\1\0\1\74\1\75\1\0"+ - "\1\30\2\0\1\30\1\163\11\41\1\u010d\20\41\1\164"+ - "\12\165\1\74\1\161\1\166\1\161\1\0\1\161\1\167"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\2\0\1\53"+ - "\1\0\1\54\2\0\1\55\1\0\1\56\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\63"+ - "\2\0\1\64\4\0\1\65\3\0\1\66\17\0\1\67"+ - "\2\0\1\70\21\0\1\71\2\0\1\72\57\0\2\30"+ - "\1\73\1\0\1\74\1\0\1\74\1\75\1\0\1\30"+ - "\2\0\1\30\1\u0161\16\41\1\u0169\13\41\1\164\12\165"+ - "\1\u0162\1\161\1\166\1\161\1\0\1\161\1\167\1\162"+ - "\1\u0163\1\u0164\1\u0165\3\0\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\55\1\0\1\56\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\63\2\0\1\64\4\0\1\65\3\0\1\66\17\0"+ - "\1\67\2\0\1\70\21\0\1\71\2\0\1\72\57\0"+ - "\2\30\1\73\1\0\1\74\1\0\1\74\1\75\1\0"+ - "\1\30\2\0\1\30\1\u0161\12\41\1\u016a\17\41\1\164"+ - "\12\165\1\u0162\1\161\1\166\1\161\1\0\1\161\1\167"+ - "\1\162\1\u0163\1\u0164\1\u0165\3\0\1\161\4\0\2\161"+ - "\2\0\1\53\1\0\1\54\2\0\1\55\1\0\1\56"+ - "\4\0\1\57\1\0\1\60\1\0\1\61\2\0\1\62"+ - "\3\0\1\63\2\0\1\64\4\0\1\65\3\0\1\66"+ - "\17\0\1\67\2\0\1\70\21\0\1\71\2\0\1\72"+ - "\57\0\2\30\1\73\1\0\1\74\1\0\1\74\1\75"+ - "\1\0\1\30\2\0\1\30\1\u0161\5\41\1\u010d\24\41"+ - "\1\164\12\165\1\u0162\1\161\1\166\1\161\1\0\1\161"+ - "\1\167\1\162\1\u0163\1\u0164\1\u0165\3\0\1\161\4\0"+ - "\2\161\2\0\1\53\1\0\1\54\2\0\1\55\1\0"+ - "\1\56\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\63\2\0\1\64\4\0\1\65\3\0"+ - "\1\66\17\0\1\67\2\0\1\70\21\0\1\71\2\0"+ - "\1\72\57\0\2\30\1\73\1\0\1\74\1\0\1\74"+ - "\1\75\1\0\1\30\2\0\1\30\1\u0161\1\u016b\31\41"+ - "\1\164\12\165\1\u0162\1\161\1\166\1\161\1\0\1\161"+ - "\1\167\1\162\1\u0163\1\u0164\1\u0165\3\0\1\161\4\0"+ - "\2\161\2\0\1\53\1\0\1\54\2\0\1\55\1\0"+ - "\1\56\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\63\2\0\1\64\4\0\1\65\3\0"+ - "\1\66\17\0\1\67\2\0\1\70\21\0\1\71\2\0"+ - "\1\72\57\0\2\30\1\73\1\0\1\74\1\0\1\74"+ - "\1\75\1\0\1\30\2\0\1\30\1\163\32\41\1\u016c"+ - "\12\165\1\74\1\161\1\166\1\161\1\0\1\161\1\167"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\2\0\1\53"+ - "\1\0\1\54\2\0\1\55\1\0\1\56\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\63"+ - "\2\0\1\64\4\0\1\65\3\0\1\66\17\0\1\67"+ - "\2\0\1\70\21\0\1\71\2\0\1\72\57\0\2\30"+ - "\1\73\1\0\1\74\1\0\1\74\1\75\1\0\1\30"+ - "\2\0\1\30\1\163\22\41\1\u010d\7\41\1\164\12\165"+ - "\1\74\1\161\1\166\1\161\1\0\1\161\1\167\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\2\0\1\53\1\0"+ - "\1\54\2\0\1\55\1\0\1\56\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\63\2\0"+ - "\1\64\4\0\1\65\3\0\1\66\17\0\1\67\2\0"+ - "\1\70\21\0\1\71\2\0\1\72\57\0\2\30\1\73"+ - "\1\0\1\74\1\0\1\74\1\75\1\0\1\30\2\0"+ - "\1\30\1\u0161\23\41\1\u010d\6\41\1\164\12\165\1\u0162"+ - "\1\161\1\166\1\161\1\0\1\161\1\167\1\162\1\u0163"+ - "\1\u0164\1\u0165\3\0\1\161\4\0\2\161\2\0\1\53"+ - "\1\0\1\54\2\0\1\55\1\0\1\56\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\63"+ - "\2\0\1\64\4\0\1\65\3\0\1\66\17\0\1\67"+ - "\2\0\1\70\21\0\1\71\2\0\1\72\57\0\2\30"+ - "\1\73\1\0\1\74\1\0\1\74\1\75\1\0\1\30"+ - "\2\0\1\30\1\u0161\24\41\1\u016d\5\41\1\164\12\165"+ - "\1\u0162\1\161\1\166\1\161\1\0\1\161\1\167\1\162"+ - "\1\u0163\1\u0164\1\u0165\3\0\1\161\4\0\2\161\212\0"+ - "\1\160\1\334\1\335\1\336\1\337\1\340\1\341\1\342"+ - "\1\343\1\344\1\345\1\346\1\347\1\350\1\351\1\352"+ - "\1\353\1\354\1\355\1\356\1\357\1\360\1\361\1\362"+ - "\1\363\1\364\1\365\1\161\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\1\206\3\0\2\161"+ - "\7\0\1\u0100\1\0\1\u0101\17\0\1\u0102\2\0\1\u0103"+ - "\4\0\1\u0104\3\0\1\u0105\22\0\1\u0106\21\0\1\u0107"+ - "\2\0\1\u0108\60\0\1\220\1\73\6\0\1\220\3\0"+ - "\1\160\33\161\12\327\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\1\206\3\0\2\161\2\0\1\53"+ - "\1\0\1\54\2\0\1\55\1\0\1\56\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\63"+ - "\2\0\1\64\4\0\1\65\3\0\1\66\17\0\1\67"+ - "\2\0\1\70\21\0\1\71\2\0\1\72\57\0\2\30"+ - "\1\73\1\0\1\74\1\0\1\74\1\75\1\0\1\30"+ - "\2\0\1\30\1\163\32\41\1\164\12\165\1\u016e\1\161"+ - "\1\166\1\161\1\0\1\161\1\167\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\2\0\1\53\1\0\1\54\2\0"+ - "\1\221\1\0\1\222\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\223\2\0\1\224\4\0"+ - "\1\225\3\0\1\226\17\0\1\67\2\0\1\227\21\0"+ - "\1\230\2\0\1\231\57\0\1\30\1\74\7\0\1\74"+ - "\2\0\1\30\1\0\32\30\24\0\1\u016f\15\0\1\53"+ - "\1\0\1\54\2\0\1\55\1\0\1\56\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\63"+ - "\2\0\1\64\4\0\1\65\3\0\1\66\17\0\1\67"+ - "\2\0\1\70\21\0\1\71\2\0\1\72\57\0\2\30"+ - "\1\73\1\0\1\74\1\0\1\74\1\75\1\0\1\30"+ - "\2\0\1\30\1\163\17\41\1\u0170\12\41\1\164\12\165"+ - "\1\74\1\161\1\166\1\161\1\0\1\161\1\167\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\2\0\1\53\1\0"+ - "\1\54\2\0\1\55\1\0\1\56\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\63\2\0"+ - "\1\64\4\0\1\65\3\0\1\66\17\0\1\67\2\0"+ - "\1\70\21\0\1\71\2\0\1\72\57\0\2\30\1\73"+ - "\1\0\1\74\1\0\1\74\1\75\1\0\1\30\2\0"+ - "\1\30\1\163\16\41\1\u0171\13\41\1\164\12\165\1\u0172"+ - "\1\161\1\166\1\161\1\0\1\161\1\167\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0173\32\324\1\164"+ - "\12\324\1\u0174\3\161\1\0\2\161\1\162\1\u0163\1\u0164"+ - "\1\u0165\3\0\1\161\4\0\2\161\212\0\1\u0173\4\324"+ - "\1\u0175\25\324\1\164\12\324\1\u0174\3\161\1\0\2\161"+ - "\1\162\1\u0163\1\u0164\1\u0165\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0173\15\324\1\350\14\324\1\164\12\324\1\u0174"+ - "\3\161\1\0\2\161\1\162\1\u0163\1\u0164\1\u0165\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0173\10\324\1\350\21\324"+ - "\1\164\12\324\1\u0174\3\161\1\0\2\161\1\162\1\u0163"+ - "\1\u0164\1\u0165\3\0\1\161\4\0\2\161\212\0\1\u0173"+ - "\17\324\1\u0127\12\324\1\164\12\324\1\u0174\3\161\1\0"+ - "\2\161\1\162\1\u0163\1\u0164\1\u0165\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0173\5\324\1\u0176\4\324\1\u0127\17\324"+ - "\1\164\12\324\1\u0174\3\161\1\0\2\161\1\162\1\u0163"+ - "\1\u0164\1\u0165\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\20\324\1\u0127\11\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\7\324\1\u0127\22\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\27\324\1\u0127\2\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0173\6\324\1\u0175\10\324\1\u0127\12\324"+ - "\1\164\12\324\1\u0174\3\161\1\0\2\161\1\162\1\u0163"+ - "\1\u0164\1\u0165\3\0\1\161\4\0\2\161\212\0\1\u0173"+ - "\24\324\1\u0177\5\324\1\164\12\324\1\u0174\3\161\1\0"+ - "\2\161\1\162\1\u0163\1\u0164\1\u0165\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\11\324\1\u0127\20\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0173\16\324\1\u0178\13\324\1\164"+ - "\12\324\1\u0174\3\161\1\0\2\161\1\162\1\u0163\1\u0164"+ - "\1\u0165\3\0\1\161\4\0\2\161\212\0\1\u0173\12\324"+ - "\1\u0179\17\324\1\164\12\324\1\u0174\3\161\1\0\2\161"+ - "\1\162\1\u0163\1\u0164\1\u0165\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0173\5\324\1\u0127\24\324\1\164\12\324\1\u0174"+ - "\3\161\1\0\2\161\1\162\1\u0163\1\u0164\1\u0165\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0173\1\u017a\31\324\1\164"+ - "\12\324\1\u0174\3\161\1\0\2\161\1\162\1\u0163\1\u0164"+ - "\1\u0165\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324"+ - "\1\u016c\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\22\324\1\u0127"+ - "\7\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0173\23\324"+ - "\1\u0127\6\324\1\164\12\324\1\u0174\3\161\1\0\2\161"+ - "\1\162\1\u0163\1\u0164\1\u0165\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0173\24\324\1\u017b\5\324\1\164\12\324\1\u0174"+ - "\3\161\1\0\2\161\1\162\1\u0163\1\u0164\1\u0165\3\0"+ - "\1\161\4\0\2\161\7\0\1\241\1\0\1\242\17\0"+ - "\1\243\2\0\1\244\4\0\1\245\3\0\1\246\22\0"+ - "\1\247\21\0\1\250\2\0\1\251\60\0\1\107\1\31"+ - "\6\0\1\107\3\0\1\160\1\334\1\335\1\336\1\337"+ - "\1\340\1\341\1\342\1\343\1\344\1\345\1\346\1\347"+ - "\1\350\1\351\1\352\1\353\1\354\1\355\1\356\1\357"+ - "\1\360\1\361\1\362\1\363\1\364\1\365\1\161\1\u017c"+ - "\2\u017d\1\u017c\5\u017d\1\u017e\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\1\206\3\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\76\1\0\1\77\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\100\2\0\1\101\4\0\1\102\3\0\1\103\17\0"+ - "\1\67\2\0\1\104\21\0\1\105\2\0\1\106\57\0"+ - "\1\30\2\31\2\0\2\107\1\110\1\0\1\31\2\0"+ - "\1\30\1\u013b\32\41\1\164\12\371\1\0\1\161\1\175"+ - "\1\161\1\0\2\176\1\162\3\161\2\0\1\107\1\161"+ - "\4\0\2\161\2\0\1\53\1\0\1\54\2\0\1\76"+ - "\1\0\1\77\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\100\2\0\1\101\4\0\1\102"+ - "\3\0\1\103\17\0\1\67\2\0\1\104\21\0\1\105"+ - "\2\0\1\106\57\0\1\30\2\31\2\0\2\107\1\110"+ - "\1\0\1\31\2\0\1\30\1\u013b\32\41\1\164\2\u013c"+ - "\1\371\2\u013c\2\371\1\u013c\1\371\1\u013c\1\0\1\161"+ - "\1\175\1\161\1\0\2\176\1\162\3\161\2\0\1\107"+ - "\1\161\4\0\2\161\7\0\1\241\1\0\1\242\17\0"+ - "\1\243\2\0\1\244\4\0\1\245\3\0\1\246\22\0"+ - "\1\247\21\0\1\250\2\0\1\251\60\0\1\107\1\31"+ - "\6\0\1\107\3\0\1\160\1\334\1\335\1\336\1\337"+ - "\1\340\1\341\1\342\1\343\1\344\1\345\1\346\1\347"+ - "\1\350\1\351\1\352\1\353\1\354\1\355\1\356\1\357"+ - "\1\360\1\361\1\362\1\363\1\364\1\365\1\161\12\371"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\1\206\3\0\2\161\213\0\4\u017f\2\0\1\u017f\15\0"+ - "\1\u017f\6\0\12\u017f\1\375\237\0\4\u0180\2\0\1\u0180"+ - "\15\0\1\u0180\6\0\12\u0180\1\u0181\237\0\4\u0182\2\0"+ - "\1\u0182\15\0\1\u0182\6\0\1\u0183\2\u0184\1\u0183\5\u0184"+ - "\1\u0185\14\0\1\u0146\223\0\4\u0186\2\0\1\u0186\15\0"+ - "\1\u0186\6\0\12\u0186\1\u0187\13\0\1\u0146\222\0\1\u0188"+ - "\4\u0186\2\0\1\u0186\15\0\1\u0186\6\0\12\u0189\1\u0187"+ - "\13\0\1\u0146\222\0\1\u0188\4\u0186\2\0\1\u0186\15\0"+ - "\1\u0186\6\0\12\u018a\1\u0187\13\0\1\u0146\222\0\1\u0188"+ - "\4\u0186\2\0\1\u0186\15\0\1\u0186\6\0\1\u0189\1\u018b"+ - "\1\u018a\2\u0189\2\u018a\1\u0189\1\u018a\1\u0189\1\u0187\13\0"+ - "\1\u0146\270\0\1\u0174\7\0\1\u018c\1\u018d\1\u018e\224\0"+ - "\1\u0109\1\270\2\u018f\1\u0190\1\u0191\10\u018f\1\270\1\u0192"+ - "\5\u018f\6\270\1\u010a\12\270\237\0\1\u0109\1\u0193\2\u018f"+ - "\1\270\1\u018f\1\u0194\6\u018f\4\270\4\u018f\1\270\1\u018f"+ - "\1\270\3\u018f\1\u010a\12\270\237\0\1\u0109\3\270\1\u018f"+ - "\1\270\1\u018f\4\270\1\u018f\10\270\1\u018f\2\270\1\u018f"+ - "\2\270\1\u018f\1\u010a\12\270\237\0\1\u0109\1\270\1\u018f"+ - "\1\u0195\2\u018f\2\270\1\u018f\6\270\3\u018f\11\270\1\u010a"+ - "\12\270\237\0\1\u0109\3\270\1\u018f\1\270\1\u018f\10\270"+ - "\1\u018f\1\270\2\u018f\10\270\1\u010a\12\270\237\0\1\u0109"+ - "\4\270\1\u0196\5\270\1\u018f\17\270\1\u010a\12\270\237\0"+ - "\1\u0109\4\270\2\u018f\2\270\1\u018f\1\270\1\u018f\13\270"+ - "\1\u018f\2\270\1\u018f\1\u010a\12\270\237\0\1\u0109\1\u018f"+ - "\1\270\3\u018f\1\u0197\14\u018f\2\270\2\u018f\2\270\1\u018f"+ - "\1\270\1\u010a\12\270\237\0\1\u0109\2\270\4\u018f\3\270"+ - "\2\u018f\1\u0198\1\u018f\1\270\2\u018f\12\270\1\u010a\12\270"+ - "\237\0\1\u0109\2\u018f\2\270\1\u018f\3\270\1\u018f\5\270"+ - "\3\u018f\3\270\1\u018f\2\270\3\u018f\1\u010a\12\270\237\0"+ - "\1\u0109\5\u018f\1\u0199\1\270\1\u018f\1\u019a\7\u018f\1\u019b"+ - "\3\u018f\1\270\1\u018f\1\270\3\u018f\1\u010a\12\270\237\0"+ - "\1\u0109\1\u019c\1\u018f\1\270\1\u0193\6\u018f\3\270\1\u018f"+ - "\2\270\1\u018f\2\270\1\u018f\6\270\1\u010a\12\270\237\0"+ - "\1\u0109\1\u018f\31\270\1\u010a\12\270\237\0\1\u0109\1\u018f"+ - "\2\270\1\u018f\1\u019d\1\270\2\u018f\1\270\3\u018f\2\270"+ - "\2\u018f\1\270\1\u018f\3\270\1\u018f\2\270\2\u018f\1\u010a"+ - "\12\270\237\0\1\u0109\6\u018f\1\270\5\u018f\3\270\2\u018f"+ - "\1\270\10\u018f\1\u010a\12\270\237\0\1\u0109\1\270\2\u018f"+ - "\1\u019a\1\u019e\3\u018f\1\270\3\u018f\1\270\1\u018f\1\270"+ - "\1\u018f\1\270\1\u018f\1\270\1\u018f\1\270\3\u018f\1\270"+ - "\1\u018f\1\u010a\12\270\237\0\1\u0109\1\u018f\6\270\1\u018f"+ - "\6\270\1\u018f\4\270\1\u018f\4\270\2\u018f\1\u010a\12\270"+ - "\237\0\1\u0109\6\270\1\u018f\7\270\1\u018f\13\270\1\u010a"+ - "\12\270\237\0\1\u0109\13\270\1\u019f\6\270\1\u01a0\7\270"+ - "\1\u010a\12\270\237\0\1\u0109\1\u018f\11\270\1\u018f\6\270"+ - "\1\u018f\10\270\1\u010a\12\270\237\0\1\u0109\1\u018f\1\270"+ - "\6\u018f\1\u01a1\1\270\2\u018f\2\270\2\u018f\1\270\1\u018f"+ - "\1\270\6\u018f\1\270\1\u010a\12\270\237\0\1\u0109\4\270"+ - "\1\u018f\5\270\2\u018f\3\270\2\u018f\10\270\1\u018f\1\u010a"+ - "\12\270\237\0\1\u0109\3\270\1\u018f\1\270\1\u01a2\4\270"+ - "\1\u018f\2\270\1\u018f\14\270\1\u010a\12\270\237\0\1\u0109"+ - "\2\u018f\1\270\1\u018f\3\270\2\u018f\2\270\1\u018f\4\270"+ - "\1\u018f\11\270\1\u010a\12\270\237\0\1\u0109\3\270\1\u018f"+ - "\13\270\1\u018f\12\270\1\u010a\12\270\237\0\1\u0109\3\270"+ - "\2\u018f\2\270\2\u018f\1\270\2\u018f\1\270\1\u018f\3\270"+ - "\1\u018f\1\270\1\u018f\1\270\1\u018f\2\270\1\u018f\1\270"+ - "\1\u010a\12\270\27\0\1\53\1\0\1\54\2\0\1\221"+ - "\1\0\1\222\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\223\2\0\1\224\4\0\1\225"+ - "\3\0\1\226\17\0\1\67\2\0\1\227\21\0\1\230"+ - "\2\0\1\231\57\0\1\30\1\74\7\0\1\74\2\0"+ - "\1\30\1\160\1\272\1\273\1\274\1\275\1\276\1\277"+ - "\1\300\1\301\1\302\1\303\1\304\1\305\1\306\1\307"+ - "\1\310\1\311\1\312\1\313\1\314\1\315\1\316\1\317"+ - "\1\320\1\321\1\322\1\323\1\161\12\324\1\u0174\3\161"+ - "\1\0\2\161\1\162\1\u0163\1\u0164\1\u0165\3\0\1\161"+ - "\1\206\3\0\2\161\2\0\1\53\1\0\1\54\2\0"+ - "\1\221\1\0\1\222\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\223\2\0\1\224\4\0"+ - "\1\225\3\0\1\226\17\0\1\67\2\0\1\227\21\0"+ - "\1\230\2\0\1\231\57\0\1\30\1\74\7\0\1\74"+ - "\2\0\1\30\1\0\32\30\1\0\12\u01a3\237\0\1\u01a4"+ - "\45\u0163\1\u018c\2\u0163\1\u01a5\1\u018c\2\u0163\1\u01a6\2\u0163"+ - "\1\u0165\2\0\1\u018c\1\u0163\4\0\1\u0163\1\161\212\0"+ - "\1\u01a7\45\u0164\1\u018d\2\u0164\1\u01a8\1\0\2\161\1\u01a9"+ - "\1\u0163\1\u0164\1\u0165\2\0\1\u018d\1\u0164\4\0\2\161"+ - "\212\0\1\u01aa\45\u0165\1\u018e\2\u0165\1\u01ab\1\u018e\2\u0165"+ - "\1\u01ac\2\u0165\1\161\2\0\1\u018e\1\u0165\4\0\1\u0165"+ - "\1\161\2\0\1\53\1\0\1\54\2\0\1\55\1\0"+ - "\1\56\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\63\2\0\1\64\4\0\1\65\3\0"+ - "\1\66\17\0\1\67\2\0\1\70\21\0\1\71\2\0"+ - "\1\72\57\0\2\30\1\73\1\0\1\74\1\0\1\74"+ - "\1\75\1\0\1\30\2\0\1\30\1\163\5\41\1\u010d"+ - "\24\41\1\164\12\165\1\74\1\161\1\166\1\161\1\0"+ - "\1\161\1\167\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\2\0\1\53\1\0\1\54\2\0\1\55\1\0\1\56"+ - "\4\0\1\57\1\0\1\60\1\0\1\61\2\0\1\62"+ - "\3\0\1\63\2\0\1\64\4\0\1\65\3\0\1\66"+ - "\17\0\1\67\2\0\1\70\21\0\1\71\2\0\1\72"+ - "\57\0\2\30\1\73\1\0\1\74\1\0\1\74\1\75"+ - "\1\0\1\30\2\0\1\30\1\163\15\41\1\u010d\14\41"+ - "\1\164\12\165\1\74\1\161\1\166\1\161\1\0\1\161"+ - "\1\167\1\162\3\161\3\0\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\55\1\0\1\56\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\63\2\0\1\64\4\0\1\65\3\0\1\66\17\0"+ - "\1\67\2\0\1\70\21\0\1\71\2\0\1\72\57\0"+ - "\2\30\1\73\1\0\1\74\1\0\1\74\1\75\1\0"+ - "\1\30\2\0\1\30\1\163\10\41\1\u010d\21\41\1\164"+ - "\12\165\1\74\1\161\1\166\1\161\1\0\1\161\1\167"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\2\0\1\53"+ - "\1\0\1\54\2\0\1\55\1\0\1\56\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\63"+ - "\2\0\1\64\4\0\1\65\3\0\1\66\17\0\1\67"+ - "\2\0\1\70\21\0\1\71\2\0\1\72\57\0\2\30"+ - "\1\73\1\0\1\74\1\0\1\74\1\75\1\0\1\30"+ - "\2\0\1\30\1\163\3\41\1\u01ad\26\41\1\164\12\165"+ - "\1\74\1\161\1\166\1\161\1\0\1\161\1\167\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\2\0\1\53\1\0"+ - "\1\54\2\0\1\55\1\0\1\56\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\63\2\0"+ - "\1\64\4\0\1\65\3\0\1\66\17\0\1\67\2\0"+ - "\1\70\21\0\1\71\2\0\1\72\57\0\2\30\1\73"+ - "\1\0\1\74\1\0\1\74\1\75\1\0\1\30\2\0"+ - "\1\30\1\163\3\41\1\u010d\26\41\1\164\12\165\1\74"+ - "\1\161\1\166\1\161\1\0\1\161\1\167\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\2\0\1\53\1\0\1\54"+ - "\2\0\1\55\1\0\1\56\4\0\1\57\1\0\1\60"+ - "\1\0\1\61\2\0\1\62\3\0\1\63\2\0\1\64"+ - "\4\0\1\65\3\0\1\66\17\0\1\67\2\0\1\70"+ - "\21\0\1\71\2\0\1\72\57\0\2\30\1\73\1\0"+ - "\1\74\1\0\1\74\1\75\1\0\1\30\2\0\1\30"+ - "\1\163\27\41\1\u01ae\2\41\1\164\12\165\1\74\1\161"+ - "\1\166\1\161\1\0\1\161\1\167\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\160\32\324\1\u01af\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\2\0\1\53\1\0\1\54\2\0\1\55"+ - "\1\0\1\56\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\63\2\0\1\64\4\0\1\65"+ - "\3\0\1\66\17\0\1\67\2\0\1\70\21\0\1\71"+ - "\2\0\1\72\57\0\2\30\1\73\1\0\1\74\1\0"+ - "\1\74\1\75\1\0\1\30\2\0\1\30\1\163\16\41"+ - "\1\u010d\13\41\1\164\12\165\1\74\1\161\1\166\1\161"+ - "\1\0\1\161\1\167\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\2\0\1\53\1\0\1\54\2\0\1\221\1\0"+ - "\1\222\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\223\2\0\1\224\4\0\1\225\3\0"+ - "\1\226\17\0\1\67\2\0\1\227\21\0\1\230\2\0"+ - "\1\231\57\0\1\30\1\74\7\0\1\74\2\0\1\30"+ - "\1\0\32\30\24\0\1\u01b0\304\0\1\u01b1\15\0\1\53"+ - "\1\0\1\54\2\0\1\55\1\0\1\56\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\63"+ - "\2\0\1\64\4\0\1\65\3\0\1\66\17\0\1\67"+ - "\2\0\1\70\21\0\1\71\2\0\1\72\57\0\2\30"+ - "\1\73\1\0\1\74\1\0\1\74\1\75\1\0\1\30"+ - "\2\0\1\30\1\163\5\41\1\u01b2\24\41\1\164\12\165"+ - "\1\74\1\161\1\166\1\161\1\0\1\161\1\167\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\2\0\1\53\1\0"+ - "\1\54\2\0\1\55\1\0\1\56\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\63\2\0"+ - "\1\64\4\0\1\65\3\0\1\66\17\0\1\67\2\0"+ - "\1\70\21\0\1\71\2\0\1\72\57\0\2\30\1\73"+ - "\1\0\1\74\1\0\1\74\1\75\1\0\1\30\2\0"+ - "\1\30\1\163\32\41\1\164\12\165\1\u0172\1\161\1\166"+ - "\1\161\1\0\1\161\1\167\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\2\0\1\53\1\0\1\54\2\0\1\221"+ - "\1\0\1\222\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\223\2\0\1\224\4\0\1\225"+ - "\3\0\1\226\17\0\1\67\2\0\1\227\21\0\1\230"+ - "\2\0\1\231\57\0\1\30\1\74\7\0\1\74\2\0"+ - "\1\30\1\0\32\30\24\0\1\u01b3\225\0\1\160\1\334"+ - "\1\335\1\336\1\337\1\340\1\341\1\342\1\343\1\344"+ - "\1\345\1\346\1\347\1\350\1\351\1\352\1\353\1\354"+ - "\1\355\1\356\1\357\1\360\1\361\1\362\1\363\1\364"+ - "\1\365\1\161\12\324\1\u0174\3\161\1\0\2\161\1\162"+ - "\1\u0163\1\u0164\1\u0165\3\0\1\161\1\206\3\0\2\161"+ - "\246\0\12\u01a3\237\0\1\u0121\5\324\1\u0127\24\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\15\324\1\u0127\14\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\10\324\1\u0127"+ - "\21\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\3\324"+ - "\1\u01b4\26\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\3\324\1\u0127\26\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\27\324\1\u01b5\2\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\16\324\1\u0127\13\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\2\0\1\53\1\0\1\54\2\0\1\76\1\0"+ - "\1\77\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\100\2\0\1\101\4\0\1\102\3\0"+ - "\1\103\17\0\1\67\2\0\1\104\21\0\1\105\2\0"+ - "\1\106\57\0\1\30\2\31\2\0\2\107\1\110\1\0"+ - "\1\31\2\0\1\30\1\u01b6\32\41\1\164\12\u017d\1\0"+ - "\1\161\1\175\1\161\1\0\2\176\1\162\3\161\2\0"+ - "\1\107\1\161\4\0\2\161\2\0\1\53\1\0\1\54"+ - "\2\0\1\76\1\0\1\77\4\0\1\57\1\0\1\60"+ - "\1\0\1\61\2\0\1\62\3\0\1\100\2\0\1\101"+ - "\4\0\1\102\3\0\1\103\17\0\1\67\2\0\1\104"+ - "\21\0\1\105\2\0\1\106\57\0\1\30\2\31\2\0"+ - "\2\107\1\110\1\0\1\31\2\0\1\30\1\u01b6\32\41"+ - "\1\164\12\u01b7\1\0\1\161\1\175\1\161\1\0\2\176"+ - "\1\162\3\161\2\0\1\107\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\76\1\0\1\77\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\100\2\0\1\101\4\0\1\102\3\0\1\103\17\0"+ - "\1\67\2\0\1\104\21\0\1\105\2\0\1\106\57\0"+ - "\1\30\2\31\2\0\2\107\1\110\1\0\1\31\2\0"+ - "\1\30\1\u01b6\32\41\1\164\1\u017d\1\u01b8\1\u01b7\2\u017d"+ - "\2\u01b7\1\u017d\1\u01b7\1\u017d\1\0\1\161\1\175\1\161"+ - "\1\0\2\176\1\162\3\161\2\0\1\107\1\161\4\0"+ - "\2\161\260\0\1\375\237\0\4\u01b9\2\0\1\u01b9\15\0"+ - "\1\u01b9\6\0\12\u01b9\1\u0181\237\0\4\u01ba\2\0\1\u01ba"+ - "\15\0\1\u01ba\6\0\12\u01ba\1\u01bb\237\0\4\u01bc\2\0"+ - "\1\u01bc\15\0\1\u01bc\6\0\12\u01bc\1\u01bd\13\0\1\u0146"+ - "\222\0\1\u0188\4\u01bc\2\0\1\u01bc\15\0\1\u01bc\6\0"+ - "\12\u01be\1\u01bd\13\0\1\u0146\222\0\1\u0188\4\u01bc\2\0"+ - "\1\u01bc\15\0\1\u01bc\6\0\12\u01bf\1\u01bd\13\0\1\u0146"+ - "\222\0\1\u0188\4\u01bc\2\0\1\u01bc\15\0\1\u01bc\6\0"+ - "\1\u01be\1\u01c0\1\u01bf\2\u01be\2\u01bf\1\u01be\1\u01bf\1\u01be"+ - "\1\u01bd\13\0\1\u0146\223\0\4\u01c1\2\0\1\u01c1\15\0"+ - "\1\u01c1\6\0\12\u01c1\1\u0187\13\0\1\u0146\223\0\4\u0182"+ - "\2\0\1\u0182\15\0\1\u0182\6\0\1\u0183\2\u0184\1\u0183"+ - "\5\u0184\1\u0185\273\0\1\u01c2\2\u01c3\1\u01c2\5\u01c3\1\u01c4"+ - "\237\0\1\u0188\4\u01c1\2\0\1\u01c1\15\0\1\u01c1\6\0"+ - "\12\u01c5\1\u0187\13\0\1\u0146\222\0\1\u0188\4\u01c1\2\0"+ - "\1\u01c1\15\0\1\u01c1\6\0\12\u01c1\1\u0187\13\0\1\u0146"+ - "\222\0\1\u0188\4\u01c1\2\0\1\u01c1\15\0\1\u01c1\6\0"+ - "\2\u01c5\1\u01c1\2\u01c5\2\u01c1\1\u01c5\1\u01c1\1\u01c5\1\u0187"+ - "\13\0\1\u0146\222\0\51\u018c\1\u01c6\6\u018c\1\u018e\2\0"+ - "\2\u018c\4\0\1\u018c\213\0\51\u018d\1\u01c7\3\0\1\u018d"+ - "\1\u018c\1\u018d\1\u018e\2\0\2\u018d\220\0\51\u018e\1\u01c8"+ - "\6\u018e\3\0\2\u018e\4\0\1\u018e\213\0\1\u01c9\32\270"+ - "\1\u010a\12\270\237\0\1\u01c9\4\270\1\u01ca\25\270\1\u010a"+ - "\12\270\237\0\1\u01c9\15\270\1\u0153\14\270\1\u010a\12\270"+ - "\237\0\1\u01c9\10\270\1\u0153\21\270\1\u010a\12\270\237\0"+ - "\1\u01c9\17\270\1\u018f\12\270\1\u010a\12\270\237\0\1\u01c9"+ - "\5\270\1\u01cb\4\270\1\u018f\17\270\1\u010a\12\270\237\0"+ - "\1\u0109\20\270\1\u018f\11\270\1\u010a\12\270\237\0\1\u0109"+ - "\7\270\1\u018f\22\270\1\u010a\12\270\237\0\1\u0109\27\270"+ - "\1\u018f\2\270\1\u010a\12\270\237\0\1\u01c9\6\270\1\u01ca"+ - "\10\270\1\u018f\12\270\1\u010a\12\270\237\0\1\u01c9\24\270"+ - "\1\u01cc\5\270\1\u010a\12\270\237\0\1\u0109\11\270\1\u018f"+ - "\20\270\1\u010a\12\270\237\0\1\u01c9\16\270\1\u01cd\13\270"+ - "\1\u010a\12\270\237\0\1\u01c9\12\270\1\u01ce\17\270\1\u010a"+ - "\12\270\237\0\1\u01c9\5\270\1\u018f\24\270\1\u010a\12\270"+ - "\237\0\1\u01c9\1\u01cf\31\270\1\u010a\12\270\237\0\1\u0109"+ - "\32\270\1\u01d0\12\270\237\0\1\u0109\22\270\1\u018f\7\270"+ - "\1\u010a\12\270\237\0\1\u01c9\23\270\1\u018f\6\270\1\u010a"+ - "\12\270\237\0\1\u01c9\24\270\1\u01d1\5\270\1\u010a\12\270"+ - "\273\0\12\u01d2\10\0\1\u018c\1\u018d\1\u018e\224\0\1\u01a4"+ - "\45\u0163\1\u018c\2\u0163\1\u01a5\1\u018c\2\u0163\1\u01a6\2\u0163"+ - "\1\u0165\2\0\1\u018c\1\u0163\1\206\3\0\1\u0163\1\161"+ - "\212\0\1\160\4\u01d3\2\161\1\u01d3\15\161\1\u01d3\6\161"+ - "\12\u01d3\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\51\u018c\1\u01c6\6\u018c\1\u018e"+ - "\1\271\1\0\2\u018c\4\0\1\u018c\213\0\1\u01a7\45\u0164"+ - "\1\u018d\2\u0164\1\u01a8\1\0\2\161\1\u01a9\1\u0163\1\u0164"+ - "\1\u0165\2\0\1\u018d\1\u0164\1\206\3\0\2\161\212\0"+ - "\1\160\4\u01d4\2\161\1\u01d4\15\161\1\u01d4\6\161\12\u01d4"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\51\u018d\1\u01c7\3\0\1\u018d\1\u018c"+ - "\1\u018d\1\u018e\1\271\1\0\2\u018d\220\0\1\u01aa\45\u0165"+ - "\1\u018e\2\u0165\1\u01ab\1\u018e\2\u0165\1\u01ac\2\u0165\1\161"+ - "\2\0\1\u018e\1\u0165\1\206\3\0\1\u0165\1\161\212\0"+ - "\1\160\4\u01d5\2\161\1\u01d5\15\161\1\u01d5\6\161\12\u01d5"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\51\u018e\1\u01c8\6\u018e\1\0\1\271"+ - "\1\0\2\u018e\4\0\1\u018e\3\0\1\53\1\0\1\54"+ - "\2\0\1\55\1\0\1\56\4\0\1\57\1\0\1\60"+ - "\1\0\1\61\2\0\1\62\3\0\1\63\2\0\1\64"+ - "\4\0\1\65\3\0\1\66\17\0\1\67\2\0\1\70"+ - "\21\0\1\71\2\0\1\72\57\0\2\30\1\73\1\0"+ - "\1\74\1\0\1\74\1\75\1\0\1\30\2\0\1\30"+ - "\1\163\20\41\1\u01d6\11\41\1\164\12\165\1\74\1\161"+ - "\1\166\1\161\1\0\1\161\1\167\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\2\0\1\53\1\0\1\54\2\0"+ - "\1\55\1\0\1\56\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\63\2\0\1\64\4\0"+ - "\1\65\3\0\1\66\17\0\1\67\2\0\1\70\21\0"+ - "\1\71\2\0\1\72\57\0\2\30\1\73\1\0\1\74"+ - "\1\0\1\74\1\75\1\0\1\30\2\0\1\30\1\163"+ - "\3\41\1\u0118\26\41\1\164\12\165\1\74\1\161\1\166"+ - "\1\161\1\0\1\161\1\167\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\160\1\324\1\u01d7\1\u01d8\2\324"+ - "\1\u01d9\1\u01da\1\u01db\1\324\1\u01dc\1\u01dd\2\324\1\u01de"+ - "\1\u01df\2\324\1\u01e0\1\u01e1\1\u01e2\1\324\1\u01e3\1\u01e4"+ - "\1\324\1\u01e5\1\u01e6\1\164\1\u01e7\2\324\1\u01e8\1\u01e9"+ - "\1\u01ea\1\324\1\u01eb\1\u01ec\1\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\271\0"+ - "\1\u01ed\225\0\1\u01ee\32\u01ef\1\u01ee\12\u01ef\1\u01f0\2\u01ee"+ - "\1\u01f1\3\u01ee\1\u01f2\3\0\1\u01f3\1\0\2\u01ee\4\0"+ - "\1\u01ee\3\0\1\53\1\0\1\54\2\0\1\55\1\0"+ - "\1\56\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\63\2\0\1\64\4\0\1\65\3\0"+ - "\1\66\17\0\1\67\2\0\1\70\21\0\1\71\2\0"+ - "\1\72\57\0\2\30\1\73\1\0\1\74\1\0\1\74"+ - "\1\75\1\0\1\30\2\0\1\30\1\163\32\41\1\164"+ - "\12\165\1\u01f4\1\161\1\166\1\161\1\0\1\161\1\167"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\271\0\1\u01f5"+ - "\225\0\1\u0121\20\324\1\u01f6\11\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\3\324\1\u0132\26\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\7\0\1\241\1\0\1\242\17\0\1\243"+ - "\2\0\1\244\4\0\1\245\3\0\1\246\22\0\1\247"+ - "\21\0\1\250\2\0\1\251\60\0\1\107\1\31\6\0"+ - "\1\107\3\0\1\160\1\334\1\335\1\336\1\337\1\340"+ - "\1\341\1\342\1\343\1\344\1\345\1\346\1\347\1\350"+ - "\1\351\1\352\1\353\1\354\1\355\1\356\1\357\1\360"+ - "\1\361\1\362\1\363\1\364\1\365\1\161\1\u01f7\2\u01f8"+ - "\1\u01f7\5\u01f8\1\u01f9\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\1\206\3\0\2\161\2\0\1\53"+ - "\1\0\1\54\2\0\1\76\1\0\1\77\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\100"+ - "\2\0\1\101\4\0\1\102\3\0\1\103\17\0\1\67"+ - "\2\0\1\104\21\0\1\105\2\0\1\106\57\0\1\30"+ - "\2\31\2\0\2\107\1\110\1\0\1\31\2\0\1\30"+ - "\1\u01b6\32\41\1\164\12\371\1\0\1\161\1\175\1\161"+ - "\1\0\2\176\1\162\3\161\2\0\1\107\1\161\4\0"+ - "\2\161\2\0\1\53\1\0\1\54\2\0\1\76\1\0"+ - "\1\77\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\100\2\0\1\101\4\0\1\102\3\0"+ - "\1\103\17\0\1\67\2\0\1\104\21\0\1\105\2\0"+ - "\1\106\57\0\1\30\2\31\2\0\2\107\1\110\1\0"+ - "\1\31\2\0\1\30\1\u01b6\32\41\1\164\2\u01b7\1\371"+ - "\2\u01b7\2\371\1\u01b7\1\371\1\u01b7\1\0\1\161\1\175"+ - "\1\161\1\0\2\176\1\162\3\161\2\0\1\107\1\161"+ - "\4\0\2\161\213\0\4\u01fa\2\0\1\u01fa\15\0\1\u01fa"+ - "\6\0\12\u01fa\1\u0181\237\0\4\u01fb\2\0\1\u01fb\15\0"+ - "\1\u01fb\6\0\12\u01fb\1\u01fc\237\0\4\u01fd\2\0\1\u01fd"+ - "\15\0\1\u01fd\6\0\1\u01fe\2\u01ff\1\u01fe\5\u01ff\1\u0200"+ - "\14\0\1\u0146\223\0\4\u0201\2\0\1\u0201\15\0\1\u0201"+ - "\6\0\12\u0201\1\u01bd\13\0\1\u0146\223\0\4\u01fd\2\0"+ - "\1\u01fd\15\0\1\u01fd\6\0\1\u01fe\2\u01ff\1\u01fe\5\u01ff"+ - "\1\u0200\237\0\1\u0188\4\u0201\2\0\1\u0201\15\0\1\u0201"+ - "\6\0\12\u0202\1\u01bd\13\0\1\u0146\222\0\1\u0188\4\u0201"+ - "\2\0\1\u0201\15\0\1\u0201\6\0\12\u0201\1\u01bd\13\0"+ - "\1\u0146\222\0\1\u0188\4\u0201\2\0\1\u0201\15\0\1\u0201"+ - "\6\0\2\u0202\1\u0201\2\u0202\2\u0201\1\u0202\1\u0201\1\u0202"+ - "\1\u01bd\13\0\1\u0146\223\0\4\u0203\2\0\1\u0203\15\0"+ - "\1\u0203\6\0\12\u0203\1\u0187\13\0\1\u0146\222\0\1\u0204"+ - "\33\0\12\u01c3\237\0\1\u0204\33\0\12\u0205\237\0\1\u0204"+ - "\33\0\1\u01c3\1\u0206\1\u0205\2\u01c3\2\u0205\1\u01c3\1\u0205"+ - "\1\u01c3\237\0\1\u0188\4\u0203\2\0\1\u0203\15\0\1\u0203"+ - "\6\0\12\u0203\1\u0187\13\0\1\u0146\223\0\4\u0207\2\0"+ - "\1\u0207\15\0\1\u0207\6\0\12\u0207\240\0\4\u0208\2\0"+ - "\1\u0208\15\0\1\u0208\6\0\12\u0208\240\0\4\u0209\2\0"+ - "\1\u0209\15\0\1\u0209\6\0\12\u0209\237\0\1\u0109\5\270"+ - "\1\u018f\24\270\1\u010a\12\270\237\0\1\u0109\15\270\1\u018f"+ - "\14\270\1\u010a\12\270\237\0\1\u0109\10\270\1\u018f\21\270"+ - "\1\u010a\12\270\237\0\1\u0109\3\270\1\u020a\26\270\1\u010a"+ - "\12\270\237\0\1\u0109\3\270\1\u018f\26\270\1\u010a\12\270"+ - "\237\0\1\u0109\27\270\1\u020b\2\270\1\u010a\12\270\240\0"+ - "\32\270\1\u020c\12\270\237\0\1\u0109\16\270\1\u018f\13\270"+ - "\1\u010a\12\270\273\0\12\u020d\10\0\1\u018c\1\u018d\1\u018e"+ - "\224\0\1\160\4\u0163\2\161\1\u0163\15\161\1\u0163\6\161"+ - "\12\u0163\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\160\4\u0164\2\161\1\u0164"+ - "\15\161\1\u0164\6\161\12\u0164\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\160"+ - "\4\u0165\2\161\1\u0165\15\161\1\u0165\6\161\12\u0165\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\2\0\1\53\1\0\1\54\2\0\1\55\1\0"+ - "\1\56\4\0\1\57\1\0\1\60\1\0\1\61\2\0"+ - "\1\62\3\0\1\63\2\0\1\64\4\0\1\65\3\0"+ - "\1\66\17\0\1\67\2\0\1\70\21\0\1\71\2\0"+ - "\1\72\57\0\2\30\1\73\1\0\1\74\1\0\1\74"+ - "\1\75\1\0\1\30\2\0\1\30\1\163\12\41\1\u010d"+ - "\17\41\1\164\12\165\1\74\1\161\1\166\1\161\1\0"+ - "\1\161\1\167\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\11\324\1\u020e\20\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\3\324\1\u020f\26\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\7\324\1\u0210\22\324\1\164"+ - "\4\324\1\u0211\5\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\10\324"+ - "\1\u0212\4\324\1\u0213\5\324\1\u0214\6\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\3\324\1\u0215\26\324\1\164"+ - "\2\324\1\u0216\7\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\7\324"+ - "\1\u0217\22\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\7\324\1\u0218\22\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\7\324\1\u0219\22\324\1\164\3\324\1\u021a\6\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\32\324\1\164\5\324\1\u021b"+ - "\4\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\7\324\1\u021c\22\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\31\324\1\u021d"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\1\324\1\u021e"+ - "\30\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\7\324"+ - "\1\u021f\1\324\1\u0220\20\324\1\164\11\324\1\u021b\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\22\324\1\u0221\7\324\1\164\2\324"+ - "\1\u0222\7\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\6\324\1\u0223"+ - "\1\u0224\22\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\7\324\1\u0225\5\324\1\u0226\14\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\23\324\1\u0227\6\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\32\324\1\164\3\324\1\u0228"+ - "\6\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\3\324\1\u0229\26\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\17\324\1\u022a"+ - "\12\324\1\164\1\u022b\11\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\32\324\1\164\1\324\1\u021b\10\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\32\324\1\164\1\u022c\11\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\213\0"+ - "\32\u022d\1\0\12\u022d\11\0\1\u022e\1\0\1\u022f\223\0"+ - "\46\u01ee\1\u01f0\2\u01ee\1\u01f1\3\u01ee\1\u01f2\5\0\2\u01ee"+ - "\4\0\1\u01ee\213\0\1\u0230\32\u01ef\1\u0231\12\u01ef\1\u0232"+ - "\2\u01ee\1\u01f1\3\u01ee\1\u01f2\1\0\1\u0233\3\0\2\u01ee"+ - "\4\0\1\u01ee\213\0\46\u01f0\1\0\2\u01f0\1\u0234\3\u01f0"+ - "\1\u01f2\5\0\2\u01f0\4\0\1\u01f0\214\0\4\u0235\2\0"+ - "\1\u0235\15\0\1\u0235\6\0\12\u0235\240\0\32\u0236\1\0"+ - "\12\u0236\13\0\1\u01f3\224\0\4\u0237\2\0\1\u0237\15\0"+ - "\1\u0237\6\0\12\u0237\1\u0238\26\0\1\53\1\0\1\54"+ - "\2\0\1\221\1\0\1\222\4\0\1\57\1\0\1\60"+ - "\1\0\1\61\2\0\1\62\3\0\1\223\2\0\1\224"+ - "\4\0\1\225\3\0\1\226\17\0\1\67\2\0\1\227"+ - "\21\0\1\230\2\0\1\231\57\0\1\30\1\74\7\0"+ - "\1\74\2\0\1\30\1\u0239\32\u023a\13\u0239\1\0\3\u0239"+ - "\1\0\2\u0239\1\0\3\u0239\3\0\1\u0239\1\u023b\3\0"+ - "\2\u0239\212\0\1\u023c\32\u023d\1\u023c\12\u023d\1\u023e\2\u023c"+ - "\1\u023f\3\u023c\1\u0240\3\0\1\u0241\1\0\2\u023c\4\0"+ - "\1\u023c\213\0\1\u0121\12\324\1\u0127\17\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\2\0\1\53\1\0\1\54\2\0\1\76"+ - "\1\0\1\77\4\0\1\57\1\0\1\60\1\0\1\61"+ - "\2\0\1\62\3\0\1\100\2\0\1\101\4\0\1\102"+ - "\3\0\1\103\17\0\1\67\2\0\1\104\21\0\1\105"+ - "\2\0\1\106\57\0\1\30\2\31\2\0\2\107\1\110"+ - "\1\0\1\31\2\0\1\30\1\u013e\32\41\1\164\12\u01f8"+ - "\1\u0174\1\161\1\175\1\161\1\0\2\176\1\162\1\u0163"+ - "\1\u0164\1\u0165\2\0\1\107\1\161\4\0\2\161\2\0"+ - "\1\53\1\0\1\54\2\0\1\76\1\0\1\77\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\100\2\0\1\101\4\0\1\102\3\0\1\103\17\0"+ - "\1\67\2\0\1\104\21\0\1\105\2\0\1\106\57\0"+ - "\1\30\2\31\2\0\2\107\1\110\1\0\1\31\2\0"+ - "\1\30\1\u013e\32\41\1\164\12\u0242\1\u0174\1\161\1\175"+ - "\1\161\1\0\2\176\1\162\1\u0163\1\u0164\1\u0165\2\0"+ - "\1\107\1\161\4\0\2\161\2\0\1\53\1\0\1\54"+ - "\2\0\1\76\1\0\1\77\4\0\1\57\1\0\1\60"+ - "\1\0\1\61\2\0\1\62\3\0\1\100\2\0\1\101"+ - "\4\0\1\102\3\0\1\103\17\0\1\67\2\0\1\104"+ - "\21\0\1\105\2\0\1\106\57\0\1\30\2\31\2\0"+ - "\2\107\1\110\1\0\1\31\2\0\1\30\1\u013e\32\41"+ - "\1\164\1\u01f8\1\u0243\1\u0242\2\u01f8\2\u0242\1\u01f8\1\u0242"+ - "\1\u01f8\1\u0174\1\161\1\175\1\161\1\0\2\176\1\162"+ - "\1\u0163\1\u0164\1\u0165\2\0\1\107\1\161\4\0\2\161"+ - "\260\0\1\u0181\237\0\4\u0244\2\0\1\u0244\15\0\1\u0244"+ - "\6\0\12\u0244\1\u01fc\237\0\4\u0245\2\0\1\u0245\15\0"+ - "\1\u0245\6\0\12\u0245\1\u0246\237\0\4\u0247\2\0\1\u0247"+ - "\15\0\1\u0247\6\0\12\u0247\1\u0248\13\0\1\u0146\222\0"+ - "\1\u0188\4\u0247\2\0\1\u0247\15\0\1\u0247\6\0\12\u0249"+ - "\1\u0248\13\0\1\u0146\222\0\1\u0188\4\u0247\2\0\1\u0247"+ - "\15\0\1\u0247\6\0\12\u024a\1\u0248\13\0\1\u0146\222\0"+ - "\1\u0188\4\u0247\2\0\1\u0247\15\0\1\u0247\6\0\1\u0249"+ - "\1\u024b\1\u024a\2\u0249\2\u024a\1\u0249\1\u024a\1\u0249\1\u0248"+ - "\13\0\1\u0146\223\0\4\u024c\2\0\1\u024c\15\0\1\u024c"+ - "\6\0\12\u024c\1\u01bd\13\0\1\u0146\222\0\1\u0188\4\u024c"+ - "\2\0\1\u024c\15\0\1\u024c\6\0\12\u024c\1\u01bd\13\0"+ - "\1\u0146\270\0\1\u0187\13\0\1\u0146\256\0\1\u024d\2\u024e"+ - "\1\u024d\5\u024e\1\u024f\237\0\1\u0204\304\0\1\u0204\33\0"+ - "\2\u0205\1\0\2\u0205\2\0\1\u0205\1\0\1\u0205\240\0"+ - "\4\u018c\2\0\1\u018c\15\0\1\u018c\6\0\12\u018c\240\0"+ - "\4\u018d\2\0\1\u018d\15\0\1\u018d\6\0\12\u018d\240\0"+ - "\4\u018e\2\0\1\u018e\15\0\1\u018e\6\0\12\u018e\237\0"+ - "\1\u0109\20\270\1\u0250\11\270\1\u010a\12\270\237\0\1\u0109"+ - "\3\270\1\u019a\26\270\1\u010a\12\270\240\0\1\270\1\u0251"+ - "\1\u0252\2\270\1\u0253\1\u0254\1\u0255\1\270\1\u0256\1\u0257"+ - "\2\270\1\u0258\1\u0259\2\270\1\u025a\1\u025b\1\u025c\1\270"+ - "\1\u025d\1\u025e\1\270\1\u025f\1\u0260\1\u010a\1\u0261\2\270"+ - "\1\u0262\1\u0263\1\u0264\1\270\1\u0265\1\u0266\1\270\273\0"+ - "\12\u0267\10\0\1\u018c\1\u018d\1\u018e\224\0\1\u0121\1\324"+ - "\1\u0268\30\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\24\324\1\u0269\5\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\24\324\1\u026a\5\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\1\324\1\u026b\30\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\14\324\1\u026c\15\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\1\324\1\u026d\30\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\1\324\1\u026e\30\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\1\324\1\u026f"+ - "\30\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\21\324"+ - "\1\u0270\10\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\24\324\1\u0271\5\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\24\324\1\u0272\5\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\24\324\1\u0273\5\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\1\u0177\31\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\24\324\1\u026f\5\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\24\324\1\u0274\5\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\1\324\1\u0275\30\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\31\324\1\u0276"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\24\324\1\u0277"+ - "\5\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\1\324"+ - "\1\u0278\30\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\1\u0279\31\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\21\324\1\u027a\10\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\4\324\1\u027b\25\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\24\324\1\u027c\5\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\24\324\1\u027d\5\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\4\324\1\u027e\25\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\21\324\1\u027f\10\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\24\324\1\u0280"+ - "\5\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324"+ - "\1\164\1\u0281\11\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324"+ - "\1\164\7\324\1\u0282\2\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\1\u0283\31\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\1\u0284\31\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0285"+ - "\32\u022d\1\u0286\12\u022d\11\0\1\u022e\225\0\51\u022e\1\u0287"+ - "\3\0\3\u022e\1\u018e\3\0\1\u022e\221\0\4\u0288\2\0"+ - "\1\u0288\15\0\1\u0288\6\0\12\u0288\1\u0289\236\0\1\u01ee"+ - "\32\u01ef\1\u01ee\12\u01ef\1\u01f0\2\u01ee\1\u01f1\3\u01ee\1\u01f2"+ - "\5\0\2\u01ee\4\0\1\u01ee\213\0\1\u01ee\32\u01ef\1\u0231"+ - "\12\u01ef\1\u01f0\2\u01ee\1\u01f1\3\u01ee\1\u01f2\5\0\2\u01ee"+ - "\4\0\1\u01ee\213\0\34\u01f0\12\u028a\1\0\2\u01f0\1\u0234"+ - "\3\u01f0\1\u01f2\5\0\2\u01f0\4\0\1\u01f0\213\0\51\u0233"+ - "\1\u028b\3\0\3\u0233\1\u018e\2\0\1\u028c\1\u0233\221\0"+ - "\4\u028d\2\0\1\u028d\15\0\1\u028d\6\0\12\u028d\240\0"+ - "\4\u01ee\2\0\1\u01ee\15\0\1\u01ee\6\0\12\u01ee\237\0"+ - "\1\u028e\32\u0236\1\u028f\12\u0236\1\u0290\10\0\1\u0233\226\0"+ - "\4\u0291\2\0\1\u0291\15\0\1\u0291\6\0\12\u0291\1\u0292"+ - "\304\0\1\u0293\236\0\1\u0294\45\u0239\1\0\3\u0239\1\0"+ - "\2\u0239\1\u0295\3\u0239\3\0\1\u0239\4\0\2\u0239\2\0"+ - "\1\53\1\0\1\54\2\0\1\55\1\0\1\56\4\0"+ - "\1\57\1\0\1\60\1\0\1\61\2\0\1\62\3\0"+ - "\1\63\2\0\1\64\4\0\1\65\3\0\1\66\17\0"+ - "\1\67\2\0\1\70\21\0\1\71\2\0\1\72\57\0"+ - "\2\30\1\73\1\0\1\74\1\0\1\74\1\75\1\0"+ - "\1\30\2\0\1\30\1\u0296\32\u023a\1\u0239\12\u0297\1\74"+ - "\1\u0239\1\u0298\1\u0239\1\0\1\u0239\1\u0299\1\u0295\3\u0239"+ - "\3\0\1\u0239\4\0\2\u0239\212\0\65\u023b\1\u029a\1\u023b"+ - "\1\u029b\1\0\2\u023b\212\0\46\u023c\1\u023e\2\u023c\1\u023f"+ - "\3\u023c\1\u0240\5\0\2\u023c\4\0\1\u023c\213\0\1\u029c"+ - "\32\u023d\1\u029d\12\u023d\1\u029e\2\u023c\1\u023f\3\u023c\1\u0240"+ - "\1\u018c\1\u018d\1\u018e\2\0\2\u023c\4\0\1\u023c\213\0"+ - "\46\u023e\1\0\2\u023e\1\u029f\3\u023e\1\u0240\5\0\2\u023e"+ - "\4\0\1\u023e\214\0\4\u02a0\2\0\1\u02a0\15\0\1\u02a0"+ - "\6\0\12\u02a0\240\0\32\u02a1\1\0\12\u02a1\13\0\1\u0241"+ - "\13\0\1\53\1\0\1\54\2\0\1\76\1\0\1\77"+ - "\4\0\1\57\1\0\1\60\1\0\1\61\2\0\1\62"+ - "\3\0\1\100\2\0\1\101\4\0\1\102\3\0\1\103"+ - "\17\0\1\67\2\0\1\104\21\0\1\105\2\0\1\106"+ - "\57\0\1\30\2\31\2\0\2\107\1\110\1\0\1\31"+ - "\2\0\1\30\1\u013e\32\41\1\164\12\371\1\u0174\1\161"+ - "\1\175\1\161\1\0\2\176\1\162\1\u0163\1\u0164\1\u0165"+ - "\2\0\1\107\1\161\4\0\2\161\2\0\1\53\1\0"+ - "\1\54\2\0\1\76\1\0\1\77\4\0\1\57\1\0"+ - "\1\60\1\0\1\61\2\0\1\62\3\0\1\100\2\0"+ - "\1\101\4\0\1\102\3\0\1\103\17\0\1\67\2\0"+ - "\1\104\21\0\1\105\2\0\1\106\57\0\1\30\2\31"+ - "\2\0\2\107\1\110\1\0\1\31\2\0\1\30\1\u013e"+ - "\32\41\1\164\2\u0242\1\371\2\u0242\2\371\1\u0242\1\371"+ - "\1\u0242\1\u0174\1\161\1\175\1\161\1\0\2\176\1\162"+ - "\1\u0163\1\u0164\1\u0165\2\0\1\107\1\161\4\0\2\161"+ - "\213\0\4\u02a2\2\0\1\u02a2\15\0\1\u02a2\6\0\12\u02a2"+ - "\1\u01fc\237\0\4\u02a3\2\0\1\u02a3\15\0\1\u02a3\6\0"+ - "\12\u02a3\1\u02a4\237\0\4\u02a5\2\0\1\u02a5\15\0\1\u02a5"+ - "\6\0\1\u02a6\2\u02a7\1\u02a6\5\u02a7\1\u02a8\14\0\1\u0146"+ - "\223\0\4\u02a9\2\0\1\u02a9\15\0\1\u02a9\6\0\12\u02a9"+ - "\1\u0248\13\0\1\u0146\223\0\4\u02a5\2\0\1\u02a5\15\0"+ - "\1\u02a5\6\0\1\u02a6\2\u02a7\1\u02a6\5\u02a7\1\u02a8\237\0"+ - "\1\u0188\4\u02a9\2\0\1\u02a9\15\0\1\u02a9\6\0\12\u02aa"+ - "\1\u0248\13\0\1\u0146\222\0\1\u0188\4\u02a9\2\0\1\u02a9"+ - "\15\0\1\u02a9\6\0\12\u02a9\1\u0248\13\0\1\u0146\222\0"+ - "\1\u0188\4\u02a9\2\0\1\u02a9\15\0\1\u02a9\6\0\2\u02aa"+ - "\1\u02a9\2\u02aa\2\u02a9\1\u02aa\1\u02a9\1\u02aa\1\u0248\13\0"+ - "\1\u0146\270\0\1\u01bd\13\0\1\u0146\222\0\1\u02ab\33\0"+ - "\12\u024e\237\0\1\u02ab\33\0\12\u02ac\237\0\1\u02ab\33\0"+ - "\1\u024e\1\u02ad\1\u02ac\2\u024e\2\u02ac\1\u024e\1\u02ac\1\u024e"+ - "\237\0\1\u0109\12\270\1\u018f\17\270\1\u010a\12\270\237\0"+ - "\1\u0109\11\270\1\u02ae\20\270\1\u010a\12\270\237\0\1\u0109"+ - "\3\270\1\u02af\26\270\1\u010a\12\270\237\0\1\u0109\7\270"+ - "\1\u02b0\22\270\1\u010a\4\270\1\u02b1\5\270\237\0\1\u0109"+ - "\10\270\1\u02b2\4\270\1\u02b3\5\270\1\u02b4\6\270\1\u010a"+ - "\12\270\237\0\1\u0109\3\270\1\u02b5\26\270\1\u010a\2\270"+ - "\1\u02b6\7\270\237\0\1\u0109\7\270\1\u02b7\22\270\1\u010a"+ - "\12\270\237\0\1\u0109\7\270\1\u02b8\22\270\1\u010a\12\270"+ - "\237\0\1\u0109\7\270\1\u02b9\22\270\1\u010a\3\270\1\u02ba"+ - "\6\270\237\0\1\u0109\32\270\1\u010a\5\270\1\u02bb\4\270"+ - "\237\0\1\u0109\7\270\1\u02bc\22\270\1\u010a\12\270\237\0"+ - "\1\u0109\31\270\1\u02bd\1\u010a\12\270\237\0\1\u0109\1\270"+ - "\1\u02be\30\270\1\u010a\12\270\237\0\1\u0109\7\270\1\u02bf"+ - "\1\270\1\u02c0\20\270\1\u010a\11\270\1\u02bb\237\0\1\u0109"+ - "\22\270\1\u02c1\7\270\1\u010a\2\270\1\u02c2\7\270\237\0"+ - "\1\u0109\6\270\1\u02c3\1\u02c4\22\270\1\u010a\12\270\237\0"+ - "\1\u0109\7\270\1\u02c5\5\270\1\u02c6\14\270\1\u010a\12\270"+ - "\237\0\1\u0109\23\270\1\u02c7\6\270\1\u010a\12\270\237\0"+ - "\1\u0109\32\270\1\u010a\3\270\1\u02c8\6\270\237\0\1\u0109"+ - "\3\270\1\u02c9\26\270\1\u010a\12\270\237\0\1\u0109\17\270"+ - "\1\u02ca\12\270\1\u010a\1\u02cb\11\270\237\0\1\u0109\32\270"+ - "\1\u010a\1\270\1\u02bb\10\270\237\0\1\u0109\32\270\1\u010a"+ - "\1\u02cc\11\270\273\0\12\u02cd\10\0\1\u018c\1\u018d\1\u018e"+ - "\224\0\1\u0121\25\324\1\u02ce\4\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\1\u02cf\31\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\15\324\1\u02d0\14\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\21\324\1\u02d1\10\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\16\324\1\u02d2\4\324"+ - "\1\u02d3\6\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\4\324\1\u02d4\25\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\32\324\1\164\11\324\1\u02d5\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\4\324\1\u02d6\25\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\32\324\1\164\11\324\1\u02d7\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\24\324\1\u02d8\5\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\1\u02d9\1\u02da\1\324\1\u02db\20\324"+ - "\1\u02dc\5\324\1\164\5\324\1\u02dd\4\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\16\324\1\u02de\13\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\11\324\1\u02df\13\324\1\u02e0\4\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164"+ - "\11\324\1\u02e1\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\23\324\1\u02e2"+ - "\6\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\31\324"+ - "\1\u02e3\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\26\324"+ - "\1\u02e4\3\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\11\324\1\u02e5\20\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\32\324\1\164\3\324\1\u02e6\6\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\5\324\1\u02e7\24\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\10\324\1\u02e8\21\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\3\324\1\u02e9\26\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\21\324\1\u02ea\6\324"+ - "\1\u02eb\1\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\12\324\1\u02ec\17\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\32\324\1\164\1\324\1\u02ed\10\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\24\324\1\u02ee\5\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\24\324\1\u02ef\5\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\32\324\1\164\4\324\1\u02f0"+ - "\5\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\5\324\1\u02f1\23\324"+ - "\1\u02f2\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\213\0\32\u022d\1\0"+ - "\12\u022d\240\0\32\u022d\1\u0286\12\u022d\240\0\4\u02f3\2\0"+ - "\1\u02f3\15\0\1\u02f3\6\0\12\u02f3\240\0\4\u02f4\2\0"+ - "\1\u02f4\15\0\1\u02f4\6\0\12\u02f4\1\u02f5\304\0\1\u02f6"+ - "\236\0\34\u01f0\12\u02f7\1\0\2\u01f0\1\u0234\3\u01f0\1\u01f2"+ - "\1\0\1\u0233\3\0\2\u01f0\4\0\1\u01f0\214\0\4\u02f8"+ - "\2\0\1\u02f8\15\0\1\u02f8\6\0\12\u02f8\257\0\1\u02f9"+ - "\265\0\4\u01f0\2\0\1\u01f0\15\0\1\u01f0\6\0\12\u01f0"+ - "\240\0\32\u0236\1\0\12\u0236\240\0\32\u0236\1\u028f\12\u0236"+ - "\273\0\12\u02fa\240\0\4\u02fb\2\0\1\u02fb\15\0\1\u02fb"+ - "\6\0\12\u02fb\1\u0292\237\0\4\u02fc\2\0\1\u02fc\15\0"+ - "\1\u02fc\6\0\12\u02fc\1\u02fd\237\0\4\u02fe\2\0\1\u02fe"+ - "\15\0\1\u02fe\6\0\1\u02ff\2\u0300\1\u02ff\5\u0300\1\u0301"+ - "\14\0\1\u0302\222\0\1\u0294\45\u0239\1\0\3\u0239\1\0"+ - "\2\u0239\1\u0295\3\u0239\3\0\1\u0239\1\u023b\3\0\2\u0239"+ - "\213\0\32\u0303\1\0\12\u0303\13\0\1\u0304\13\0\1\53"+ - "\1\0\1\54\2\0\1\221\1\0\1\222\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\223"+ - "\2\0\1\224\4\0\1\225\3\0\1\226\17\0\1\67"+ - "\2\0\1\227\21\0\1\230\2\0\1\231\57\0\1\30"+ - "\1\74\7\0\1\74\2\0\1\30\1\u0294\32\u023a\13\u0239"+ - "\1\0\3\u0239\1\0\2\u0239\1\u0295\3\u0239\3\0\1\u0239"+ - "\1\u023b\3\0\2\u0239\2\0\1\53\1\0\1\54\2\0"+ - "\1\211\1\0\1\56\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\212\2\0\1\213\4\0"+ - "\1\65\3\0\1\214\17\0\1\67\2\0\1\215\21\0"+ - "\1\216\2\0\1\217\57\0\1\30\2\73\2\0\2\220"+ - "\1\75\1\0\1\73\2\0\1\30\1\u0305\32\u023a\1\u0239"+ - "\12\u0297\1\0\1\u0239\1\u0298\1\u0239\1\0\2\u0306\1\u0295"+ - "\3\u0239\2\0\1\220\1\u0239\4\0\2\u0239\2\0\1\53"+ - "\1\0\1\54\2\0\1\232\1\0\1\56\4\0\1\57"+ - "\1\0\1\60\1\0\1\61\2\0\1\62\3\0\1\233"+ - "\2\0\1\234\4\0\1\65\3\0\1\235\17\0\1\67"+ - "\2\0\1\236\21\0\1\237\2\0\1\240\41\0\1\130"+ - "\15\0\1\30\1\75\1\73\1\132\3\0\1\75\1\0"+ - "\1\75\2\0\1\30\1\u0294\32\u023a\1\u0239\12\u0297\1\0"+ - "\1\u0239\1\u0298\1\u0239\1\0\2\u0239\1\u0295\3\u0239\3\0"+ - "\1\u0239\4\0\2\u0239\2\0\1\53\1\0\1\54\2\0"+ - "\1\221\1\0\1\222\4\0\1\57\1\0\1\60\1\0"+ - "\1\61\2\0\1\62\3\0\1\223\2\0\1\224\4\0"+ - "\1\225\3\0\1\226\17\0\1\67\2\0\1\227\21\0"+ - "\1\230\2\0\1\231\57\0\1\30\1\74\7\0\1\74"+ - "\2\0\1\30\1\u0294\32\u023a\13\u0239\1\0\3\u0239\1\0"+ - "\2\u0239\1\u0295\3\u0239\3\0\1\u0239\4\0\2\u0239\212\0"+ - "\1\u0307\54\0\1\u0295\227\0\74\u023b\211\0\1\u023c\32\u023d"+ - "\1\u023c\12\u023d\1\u023e\2\u023c\1\u023f\3\u023c\1\u0240\5\0"+ - "\2\u023c\4\0\1\u023c\213\0\1\u023c\32\u023d\1\u029d\12\u023d"+ - "\1\u023e\2\u023c\1\u023f\3\u023c\1\u0240\5\0\2\u023c\4\0"+ - "\1\u023c\213\0\34\u023e\12\u0308\1\0\2\u023e\1\u029f\3\u023e"+ - "\1\u0240\5\0\2\u023e\4\0\1\u023e\214\0\4\u0309\2\0"+ - "\1\u0309\15\0\1\u0309\6\0\12\u0309\240\0\4\u023c\2\0"+ - "\1\u023c\15\0\1\u023c\6\0\12\u023c\237\0\1\u030a\32\u02a1"+ - "\1\u030b\12\u02a1\1\u0174\7\0\1\u018c\1\u018d\1\u018e\272\0"+ - "\1\u01fc\237\0\4\u030c\2\0\1\u030c\15\0\1\u030c\6\0"+ - "\12\u030c\1\u02a4\237\0\4\u030d\2\0\1\u030d\15\0\1\u030d"+ - "\6\0\12\u030d\1\u030e\237\0\4\u030f\2\0\1\u030f\15\0"+ - "\1\u030f\6\0\12\u030f\1\u0310\13\0\1\u0146\222\0\1\u0188"+ - "\4\u030f\2\0\1\u030f\15\0\1\u030f\6\0\12\u0311\1\u0310"+ - "\13\0\1\u0146\222\0\1\u0188\4\u030f\2\0\1\u030f\15\0"+ - "\1\u030f\6\0\12\u0312\1\u0310\13\0\1\u0146\222\0\1\u0188"+ - "\4\u030f\2\0\1\u030f\15\0\1\u030f\6\0\1\u0311\1\u0313"+ - "\1\u0312\2\u0311\2\u0312\1\u0311\1\u0312\1\u0311\1\u0310\13\0"+ - "\1\u0146\223\0\4\u0314\2\0\1\u0314\15\0\1\u0314\6\0"+ - "\12\u0314\1\u0248\13\0\1\u0146\222\0\1\u0188\4\u0314\2\0"+ - "\1\u0314\15\0\1\u0314\6\0\12\u0314\1\u0248\13\0\1\u0146"+ - "\256\0\1\u0315\2\u0316\1\u0315\5\u0316\1\u0317\237\0\1\u02ab"+ - "\304\0\1\u02ab\33\0\2\u02ac\1\0\2\u02ac\2\0\1\u02ac"+ - "\1\0\1\u02ac\237\0\1\u0109\1\270\1\u0318\30\270\1\u010a"+ - "\12\270\237\0\1\u0109\24\270\1\u0319\5\270\1\u010a\12\270"+ - "\237\0\1\u0109\24\270\1\u031a\5\270\1\u010a\12\270\237\0"+ - "\1\u0109\1\270\1\u031b\30\270\1\u010a\12\270\237\0\1\u0109"+ - "\14\270\1\u031c\15\270\1\u010a\12\270\237\0\1\u0109\1\270"+ - "\1\u031d\30\270\1\u010a\12\270\237\0\1\u0109\1\270\1\u031e"+ - "\30\270\1\u010a\12\270\237\0\1\u0109\1\270\1\u031f\30\270"+ - "\1\u010a\12\270\237\0\1\u0109\21\270\1\u0320\10\270\1\u010a"+ - "\12\270\237\0\1\u0109\24\270\1\u0321\5\270\1\u010a\12\270"+ - "\237\0\1\u0109\24\270\1\u0322\5\270\1\u010a\12\270\237\0"+ - "\1\u0109\24\270\1\u0323\5\270\1\u010a\12\270\237\0\1\u0109"+ - "\1\u01cc\31\270\1\u010a\12\270\237\0\1\u0109\24\270\1\u031f"+ - "\5\270\1\u010a\12\270\237\0\1\u0109\24\270\1\u0324\5\270"+ - "\1\u010a\12\270\237\0\1\u0109\1\270\1\u0325\30\270\1\u010a"+ - "\12\270\237\0\1\u0109\31\270\1\u0326\1\u010a\12\270\237\0"+ - "\1\u0109\24\270\1\u0327\5\270\1\u010a\12\270\237\0\1\u0109"+ - "\1\270\1\u0328\30\270\1\u010a\12\270\237\0\1\u0109\1\u0329"+ - "\31\270\1\u010a\12\270\237\0\1\u0109\21\270\1\u032a\10\270"+ - "\1\u010a\12\270\237\0\1\u0109\4\270\1\u032b\25\270\1\u010a"+ - "\12\270\237\0\1\u0109\24\270\1\u032c\5\270\1\u010a\12\270"+ - "\237\0\1\u0109\24\270\1\u032d\5\270\1\u010a\12\270\237\0"+ - "\1\u0109\4\270\1\u032e\25\270\1\u010a\12\270\237\0\1\u0109"+ - "\21\270\1\u032f\10\270\1\u010a\12\270\237\0\1\u0109\24\270"+ - "\1\u0330\5\270\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a"+ - "\1\u0331\11\270\237\0\1\u0109\32\270\1\u010a\7\270\1\u0332"+ - "\2\270\237\0\1\u0109\1\u0333\31\270\1\u010a\12\270\237\0"+ - "\1\u0109\1\u0334\31\270\1\u010a\12\270\315\0\1\u018c\1\u018d"+ - "\1\u018e\224\0\1\u0121\1\324\1\u0335\30\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\32\324\1\164\1\u0336\11\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\6\324\1\u0337\23\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164\7\324"+ - "\1\u0338\2\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164"+ - "\10\324\1\u017b\1\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324"+ - "\1\164\5\324\1\u017b\4\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\26\324\1\u0339\3\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\1\324\1\u033a\30\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\26\324\1\u033b\3\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\32\324\1\164\1\324\1\u033c\10\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\1\u033d\31\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\1\u033e\27\324\1\u033f\1\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164"+ - "\1\u0340\11\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\4\324\1\u0341"+ - "\25\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\25\324"+ - "\1\u0342\4\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\1\u0343\31\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\32\324\1\164\1\u0344\11\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\32\324\1\164\2\324\1\350\7\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\32\324\1\164\3\324\1\u0345\6\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\1\u0346\1\324\1\u0347\27\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\1\u0338\31\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\32\324\1\164\2\324\1\u0348"+ - "\7\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164\2\324"+ - "\1\u0349\7\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\15\324\1\u034a"+ - "\14\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324"+ - "\1\164\5\324\1\u034b\4\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\32\324\1\164\7\324\1\u034c\2\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\32\324\1\164\11\324\1\u034d\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\1\324\1\u034e\30\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\32\324\1\164\3\324\1\u034f\6\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\32\324\1\164\1\324\1\u0350\10\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\32\324\1\164\1\324\1\u0351"+ - "\10\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\24\324\1\u0352\5\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164"+ - "\6\324\1\u0353\3\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324"+ - "\1\164\3\324\1\u0354\6\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\1\u0345\31\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\32\324\1\164\11\324\1\u0355\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\25\324\1\u0356\4\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\213\0"+ - "\4\u022e\2\0\1\u022e\15\0\1\u022e\6\0\12\u022e\240\0"+ - "\4\u0357\2\0\1\u0357\15\0\1\u0357\6\0\12\u0357\1\u02f5"+ - "\237\0\4\u0358\2\0\1\u0358\15\0\1\u0358\6\0\12\u0358"+ - "\1\u0359\237\0\4\u035a\2\0\1\u035a\15\0\1\u035a\6\0"+ - "\1\u035b\2\u035c\1\u035b\5\u035c\1\u035d\14\0\1\u035e\222\0"+ - "\34\u01f0\12\u035f\1\0\2\u01f0\1\u0234\3\u01f0\1\u01f2\1\0"+ - "\1\u0233\3\0\2\u01f0\4\0\1\u01f0\214\0\4\u0233\2\0"+ - "\1\u0233\15\0\1\u0233\6\0\12\u0233\270\0\1\u0360\307\0"+ - "\12\u0361\11\0\1\u0233\226\0\4\u0362\2\0\1\u0362\15\0"+ - "\1\u0362\6\0\12\u0362\1\u0292\237\0\4\u0363\2\0\1\u0363"+ - "\15\0\1\u0363\6\0\12\u0363\1\u0364\237\0\4\u0365\2\0"+ - "\1\u0365\15\0\1\u0365\6\0\1\u0366\2\u0367\1\u0366\5\u0367"+ - "\1\u0368\14\0\1\u0302\223\0\4\u0369\2\0\1\u0369\15\0"+ - "\1\u0369\6\0\12\u0369\1\u036a\13\0\1\u0302\222\0\1\u036b"+ - "\4\u0369\2\0\1\u0369\15\0\1\u0369\6\0\12\u036c\1\u036a"+ - "\13\0\1\u0302\222\0\1\u036b\4\u0369\2\0\1\u0369\15\0"+ - "\1\u0369\6\0\12\u036d\1\u036a\13\0\1\u0302\222\0\1\u036b"+ - "\4\u0369\2\0\1\u0369\15\0\1\u0369\6\0\1\u036c\1\u036e"+ - "\1\u036d\2\u036c\2\u036d\1\u036c\1\u036d\1\u036c\1\u036a\13\0"+ - "\1\u0302\270\0\1\u0290\10\0\1\u0233\225\0\1\u036f\32\u0303"+ - "\1\u0370\12\u0303\237\0\61\u0304\1\0\1\u0371\4\u0304\1\u0372"+ - "\1\0\3\u0304\6\0\1\u0100\1\0\1\u0101\17\0\1\u0102"+ - "\2\0\1\u0103\4\0\1\u0104\3\0\1\u0105\22\0\1\u0106"+ - "\21\0\1\u0107\2\0\1\u0108\60\0\1\220\1\73\6\0"+ - "\1\220\3\0\1\u0294\33\u0239\12\u0297\1\0\3\u0239\1\0"+ - "\2\u0239\1\u0295\3\u0239\3\0\1\u0239\1\u023b\3\0\2\u0239"+ - "\7\0\1\u0100\1\0\1\u0101\17\0\1\u0102\2\0\1\u0103"+ - "\4\0\1\u0104\3\0\1\u0105\22\0\1\u0106\21\0\1\u0107"+ - "\2\0\1\u0108\60\0\1\220\1\73\6\0\1\220\3\0"+ - "\1\u0294\33\u0239\12\u0297\1\0\3\u0239\1\0\2\u0239\1\u0295"+ - "\3\u0239\3\0\1\u0239\4\0\2\u0239\212\0\46\u0239\1\0"+ - "\3\u0239\1\0\2\u0239\1\0\3\u0239\3\0\1\u0239\1\u023b"+ - "\3\0\2\u0239\212\0\34\u023e\12\u0373\1\0\2\u023e\1\u029f"+ - "\3\u023e\1\u0240\1\u018c\1\u018d\1\u018e\2\0\2\u023e\4\0"+ - "\1\u023e\214\0\4\u023e\2\0\1\u023e\15\0\1\u023e\6\0"+ - "\12\u023e\240\0\32\u02a1\1\0\12\u02a1\240\0\32\u02a1\1\u030b"+ - "\12\u02a1\240\0\4\u0374\2\0\1\u0374\15\0\1\u0374\6\0"+ - "\12\u0374\1\u02a4\237\0\4\u0375\2\0\1\u0375\15\0\1\u0375"+ - "\6\0\12\u0375\1\u0376\237\0\4\u0377\2\0\1\u0377\15\0"+ - "\1\u0377\6\0\1\u0378\2\u0379\1\u0378\5\u0379\1\u037a\14\0"+ - "\1\u0146\223\0\4\u037b\2\0\1\u037b\15\0\1\u037b\6\0"+ - "\12\u037b\1\u0310\13\0\1\u0146\223\0\4\u0377\2\0\1\u0377"+ - "\15\0\1\u0377\6\0\1\u0378\2\u0379\1\u0378\5\u0379\1\u037a"+ - "\237\0\1\u0188\4\u037b\2\0\1\u037b\15\0\1\u037b\6\0"+ - "\12\u037c\1\u0310\13\0\1\u0146\222\0\1\u0188\4\u037b\2\0"+ - "\1\u037b\15\0\1\u037b\6\0\12\u037b\1\u0310\13\0\1\u0146"+ - "\222\0\1\u0188\4\u037b\2\0\1\u037b\15\0\1\u037b\6\0"+ - "\2\u037c\1\u037b\2\u037c\2\u037b\1\u037c\1\u037b\1\u037c\1\u0310"+ - "\13\0\1\u0146\270\0\1\u0248\13\0\1\u0146\256\0\12\u0316"+ - "\14\0\1\u0146\256\0\12\u037d\14\0\1\u0146\256\0\1\u0316"+ - "\1\u037e\1\u037d\2\u0316\2\u037d\1\u0316\1\u037d\1\u0316\14\0"+ - "\1\u0146\222\0\1\u0109\25\270\1\u037f\4\270\1\u010a\12\270"+ - "\237\0\1\u0109\1\u0380\31\270\1\u010a\12\270\237\0\1\u0109"+ - "\15\270\1\u0381\14\270\1\u010a\12\270\237\0\1\u0109\21\270"+ - "\1\u0382\10\270\1\u010a\12\270\237\0\1\u0109\16\270\1\u0383"+ - "\4\270\1\u0384\6\270\1\u010a\12\270\237\0\1\u0109\4\270"+ - "\1\u0385\25\270\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a"+ - "\11\270\1\u0386\237\0\1\u0109\4\270\1\u0387\25\270\1\u010a"+ - "\12\270\237\0\1\u0109\32\270\1\u010a\11\270\1\u0388\237\0"+ - "\1\u0109\24\270\1\u0389\5\270\1\u010a\12\270\237\0\1\u0109"+ - "\1\u038a\1\u038b\1\270\1\u038c\20\270\1\u038d\5\270\1\u010a"+ - "\5\270\1\u038e\4\270\237\0\1\u0109\16\270\1\u038f\13\270"+ - "\1\u010a\12\270\237\0\1\u0109\11\270\1\u0390\13\270\1\u0391"+ - "\4\270\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a\11\270"+ - "\1\u0392\237\0\1\u0109\23\270\1\u0393\6\270\1\u010a\12\270"+ - "\237\0\1\u0109\31\270\1\u0394\1\u010a\12\270\237\0\1\u0109"+ - "\26\270\1\u0395\3\270\1\u010a\12\270\237\0\1\u0109\11\270"+ - "\1\u0396\20\270\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a"+ - "\3\270\1\u0397\6\270\237\0\1\u0109\5\270\1\u0398\24\270"+ - "\1\u010a\12\270\237\0\1\u0109\10\270\1\u0399\21\270\1\u010a"+ - "\12\270\237\0\1\u0109\3\270\1\u039a\26\270\1\u010a\12\270"+ - "\237\0\1\u0109\21\270\1\u039b\6\270\1\u039c\1\270\1\u010a"+ - "\12\270\237\0\1\u0109\12\270\1\u039d\17\270\1\u010a\12\270"+ - "\237\0\1\u0109\32\270\1\u010a\1\270\1\u039e\10\270\237\0"+ - "\1\u0109\24\270\1\u039f\5\270\1\u010a\12\270\237\0\1\u0109"+ - "\24\270\1\u03a0\5\270\1\u010a\12\270\237\0\1\u0109\32\270"+ - "\1\u010a\4\270\1\u03a1\5\270\237\0\1\u0109\5\270\1\u03a2"+ - "\23\270\1\u03a3\1\u010a\12\270\237\0\1\u0121\32\324\1\164"+ - "\1\u03a4\11\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\1\u03a5\31\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164"+ - "\10\324\1\u03a6\1\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\25\324"+ - "\1\u0127\4\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\32\324\1\164\5\324\1\u03a7\4\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\32\324\1\164\5\324\1\u03a8\4\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\32\324\1\164\5\324\1\u0345\4\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\32\324\1\164\3\324\1\u03a5\6\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\17\324\1\u03a9\12\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\12\324\1\u03aa\17\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\25\324\1\u03ab"+ - "\4\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\1\u03ac"+ - "\31\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\15\324"+ - "\1\u03ad\14\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\32\324\1\164\3\324\1\u03ae\6\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\21\324\1\u03af\10\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\2\324\1\u0338\27\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\1\324\1\u0127\30\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\11\324\1\u03b0\20\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\11\324\1\u03b1\20\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\1\u03b2\31\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\1\u03b3\31\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\2\324\1\u03b4"+ - "\27\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324"+ - "\1\164\4\324\1\u012e\5\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\10\324\1\u03b5\21\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\1\u03b6\31\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\25\324\1\u03b7\4\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\32\324\1\164\4\324\1\u03a5\5\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\32\324\1\164\6\324\1\u03a5\3\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\32\324\1\164\2\324\1\u03a5"+ - "\7\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\16\324\1\u03b8\13\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164"+ - "\1\u03b9\11\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164"+ - "\3\324\1\u03ba\6\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324"+ - "\1\164\3\324\1\350\6\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\24\324\1\u03bb\5\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\213\0"+ - "\4\u03bc\2\0\1\u03bc\15\0\1\u03bc\6\0\12\u03bc\1\u02f5"+ - "\237\0\4\u03bd\2\0\1\u03bd\15\0\1\u03bd\6\0\12\u03bd"+ - "\1\u03be\237\0\4\u03bf\2\0\1\u03bf\15\0\1\u03bf\6\0"+ - "\1\u03c0\2\u03c1\1\u03c0\5\u03c1\1\u03c2\14\0\1\u035e\223\0"+ - "\4\u03c3\2\0\1\u03c3\15\0\1\u03c3\6\0\12\u03c3\1\u03c4"+ - "\13\0\1\u035e\222\0\1\u03c5\4\u03c3\2\0\1\u03c3\15\0"+ - "\1\u03c3\6\0\12\u03c6\1\u03c4\13\0\1\u035e\222\0\1\u03c5"+ - "\4\u03c3\2\0\1\u03c3\15\0\1\u03c3\6\0\12\u03c7\1\u03c4"+ - "\13\0\1\u035e\222\0\1\u03c5\4\u03c3\2\0\1\u03c3\15\0"+ - "\1\u03c3\6\0\1\u03c6\1\u03c8\1\u03c7\2\u03c6\2\u03c7\1\u03c6"+ - "\1\u03c7\1\u03c6\1\u03c4\13\0\1\u035e\301\0\1\u022e\225\0"+ - "\34\u01f0\12\u03c9\1\0\2\u01f0\1\u0234\3\u01f0\1\u01f2\1\0"+ - "\1\u0233\3\0\2\u01f0\4\0\1\u01f0\231\0\1\u03ca\322\0"+ - "\12\u03cb\11\0\1\u0233\273\0\1\u0292\237\0\4\u03cc\2\0"+ - "\1\u03cc\15\0\1\u03cc\6\0\12\u03cc\1\u0364\237\0\4\u03cd"+ - "\2\0\1\u03cd\15\0\1\u03cd\6\0\12\u03cd\1\u03ce\237\0"+ - "\4\u03cf\2\0\1\u03cf\15\0\1\u03cf\6\0\12\u03cf\1\u03d0"+ - "\13\0\1\u0302\222\0\1\u036b\4\u03cf\2\0\1\u03cf\15\0"+ - "\1\u03cf\6\0\12\u03d1\1\u03d0\13\0\1\u0302\222\0\1\u036b"+ - "\4\u03cf\2\0\1\u03cf\15\0\1\u03cf\6\0\12\u03d2\1\u03d0"+ - "\13\0\1\u0302\222\0\1\u036b\4\u03cf\2\0\1\u03cf\15\0"+ - "\1\u03cf\6\0\1\u03d1\1\u03d3\1\u03d2\2\u03d1\2\u03d2\1\u03d1"+ - "\1\u03d2\1\u03d1\1\u03d0\13\0\1\u0302\223\0\4\u03d4\2\0"+ - "\1\u03d4\15\0\1\u03d4\6\0\12\u03d4\1\u036a\13\0\1\u0302"+ - "\223\0\4\u0365\2\0\1\u0365\15\0\1\u0365\6\0\1\u0366"+ - "\2\u0367\1\u0366\5\u0367\1\u0368\273\0\1\u03d5\2\u03d6\1\u03d5"+ - "\5\u03d6\1\u03d7\237\0\1\u036b\4\u03d4\2\0\1\u03d4\15\0"+ - "\1\u03d4\6\0\12\u03d8\1\u036a\13\0\1\u0302\222\0\1\u036b"+ - "\4\u03d4\2\0\1\u03d4\15\0\1\u03d4\6\0\12\u03d4\1\u036a"+ - "\13\0\1\u0302\222\0\1\u036b\4\u03d4\2\0\1\u03d4\15\0"+ - "\1\u03d4\6\0\2\u03d8\1\u03d4\2\u03d8\2\u03d4\1\u03d8\1\u03d4"+ - "\1\u03d8\1\u036a\13\0\1\u0302\223\0\1\u03d9\1\u03da\1\u03db"+ - "\1\u03dc\1\u03dd\1\u03de\1\u03df\1\u03e0\1\u03e1\1\u03e2\1\u03e3"+ - "\1\u03e4\1\u03e5\1\u03e6\1\u03e7\1\u03e8\1\u03e9\1\u03ea\1\u03eb"+ - "\1\u03ec\1\u03ed\1\u03ee\1\u03ef\1\u03f0\1\u03f1\1\u03f2\1\0"+ - "\12\u0303\240\0\32\u0303\1\u0370\12\u0303\237\0\74\u0304\211\0"+ - "\34\u023e\12\u03f3\1\0\2\u023e\1\u029f\3\u023e\1\u0240\1\u018c"+ - "\1\u018d\1\u018e\2\0\2\u023e\4\0\1\u023e\261\0\1\u02a4"+ - "\237\0\4\u03f4\2\0\1\u03f4\15\0\1\u03f4\6\0\12\u03f4"+ - "\1\u0376\237\0\4\u03f5\2\0\1\u03f5\15\0\1\u03f5\6\0"+ - "\12\u03f5\1\u03f6\237\0\4\u03f7\2\0\1\u03f7\15\0\1\u03f7"+ - "\6\0\12\u03f7\1\u03f8\13\0\1\u0146\222\0\1\u0188\4\u03f7"+ - "\2\0\1\u03f7\15\0\1\u03f7\6\0\12\u03f9\1\u03f8\13\0"+ - "\1\u0146\222\0\1\u0188\4\u03f7\2\0\1\u03f7\15\0\1\u03f7"+ - "\6\0\12\u03fa\1\u03f8\13\0\1\u0146\222\0\1\u0188\4\u03f7"+ - "\2\0\1\u03f7\15\0\1\u03f7\6\0\1\u03f9\1\u03fb\1\u03fa"+ - "\2\u03f9\2\u03fa\1\u03f9\1\u03fa\1\u03f9\1\u03f8\13\0\1\u0146"+ - "\223\0\4\u03fc\2\0\1\u03fc\15\0\1\u03fc\6\0\12\u03fc"+ - "\1\u0310\13\0\1\u0146\222\0\1\u0188\4\u03fc\2\0\1\u03fc"+ - "\15\0\1\u03fc\6\0\12\u03fc\1\u0310\13\0\1\u0146\304\0"+ - "\1\u0146\256\0\2\u037d\1\0\2\u037d\2\0\1\u037d\1\0"+ - "\1\u037d\14\0\1\u0146\222\0\1\u0109\1\270\1\u03fd\30\270"+ - "\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a\1\u03fe\11\270"+ - "\237\0\1\u0109\6\270\1\u03ff\23\270\1\u010a\12\270\237\0"+ - "\1\u0109\32\270\1\u010a\7\270\1\u0400\2\270\237\0\1\u0109"+ - "\32\270\1\u010a\10\270\1\u01d1\1\270\237\0\1\u0109\32\270"+ - "\1\u010a\5\270\1\u01d1\4\270\237\0\1\u0109\26\270\1\u0401"+ - "\3\270\1\u010a\12\270\237\0\1\u0109\1\270\1\u0402\30\270"+ - "\1\u010a\12\270\237\0\1\u0109\26\270\1\u0403\3\270\1\u010a"+ - "\12\270\237\0\1\u0109\32\270\1\u010a\1\270\1\u0404\10\270"+ - "\237\0\1\u0109\1\u0405\31\270\1\u010a\12\270\237\0\1\u0109"+ - "\1\u0406\27\270\1\u0407\1\270\1\u010a\12\270\237\0\1\u0109"+ - "\32\270\1\u010a\1\u0408\11\270\237\0\1\u0109\4\270\1\u0409"+ - "\25\270\1\u010a\12\270\237\0\1\u0109\25\270\1\u040a\4\270"+ - "\1\u010a\12\270\237\0\1\u0109\1\u040b\31\270\1\u010a\12\270"+ - "\237\0\1\u0109\32\270\1\u010a\1\u040c\11\270\237\0\1\u0109"+ - "\32\270\1\u010a\2\270\1\u0153\7\270\237\0\1\u0109\32\270"+ - "\1\u010a\3\270\1\u040d\6\270\237\0\1\u0109\1\u040e\1\270"+ - "\1\u040f\27\270\1\u010a\12\270\237\0\1\u0109\1\u0400\31\270"+ - "\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a\2\270\1\u0410"+ - "\7\270\237\0\1\u0109\32\270\1\u010a\2\270\1\u0411\7\270"+ - "\237\0\1\u0109\15\270\1\u0412\14\270\1\u010a\12\270\237\0"+ - "\1\u0109\32\270\1\u010a\5\270\1\u0413\4\270\237\0\1\u0109"+ - "\32\270\1\u010a\7\270\1\u0414\2\270\237\0\1\u0109\32\270"+ - "\1\u010a\11\270\1\u0415\237\0\1\u0109\1\270\1\u0416\30\270"+ - "\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a\3\270\1\u0417"+ - "\6\270\237\0\1\u0109\32\270\1\u010a\1\270\1\u0418\10\270"+ - "\237\0\1\u0109\32\270\1\u010a\1\270\1\u0419\10\270\237\0"+ - "\1\u0109\24\270\1\u041a\5\270\1\u010a\12\270\237\0\1\u0109"+ - "\32\270\1\u010a\6\270\1\u041b\3\270\237\0\1\u0109\32\270"+ - "\1\u010a\3\270\1\u041c\6\270\237\0\1\u0109\1\u040d\31\270"+ - "\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a\11\270\1\u041d"+ - "\237\0\1\u0109\25\270\1\u041e\4\270\1\u010a\12\270\237\0"+ - "\1\u0121\3\324\1\u041f\26\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\2\324\1\u0127\27\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\6\324\1\u0132\23\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\1\324\1\u034f\30\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\3\324\1\u0420\26\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164"+ - "\3\324\1\u0421\6\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324"+ - "\1\164\6\324\1\u0422\3\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\32\324\1\164\6\324\1\u0423\3\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\32\324\1\164\5\324\1\u0424\4\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\32\324\1\164\7\324\1\u0425\2\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\1\u0426\31\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\24\324\1\u0427\5\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\32\324\1\164\4\324\1\u0428"+ - "\5\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164\4\324"+ - "\1\u0429\5\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\26\324\1\u042a"+ - "\3\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\30\324"+ - "\1\u042b\1\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\11\324\1\u0176\20\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\32\324\1\164\2\324\1\u042c\7\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\12\324\1\u042d\17\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\17\324\1\u012f\12\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\32\324\1\164\4\324\1\u042e"+ - "\5\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164\6\324"+ - "\1\u0179\3\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\30\324\1\u042f"+ - "\1\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\30\324"+ - "\1\u0430\1\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\260\0\1\u02f5"+ - "\237\0\4\u0431\2\0\1\u0431\15\0\1\u0431\6\0\12\u0431"+ - "\1\u03be\237\0\4\u0432\2\0\1\u0432\15\0\1\u0432\6\0"+ - "\12\u0432\1\u0433\237\0\4\u0434\2\0\1\u0434\15\0\1\u0434"+ - "\6\0\12\u0434\1\u0435\13\0\1\u035e\222\0\1\u03c5\4\u0434"+ - "\2\0\1\u0434\15\0\1\u0434\6\0\12\u0436\1\u0435\13\0"+ - "\1\u035e\222\0\1\u03c5\4\u0434\2\0\1\u0434\15\0\1\u0434"+ - "\6\0\12\u0437\1\u0435\13\0\1\u035e\222\0\1\u03c5\4\u0434"+ - "\2\0\1\u0434\15\0\1\u0434\6\0\1\u0436\1\u0438\1\u0437"+ - "\2\u0436\2\u0437\1\u0436\1\u0437\1\u0436\1\u0435\13\0\1\u035e"+ - "\223\0\4\u0439\2\0\1\u0439\15\0\1\u0439\6\0\12\u0439"+ - "\1\u03c4\13\0\1\u035e\223\0\4\u03bf\2\0\1\u03bf\15\0"+ - "\1\u03bf\6\0\1\u03c0\2\u03c1\1\u03c0\5\u03c1\1\u03c2\273\0"+ - "\1\u043a\2\u043b\1\u043a\5\u043b\1\u043c\237\0\1\u03c5\4\u0439"+ - "\2\0\1\u0439\15\0\1\u0439\6\0\12\u043d\1\u03c4\13\0"+ - "\1\u035e\222\0\1\u03c5\4\u0439\2\0\1\u0439\15\0\1\u0439"+ - "\6\0\12\u0439\1\u03c4\13\0\1\u035e\222\0\1\u03c5\4\u0439"+ - "\2\0\1\u0439\15\0\1\u0439\6\0\2\u043d\1\u0439\2\u043d"+ - "\2\u0439\1\u043d\1\u0439\1\u043d\1\u03c4\13\0\1\u035e\222\0"+ - "\34\u01f0\12\u043e\1\0\2\u01f0\1\u0234\3\u01f0\1\u01f2\1\0"+ - "\1\u0233\3\0\2\u01f0\4\0\1\u01f0\217\0\1\u043f\334\0"+ - "\12\u0440\11\0\1\u0233\226\0\4\u0441\2\0\1\u0441\15\0"+ - "\1\u0441\6\0\12\u0441\1\u0364\237\0\4\u0442\2\0\1\u0442"+ - "\15\0\1\u0442\6\0\12\u0442\1\u0443\237\0\4\u0444\2\0"+ - "\1\u0444\15\0\1\u0444\6\0\1\u0445\2\u0446\1\u0445\5\u0446"+ - "\1\u0447\14\0\1\u0302\223\0\4\u0448\2\0\1\u0448\15\0"+ - "\1\u0448\6\0\12\u0448\1\u03d0\13\0\1\u0302\223\0\4\u0444"+ - "\2\0\1\u0444\15\0\1\u0444\6\0\1\u0445\2\u0446\1\u0445"+ - "\5\u0446\1\u0447\237\0\1\u036b\4\u0448\2\0\1\u0448\15\0"+ - "\1\u0448\6\0\12\u0449\1\u03d0\13\0\1\u0302\222\0\1\u036b"+ - "\4\u0448\2\0\1\u0448\15\0\1\u0448\6\0\12\u0448\1\u03d0"+ - "\13\0\1\u0302\222\0\1\u036b\4\u0448\2\0\1\u0448\15\0"+ - "\1\u0448\6\0\2\u0449\1\u0448\2\u0449\2\u0448\1\u0449\1\u0448"+ - "\1\u0449\1\u03d0\13\0\1\u0302\223\0\4\u044a\2\0\1\u044a"+ - "\15\0\1\u044a\6\0\12\u044a\1\u036a\13\0\1\u0302\222\0"+ - "\1\u044b\33\0\12\u03d6\237\0\1\u044b\33\0\12\u044c\237\0"+ - "\1\u044b\33\0\1\u03d6\1\u044d\1\u044c\2\u03d6\2\u044c\1\u03d6"+ - "\1\u044c\1\u03d6\237\0\1\u036b\4\u044a\2\0\1\u044a\15\0"+ - "\1\u044a\6\0\12\u044a\1\u036a\13\0\1\u0302\222\0\1\u036f"+ - "\1\u0303\2\u044e\1\u044f\1\u0450\10\u044e\1\u0303\1\u0451\5\u044e"+ - "\6\u0303\1\u0370\12\u0303\237\0\1\u036f\1\u0452\2\u044e\1\u0303"+ - "\1\u044e\1\u0453\6\u044e\4\u0303\4\u044e\1\u0303\1\u044e\1\u0303"+ - "\3\u044e\1\u0370\12\u0303\237\0\1\u036f\3\u0303\1\u044e\1\u0303"+ - "\1\u044e\4\u0303\1\u044e\10\u0303\1\u044e\2\u0303\1\u044e\2\u0303"+ - "\1\u044e\1\u0370\12\u0303\237\0\1\u036f\1\u0303\1\u044e\1\u0454"+ - "\2\u044e\2\u0303\1\u044e\6\u0303\3\u044e\11\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\3\u0303\1\u044e\1\u0303\1\u044e\10\u0303\1\u044e"+ - "\1\u0303\2\u044e\10\u0303\1\u0370\12\u0303\237\0\1\u036f\4\u0303"+ - "\1\u0455\5\u0303\1\u044e\17\u0303\1\u0370\12\u0303\237\0\1\u036f"+ - "\4\u0303\2\u044e\2\u0303\1\u044e\1\u0303\1\u044e\13\u0303\1\u044e"+ - "\2\u0303\1\u044e\1\u0370\12\u0303\237\0\1\u036f\1\u044e\1\u0303"+ - "\3\u044e\1\u0456\14\u044e\2\u0303\2\u044e\2\u0303\1\u044e\1\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\2\u0303\4\u044e\3\u0303\2\u044e"+ - "\1\u0457\1\u044e\1\u0303\2\u044e\12\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\2\u044e\2\u0303\1\u044e\3\u0303\1\u044e\5\u0303\3\u044e"+ - "\3\u0303\1\u044e\2\u0303\3\u044e\1\u0370\12\u0303\237\0\1\u036f"+ - "\5\u044e\1\u0458\1\u0303\1\u044e\1\u0459\7\u044e\1\u045a\3\u044e"+ - "\1\u0303\1\u044e\1\u0303\3\u044e\1\u0370\12\u0303\237\0\1\u036f"+ - "\1\u045b\1\u044e\1\u0303\1\u0452\6\u044e\3\u0303\1\u044e\2\u0303"+ - "\1\u044e\2\u0303\1\u044e\6\u0303\1\u0370\12\u0303\237\0\1\u036f"+ - "\1\u044e\31\u0303\1\u0370\12\u0303\237\0\1\u036f\1\u044e\2\u0303"+ - "\1\u044e\1\u045c\1\u0303\2\u044e\1\u0303\3\u044e\2\u0303\2\u044e"+ - "\1\u0303\1\u044e\3\u0303\1\u044e\2\u0303\2\u044e\1\u0370\12\u0303"+ - "\237\0\1\u036f\6\u044e\1\u0303\5\u044e\3\u0303\2\u044e\1\u0303"+ - "\10\u044e\1\u0370\12\u0303\237\0\1\u036f\1\u0303\2\u044e\1\u0459"+ - "\1\u045d\3\u044e\1\u0303\3\u044e\1\u0303\1\u044e\1\u0303\1\u044e"+ - "\1\u0303\1\u044e\1\u0303\1\u044e\1\u0303\3\u044e\1\u0303\1\u044e"+ - "\1\u0370\12\u0303\237\0\1\u036f\1\u044e\6\u0303\1\u044e\6\u0303"+ - "\1\u044e\4\u0303\1\u044e\4\u0303\2\u044e\1\u0370\12\u0303\237\0"+ - "\1\u036f\6\u0303\1\u044e\7\u0303\1\u044e\13\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\13\u0303\1\u045e\6\u0303\1\u045f\7\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\1\u044e\11\u0303\1\u044e\6\u0303\1\u044e"+ - "\10\u0303\1\u0370\12\u0303\237\0\1\u036f\1\u044e\1\u0303\6\u044e"+ - "\1\u0460\1\u0303\2\u044e\2\u0303\2\u044e\1\u0303\1\u044e\1\u0303"+ - "\6\u044e\1\u0303\1\u0370\12\u0303\237\0\1\u036f\4\u0303\1\u044e"+ - "\5\u0303\2\u044e\3\u0303\2\u044e\10\u0303\1\u044e\1\u0370\12\u0303"+ - "\237\0\1\u036f\3\u0303\1\u044e\1\u0303\1\u0461\4\u0303\1\u044e"+ - "\2\u0303\1\u044e\14\u0303\1\u0370\12\u0303\237\0\1\u036f\2\u044e"+ - "\1\u0303\1\u044e\3\u0303\2\u044e\2\u0303\1\u044e\4\u0303\1\u044e"+ - "\11\u0303\1\u0370\12\u0303\237\0\1\u036f\3\u0303\1\u044e\13\u0303"+ - "\1\u044e\12\u0303\1\u0370\12\u0303\237\0\1\u036f\3\u0303\2\u044e"+ - "\2\u0303\2\u044e\1\u0303\2\u044e\1\u0303\1\u044e\3\u0303\1\u044e"+ - "\1\u0303\1\u044e\1\u0303\1\u044e\2\u0303\1\u044e\1\u0303\1\u0370"+ - "\12\u0303\237\0\34\u023e\12\u0462\1\0\2\u023e\1\u029f\3\u023e"+ - "\1\u0240\1\u018c\1\u018d\1\u018e\2\0\2\u023e\4\0\1\u023e"+ - "\214\0\4\u0463\2\0\1\u0463\15\0\1\u0463\6\0\12\u0463"+ - "\1\u0376\237\0\4\u0464\2\0\1\u0464\15\0\1\u0464\6\0"+ - "\12\u0464\1\u0465\237\0\4\u0466\2\0\1\u0466\15\0\1\u0466"+ - "\6\0\1\u0467\2\u0468\1\u0467\5\u0468\1\u0469\14\0\1\u0146"+ - "\223\0\4\u046a\2\0\1\u046a\15\0\1\u046a\6\0\12\u046a"+ - "\1\u03f8\13\0\1\u0146\223\0\4\u0466\2\0\1\u0466\15\0"+ - "\1\u0466\6\0\1\u0467\2\u0468\1\u0467\5\u0468\1\u0469\237\0"+ - "\1\u0188\4\u046a\2\0\1\u046a\15\0\1\u046a\6\0\12\u046b"+ - "\1\u03f8\13\0\1\u0146\222\0\1\u0188\4\u046a\2\0\1\u046a"+ - "\15\0\1\u046a\6\0\12\u046a\1\u03f8\13\0\1\u0146\222\0"+ - "\1\u0188\4\u046a\2\0\1\u046a\15\0\1\u046a\6\0\2\u046b"+ - "\1\u046a\2\u046b\2\u046a\1\u046b\1\u046a\1\u046b\1\u03f8\13\0"+ - "\1\u0146\270\0\1\u0310\13\0\1\u0146\222\0\1\u0109\32\270"+ - "\1\u010a\1\u046c\11\270\237\0\1\u0109\1\u046d\31\270\1\u010a"+ - "\12\270\237\0\1\u0109\32\270\1\u010a\10\270\1\u046e\1\270"+ - "\237\0\1\u0109\25\270\1\u018f\4\270\1\u010a\12\270\237\0"+ - "\1\u0109\32\270\1\u010a\5\270\1\u046f\4\270\237\0\1\u0109"+ - "\32\270\1\u010a\5\270\1\u0470\4\270\237\0\1\u0109\32\270"+ - "\1\u010a\5\270\1\u040d\4\270\237\0\1\u0109\32\270\1\u010a"+ - "\3\270\1\u046d\6\270\237\0\1\u0109\17\270\1\u0471\12\270"+ - "\1\u010a\12\270\237\0\1\u0109\12\270\1\u0472\17\270\1\u010a"+ - "\12\270\237\0\1\u0109\25\270\1\u0473\4\270\1\u010a\12\270"+ - "\237\0\1\u0109\1\u0474\31\270\1\u010a\12\270\237\0\1\u0109"+ - "\15\270\1\u0475\14\270\1\u010a\12\270\237\0\1\u0109\32\270"+ - "\1\u010a\3\270\1\u0476\6\270\237\0\1\u0109\21\270\1\u0477"+ - "\10\270\1\u010a\12\270\237\0\1\u0109\2\270\1\u0400\27\270"+ - "\1\u010a\12\270\237\0\1\u0109\1\270\1\u018f\30\270\1\u010a"+ - "\12\270\237\0\1\u0109\11\270\1\u0478\20\270\1\u010a\12\270"+ - "\237\0\1\u0109\11\270\1\u0479\20\270\1\u010a\12\270\237\0"+ - "\1\u0109\1\u047a\31\270\1\u010a\12\270\237\0\1\u0109\1\u047b"+ - "\31\270\1\u010a\12\270\237\0\1\u0109\2\270\1\u047c\27\270"+ - "\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a\4\270\1\u0196"+ - "\5\270\237\0\1\u0109\10\270\1\u047d\21\270\1\u010a\12\270"+ - "\237\0\1\u0109\1\u047e\31\270\1\u010a\12\270\237\0\1\u0109"+ - "\25\270\1\u047f\4\270\1\u010a\12\270\237\0\1\u0109\32\270"+ - "\1\u010a\4\270\1\u046d\5\270\237\0\1\u0109\32\270\1\u010a"+ - "\6\270\1\u046d\3\270\237\0\1\u0109\32\270\1\u010a\2\270"+ - "\1\u046d\7\270\237\0\1\u0109\16\270\1\u0480\13\270\1\u010a"+ - "\12\270\237\0\1\u0109\32\270\1\u010a\1\u0481\11\270\237\0"+ - "\1\u0109\32\270\1\u010a\3\270\1\u0482\6\270\237\0\1\u0109"+ - "\32\270\1\u010a\3\270\1\u0153\6\270\237\0\1\u0109\24\270"+ - "\1\u0483\5\270\1\u010a\12\270\237\0\1\u0121\1\u0484\31\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164"+ - "\11\324\1\u0345\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\1\u0485\31\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\1\u0486\31\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\7\324\1\u0487"+ - "\22\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\1\u0488"+ - "\31\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\1\u0489"+ - "\31\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324"+ - "\1\164\6\324\1\u048a\3\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\6\324\1\u0127\23\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\25\324\1\u048b\4\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\1\u048c\31\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\32\324\1\164\6\324\1\u048d\3\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\1\u048e\31\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\32\324\1\164\6\324\1\u0175\3\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\12\324\1\u0138\17\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\1\u048f\31\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\10\324\1\u0490\21\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\31\324\1\u0491"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\213\0\4\u0492\2\0\1\u0492"+ - "\15\0\1\u0492\6\0\12\u0492\1\u03be\237\0\4\u0493\2\0"+ - "\1\u0493\15\0\1\u0493\6\0\12\u0493\1\u0494\237\0\4\u0495"+ - "\2\0\1\u0495\15\0\1\u0495\6\0\1\u0496\2\u0497\1\u0496"+ - "\5\u0497\1\u0498\14\0\1\u035e\223\0\4\u0499\2\0\1\u0499"+ - "\15\0\1\u0499\6\0\12\u0499\1\u0435\13\0\1\u035e\223\0"+ - "\4\u0495\2\0\1\u0495\15\0\1\u0495\6\0\1\u0496\2\u0497"+ - "\1\u0496\5\u0497\1\u0498\237\0\1\u03c5\4\u0499\2\0\1\u0499"+ - "\15\0\1\u0499\6\0\12\u049a\1\u0435\13\0\1\u035e\222\0"+ - "\1\u03c5\4\u0499\2\0\1\u0499\15\0\1\u0499\6\0\12\u0499"+ - "\1\u0435\13\0\1\u035e\222\0\1\u03c5\4\u0499\2\0\1\u0499"+ - "\15\0\1\u0499\6\0\2\u049a\1\u0499\2\u049a\2\u0499\1\u049a"+ - "\1\u0499\1\u049a\1\u0435\13\0\1\u035e\223\0\4\u049b\2\0"+ - "\1\u049b\15\0\1\u049b\6\0\12\u049b\1\u03c4\13\0\1\u035e"+ - "\222\0\1\u049c\33\0\12\u043b\237\0\1\u049c\33\0\12\u049d"+ - "\237\0\1\u049c\33\0\1\u043b\1\u049e\1\u049d\2\u043b\2\u049d"+ - "\1\u043b\1\u049d\1\u043b\237\0\1\u03c5\4\u049b\2\0\1\u049b"+ - "\15\0\1\u049b\6\0\12\u049b\1\u03c4\13\0\1\u035e\222\0"+ - "\46\u01f0\1\0\2\u01f0\1\u0234\3\u01f0\1\u01f2\1\0\1\u0233"+ - "\3\0\2\u01f0\4\0\1\u01f0\277\0\1\u049f\254\0\12\u04a0"+ - "\11\0\1\u0233\273\0\1\u0364\237\0\4\u04a1\2\0\1\u04a1"+ - "\15\0\1\u04a1\6\0\12\u04a1\1\u0443\237\0\4\u04a2\2\0"+ - "\1\u04a2\15\0\1\u04a2\6\0\12\u04a2\1\u04a3\237\0\4\u04a4"+ - "\2\0\1\u04a4\15\0\1\u04a4\6\0\12\u04a4\1\u04a5\13\0"+ - "\1\u0302\222\0\1\u036b\4\u04a4\2\0\1\u04a4\15\0\1\u04a4"+ - "\6\0\12\u04a6\1\u04a5\13\0\1\u0302\222\0\1\u036b\4\u04a4"+ - "\2\0\1\u04a4\15\0\1\u04a4\6\0\12\u04a7\1\u04a5\13\0"+ - "\1\u0302\222\0\1\u036b\4\u04a4\2\0\1\u04a4\15\0\1\u04a4"+ - "\6\0\1\u04a6\1\u04a8\1\u04a7\2\u04a6\2\u04a7\1\u04a6\1\u04a7"+ - "\1\u04a6\1\u04a5\13\0\1\u0302\223\0\4\u04a9\2\0\1\u04a9"+ - "\15\0\1\u04a9\6\0\12\u04a9\1\u03d0\13\0\1\u0302\222\0"+ - "\1\u036b\4\u04a9\2\0\1\u04a9\15\0\1\u04a9\6\0\12\u04a9"+ - "\1\u03d0\13\0\1\u0302\270\0\1\u036a\13\0\1\u0302\256\0"+ - "\1\u04aa\2\u04ab\1\u04aa\5\u04ab\1\u04ac\237\0\1\u044b\304\0"+ - "\1\u044b\33\0\2\u044c\1\0\2\u044c\2\0\1\u044c\1\0"+ - "\1\u044c\237\0\1\u04ad\32\u0303\1\u0370\12\u0303\237\0\1\u04ad"+ - "\4\u0303\1\u04ae\25\u0303\1\u0370\12\u0303\237\0\1\u04ad\15\u0303"+ - "\1\u03e5\14\u0303\1\u0370\12\u0303\237\0\1\u04ad\10\u0303\1\u03e5"+ - "\21\u0303\1\u0370\12\u0303\237\0\1\u04ad\17\u0303\1\u044e\12\u0303"+ - "\1\u0370\12\u0303\237\0\1\u04ad\5\u0303\1\u04af\4\u0303\1\u044e"+ - "\17\u0303\1\u0370\12\u0303\237\0\1\u036f\20\u0303\1\u044e\11\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\7\u0303\1\u044e\22\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\27\u0303\1\u044e\2\u0303\1\u0370\12\u0303"+ - "\237\0\1\u04ad\6\u0303\1\u04ae\10\u0303\1\u044e\12\u0303\1\u0370"+ - "\12\u0303\237\0\1\u04ad\24\u0303\1\u04b0\5\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\11\u0303\1\u044e\20\u0303\1\u0370\12\u0303\237\0"+ - "\1\u04ad\16\u0303\1\u04b1\13\u0303\1\u0370\12\u0303\237\0\1\u04ad"+ - "\12\u0303\1\u04b2\17\u0303\1\u0370\12\u0303\237\0\1\u04ad\5\u0303"+ - "\1\u044e\24\u0303\1\u0370\12\u0303\237\0\1\u04ad\1\u04b3\31\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u04b4\12\u0303\237\0"+ - "\1\u036f\22\u0303\1\u044e\7\u0303\1\u0370\12\u0303\237\0\1\u04ad"+ - "\23\u0303\1\u044e\6\u0303\1\u0370\12\u0303\237\0\1\u04ad\24\u0303"+ - "\1\u04b5\5\u0303\1\u0370\12\u0303\237\0\34\u023e\12\u04b6\1\0"+ - "\2\u023e\1\u029f\3\u023e\1\u0240\1\u018c\1\u018d\1\u018e\2\0"+ - "\2\u023e\4\0\1\u023e\261\0\1\u0376\237\0\4\u04b7\2\0"+ - "\1\u04b7\15\0\1\u04b7\6\0\12\u04b7\1\u0465\237\0\4\u04b8"+ - "\2\0\1\u04b8\15\0\1\u04b8\6\0\1\u04b9\2\u04ba\1\u04b9"+ - "\5\u04ba\1\u04bb\1\u04bc\237\0\4\u04bd\2\0\1\u04bd\15\0"+ - "\1\u04bd\6\0\12\u04bd\1\u04be\13\0\1\u0146\222\0\1\u0188"+ - "\4\u04bd\2\0\1\u04bd\15\0\1\u04bd\6\0\12\u04bf\1\u04be"+ - "\13\0\1\u0146\222\0\1\u0188\4\u04bd\2\0\1\u04bd\15\0"+ - "\1\u04bd\6\0\12\u04c0\1\u04be\13\0\1\u0146\222\0\1\u0188"+ - "\4\u04bd\2\0\1\u04bd\15\0\1\u04bd\6\0\1\u04bf\1\u04c1"+ - "\1\u04c0\2\u04bf\2\u04c0\1\u04bf\1\u04c0\1\u04bf\1\u04be\13\0"+ - "\1\u0146\223\0\4\u04c2\2\0\1\u04c2\15\0\1\u04c2\6\0"+ - "\12\u04c2\1\u03f8\13\0\1\u0146\222\0\1\u0188\4\u04c2\2\0"+ - "\1\u04c2\15\0\1\u04c2\6\0\12\u04c2\1\u03f8\13\0\1\u0146"+ - "\222\0\1\u0109\3\270\1\u04c3\26\270\1\u010a\12\270\237\0"+ - "\1\u0109\2\270\1\u018f\27\270\1\u010a\12\270\237\0\1\u0109"+ - "\6\270\1\u019a\23\270\1\u010a\12\270\237\0\1\u0109\1\270"+ - "\1\u0417\30\270\1\u010a\12\270\237\0\1\u0109\3\270\1\u04c4"+ - "\26\270\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a\3\270"+ - "\1\u04c5\6\270\237\0\1\u0109\32\270\1\u010a\6\270\1\u04c6"+ - "\3\270\237\0\1\u0109\32\270\1\u010a\6\270\1\u04c7\3\270"+ - "\237\0\1\u0109\32\270\1\u010a\5\270\1\u04c8\4\270\237\0"+ - "\1\u0109\32\270\1\u010a\7\270\1\u04c9\2\270\237\0\1\u0109"+ - "\1\u04ca\31\270\1\u010a\12\270\237\0\1\u0109\24\270\1\u04cb"+ - "\5\270\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a\4\270"+ - "\1\u04cc\5\270\237\0\1\u0109\32\270\1\u010a\4\270\1\u04cd"+ - "\5\270\237\0\1\u0109\26\270\1\u04ce\3\270\1\u010a\12\270"+ - "\237\0\1\u0109\30\270\1\u04cf\1\270\1\u010a\12\270\237\0"+ - "\1\u0109\11\270\1\u01cb\20\270\1\u010a\12\270\237\0\1\u0109"+ - "\32\270\1\u010a\2\270\1\u04d0\7\270\237\0\1\u0109\12\270"+ - "\1\u04d1\17\270\1\u010a\12\270\237\0\1\u0109\17\270\1\u0197"+ - "\12\270\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a\4\270"+ - "\1\u04d2\5\270\237\0\1\u0109\32\270\1\u010a\6\270\1\u01ce"+ - "\3\270\237\0\1\u0109\30\270\1\u04d3\1\270\1\u010a\12\270"+ - "\237\0\1\u0109\30\270\1\u04d4\1\270\1\u010a\12\270\237\0"+ - "\1\u0121\32\324\1\164\1\u04d5\11\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\2\324\1\u04d6\27\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\32\324\1\164\10\324\1\u0338\1\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\15\324\1\350\14\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\23\324\1\u04d7\6\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164\1\324"+ - "\1\u04d8\10\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164"+ - "\3\324\1\u0179\6\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\30\324"+ - "\1\u04d9\1\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\32\324\1\164\1\324\1\u04da\10\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\6\324\1\u04db\23\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\32\324\1\164\5\324\1\u04dc\4\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\32\324\1\164\5\324\1\u04dd\4\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\32\324\1\164\1\324\1\350"+ - "\10\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\13\324\1\u04de\16\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\260\0\1\u03be\237\0\4\u04df"+ - "\2\0\1\u04df\15\0\1\u04df\6\0\12\u04df\1\u0494\237\0"+ - "\4\u04e0\2\0\1\u04e0\15\0\1\u04e0\6\0\12\u04e0\1\u04e1"+ - "\237\0\4\u04e2\2\0\1\u04e2\15\0\1\u04e2\6\0\12\u04e2"+ - "\1\u04e3\13\0\1\u035e\222\0\1\u03c5\4\u04e2\2\0\1\u04e2"+ - "\15\0\1\u04e2\6\0\12\u04e4\1\u04e3\13\0\1\u035e\222\0"+ - "\1\u03c5\4\u04e2\2\0\1\u04e2\15\0\1\u04e2\6\0\12\u04e5"+ - "\1\u04e3\13\0\1\u035e\222\0\1\u03c5\4\u04e2\2\0\1\u04e2"+ - "\15\0\1\u04e2\6\0\1\u04e4\1\u04e6\1\u04e5\2\u04e4\2\u04e5"+ - "\1\u04e4\1\u04e5\1\u04e4\1\u04e3\13\0\1\u035e\223\0\4\u04e7"+ - "\2\0\1\u04e7\15\0\1\u04e7\6\0\12\u04e7\1\u0435\13\0"+ - "\1\u035e\222\0\1\u03c5\4\u04e7\2\0\1\u04e7\15\0\1\u04e7"+ - "\6\0\12\u04e7\1\u0435\13\0\1\u035e\270\0\1\u03c4\13\0"+ - "\1\u035e\256\0\1\u04e8\2\u04e9\1\u04e8\5\u04e9\1\u04ea\237\0"+ - "\1\u049c\304\0\1\u049c\33\0\2\u049d\1\0\2\u049d\2\0"+ - "\1\u049d\1\0\1\u049d\240\0\1\u04eb\1\0\1\u04eb\5\0"+ - "\1\u04eb\352\0\1\u0233\226\0\4\u04ec\2\0\1\u04ec\15\0"+ - "\1\u04ec\6\0\12\u04ec\1\u0443\237\0\4\u04ed\2\0\1\u04ed"+ - "\15\0\1\u04ed\6\0\12\u04ed\1\u04ee\237\0\4\u04ef\2\0"+ - "\1\u04ef\15\0\1\u04ef\6\0\1\u04f0\2\u04f1\1\u04f0\5\u04f1"+ - "\1\u04f2\14\0\1\u0302\223\0\4\u04f3\2\0\1\u04f3\15\0"+ - "\1\u04f3\6\0\12\u04f3\1\u04a5\13\0\1\u0302\223\0\4\u04ef"+ - "\2\0\1\u04ef\15\0\1\u04ef\6\0\1\u04f0\2\u04f1\1\u04f0"+ - "\5\u04f1\1\u04f2\237\0\1\u036b\4\u04f3\2\0\1\u04f3\15\0"+ - "\1\u04f3\6\0\12\u04f4\1\u04a5\13\0\1\u0302\222\0\1\u036b"+ - "\4\u04f3\2\0\1\u04f3\15\0\1\u04f3\6\0\12\u04f3\1\u04a5"+ - "\13\0\1\u0302\222\0\1\u036b\4\u04f3\2\0\1\u04f3\15\0"+ - "\1\u04f3\6\0\2\u04f4\1\u04f3\2\u04f4\2\u04f3\1\u04f4\1\u04f3"+ - "\1\u04f4\1\u04a5\13\0\1\u0302\270\0\1\u03d0\13\0\1\u0302"+ - "\222\0\1\u04f5\33\0\12\u04ab\237\0\1\u04f5\33\0\12\u04f6"+ - "\237\0\1\u04f5\33\0\1\u04ab\1\u04f7\1\u04f6\2\u04ab\2\u04f6"+ - "\1\u04ab\1\u04f6\1\u04ab\237\0\1\u036f\5\u0303\1\u044e\24\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\15\u0303\1\u044e\14\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\10\u0303\1\u044e\21\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\3\u0303\1\u04f8\26\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\3\u0303\1\u044e\26\u0303\1\u0370\12\u0303\237\0\1\u036f"+ - "\27\u0303\1\u04f9\2\u0303\1\u0370\12\u0303\240\0\32\u0303\1\u04fa"+ - "\12\u0303\237\0\1\u036f\16\u0303\1\u044e\13\u0303\1\u0370\12\u0303"+ - "\237\0\46\u023e\1\0\2\u023e\1\u029f\3\u023e\1\u0240\1\u018c"+ - "\1\u018d\1\u018e\2\0\2\u023e\4\0\1\u023e\214\0\4\u04fb"+ - "\2\0\1\u04fb\15\0\1\u04fb\6\0\12\u04fb\1\u0465\237\0"+ - "\4\u04fc\2\0\1\u04fc\15\0\1\u04fc\6\0\12\u04fc\1\u04fd"+ - "\236\0\1\u0188\4\u04fc\2\0\1\u04fc\15\0\1\u04fc\6\0"+ - "\12\u04fe\1\u04fd\236\0\1\u0188\4\u04fc\2\0\1\u04fc\15\0"+ - "\1\u04fc\6\0\12\u04ff\1\u04fd\236\0\1\u0188\4\u04fc\2\0"+ - "\1\u04fc\15\0\1\u04fc\6\0\1\u04fe\1\u0500\1\u04ff\2\u04fe"+ - "\2\u04ff\1\u04fe\1\u04ff\1\u04fe\1\u04fd\237\0\4\u0501\2\0"+ - "\1\u0501\15\0\1\u0501\6\0\12\u0501\14\0\1\u0146\223\0"+ - "\4\u0502\2\0\1\u0502\15\0\1\u0502\6\0\12\u0502\1\u04be"+ - "\13\0\1\u0146\223\0\4\u0501\2\0\1\u0501\15\0\1\u0501"+ - "\6\0\12\u0501\237\0\1\u0188\4\u0502\2\0\1\u0502\15\0"+ - "\1\u0502\6\0\12\u0503\1\u04be\13\0\1\u0146\222\0\1\u0188"+ - "\4\u0502\2\0\1\u0502\15\0\1\u0502\6\0\12\u0502\1\u04be"+ - "\13\0\1\u0146\222\0\1\u0188\4\u0502\2\0\1\u0502\15\0"+ - "\1\u0502\6\0\2\u0503\1\u0502\2\u0503\2\u0502\1\u0503\1\u0502"+ - "\1\u0503\1\u04be\13\0\1\u0146\270\0\1\u03f8\13\0\1\u0146"+ - "\222\0\1\u0109\1\u0504\31\270\1\u010a\12\270\237\0\1\u0109"+ - "\32\270\1\u010a\11\270\1\u040d\237\0\1\u0109\1\u0505\31\270"+ - "\1\u010a\12\270\237\0\1\u0109\1\u0506\31\270\1\u010a\12\270"+ - "\237\0\1\u0109\7\270\1\u0507\22\270\1\u010a\12\270\237\0"+ - "\1\u0109\1\u0508\31\270\1\u010a\12\270\237\0\1\u0109\1\u0509"+ - "\31\270\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a\6\270"+ - "\1\u050a\3\270\237\0\1\u0109\6\270\1\u018f\23\270\1\u010a"+ - "\12\270\237\0\1\u0109\25\270\1\u050b\4\270\1\u010a\12\270"+ - "\237\0\1\u0109\1\u050c\31\270\1\u010a\12\270\237\0\1\u0109"+ - "\32\270\1\u010a\6\270\1\u050d\3\270\237\0\1\u0109\1\u050e"+ - "\31\270\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a\6\270"+ - "\1\u01ca\3\270\237\0\1\u0109\12\270\1\u01a0\17\270\1\u010a"+ - "\12\270\237\0\1\u0109\1\u050f\31\270\1\u010a\12\270\237\0"+ - "\1\u0109\10\270\1\u0510\21\270\1\u010a\12\270\237\0\1\u0109"+ - "\31\270\1\u0511\1\u010a\12\270\237\0\1\u0121\24\324\1\u0512"+ - "\5\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324"+ - "\1\164\10\324\1\u0513\1\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\1\324\1\u012e\30\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\2\324\1\u0514\27\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\3\324\1\u0515\26\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\3\324\1\u0516\26\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\32\324\1\164\1\324\1\u0517"+ - "\10\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\3\324\1\u0518\26\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\1\u0519\31\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\26\324\1\u051a"+ - "\3\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\213\0\4\u051b\2\0"+ - "\1\u051b\15\0\1\u051b\6\0\12\u051b\1\u0494\237\0\4\u051c"+ - "\2\0\1\u051c\15\0\1\u051c\6\0\12\u051c\1\u051d\237\0"+ - "\4\u051e\2\0\1\u051e\15\0\1\u051e\6\0\1\u051f\2\u0520"+ - "\1\u051f\5\u0520\1\u0521\14\0\1\u035e\223\0\4\u0522\2\0"+ - "\1\u0522\15\0\1\u0522\6\0\12\u0522\1\u04e3\13\0\1\u035e"+ - "\223\0\4\u051e\2\0\1\u051e\15\0\1\u051e\6\0\1\u051f"+ - "\2\u0520\1\u051f\5\u0520\1\u0521\237\0\1\u03c5\4\u0522\2\0"+ - "\1\u0522\15\0\1\u0522\6\0\12\u0523\1\u04e3\13\0\1\u035e"+ - "\222\0\1\u03c5\4\u0522\2\0\1\u0522\15\0\1\u0522\6\0"+ - "\12\u0522\1\u04e3\13\0\1\u035e\222\0\1\u03c5\4\u0522\2\0"+ - "\1\u0522\15\0\1\u0522\6\0\2\u0523\1\u0522\2\u0523\2\u0522"+ - "\1\u0523\1\u0522\1\u0523\1\u04e3\13\0\1\u035e\270\0\1\u0435"+ - "\13\0\1\u035e\222\0\1\u0524\33\0\12\u04e9\237\0\1\u0524"+ - "\33\0\12\u0525\237\0\1\u0524\33\0\1\u04e9\1\u0526\1\u0525"+ - "\2\u04e9\2\u0525\1\u04e9\1\u0525\1\u04e9\317\0\1\u018e\272\0"+ - "\1\u0443\237\0\4\u0527\2\0\1\u0527\15\0\1\u0527\6\0"+ - "\12\u0527\1\u04ee\237\0\4\u0528\2\0\1\u0528\15\0\1\u0528"+ - "\6\0\12\u0528\1\u0529\237\0\4\u052a\2\0\1\u052a\15\0"+ - "\1\u052a\6\0\12\u052a\1\u052b\13\0\1\u0302\222\0\1\u036b"+ - "\4\u052a\2\0\1\u052a\15\0\1\u052a\6\0\12\u052c\1\u052b"+ - "\13\0\1\u0302\222\0\1\u036b\4\u052a\2\0\1\u052a\15\0"+ - "\1\u052a\6\0\12\u052d\1\u052b\13\0\1\u0302\222\0\1\u036b"+ - "\4\u052a\2\0\1\u052a\15\0\1\u052a\6\0\1\u052c\1\u052e"+ - "\1\u052d\2\u052c\2\u052d\1\u052c\1\u052d\1\u052c\1\u052b\13\0"+ - "\1\u0302\223\0\4\u052f\2\0\1\u052f\15\0\1\u052f\6\0"+ - "\12\u052f\1\u04a5\13\0\1\u0302\222\0\1\u036b\4\u052f\2\0"+ - "\1\u052f\15\0\1\u052f\6\0\12\u052f\1\u04a5\13\0\1\u0302"+ - "\256\0\1\u0530\2\u0531\1\u0530\5\u0531\1\u0532\237\0\1\u04f5"+ - "\304\0\1\u04f5\33\0\2\u04f6\1\0\2\u04f6\2\0\1\u04f6"+ - "\1\0\1\u04f6\237\0\1\u036f\20\u0303\1\u0533\11\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\3\u0303\1\u0459\26\u0303\1\u0370\12\u0303"+ - "\240\0\1\u0303\1\u0534\1\u0535\2\u0303\1\u0536\1\u0537\1\u0538"+ - "\1\u0303\1\u0539\1\u053a\2\u0303\1\u053b\1\u053c\2\u0303\1\u053d"+ - "\1\u053e\1\u053f\1\u0303\1\u0540\1\u0541\1\u0303\1\u0542\1\u0543"+ - "\1\u0370\1\u0544\2\u0303\1\u0545\1\u0546\1\u0547\1\u0303\1\u0548"+ - "\1\u0549\1\u0303\305\0\1\u0465\237\0\4\u054a\2\0\1\u054a"+ - "\15\0\1\u054a\6\0\12\u054a\1\u04fd\237\0\4\u0501\2\0"+ - "\1\u0501\15\0\1\u0501\6\0\12\u0501\1\u037d\236\0\1\u0188"+ - "\4\u054a\2\0\1\u054a\15\0\1\u054a\6\0\12\u054b\1\u04fd"+ - "\236\0\1\u0188\4\u054a\2\0\1\u054a\15\0\1\u054a\6\0"+ - "\12\u054a\1\u04fd\236\0\1\u0188\4\u054a\2\0\1\u054a\15\0"+ - "\1\u054a\6\0\2\u054b\1\u054a\2\u054b\2\u054a\1\u054b\1\u054a"+ - "\1\u054b\1\u04fd\237\0\4\u054c\2\0\1\u054c\15\0\1\u054c"+ - "\6\0\12\u054c\14\0\1\u0146\223\0\4\u054d\2\0\1\u054d"+ - "\15\0\1\u054d\6\0\12\u054d\1\u04be\13\0\1\u0146\222\0"+ - "\1\u0188\4\u054d\2\0\1\u054d\15\0\1\u054d\6\0\12\u054d"+ - "\1\u04be\13\0\1\u0146\222\0\1\u0109\32\270\1\u010a\1\u054e"+ - "\11\270\237\0\1\u0109\2\270\1\u054f\27\270\1\u010a\12\270"+ - "\237\0\1\u0109\32\270\1\u010a\10\270\1\u0400\1\270\237\0"+ - "\1\u0109\15\270\1\u0153\14\270\1\u010a\12\270\237\0\1\u0109"+ - "\23\270\1\u0550\6\270\1\u010a\12\270\237\0\1\u0109\32\270"+ - "\1\u010a\1\270\1\u0551\10\270\237\0\1\u0109\32\270\1\u010a"+ - "\3\270\1\u01ce\6\270\237\0\1\u0109\30\270\1\u0552\1\270"+ - "\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a\1\270\1\u0553"+ - "\10\270\237\0\1\u0109\6\270\1\u0554\23\270\1\u010a\12\270"+ - "\237\0\1\u0109\32\270\1\u010a\5\270\1\u0555\4\270\237\0"+ - "\1\u0109\32\270\1\u010a\5\270\1\u0556\4\270\237\0\1\u0109"+ - "\32\270\1\u010a\1\270\1\u0153\10\270\237\0\1\u0109\13\270"+ - "\1\u0557\16\270\1\u010a\12\270\237\0\1\u0121\32\324\1\164"+ - "\11\324\1\u0558\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\26\324\1\u0127"+ - "\3\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324"+ - "\1\164\7\324\1\u0559\2\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\32\324\1\164\11\324\1\350\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121"+ - "\3\324\1\u055a\26\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\32\324\1\164\4\324\1\u055b\5\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\16\324\1\u055c\13\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\26\324\1\u055d\3\324\1\164\12\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\212\0\1\u0121\32\324\1\164\7\324\1\u0427"+ - "\2\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\260\0\1\u0494\237\0\4\u055e\2\0"+ - "\1\u055e\15\0\1\u055e\6\0\12\u055e\1\u051d\237\0\4\u055f"+ - "\2\0\1\u055f\15\0\1\u055f\6\0\12\u055f\1\u0560\237\0"+ - "\4\u0561\2\0\1\u0561\15\0\1\u0561\6\0\12\u0561\1\u0562"+ - "\13\0\1\u035e\222\0\1\u03c5\4\u0561\2\0\1\u0561\15\0"+ - "\1\u0561\6\0\12\u0563\1\u0562\13\0\1\u035e\222\0\1\u03c5"+ - "\4\u0561\2\0\1\u0561\15\0\1\u0561\6\0\12\u0564\1\u0562"+ - "\13\0\1\u035e\222\0\1\u03c5\4\u0561\2\0\1\u0561\15\0"+ - "\1\u0561\6\0\1\u0563\1\u0565\1\u0564\2\u0563\2\u0564\1\u0563"+ - "\1\u0564\1\u0563\1\u0562\13\0\1\u035e\223\0\4\u0566\2\0"+ - "\1\u0566\15\0\1\u0566\6\0\12\u0566\1\u04e3\13\0\1\u035e"+ - "\222\0\1\u03c5\4\u0566\2\0\1\u0566\15\0\1\u0566\6\0"+ - "\12\u0566\1\u04e3\13\0\1\u035e\256\0\1\u0567\2\u0568\1\u0567"+ - "\5\u0568\1\u0569\237\0\1\u0524\304\0\1\u0524\33\0\2\u0525"+ - "\1\0\2\u0525\2\0\1\u0525\1\0\1\u0525\240\0\4\u056a"+ - "\2\0\1\u056a\15\0\1\u056a\6\0\12\u056a\1\u04ee\237\0"+ - "\4\u056b\2\0\1\u056b\15\0\1\u056b\6\0\12\u056b\1\u056c"+ - "\237\0\4\u056d\2\0\1\u056d\15\0\1\u056d\6\0\1\u056e"+ - "\2\u056f\1\u056e\5\u056f\1\u0570\14\0\1\u0302\223\0\4\u0571"+ - "\2\0\1\u0571\15\0\1\u0571\6\0\12\u0571\1\u052b\13\0"+ - "\1\u0302\223\0\4\u056d\2\0\1\u056d\15\0\1\u056d\6\0"+ - "\1\u056e\2\u056f\1\u056e\5\u056f\1\u0570\237\0\1\u036b\4\u0571"+ - "\2\0\1\u0571\15\0\1\u0571\6\0\12\u0572\1\u052b\13\0"+ - "\1\u0302\222\0\1\u036b\4\u0571\2\0\1\u0571\15\0\1\u0571"+ - "\6\0\12\u0571\1\u052b\13\0\1\u0302\222\0\1\u036b\4\u0571"+ - "\2\0\1\u0571\15\0\1\u0571\6\0\2\u0572\1\u0571\2\u0572"+ - "\2\u0571\1\u0572\1\u0571\1\u0572\1\u052b\13\0\1\u0302\270\0"+ - "\1\u04a5\13\0\1\u0302\256\0\12\u0531\14\0\1\u0302\256\0"+ - "\12\u0573\14\0\1\u0302\256\0\1\u0531\1\u0574\1\u0573\2\u0531"+ - "\2\u0573\1\u0531\1\u0573\1\u0531\14\0\1\u0302\222\0\1\u036f"+ - "\12\u0303\1\u044e\17\u0303\1\u0370\12\u0303\237\0\1\u036f\11\u0303"+ - "\1\u0575\20\u0303\1\u0370\12\u0303\237\0\1\u036f\3\u0303\1\u0576"+ - "\26\u0303\1\u0370\12\u0303\237\0\1\u036f\7\u0303\1\u0577\22\u0303"+ - "\1\u0370\4\u0303\1\u0578\5\u0303\237\0\1\u036f\10\u0303\1\u0579"+ - "\4\u0303\1\u057a\5\u0303\1\u057b\6\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\3\u0303\1\u057c\26\u0303\1\u0370\2\u0303\1\u057d\7\u0303"+ - "\237\0\1\u036f\7\u0303\1\u057e\22\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\7\u0303\1\u057f\22\u0303\1\u0370\12\u0303\237\0\1\u036f"+ - "\7\u0303\1\u0580\22\u0303\1\u0370\3\u0303\1\u0581\6\u0303\237\0"+ - "\1\u036f\32\u0303\1\u0370\5\u0303\1\u0582\4\u0303\237\0\1\u036f"+ - "\7\u0303\1\u0583\22\u0303\1\u0370\12\u0303\237\0\1\u036f\31\u0303"+ - "\1\u0584\1\u0370\12\u0303\237\0\1\u036f\1\u0303\1\u0585\30\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\7\u0303\1\u0586\1\u0303\1\u0587"+ - "\20\u0303\1\u0370\11\u0303\1\u0582\237\0\1\u036f\22\u0303\1\u0588"+ - "\7\u0303\1\u0370\2\u0303\1\u0589\7\u0303\237\0\1\u036f\6\u0303"+ - "\1\u058a\1\u058b\22\u0303\1\u0370\12\u0303\237\0\1\u036f\7\u0303"+ - "\1\u058c\5\u0303\1\u058d\14\u0303\1\u0370\12\u0303\237\0\1\u036f"+ - "\23\u0303\1\u058e\6\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303"+ - "\1\u0370\3\u0303\1\u058f\6\u0303\237\0\1\u036f\3\u0303\1\u0590"+ - "\26\u0303\1\u0370\12\u0303\237\0\1\u036f\17\u0303\1\u0591\12\u0303"+ - "\1\u0370\1\u0592\11\u0303\237\0\1\u036f\32\u0303\1\u0370\1\u0303"+ - "\1\u0582\10\u0303\237\0\1\u036f\32\u0303\1\u0370\1\u0593\11\u0303"+ - "\240\0\4\u0594\2\0\1\u0594\15\0\1\u0594\6\0\12\u0594"+ - "\1\u04fd\236\0\1\u0188\4\u0594\2\0\1\u0594\15\0\1\u0594"+ - "\6\0\12\u0594\1\u04fd\237\0\4\u0595\2\0\1\u0595\15\0"+ - "\1\u0595\6\0\12\u0595\14\0\1\u0146\270\0\1\u04be\13\0"+ - "\1\u0146\222\0\1\u0109\24\270\1\u0596\5\270\1\u010a\12\270"+ - "\237\0\1\u0109\32\270\1\u010a\10\270\1\u0597\1\270\237\0"+ - "\1\u0109\1\270\1\u0196\30\270\1\u010a\12\270\237\0\1\u0109"+ - "\2\270\1\u0598\27\270\1\u010a\12\270\237\0\1\u0109\3\270"+ - "\1\u0599\26\270\1\u010a\12\270\237\0\1\u0109\3\270\1\u059a"+ - "\26\270\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a\1\270"+ - "\1\u059b\10\270\237\0\1\u0109"; + "\1\366\1\367\1\370\1\154\12\371\1\u01d0\1\154\1\201"+ + "\1\154\1\0\1\154\1\156\1\u01d1\1\u01d2\1\u01d3\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\245\1\0\1\246\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\247\2\0\1\250\4\0"+ + "\1\251\3\0\1\252\17\0\1\71\2\0\1\253\21\0"+ + "\1\254\2\0\1\255\61\0\1\30\1\76\2\0\1\76"+ + "\1\0\2\76\1\0\1\76\2\0\1\76\1\0\2\30"+ + "\1\76\32\30\1\0\12\u0259\1\76\1\0\1\76\233\0"+ + "\1\u01d1\3\0\1\u025a\45\u01d1\1\u0228\2\u01d1\1\u025b\1\u0228"+ + "\1\u01d1\1\u025c\2\u01d1\1\u01d3\2\0\1\u0228\1\u01d1\3\0"+ + "\1\u01d1\1\154\213\0\1\154\3\0\1\u025d\45\u01d2\1\u0229"+ + "\2\u01d2\1\u025e\1\0\1\154\1\u025f\1\u01d1\1\u01d2\1\u01d3"+ + "\2\0\1\u0229\1\u01d2\3\0\2\154\213\0\1\u01d3\3\0"+ + "\1\u0260\45\u01d3\1\u022a\2\u01d3\1\u0261\1\u022a\1\u01d3\1\u0262"+ + "\2\u01d3\1\154\2\0\1\u022a\1\u01d3\3\0\1\u01d3\1\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\3\43\1\u0263\26\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\15\43\1\u0141\14\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\16\43\1\u0264\1\u0265\12\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\17\43\1\u0266\12\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\12\43\1\u0267\17\43"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\3\43\1\u0268"+ + "\26\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\3\43"+ + "\1\u0269\26\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\10\43\1\u026a\21\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\1\u026b\31\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\11\43\1\u026c\20\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\15\43\1\u026d\14\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\2\43\1\u0141\27\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\25\43\1\u026e\4\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\10\43\1\u0141\21\43"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\3\43\1\u026f"+ + "\26\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\3\43"+ + "\1\u0141\26\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\17\43\1\u0141\12\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\12\43\1\u0270\17\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\17\43\1\u0271\12\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\31\43\1\u0141\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\7\43\1\u0272\22\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\17\43\1\u0273\12\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\25\43\1\u0274\4\43"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\30\43\1\u0275"+ + "\1\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\1\u01e7"+ + "\31\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\16\43"+ + "\1\u0141\13\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\155\32\371\1\u0276\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\2\43\1\u0277\27\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\1\43\1\u0278\30\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\17\43\1\u0279\12\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\1\u027a\31\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\245\1\0\1\246\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\247\2\0\1\250"+ + "\4\0\1\251\3\0\1\252\17\0\1\71\2\0\1\253"+ + "\21\0\1\254\2\0\1\255\61\0\1\30\1\76\2\0"+ + "\1\76\1\0\2\76\1\0\1\76\2\0\1\76\1\0"+ + "\2\30\1\76\32\30\13\0\1\76\1\0\1\76\5\0"+ + "\1\u027b\307\0\1\u027c\14\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\5\43\1\u027d\24\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\32\43\1\203\12\204\1\u01f7"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\245\1\0\1\246\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\247\2\0\1\250\4\0"+ + "\1\251\3\0\1\252\17\0\1\71\2\0\1\253\21\0"+ + "\1\254\2\0\1\255\61\0\1\30\1\76\2\0\1\76"+ + "\1\0\2\76\1\0\1\76\2\0\1\76\1\0\2\30"+ + "\1\76\32\30\13\0\1\76\1\0\1\76\5\0\1\u027e"+ + "\225\0\1\154\1\157\2\0\1\155\1\u0103\1\u0104\1\u0105"+ + "\1\u0106\1\u0107\1\u0108\1\u0109\1\u010a\1\u010b\1\u010c\1\u010d"+ + "\1\u010e\1\u010f\1\u0110\1\u0111\1\u0112\1\u0113\1\u0114\1\u0115"+ + "\1\u0116\1\u0117\1\u0118\1\u0119\1\u011a\1\u011b\1\u011c\1\154"+ + "\12\371\1\u01f9\3\154\1\0\1\154\1\156\1\u01d1\1\u01d2"+ + "\1\u01d3\3\0\1\154\3\0\2\154\253\0\12\u0259\236\0"+ + "\1\154\3\0\1\u016f\3\371\1\u027f\26\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\15\371\1\u0174"+ + "\14\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\16\371\1\u0280\1\u0281\12\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\17\371\1\u0282\12\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\12\371\1\u0283\17\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\3\371\1\u0284\26\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\3\371\1\u0285"+ + "\26\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\10\371\1\u0286\21\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\1\u0287\31\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\11\371\1\u0288"+ + "\20\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\15\371\1\u0289\14\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\2\371\1\u0174\27\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\25\371"+ + "\1\u028a\4\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\10\371\1\u0174\21\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\3\371\1\u028b\26\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\3\371\1\u0174\26\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\17\371\1\u0174\12\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\12\371\1\u028c"+ + "\17\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\17\371\1\u028d\12\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\31\371\1\u0174\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\7\371\1\u028e"+ + "\22\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\17\371\1\u028f\12\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\25\371\1\u0290\4\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\30\371"+ + "\1\u0291\1\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\1\u020d\31\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\16\371\1\u0174\13\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\2\371"+ + "\1\u0292\27\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\1\371\1\u0293\30\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\17\371\1\u0294\12\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\1\u0295\31\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\100\1\0\1\101\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\102"+ + "\2\0\1\103\4\0\1\104\3\0\1\105\17\0\1\71"+ + "\2\0\1\106\21\0\1\107\2\0\1\110\61\0\1\30"+ + "\2\31\2\0\2\111\1\112\1\0\1\31\2\0\1\212"+ + "\1\0\1\41\1\30\1\u0296\32\43\1\203\12\u0297\1\0"+ + "\1\154\1\215\1\154\1\0\1\212\1\156\3\154\2\0"+ + "\1\111\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\100\1\0\1\101\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\102\2\0\1\103"+ + "\4\0\1\104\3\0\1\105\17\0\1\71\2\0\1\106"+ + "\21\0\1\107\2\0\1\110\61\0\1\30\2\31\2\0"+ + "\2\111\1\112\1\0\1\31\2\0\1\212\1\0\1\41"+ + "\1\30\1\u0296\32\43\1\203\12\u0218\1\0\1\154\1\215"+ + "\1\154\1\0\1\212\1\156\3\154\2\0\1\111\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\100"+ + "\1\0\1\101\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\102\2\0\1\103\4\0\1\104"+ + "\3\0\1\105\17\0\1\71\2\0\1\106\21\0\1\107"+ + "\2\0\1\110\61\0\1\30\2\31\2\0\2\111\1\112"+ + "\1\0\1\31\2\0\1\212\1\0\1\41\1\30\1\u0296"+ + "\32\43\1\203\2\u0218\1\u0297\1\u0218\1\u0298\2\u0297\2\u0218"+ + "\1\u0297\1\0\1\154\1\215\1\154\1\0\1\212\1\156"+ + "\3\154\2\0\1\111\1\154\3\0\2\154\265\0\1\u0123"+ + "\242\0\4\u0299\2\0\1\u0299\15\0\1\u0299\6\0\12\u0299"+ + "\1\u021d\242\0\4\u029a\2\0\1\u029a\15\0\1\u029a\6\0"+ + "\12\u029a\1\u029b\242\0\4\u029c\2\0\1\u029c\15\0\1\u029c"+ + "\6\0\12\u029c\1\u029d\12\0\1\u01ad\226\0\1\u0224\4\u029c"+ + "\2\0\1\u029c\15\0\1\u029c\6\0\12\u029e\1\u029d\12\0"+ + "\1\u01ad\226\0\1\u0224\4\u029c\2\0\1\u029c\15\0\1\u029c"+ + "\6\0\12\u029f\1\u029d\12\0\1\u01ad\226\0\1\u0224\4\u029c"+ + "\2\0\1\u029c\15\0\1\u029c\6\0\2\u029f\1\u029e\1\u029f"+ + "\1\u02a0\2\u029e\2\u029f\1\u029e\1\u029d\12\0\1\u01ad\227\0"+ + "\4\u02a1\2\0\1\u02a1\15\0\1\u02a1\6\0\12\u02a1\1\u0223"+ + "\12\0\1\u01ad\227\0\4\u021e\2\0\1\u021e\15\0\1\u021e"+ + "\6\0\1\u021f\1\u0220\5\u021f\1\u0221\1\u0220\1\u021f\276\0"+ + "\1\u02a2\1\u02a3\5\u02a2\1\u02a4\1\u02a3\1\u02a2\242\0\1\u0224"+ + "\4\u02a1\2\0\1\u02a1\15\0\1\u02a1\6\0\12\u02a1\1\u0223"+ + "\12\0\1\u01ad\226\0\1\u0224\4\u02a1\2\0\1\u02a1\15\0"+ + "\1\u02a1\6\0\12\u02a5\1\u0223\12\0\1\u01ad\226\0\1\u0224"+ + "\4\u02a1\2\0\1\u02a1\15\0\1\u02a1\6\0\2\u02a5\1\u02a1"+ + "\2\u02a5\2\u02a1\2\u02a5\1\u02a1\1\u0223\12\0\1\u01ad\222\0"+ + "\1\u0228\3\0\51\u0228\1\u02a6\5\u0228\1\u022a\2\0\2\u0228"+ + "\3\0\1\u0228\220\0\51\u0229\1\u02a7\2\0\1\u0229\1\u0228"+ + "\1\u0229\1\u022a\2\0\2\u0229\220\0\1\u022a\3\0\51\u022a"+ + "\1\u02a8\5\u022a\3\0\2\u022a\3\0\1\u022a\220\0\1\u02a9"+ + "\32\323\1\u0137\12\323\242\0\1\u02a9\4\323\1\u0250\25\323"+ + "\1\u0137\12\323\242\0\1\u02a9\15\323\1\u01c1\14\323\1\u0137"+ + "\12\323\242\0\1\u02a9\10\323\1\u01c1\21\323\1\u0137\12\323"+ + "\242\0\1\u02a9\12\323\1\u02aa\4\323\1\u022b\12\323\1\u0137"+ + "\12\323\242\0\1\u02a9\5\323\1\u02ab\4\323\1\u022b\1\u02ac"+ + "\16\323\1\u0137\12\323\242\0\1\u02a9\5\323\1\u02ad\24\323"+ + "\1\u0137\12\323\242\0\1\u0136\1\u02ae\3\323\1\u02af\25\323"+ + "\1\u0137\12\323\242\0\1\u0136\20\323\1\u022b\11\323\1\u0137"+ + "\12\323\242\0\1\u0136\17\323\1\u02b0\12\323\1\u0137\12\323"+ + "\242\0\1\u0136\20\323\1\u02b1\11\323\1\u0137\12\323\242\0"+ + "\1\u02a9\17\323\1\u02b2\12\323\1\u0137\12\323\242\0\1\u0136"+ + "\7\323\1\u022b\22\323\1\u0137\12\323\242\0\1\u02a9\11\323"+ + "\1\u02b3\20\323\1\u0137\12\323\242\0\1\u02a9\1\u02b4\31\323"+ + "\1\u0137\12\323\242\0\1\u0136\30\323\1\u022b\1\323\1\u0137"+ + "\12\323\242\0\1\u02a9\4\323\1\u0233\25\323\1\u0137\12\323"+ + "\242\0\1\u02a9\6\323\1\u0250\10\323\1\u022b\12\323\1\u0137"+ + "\12\323\242\0\1\u02a9\13\323\1\u02b5\16\323\1\u0137\12\323"+ + "\242\0\1\u02a9\7\323\1\u02b6\22\323\1\u0137\12\323\242\0"+ + "\1\u02a9\13\323\1\u0233\16\323\1\u0137\12\323\242\0\1\u02a9"+ + "\24\323\1\u02b7\5\323\1\u0137\12\323\242\0\1\u0136\11\323"+ + "\1\u022b\20\323\1\u0137\12\323\242\0\1\u02a9\16\323\1\u02b8"+ + "\13\323\1\u0137\12\323\242\0\1\u02a9\12\323\1\u02b9\17\323"+ + "\1\u0137\12\323\242\0\1\u02a9\17\323\1\u022b\12\323\1\u0137"+ + "\12\323\242\0\1\u02a9\5\323\1\u022b\24\323\1\u0137\12\323"+ + "\242\0\1\u0136\16\323\1\u02ba\13\323\1\u0137\12\323\242\0"+ + "\1\u02a9\20\323\1\u02bb\11\323\1\u0137\12\323\242\0\1\u02a9"+ + "\5\323\1\u02bc\24\323\1\u0137\12\323\242\0\1\u02a9\22\323"+ + "\1\u02bd\7\323\1\u0137\12\323\242\0\1\u02a9\13\323\1\u02be"+ + "\16\323\1\u0137\12\323\242\0\1\u0136\17\323\1\u02bf\12\323"+ + "\1\u0137\12\323\242\0\1\u0136\1\323\1\u02c0\7\323\1\u022b"+ + "\20\323\1\u0137\12\323\242\0\1\u02a9\1\u02c1\31\323\1\u0137"+ + "\12\323\242\0\1\u02a9\2\323\1\u02c2\27\323\1\u0137\12\323"+ + "\242\0\1\u0136\15\323\1\u02c3\14\323\1\u0137\12\323\242\0"+ + "\1\u0136\5\323\1\u022b\24\323\1\u0137\12\323\242\0\1\u0136"+ + "\32\323\1\u02c4\12\323\242\0\1\u0136\22\323\1\u022b\7\323"+ + "\1\u0137\12\323\242\0\1\u02a9\23\323\1\u022b\2\323\1\u02b9"+ + "\3\323\1\u0137\12\323\242\0\1\u0136\11\323\1\u02c5\20\323"+ + "\1\u0137\12\323\242\0\1\u02a9\17\323\1\u02c6\12\323\1\u0137"+ + "\12\323\242\0\1\u02a9\24\323\1\u02c3\5\323\1\u0137\12\323"+ + "\242\0\1\u02a9\13\323\1\u02c7\16\323\1\u0137\12\323\242\0"+ + "\1\u0136\31\323\1\u02c8\1\u0137\12\323\276\0\12\u02c9\7\0"+ + "\1\u0228\1\u0229\1\u022a\224\0\1\u01d1\1\157\2\0\1\u025a"+ + "\45\u01d1\1\u0228\2\u01d1\1\u025b\1\u0228\1\u01d1\1\u025c\2\u01d1"+ + "\1\u01d3\2\0\1\u0228\1\u01d1\3\0\1\u01d1\1\154\213\0"+ + "\1\154\3\0\1\155\4\u02ca\2\154\1\u02ca\15\154\1\u02ca"+ + "\6\154\12\u02ca\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\u0228\3\0\51\u0228"+ + "\1\u02a6\5\u0228\1\u022a\1\324\1\0\2\u0228\3\0\1\u0228"+ + "\214\0\1\154\1\157\2\0\1\u025d\45\u01d2\1\u0229\2\u01d2"+ + "\1\u025e\1\0\1\154\1\u025f\1\u01d1\1\u01d2\1\u01d3\2\0"+ + "\1\u0229\1\u01d2\3\0\2\154\213\0\1\154\3\0\1\155"+ + "\4\u02cb\2\154\1\u02cb\15\154\1\u02cb\6\154\12\u02cb\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\217\0\51\u0229\1\u02a7\2\0\1\u0229\1\u0228\1\u0229"+ + "\1\u022a\1\324\1\0\2\u0229\220\0\1\u01d3\1\157\2\0"+ + "\1\u0260\45\u01d3\1\u022a\2\u01d3\1\u0261\1\u022a\1\u01d3\1\u0262"+ + "\2\u01d3\1\154\2\0\1\u022a\1\u01d3\3\0\1\u01d3\1\154"+ + "\213\0\1\154\3\0\1\155\4\u02cc\2\154\1\u02cc\15\154"+ + "\1\u02cc\6\154\12\u02cc\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\u022a\3\0"+ + "\51\u022a\1\u02a8\5\u022a\1\0\1\324\1\0\2\u022a\3\0"+ + "\1\u022a\3\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\4\43\1\353\25\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\17\43\1\u02cd\12\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\4\43\1\u02ce\25\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\25\43\1\u02cf\4\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\5\43\1\u02d0\24\43"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\1\43\1\u02d1"+ + "\30\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\4\43"+ + "\1\u02d2\25\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\15\43\1\u02d3\14\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\17\43\1\u01e3\12\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\3\43\1\u02d4\26\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\25\43\1\u02d5\4\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\17\43\1\u02cf\12\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\20\43\1\u02d6\11\43"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\24\43\1\u02cf"+ + "\5\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\5\43"+ + "\1\u02d7\24\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\11\43\1\u02d8\20\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\5\43\1\u0166\24\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\13\43\1\u02d9\16\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\3\43\1\u0157\26\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\155"+ + "\1\371\1\u02da\3\371\1\u02db\1\u02dc\1\u02dd\1\371\1\u02de"+ + "\1\u02df\1\u02e0\1\u02e1\1\u02e2\1\u02e3\1\371\1\u02e4\1\u02e5"+ + "\1\u02e6\2\371\1\u02e7\1\u02e8\1\u02e9\1\371\1\u02ea\1\203"+ + "\1\u02eb\2\371\1\u02ec\1\371\1\u02ed\1\u02ee\3\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\10\43\1\u02ef\21\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\25\43\1\u02f0\4\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\20\43\1\u02f1\11\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\7\43\1\u01e3\22\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\275\0\1\u02f2\225\0"+ + "\1\u02f3\3\0\1\u02f3\32\u02f4\1\u02f3\12\u02f4\1\u02f5\2\u02f3"+ + "\1\u02f6\2\u02f3\1\u02f7\3\0\1\u02f8\1\0\2\u02f3\3\0"+ + "\1\u02f3\3\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\32\43\1\203\12\204\1\u02f9\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\275\0\1\u02fa\225\0\1\154\3\0\1\u016f\4\371\1\u010f"+ + "\25\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\17\371\1\u02fb\12\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\4\371\1\u02fc\25\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\25\371"+ + "\1\u02fd\4\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\5\371\1\u02fe\24\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\1\371\1\u02ff\30\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\4\371\1\u0300\25\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\15\371\1\u0301\14\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\17\371\1\u0209"+ + "\12\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\3\371\1\u0302\26\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\25\371\1\u0303\4\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\17\371"+ + "\1\u02fd\12\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\20\371\1\u0304\11\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\24\371\1\u02fd\5\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\5\371\1\u0305\24\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\11\371\1\u0306\20\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\5\371\1\u0199"+ + "\24\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\13\371\1\u0307\16\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\3\371\1\u018a\26\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\10\371"+ + "\1\u0308\21\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\25\371\1\u0309\4\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\20\371\1\u030a\11\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\7\371\1\u0209\22\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\7\0"+ + "\1\265\1\0\1\266\17\0\1\267\2\0\1\270\4\0"+ + "\1\271\3\0\1\272\22\0\1\273\21\0\1\274\2\0"+ + "\1\275\62\0\1\111\1\31\2\0\3\243\1\0\1\111"+ + "\2\0\1\372\1\157\2\0\1\u0102\1\u0103\1\u0104\1\u0105"+ + "\1\u0106\1\u0107\1\u0108\1\u0109\1\u010a\1\u010b\1\u010c\1\u010d"+ + "\1\u010e\1\u010f\1\u0110\1\u0111\1\u0112\1\u0113\1\u0114\1\u0115"+ + "\1\u0116\1\u0117\1\u0118\1\u0119\1\u011a\1\u011b\1\u011c\1\154"+ + "\1\u030b\1\u030c\5\u030b\1\u030d\1\u030c\1\u030b\1\0\1\154"+ + "\1\372\1\154\1\0\1\372\1\156\3\154\2\0\1\243"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\100\1\0\1\101\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\102\2\0\1\103\4\0"+ + "\1\104\3\0\1\105\17\0\1\71\2\0\1\106\21\0"+ + "\1\107\2\0\1\110\61\0\1\30\2\31\2\0\2\111"+ + "\1\112\1\0\1\31\2\0\1\212\1\0\1\41\1\30"+ + "\1\u0296\32\43\1\203\12\u0120\1\0\1\154\1\215\1\154"+ + "\1\0\1\212\1\156\3\154\2\0\1\111\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\100\1\0"+ + "\1\101\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\102\2\0\1\103\4\0\1\104\3\0"+ + "\1\105\17\0\1\71\2\0\1\106\21\0\1\107\2\0"+ + "\1\110\61\0\1\30\2\31\2\0\2\111\1\112\1\0"+ + "\1\31\2\0\1\212\1\0\1\41\1\30\1\u0296\32\43"+ + "\1\203\2\u0297\1\u0120\2\u0297\2\u0120\2\u0297\1\u0120\1\0"+ + "\1\154\1\215\1\154\1\0\1\212\1\156\3\154\2\0"+ + "\1\111\1\154\3\0\2\154\220\0\4\u030e\2\0\1\u030e"+ + "\15\0\1\u030e\6\0\12\u030e\1\u021d\242\0\4\u030f\2\0"+ + "\1\u030f\15\0\1\u030f\6\0\12\u030f\1\u0310\242\0\4\u0311"+ + "\2\0\1\u0311\15\0\1\u0311\6\0\1\u0312\1\u0313\5\u0312"+ + "\1\u0314\1\u0313\1\u0312\13\0\1\u01ad\227\0\4\u0315\2\0"+ + "\1\u0315\15\0\1\u0315\6\0\12\u0315\1\u029d\12\0\1\u01ad"+ + "\227\0\4\u0311\2\0\1\u0311\15\0\1\u0311\6\0\1\u0312"+ + "\1\u0313\5\u0312\1\u0314\1\u0313\1\u0312\242\0\1\u0224\4\u0315"+ + "\2\0\1\u0315\15\0\1\u0315\6\0\12\u0315\1\u029d\12\0"+ + "\1\u01ad\226\0\1\u0224\4\u0315\2\0\1\u0315\15\0\1\u0315"+ + "\6\0\12\u0316\1\u029d\12\0\1\u01ad\226\0\1\u0224\4\u0315"+ + "\2\0\1\u0315\15\0\1\u0315\6\0\2\u0316\1\u0315\2\u0316"+ + "\2\u0315\2\u0316\1\u0315\1\u029d\12\0\1\u01ad\227\0\4\u0317"+ + "\2\0\1\u0317\15\0\1\u0317\6\0\12\u0317\1\u0223\12\0"+ + "\1\u01ad\226\0\1\u0318\33\0\12\u0319\242\0\1\u0318\33\0"+ + "\12\u02a2\242\0\1\u0318\33\0\2\u02a2\1\u0319\1\u02a2\1\u031a"+ + "\2\u0319\2\u02a2\1\u0319\242\0\1\u0224\4\u0317\2\0\1\u0317"+ + "\15\0\1\u0317\6\0\12\u0317\1\u0223\12\0\1\u01ad\227\0"+ + "\4\u031b\2\0\1\u031b\15\0\1\u031b\6\0\12\u031b\243\0"+ + "\4\u031c\2\0\1\u031c\15\0\1\u031c\6\0\12\u031c\243\0"+ + "\4\u031d\2\0\1\u031d\15\0\1\u031d\6\0\12\u031d\242\0"+ + "\1\u0136\3\323\1\u031e\26\323\1\u0137\12\323\242\0\1\u0136"+ + "\15\323\1\u022b\14\323\1\u0137\12\323\242\0\1\u0136\16\323"+ + "\1\u031f\1\u0320\12\323\1\u0137\12\323\242\0\1\u0136\17\323"+ + "\1\u0321\12\323\1\u0137\12\323\242\0\1\u0136\12\323\1\u0322"+ + "\17\323\1\u0137\12\323\242\0\1\u0136\3\323\1\u0323\26\323"+ + "\1\u0137\12\323\242\0\1\u0136\3\323\1\u0324\26\323\1\u0137"+ + "\12\323\242\0\1\u0136\10\323\1\u0325\21\323\1\u0137\12\323"+ + "\242\0\1\u0136\1\u0326\31\323\1\u0137\12\323\242\0\1\u0136"+ + "\11\323\1\u0327\20\323\1\u0137\12\323\242\0\1\u0136\15\323"+ + "\1\u0328\14\323\1\u0137\12\323\242\0\1\u0136\2\323\1\u022b"+ + "\27\323\1\u0137\12\323\242\0\1\u0136\25\323\1\u0329\4\323"+ + "\1\u0137\12\323\242\0\1\u0136\10\323\1\u022b\21\323\1\u0137"+ + "\12\323\242\0\1\u0136\3\323\1\u032a\26\323\1\u0137\12\323"+ + "\242\0\1\u0136\3\323\1\u022b\26\323\1\u0137\12\323\242\0"+ + "\1\u0136\17\323\1\u022b\12\323\1\u0137\12\323\242\0\1\u0136"+ + "\12\323\1\u032b\17\323\1\u0137\12\323\242\0\1\u0136\17\323"+ + "\1\u032c\12\323\1\u0137\12\323\242\0\1\u0136\31\323\1\u022b"+ + "\1\u0137\12\323\242\0\1\u0136\7\323\1\u032d\22\323\1\u0137"+ + "\12\323\242\0\1\u0136\17\323\1\u032e\12\323\1\u0137\12\323"+ + "\242\0\1\u0136\25\323\1\u032f\4\323\1\u0137\12\323\242\0"+ + "\1\u0136\30\323\1\u0330\1\323\1\u0137\12\323\242\0\1\u0136"+ + "\1\u02bd\31\323\1\u0137\12\323\242\0\1\u0136\16\323\1\u022b"+ + "\13\323\1\u0137\12\323\243\0\32\323\1\u0331\12\323\242\0"+ + "\1\u0136\2\323\1\u0332\27\323\1\u0137\12\323\242\0\1\u0136"+ + "\1\323\1\u0333\30\323\1\u0137\12\323\242\0\1\u0136\17\323"+ + "\1\u0334\12\323\1\u0137\12\323\242\0\1\u0136\1\u0335\31\323"+ + "\1\u0137\12\323\276\0\12\u0336\7\0\1\u0228\1\u0229\1\u022a"+ + "\224\0\1\154\3\0\1\155\4\u01d1\2\154\1\u01d1\15\154"+ + "\1\u01d1\6\154\12\u01d1\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\155\4\u01d2\2\154\1\u01d2\15\154\1\u01d2\6\154\12\u01d2"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\155\4\u01d3\2\154"+ + "\1\u01d3\15\154\1\u01d3\6\154\12\u01d3\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\4\43"+ + "\1\u0337\25\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\1\u0338\31\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\10\43\1\u0339\21\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\13\43\1\u033a\16\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\17\43\1\u033b\12\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\15\43\1\u033c\14\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\12\43\1\u033d\17\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\4\43\1\u01e7\25\43"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\10\43\1\u033e"+ + "\21\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\12\43"+ + "\1\u0141\17\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\7\43\1\u033f\22\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\3\43\1\u01ed\26\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\5\43\1\u0340\24\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\11\371"+ + "\1\u0341\20\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\7\371\1\u0342\22\371\1\203\1\u0343\11\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\10\371\1\u0344"+ + "\4\371\1\u0345\5\371\1\u0346\6\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\3\371\1\u0347\26\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\7\371\1\u0348\22\371\1\203\10\371\1\u0349\1\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\7\371\1\u034a\22\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\7\371\1\u034b\22\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\32\371\1\203\5\371\1\u034c\4\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\7\371\1\u034d"+ + "\22\371\1\203\10\371\1\u034e\1\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\32\371\1\203\5\371\1\u034f\4\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\13\371\1\u0350"+ + "\16\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\7\371\1\u0351\22\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\26\371\1\u0352\3\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371"+ + "\1\203\7\371\1\u034f\2\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\15\371\1\u0353\14\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203\10\371"+ + "\1\u0354\1\u0355\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\6\371\1\u0356\1\u0357\22\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\3\371\1\u0358\26\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371"+ + "\1\203\4\371\1\u034f\5\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\32\371\1\203\1\371\1\u0359\10\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203\1\371"+ + "\1\u035a\10\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\13\43\1\u035b\16\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\3\43\1\u035c\26\43"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\4\43\1\u02d8"+ + "\25\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\220\0"+ + "\32\u035d\1\0\12\u035d\10\0\1\u035e\1\0\1\u035f\223\0"+ + "\1\u02f3\3\0\46\u02f3\1\u02f5\2\u02f3\1\u02f6\2\u02f3\1\u02f7"+ + "\5\0\2\u02f3\3\0\1\u02f3\214\0\1\u02f3\3\0\1\u0360"+ + "\32\u02f4\1\u0361\12\u02f4\1\u0362\2\u02f3\1\u02f6\2\u02f3\1\u02f7"+ + "\1\0\1\u0363\3\0\2\u02f3\3\0\1\u02f3\214\0\1\u02f5"+ + "\3\0\46\u02f5\1\0\2\u02f5\1\u0364\2\u02f5\1\u02f7\5\0"+ + "\2\u02f5\3\0\1\u02f5\221\0\4\u0365\2\0\1\u0365\15\0"+ + "\1\u0365\6\0\12\u0365\243\0\32\u0366\1\0\12\u0366\12\0"+ + "\1\u02f8\230\0\4\u0367\2\0\1\u0367\15\0\1\u0367\6\0"+ + "\12\u0367\1\u0368\24\0\1\55\1\0\1\56\2\0\1\245"+ + "\1\0\1\246\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\247\2\0\1\250\4\0\1\251"+ + "\3\0\1\252\17\0\1\71\2\0\1\253\21\0\1\254"+ + "\2\0\1\255\61\0\1\30\1\76\2\0\1\76\1\0"+ + "\2\76\1\0\1\76\2\0\1\u0369\1\u036a\2\30\1\u0369"+ + "\32\u036b\13\u036c\1\76\1\u036c\1\u0369\1\u036c\1\0\1\u036c"+ + "\1\0\3\u036c\3\0\1\u036c\3\0\2\u036c\213\0\1\u036d"+ + "\3\0\1\u036d\32\u036e\1\u036d\12\u036e\1\u036f\2\u036d\1\u0370"+ + "\2\u036d\1\u0371\3\0\1\u0372\1\0\2\u036d\3\0\1\u036d"+ + "\214\0\1\154\3\0\1\u016f\4\371\1\u0373\25\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\1\u0374"+ + "\31\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\10\371\1\u0375\21\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\13\371\1\u0376\16\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\17\371"+ + "\1\u0377\12\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\15\371\1\u0378\14\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\12\371\1\u0379\17\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\4\371\1\u020d\25\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\10\371\1\u037a\21\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\12\371\1\u0174"+ + "\17\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\7\371\1\u037b\22\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\3\371\1\u0213\26\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\5\371"+ + "\1\u037c\24\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\13\371\1\u037d\16\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\3\371\1\u037e\26\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\4\371\1\u0306\25\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\100\1\0\1\101\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\102\2\0\1\103\4\0\1\104\3\0\1\105\17\0"+ + "\1\71\2\0\1\106\21\0\1\107\2\0\1\110\61\0"+ + "\1\30\2\31\2\0\2\111\1\112\1\0\1\31\2\0"+ + "\1\212\1\0\1\41\1\30\1\u01a5\32\43\1\203\12\u037f"+ + "\1\u01f9\1\154\1\215\1\154\1\0\1\212\1\156\1\u01d1"+ + "\1\u01d2\1\u01d3\2\0\1\111\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\100\1\0\1\101\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\102\2\0\1\103\4\0\1\104\3\0\1\105\17\0"+ + "\1\71\2\0\1\106\21\0\1\107\2\0\1\110\61\0"+ + "\1\30\2\31\2\0\2\111\1\112\1\0\1\31\2\0"+ + "\1\212\1\0\1\41\1\30\1\u01a5\32\43\1\203\12\u030b"+ + "\1\u01f9\1\154\1\215\1\154\1\0\1\212\1\156\1\u01d1"+ + "\1\u01d2\1\u01d3\2\0\1\111\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\100\1\0\1\101\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\102\2\0\1\103\4\0\1\104\3\0\1\105\17\0"+ + "\1\71\2\0\1\106\21\0\1\107\2\0\1\110\61\0"+ + "\1\30\2\31\2\0\2\111\1\112\1\0\1\31\2\0"+ + "\1\212\1\0\1\41\1\30\1\u01a5\32\43\1\203\2\u030b"+ + "\1\u037f\1\u030b\1\u0380\2\u037f\2\u030b\1\u037f\1\u01f9\1\154"+ + "\1\215\1\154\1\0\1\212\1\156\1\u01d1\1\u01d2\1\u01d3"+ + "\2\0\1\111\1\154\3\0\2\154\265\0\1\u021d\242\0"+ + "\4\u0381\2\0\1\u0381\15\0\1\u0381\6\0\12\u0381\1\u0310"+ + "\242\0\4\u0382\2\0\1\u0382\15\0\1\u0382\6\0\12\u0382"+ + "\1\u0383\242\0\4\u0384\2\0\1\u0384\15\0\1\u0384\6\0"+ + "\12\u0384\1\u0385\12\0\1\u01ad\226\0\1\u0224\4\u0384\2\0"+ + "\1\u0384\15\0\1\u0384\6\0\12\u0386\1\u0385\12\0\1\u01ad"+ + "\226\0\1\u0224\4\u0384\2\0\1\u0384\15\0\1\u0384\6\0"+ + "\12\u0387\1\u0385\12\0\1\u01ad\226\0\1\u0224\4\u0384\2\0"+ + "\1\u0384\15\0\1\u0384\6\0\2\u0387\1\u0386\1\u0387\1\u0388"+ + "\2\u0386\2\u0387\1\u0386\1\u0385\12\0\1\u01ad\227\0\4\u0389"+ + "\2\0\1\u0389\15\0\1\u0389\6\0\12\u0389\1\u029d\12\0"+ + "\1\u01ad\226\0\1\u0224\4\u0389\2\0\1\u0389\15\0\1\u0389"+ + "\6\0\12\u0389\1\u029d\12\0\1\u01ad\274\0\1\u0223\12\0"+ + "\1\u01ad\262\0\1\u038a\1\u038b\5\u038a\1\u038c\1\u038b\1\u038a"+ + "\242\0\1\u0318\307\0\1\u0318\33\0\2\u0319\1\0\2\u0319"+ + "\2\0\2\u0319\244\0\4\u0228\2\0\1\u0228\15\0\1\u0228"+ + "\6\0\12\u0228\243\0\4\u0229\2\0\1\u0229\15\0\1\u0229"+ + "\6\0\12\u0229\243\0\4\u022a\2\0\1\u022a\15\0\1\u022a"+ + "\6\0\12\u022a\242\0\1\u0136\4\323\1\u01c1\25\323\1\u0137"+ + "\12\323\242\0\1\u0136\17\323\1\u038d\12\323\1\u0137\12\323"+ + "\242\0\1\u0136\4\323\1\u038e\25\323\1\u0137\12\323\242\0"+ + "\1\u0136\25\323\1\u038f\4\323\1\u0137\12\323\242\0\1\u0136"+ + "\5\323\1\u0390\24\323\1\u0137\12\323\242\0\1\u0136\1\323"+ + "\1\u0391\30\323\1\u0137\12\323\242\0\1\u0136\4\323\1\u0392"+ + "\25\323\1\u0137\12\323\242\0\1\u0136\15\323\1\u0393\14\323"+ + "\1\u0137\12\323\242\0\1\u0136\17\323\1\u02b9\12\323\1\u0137"+ + "\12\323\242\0\1\u0136\3\323\1\u0394\26\323\1\u0137\12\323"+ + "\242\0\1\u0136\25\323\1\u0395\4\323\1\u0137\12\323\242\0"+ + "\1\u0136\17\323\1\u038f\12\323\1\u0137\12\323\242\0\1\u0136"+ + "\20\323\1\u0396\11\323\1\u0137\12\323\242\0\1\u0136\24\323"+ + "\1\u038f\5\323\1\u0137\12\323\242\0\1\u0136\5\323\1\u0397"+ + "\24\323\1\u0137\12\323\242\0\1\u0136\11\323\1\u0398\20\323"+ + "\1\u0137\12\323\242\0\1\u0136\5\323\1\u0250\24\323\1\u0137"+ + "\12\323\242\0\1\u0136\13\323\1\u0399\16\323\1\u0137\12\323"+ + "\242\0\1\u0136\3\323\1\u0241\26\323\1\u0137\12\323\243\0"+ + "\1\323\1\u039a\3\323\1\u039b\1\u039c\1\u039d\1\323\1\u039e"+ + "\1\u039f\1\u03a0\1\u03a1\1\u03a2\1\u03a3\1\323\1\u03a4\1\u03a5"+ + "\1\u03a6\2\323\1\u03a7\1\u03a8\1\u03a9\1\323\1\u03aa\1\u0137"+ + "\1\u03ab\2\323\1\u03ac\1\323\1\u03ad\1\u03ae\3\323\242\0"+ + "\1\u0136\10\323\1\u03af\21\323\1\u0137\12\323\242\0\1\u0136"+ + "\25\323\1\u03b0\4\323\1\u0137\12\323\242\0\1\u0136\20\323"+ + "\1\u03b1\11\323\1\u0137\12\323\242\0\1\u0136\7\323\1\u02b9"+ + "\22\323\1\u0137\12\323\276\0\12\u03b2\7\0\1\u0228\1\u0229"+ + "\1\u022a\13\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\20\43\1\u03b3\11\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\1\43\1\u03b4\30\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\13\43\1\u014d\16\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\2\43\1\u01ed\27\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\5\43\1\u02d4\24\43"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\2\0\1\55"+ + "\1\0\1\56\2\0\1\57\1\0\1\60\4\0\1\61"+ + "\1\0\1\62\1\0\1\63\2\0\1\64\3\0\1\65"+ + "\2\0\1\66\4\0\1\67\3\0\1\70\17\0\1\71"+ + "\2\0\1\72\21\0\1\73\2\0\1\74\61\0\2\30"+ + "\1\75\1\0\1\76\1\0\1\76\1\77\1\0\1\30"+ + "\2\0\1\201\1\0\1\41\1\30\1\202\4\43\1\u03b5"+ + "\25\43\1\203\12\204\1\76\1\154\1\205\1\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\2\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\3\43"+ + "\1\u03b6\26\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\1\43\1\u01ed\30\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\4\43\1\u03b7\25\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\11\43\1\u03b8\20\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\1\371"+ + "\1\u03b9\30\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\24\371\1\u03ba\5\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\1\371\1\u03bb\30\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\14\371\1\u03bc\15\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\1\371\1\u03bd\30\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\1\371\1\u03be"+ + "\30\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\1\371\1\u03bf\30\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\24\371\1\u03c0\5\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\1\u03c1"+ + "\31\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\24\371\1\u03c2\5\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\24\371\1\u03c3\5\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\27\371"+ + "\1\u03c4\2\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\24\371\1\u03c5\5\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\1\u0207\31\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\24\371"+ + "\1\u03bf\5\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\20\371\1\u03c6\11\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\24\371\1\u03c7\5\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\1\371\1\u03c8\30\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\4\371\1\u03c9\25\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\1\u03ca\31\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\21\371\1\u03cb\10\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\4\371\1\u03cc\25\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\24\371\1\u03cd"+ + "\5\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\32\371\1\203\1\371\1\u03ce\10\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\1\u03cf\31\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\1\u03d0\31\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\7\43\1\u01ed\22\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"; private static final String ZZ_TRANS_PACKED_1 = - "\3\270\1\u059c\26\270\1\u010a\12\270\237\0\1\u0109\1\u059d"+ - "\31\270\1\u010a\12\270\237\0\1\u0109\26\270\1\u059e\3\270"+ - "\1\u010a\12\270\237\0\1\u0121\7\324\1\u059f\22\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\1\u05a0\31\324\1\164"+ - "\12\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\32\324\1\164\1\u0338"+ - "\11\324\1\0\3\161\1\0\2\161\1\162\3\161\3\0"+ - "\1\161\4\0\2\161\212\0\1\u0121\24\324\1\u05a1\5\324"+ - "\1\164\12\324\1\0\3\161\1\0\2\161\1\162\3\161"+ - "\3\0\1\161\4\0\2\161\212\0\1\u0121\1\324\1\u05a2"+ - "\30\324\1\164\12\324\1\0\3\161\1\0\2\161\1\162"+ - "\3\161\3\0\1\161\4\0\2\161\212\0\1\u0121\32\324"+ - "\1\164\2\324\1\u012e\7\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\213\0\4\u05a3"+ - "\2\0\1\u05a3\15\0\1\u05a3\6\0\12\u05a3\1\u051d\237\0"+ - "\4\u05a4\2\0\1\u05a4\15\0\1\u05a4\6\0\12\u05a4\1\u05a5"+ - "\237\0\4\u05a6\2\0\1\u05a6\15\0\1\u05a6\6\0\1\u05a7"+ - "\2\u05a8\1\u05a7\5\u05a8\1\u05a9\14\0\1\u035e\223\0\4\u05aa"+ - "\2\0\1\u05aa\15\0\1\u05aa\6\0\12\u05aa\1\u0562\13\0"+ - "\1\u035e\223\0\4\u05a6\2\0\1\u05a6\15\0\1\u05a6\6\0"+ - "\1\u05a7\2\u05a8\1\u05a7\5\u05a8\1\u05a9\237\0\1\u03c5\4\u05aa"+ - "\2\0\1\u05aa\15\0\1\u05aa\6\0\12\u05ab\1\u0562\13\0"+ - "\1\u035e\222\0\1\u03c5\4\u05aa\2\0\1\u05aa\15\0\1\u05aa"+ - "\6\0\12\u05aa\1\u0562\13\0\1\u035e\222\0\1\u03c5\4\u05aa"+ - "\2\0\1\u05aa\15\0\1\u05aa\6\0\2\u05ab\1\u05aa\2\u05ab"+ - "\2\u05aa\1\u05ab\1\u05aa\1\u05ab\1\u0562\13\0\1\u035e\270\0"+ - "\1\u04e3\13\0\1\u035e\256\0\12\u0568\14\0\1\u035e\256\0"+ - "\12\u05ac\14\0\1\u035e\256\0\1\u0568\1\u05ad\1\u05ac\2\u0568"+ - "\2\u05ac\1\u0568\1\u05ac\1\u0568\14\0\1\u035e\270\0\1\u04ee"+ - "\237\0\4\u05ae\2\0\1\u05ae\15\0\1\u05ae\6\0\12\u05ae"+ - "\1\u056c\237\0\4\u05af\2\0\1\u05af\15\0\1\u05af\6\0"+ - "\12\u05af\1\u05b0\237\0\4\u05b1\2\0\1\u05b1\15\0\1\u05b1"+ - "\6\0\12\u05b1\1\u05b2\13\0\1\u0302\222\0\1\u036b\4\u05b1"+ - "\2\0\1\u05b1\15\0\1\u05b1\6\0\12\u05b3\1\u05b2\13\0"+ - "\1\u0302\222\0\1\u036b\4\u05b1\2\0\1\u05b1\15\0\1\u05b1"+ - "\6\0\12\u05b4\1\u05b2\13\0\1\u0302\222\0\1\u036b\4\u05b1"+ - "\2\0\1\u05b1\15\0\1\u05b1\6\0\1\u05b3\1\u05b5\1\u05b4"+ - "\2\u05b3\2\u05b4\1\u05b3\1\u05b4\1\u05b3\1\u05b2\13\0\1\u0302"+ - "\223\0\4\u05b6\2\0\1\u05b6\15\0\1\u05b6\6\0\12\u05b6"+ - "\1\u052b\13\0\1\u0302\222\0\1\u036b\4\u05b6\2\0\1\u05b6"+ - "\15\0\1\u05b6\6\0\12\u05b6\1\u052b\13\0\1\u0302\304\0"+ - "\1\u0302\256\0\2\u0573\1\0\2\u0573\2\0\1\u0573\1\0"+ - "\1\u0573\14\0\1\u0302\222\0\1\u036f\1\u0303\1\u05b7\30\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\24\u0303\1\u05b8\5\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\24\u0303\1\u05b9\5\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\1\u0303\1\u05ba\30\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\14\u0303\1\u05bb\15\u0303\1\u0370\12\u0303\237\0\1\u036f"+ - "\1\u0303\1\u05bc\30\u0303\1\u0370\12\u0303\237\0\1\u036f\1\u0303"+ - "\1\u05bd\30\u0303\1\u0370\12\u0303\237\0\1\u036f\1\u0303\1\u05be"+ - "\30\u0303\1\u0370\12\u0303\237\0\1\u036f\21\u0303\1\u05bf\10\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\24\u0303\1\u05c0\5\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\24\u0303\1\u05c1\5\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\24\u0303\1\u05c2\5\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\1\u04b0\31\u0303\1\u0370\12\u0303\237\0\1\u036f\24\u0303"+ - "\1\u05be\5\u0303\1\u0370\12\u0303\237\0\1\u036f\24\u0303\1\u05c3"+ - "\5\u0303\1\u0370\12\u0303\237\0\1\u036f\1\u0303\1\u05c4\30\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\31\u0303\1\u05c5\1\u0370\12\u0303"+ - "\237\0\1\u036f\24\u0303\1\u05c6\5\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\1\u0303\1\u05c7\30\u0303\1\u0370\12\u0303\237\0\1\u036f"+ - "\1\u05c8\31\u0303\1\u0370\12\u0303\237\0\1\u036f\21\u0303\1\u05c9"+ - "\10\u0303\1\u0370\12\u0303\237\0\1\u036f\4\u0303\1\u05ca\25\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\24\u0303\1\u05cb\5\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\24\u0303\1\u05cc\5\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\4\u0303\1\u05cd\25\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\21\u0303\1\u05ce\10\u0303\1\u0370\12\u0303\237\0\1\u036f"+ - "\24\u0303\1\u05cf\5\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303"+ - "\1\u0370\1\u05d0\11\u0303\237\0\1\u036f\32\u0303\1\u0370\7\u0303"+ - "\1\u05d1\2\u0303\237\0\1\u036f\1\u05d2\31\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\1\u05d3\31\u0303\1\u0370\12\u0303\305\0\1\u04fd"+ - "\237\0\4\u037d\2\0\1\u037d\15\0\1\u037d\6\0\12\u037d"+ - "\14\0\1\u0146\222\0\1\u0109\32\270\1\u010a\11\270\1\u05d4"+ - "\237\0\1\u0109\26\270\1\u018f\3\270\1\u010a\12\270\237\0"+ - "\1\u0109\32\270\1\u010a\7\270\1\u05d5\2\270\237\0\1\u0109"+ - "\32\270\1\u010a\11\270\1\u0153\237\0\1\u0109\3\270\1\u05d6"+ - "\26\270\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a\4\270"+ - "\1\u05d7\5\270\237\0\1\u0109\16\270\1\u05d8\13\270\1\u010a"+ - "\12\270\237\0\1\u0109\26\270\1\u05d9\3\270\1\u010a\12\270"+ - "\237\0\1\u0109\32\270\1\u010a\7\270\1\u04cb\2\270\237\0"+ - "\1\u0121\32\324\1\164\11\324\1\u05da\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\212\0"+ - "\1\u0121\4\324\1\u0127\25\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\212\0\1\u0121\24\324\1\350\5\324\1\164\12\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\212\0\1\u0121\32\324\1\164\6\324\1\350\3\324"+ - "\1\0\3\161\1\0\2\161\1\162\3\161\3\0\1\161"+ - "\4\0\2\161\260\0\1\u051d\237\0\4\u05db\2\0\1\u05db"+ - "\15\0\1\u05db\6\0\12\u05db\1\u05a5\237\0\4\u05dc\2\0"+ - "\1\u05dc\15\0\1\u05dc\6\0\12\u05dc\1\u05dd\237\0\4\u05de"+ - "\2\0\1\u05de\15\0\1\u05de\6\0\12\u05de\1\u05df\13\0"+ - "\1\u035e\222\0\1\u03c5\4\u05de\2\0\1\u05de\15\0\1\u05de"+ - "\6\0\12\u05e0\1\u05df\13\0\1\u035e\222\0\1\u03c5\4\u05de"+ - "\2\0\1\u05de\15\0\1\u05de\6\0\12\u05e1\1\u05df\13\0"+ - "\1\u035e\222\0\1\u03c5\4\u05de\2\0\1\u05de\15\0\1\u05de"+ - "\6\0\1\u05e0\1\u05e2\1\u05e1\2\u05e0\2\u05e1\1\u05e0\1\u05e1"+ - "\1\u05e0\1\u05df\13\0\1\u035e\223\0\4\u05e3\2\0\1\u05e3"+ - "\15\0\1\u05e3\6\0\12\u05e3\1\u0562\13\0\1\u035e\222\0"+ - "\1\u03c5\4\u05e3\2\0\1\u05e3\15\0\1\u05e3\6\0\12\u05e3"+ - "\1\u0562\13\0\1\u035e\304\0\1\u035e\256\0\2\u05ac\1\0"+ - "\2\u05ac\2\0\1\u05ac\1\0\1\u05ac\14\0\1\u035e\223\0"+ - "\4\u05e4\2\0\1\u05e4\15\0\1\u05e4\6\0\12\u05e4\1\u056c"+ - "\237\0\4\u05e5\2\0\1\u05e5\15\0\1\u05e5\6\0\12\u05e5"+ - "\1\u05e6\237\0\4\u05e7\2\0\1\u05e7\15\0\1\u05e7\6\0"+ - "\1\u05e8\2\u05e9\1\u05e8\5\u05e9\1\u05ea\14\0\1\u0302\223\0"+ - "\4\u05eb\2\0\1\u05eb\15\0\1\u05eb\6\0\12\u05eb\1\u05b2"+ - "\13\0\1\u0302\223\0\4\u05e7\2\0\1\u05e7\15\0\1\u05e7"+ - "\6\0\1\u05e8\2\u05e9\1\u05e8\5\u05e9\1\u05ea\237\0\1\u036b"+ - "\4\u05eb\2\0\1\u05eb\15\0\1\u05eb\6\0\12\u05ec\1\u05b2"+ - "\13\0\1\u0302\222\0\1\u036b\4\u05eb\2\0\1\u05eb\15\0"+ - "\1\u05eb\6\0\12\u05eb\1\u05b2\13\0\1\u0302\222\0\1\u036b"+ - "\4\u05eb\2\0\1\u05eb\15\0\1\u05eb\6\0\2\u05ec\1\u05eb"+ - "\2\u05ec\2\u05eb\1\u05ec\1\u05eb\1\u05ec\1\u05b2\13\0\1\u0302"+ - "\270\0\1\u052b\13\0\1\u0302\222\0\1\u036f\25\u0303\1\u05ed"+ - "\4\u0303\1\u0370\12\u0303\237\0\1\u036f\1\u05ee\31\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\15\u0303\1\u05ef\14\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\21\u0303\1\u05f0\10\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\16\u0303\1\u05f1\4\u0303\1\u05f2\6\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\4\u0303\1\u05f3\25\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\32\u0303\1\u0370\11\u0303\1\u05f4\237\0\1\u036f\4\u0303"+ - "\1\u05f5\25\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u0370"+ - "\11\u0303\1\u05f6\237\0\1\u036f\24\u0303\1\u05f7\5\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\1\u05f8\1\u05f9\1\u0303\1\u05fa\20\u0303"+ - "\1\u05fb\5\u0303\1\u0370\5\u0303\1\u05fc\4\u0303\237\0\1\u036f"+ - "\16\u0303\1\u05fd\13\u0303\1\u0370\12\u0303\237\0\1\u036f\11\u0303"+ - "\1\u05fe\13\u0303\1\u05ff\4\u0303\1\u0370\12\u0303\237\0\1\u036f"+ - "\32\u0303\1\u0370\11\u0303\1\u0600\237\0\1\u036f\23\u0303\1\u0601"+ - "\6\u0303\1\u0370\12\u0303\237\0\1\u036f\31\u0303\1\u0602\1\u0370"+ - "\12\u0303\237\0\1\u036f\26\u0303\1\u0603\3\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\11\u0303\1\u0604\20\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\32\u0303\1\u0370\3\u0303\1\u0605\6\u0303\237\0\1\u036f"+ - "\5\u0303\1\u0606\24\u0303\1\u0370\12\u0303\237\0\1\u036f\10\u0303"+ - "\1\u0607\21\u0303\1\u0370\12\u0303\237\0\1\u036f\3\u0303\1\u0608"+ - "\26\u0303\1\u0370\12\u0303\237\0\1\u036f\21\u0303\1\u0609\6\u0303"+ - "\1\u060a\1\u0303\1\u0370\12\u0303\237\0\1\u036f\12\u0303\1\u060b"+ - "\17\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u0370\1\u0303"+ - "\1\u060c\10\u0303\237\0\1\u036f\24\u0303\1\u060d\5\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\24\u0303\1\u060e\5\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\32\u0303\1\u0370\4\u0303\1\u060f\5\u0303\237\0"+ - "\1\u036f\5\u0303\1\u0610\23\u0303\1\u0611\1\u0370\12\u0303\237\0"+ - "\1\u0109\7\270\1\u0612\22\270\1\u010a\12\270\237\0\1\u0109"+ - "\1\u0613\31\270\1\u010a\12\270\237\0\1\u0109\32\270\1\u010a"+ - "\1\u0400\11\270\237\0\1\u0109\24\270\1\u0614\5\270\1\u010a"+ - "\12\270\237\0\1\u0109\1\270\1\u0615\30\270\1\u010a\12\270"+ - "\237\0\1\u0109\32\270\1\u010a\2\270\1\u0196\7\270\237\0"+ - "\1\u0121\1\u0616\31\324\1\164\12\324\1\0\3\161\1\0"+ - "\2\161\1\162\3\161\3\0\1\161\4\0\2\161\213\0"+ - "\4\u0617\2\0\1\u0617\15\0\1\u0617\6\0\12\u0617\1\u05a5"+ - "\237\0\4\u0618\2\0\1\u0618\15\0\1\u0618\6\0\12\u0618"+ - "\1\u0619\237\0\4\u061a\2\0\1\u061a\15\0\1\u061a\6\0"+ - "\1\u061b\2\u061c\1\u061b\5\u061c\1\u061d\14\0\1\u035e\223\0"+ - "\4\u061e\2\0\1\u061e\15\0\1\u061e\6\0\12\u061e\1\u05df"+ - "\13\0\1\u035e\223\0\4\u061a\2\0\1\u061a\15\0\1\u061a"+ - "\6\0\1\u061b\2\u061c\1\u061b\5\u061c\1\u061d\237\0\1\u03c5"+ - "\4\u061e\2\0\1\u061e\15\0\1\u061e\6\0\12\u061f\1\u05df"+ - "\13\0\1\u035e\222\0\1\u03c5\4\u061e\2\0\1\u061e\15\0"+ - "\1\u061e\6\0\12\u061e\1\u05df\13\0\1\u035e\222\0\1\u03c5"+ - "\4\u061e\2\0\1\u061e\15\0\1\u061e\6\0\2\u061f\1\u061e"+ - "\2\u061f\2\u061e\1\u061f\1\u061e\1\u061f\1\u05df\13\0\1\u035e"+ - "\270\0\1\u0562\13\0\1\u035e\270\0\1\u056c\237\0\4\u0620"+ - "\2\0\1\u0620\15\0\1\u0620\6\0\12\u0620\1\u05e6\237\0"+ - "\4\u0621\2\0\1\u0621\15\0\1\u0621\6\0\1\u0622\2\u0623"+ - "\1\u0622\5\u0623\1\u0624\1\u0625\237\0\4\u0626\2\0\1\u0626"+ - "\15\0\1\u0626\6\0\12\u0626\1\u0627\13\0\1\u0302\222\0"+ - "\1\u036b\4\u0626\2\0\1\u0626\15\0\1\u0626\6\0\12\u0628"+ - "\1\u0627\13\0\1\u0302\222\0\1\u036b\4\u0626\2\0\1\u0626"+ - "\15\0\1\u0626\6\0\12\u0629\1\u0627\13\0\1\u0302\222\0"+ - "\1\u036b\4\u0626\2\0\1\u0626\15\0\1\u0626\6\0\1\u0628"+ - "\1\u062a\1\u0629\2\u0628\2\u0629\1\u0628\1\u0629\1\u0628\1\u0627"+ - "\13\0\1\u0302\223\0\4\u062b\2\0\1\u062b\15\0\1\u062b"+ - "\6\0\12\u062b\1\u05b2\13\0\1\u0302\222\0\1\u036b\4\u062b"+ - "\2\0\1\u062b\15\0\1\u062b\6\0\12\u062b\1\u05b2\13\0"+ - "\1\u0302\222\0\1\u036f\1\u0303\1\u062c\30\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\32\u0303\1\u0370\1\u062d\11\u0303\237\0\1\u036f"+ - "\6\u0303\1\u062e\23\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303"+ - "\1\u0370\7\u0303\1\u062f\2\u0303\237\0\1\u036f\32\u0303\1\u0370"+ - "\10\u0303\1\u04b5\1\u0303\237\0\1\u036f\32\u0303\1\u0370\5\u0303"+ - "\1\u04b5\4\u0303\237\0\1\u036f\26\u0303\1\u0630\3\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\1\u0303\1\u0631\30\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\26\u0303\1\u0632\3\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\32\u0303\1\u0370\1\u0303\1\u0633\10\u0303\237\0\1\u036f"+ - "\1\u0634\31\u0303\1\u0370\12\u0303\237\0\1\u036f\1\u0635\27\u0303"+ - "\1\u0636\1\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u0370"+ - "\1\u0637\11\u0303\237\0\1\u036f\4\u0303\1\u0638\25\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\25\u0303\1\u0639\4\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\1\u063a\31\u0303\1\u0370\12\u0303\237\0\1\u036f"+ - "\32\u0303\1\u0370\1\u063b\11\u0303\237\0\1\u036f\32\u0303\1\u0370"+ - "\2\u0303\1\u03e5\7\u0303\237\0\1\u036f\32\u0303\1\u0370\3\u0303"+ - "\1\u063c\6\u0303\237\0\1\u036f\1\u063d\1\u0303\1\u063e\27\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\1\u062f\31\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\32\u0303\1\u0370\2\u0303\1\u063f\7\u0303\237\0"+ - "\1\u036f\32\u0303\1\u0370\2\u0303\1\u0640\7\u0303\237\0\1\u036f"+ - "\15\u0303\1\u0641\14\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303"+ - "\1\u0370\5\u0303\1\u0642\4\u0303\237\0\1\u036f\32\u0303\1\u0370"+ - "\7\u0303\1\u0643\2\u0303\237\0\1\u036f\32\u0303\1\u0370\11\u0303"+ - "\1\u0644\237\0\1\u036f\1\u0303\1\u0645\30\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\32\u0303\1\u0370\3\u0303\1\u0646\6\u0303\237\0"+ - "\1\u036f\32\u0303\1\u0370\1\u0303\1\u0647\10\u0303\237\0\1\u036f"+ - "\32\u0303\1\u0370\1\u0303\1\u0648\10\u0303\237\0\1\u036f\24\u0303"+ - "\1\u0649\5\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u0370"+ - "\6\u0303\1\u064a\3\u0303\237\0\1\u036f\32\u0303\1\u0370\3\u0303"+ - "\1\u064b\6\u0303\237\0\1\u036f\1\u063c\31\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\32\u0303\1\u0370\11\u0303\1\u064c\237\0\1\u036f"+ - "\25\u0303\1\u064d\4\u0303\1\u0370\12\u0303\237\0\1\u0109\32\270"+ - "\1\u010a\11\270\1\u064e\237\0\1\u0109\4\270\1\u018f\25\270"+ - "\1\u010a\12\270\237\0\1\u0109\24\270\1\u0153\5\270\1\u010a"+ - "\12\270\237\0\1\u0109\32\270\1\u010a\6\270\1\u0153\3\270"+ - "\237\0\1\u0121\32\324\1\164\5\324\1\u064f\4\324\1\0"+ - "\3\161\1\0\2\161\1\162\3\161\3\0\1\161\4\0"+ - "\2\161\260\0\1\u05a5\237\0\4\u0650\2\0\1\u0650\15\0"+ - "\1\u0650\6\0\12\u0650\1\u0619\237\0\4\u0651\2\0\1\u0651"+ - "\15\0\1\u0651\6\0\1\u0652\2\u0653\1\u0652\5\u0653\1\u0654"+ - "\1\u0655\237\0\4\u0656\2\0\1\u0656\15\0\1\u0656\6\0"+ - "\12\u0656\1\u0657\13\0\1\u035e\222\0\1\u03c5\4\u0656\2\0"+ - "\1\u0656\15\0\1\u0656\6\0\12\u0658\1\u0657\13\0\1\u035e"+ - "\222\0\1\u03c5\4\u0656\2\0\1\u0656\15\0\1\u0656\6\0"+ - "\12\u0659\1\u0657\13\0\1\u035e\222\0\1\u03c5\4\u0656\2\0"+ - "\1\u0656\15\0\1\u0656\6\0\1\u0658\1\u065a\1\u0659\2\u0658"+ - "\2\u0659\1\u0658\1\u0659\1\u0658\1\u0657\13\0\1\u035e\223\0"+ - "\4\u065b\2\0\1\u065b\15\0\1\u065b\6\0\12\u065b\1\u05df"+ - "\13\0\1\u035e\222\0\1\u03c5\4\u065b\2\0\1\u065b\15\0"+ - "\1\u065b\6\0\12\u065b\1\u05df\13\0\1\u035e\223\0\4\u065c"+ - "\2\0\1\u065c\15\0\1\u065c\6\0\12\u065c\1\u05e6\237\0"+ - "\4\u065d\2\0\1\u065d\15\0\1\u065d\6\0\12\u065d\1\u065e"+ - "\236\0\1\u036b\4\u065d\2\0\1\u065d\15\0\1\u065d\6\0"+ - "\12\u065f\1\u065e\236\0\1\u036b\4\u065d\2\0\1\u065d\15\0"+ - "\1\u065d\6\0\12\u0660\1\u065e\236\0\1\u036b\4\u065d\2\0"+ - "\1\u065d\15\0\1\u065d\6\0\1\u065f\1\u0661\1\u0660\2\u065f"+ - "\2\u0660\1\u065f\1\u0660\1\u065f\1\u065e\237\0\4\u0662\2\0"+ - "\1\u0662\15\0\1\u0662\6\0\12\u0662\14\0\1\u0302\223\0"+ - "\4\u0663\2\0\1\u0663\15\0\1\u0663\6\0\12\u0663\1\u0627"+ - "\13\0\1\u0302\223\0\4\u0662\2\0\1\u0662\15\0\1\u0662"+ - "\6\0\12\u0662\237\0\1\u036b\4\u0663\2\0\1\u0663\15\0"+ - "\1\u0663\6\0\12\u0664\1\u0627\13\0\1\u0302\222\0\1\u036b"+ - "\4\u0663\2\0\1\u0663\15\0\1\u0663\6\0\12\u0663\1\u0627"+ - "\13\0\1\u0302\222\0\1\u036b\4\u0663\2\0\1\u0663\15\0"+ - "\1\u0663\6\0\2\u0664\1\u0663\2\u0664\2\u0663\1\u0664\1\u0663"+ - "\1\u0664\1\u0627\13\0\1\u0302\270\0\1\u05b2\13\0\1\u0302"+ - "\222\0\1\u036f\32\u0303\1\u0370\1\u0665\11\u0303\237\0\1\u036f"+ - "\1\u0666\31\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u0370"+ - "\10\u0303\1\u0667\1\u0303\237\0\1\u036f\25\u0303\1\u044e\4\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u0370\5\u0303\1\u0668"+ - "\4\u0303\237\0\1\u036f\32\u0303\1\u0370\5\u0303\1\u0669\4\u0303"+ - "\237\0\1\u036f\32\u0303\1\u0370\5\u0303\1\u063c\4\u0303\237\0"+ - "\1\u036f\32\u0303\1\u0370\3\u0303\1\u0666\6\u0303\237\0\1\u036f"+ - "\17\u0303\1\u066a\12\u0303\1\u0370\12\u0303\237\0\1\u036f\12\u0303"+ - "\1\u066b\17\u0303\1\u0370\12\u0303\237\0\1\u036f\25\u0303\1\u066c"+ - "\4\u0303\1\u0370\12\u0303\237\0\1\u036f\1\u066d\31\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\15\u0303\1\u066e\14\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\32\u0303\1\u0370\3\u0303\1\u066f\6\u0303\237\0"+ - "\1\u036f\21\u0303\1\u0670\10\u0303\1\u0370\12\u0303\237\0\1\u036f"+ - "\2\u0303\1\u062f\27\u0303\1\u0370\12\u0303\237\0\1\u036f\1\u0303"+ - "\1\u044e\30\u0303\1\u0370\12\u0303\237\0\1\u036f\11\u0303\1\u0671"+ - "\20\u0303\1\u0370\12\u0303\237\0\1\u036f\11\u0303\1\u0672\20\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\1\u0673\31\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\1\u0674\31\u0303\1\u0370\12\u0303\237\0\1\u036f"+ - "\2\u0303\1\u0675\27\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303"+ - "\1\u0370\4\u0303\1\u0455\5\u0303\237\0\1\u036f\10\u0303\1\u0676"+ - "\21\u0303\1\u0370\12\u0303\237\0\1\u036f\1\u0677\31\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\25\u0303\1\u0678\4\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\32\u0303\1\u0370\4\u0303\1\u0666\5\u0303\237\0"+ - "\1\u036f\32\u0303\1\u0370\6\u0303\1\u0666\3\u0303\237\0\1\u036f"+ - "\32\u0303\1\u0370\2\u0303\1\u0666\7\u0303\237\0\1\u036f\16\u0303"+ - "\1\u0679\13\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u0370"+ - "\1\u067a\11\u0303\237\0\1\u036f\32\u0303\1\u0370\3\u0303\1\u067b"+ - "\6\u0303\237\0\1\u036f\32\u0303\1\u0370\3\u0303\1\u03e5\6\u0303"+ - "\237\0\1\u036f\24\u0303\1\u067c\5\u0303\1\u0370\12\u0303\237\0"+ - "\1\u0109\1\u067d\31\270\1\u010a\12\270\237\0\1\u0121\7\324"+ - "\1\u067e\22\324\1\164\12\324\1\0\3\161\1\0\2\161"+ - "\1\162\3\161\3\0\1\161\4\0\2\161\213\0\4\u067f"+ - "\2\0\1\u067f\15\0\1\u067f\6\0\12\u067f\1\u0619\237\0"+ - "\4\u0680\2\0\1\u0680\15\0\1\u0680\6\0\12\u0680\1\u0681"+ - "\236\0\1\u03c5\4\u0680\2\0\1\u0680\15\0\1\u0680\6\0"+ - "\12\u0682\1\u0681\236\0\1\u03c5\4\u0680\2\0\1\u0680\15\0"+ - "\1\u0680\6\0\12\u0683\1\u0681\236\0\1\u03c5\4\u0680\2\0"+ - "\1\u0680\15\0\1\u0680\6\0\1\u0682\1\u0684\1\u0683\2\u0682"+ - "\2\u0683\1\u0682\1\u0683\1\u0682\1\u0681\237\0\4\u0685\2\0"+ - "\1\u0685\15\0\1\u0685\6\0\12\u0685\14\0\1\u035e\223\0"+ - "\4\u0686\2\0\1\u0686\15\0\1\u0686\6\0\12\u0686\1\u0657"+ - "\13\0\1\u035e\223\0\4\u0685\2\0\1\u0685\15\0\1\u0685"+ - "\6\0\12\u0685\237\0\1\u03c5\4\u0686\2\0\1\u0686\15\0"+ - "\1\u0686\6\0\12\u0687\1\u0657\13\0\1\u035e\222\0\1\u03c5"+ - "\4\u0686\2\0\1\u0686\15\0\1\u0686\6\0\12\u0686\1\u0657"+ - "\13\0\1\u035e\222\0\1\u03c5\4\u0686\2\0\1\u0686\15\0"+ - "\1\u0686\6\0\2\u0687\1\u0686\2\u0687\2\u0686\1\u0687\1\u0686"+ - "\1\u0687\1\u0657\13\0\1\u035e\270\0\1\u05df\13\0\1\u035e"+ - "\270\0\1\u05e6\237\0\4\u0688\2\0\1\u0688\15\0\1\u0688"+ - "\6\0\12\u0688\1\u065e\237\0\4\u0662\2\0\1\u0662\15\0"+ - "\1\u0662\6\0\12\u0662\1\u0573\236\0\1\u036b\4\u0688\2\0"+ - "\1\u0688\15\0\1\u0688\6\0\12\u0689\1\u065e\236\0\1\u036b"+ - "\4\u0688\2\0\1\u0688\15\0\1\u0688\6\0\12\u0688\1\u065e"+ - "\236\0\1\u036b\4\u0688\2\0\1\u0688\15\0\1\u0688\6\0"+ - "\2\u0689\1\u0688\2\u0689\2\u0688\1\u0689\1\u0688\1\u0689\1\u065e"+ - "\237\0\4\u068a\2\0\1\u068a\15\0\1\u068a\6\0\12\u068a"+ - "\14\0\1\u0302\223\0\4\u068b\2\0\1\u068b\15\0\1\u068b"+ - "\6\0\12\u068b\1\u0627\13\0\1\u0302\222\0\1\u036b\4\u068b"+ - "\2\0\1\u068b\15\0\1\u068b\6\0\12\u068b\1\u0627\13\0"+ - "\1\u0302\222\0\1\u036f\3\u0303\1\u068c\26\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\2\u0303\1\u044e\27\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\6\u0303\1\u0459\23\u0303\1\u0370\12\u0303\237\0\1\u036f"+ - "\1\u0303\1\u0646\30\u0303\1\u0370\12\u0303\237\0\1\u036f\3\u0303"+ - "\1\u068d\26\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u0370"+ - "\3\u0303\1\u068e\6\u0303\237\0\1\u036f\32\u0303\1\u0370\6\u0303"+ - "\1\u068f\3\u0303\237\0\1\u036f\32\u0303\1\u0370\6\u0303\1\u0690"+ - "\3\u0303\237\0\1\u036f\32\u0303\1\u0370\5\u0303\1\u0691\4\u0303"+ - "\237\0\1\u036f\32\u0303\1\u0370\7\u0303\1\u0692\2\u0303\237\0"+ - "\1\u036f\1\u0693\31\u0303\1\u0370\12\u0303\237\0\1\u036f\24\u0303"+ - "\1\u0694\5\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u0370"+ - "\4\u0303\1\u0695\5\u0303\237\0\1\u036f\32\u0303\1\u0370\4\u0303"+ - "\1\u0696\5\u0303\237\0\1\u036f\26\u0303\1\u0697\3\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\30\u0303\1\u0698\1\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\11\u0303\1\u04af\20\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\32\u0303\1\u0370\2\u0303\1\u0699\7\u0303\237\0\1\u036f"+ - "\12\u0303\1\u069a\17\u0303\1\u0370\12\u0303\237\0\1\u036f\17\u0303"+ - "\1\u0456\12\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u0370"+ - "\4\u0303\1\u069b\5\u0303\237\0\1\u036f\32\u0303\1\u0370\6\u0303"+ - "\1\u04b2\3\u0303\237\0\1\u036f\30\u0303\1\u069c\1\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\30\u0303\1\u069d\1\u0303\1\u0370\12\u0303"+ - "\237\0\1\u0109\32\270\1\u010a\5\270\1\u069e\4\270\237\0"+ - "\1\u0121\1\324\1\u03a5\30\324\1\164\12\324\1\0\3\161"+ - "\1\0\2\161\1\162\3\161\3\0\1\161\4\0\2\161"+ - "\260\0\1\u0619\237\0\4\u069f\2\0\1\u069f\15\0\1\u069f"+ - "\6\0\12\u069f\1\u0681\237\0\4\u0685\2\0\1\u0685\15\0"+ - "\1\u0685\6\0\12\u0685\1\u05ac\236\0\1\u03c5\4\u069f\2\0"+ - "\1\u069f\15\0\1\u069f\6\0\12\u06a0\1\u0681\236\0\1\u03c5"+ - "\4\u069f\2\0\1\u069f\15\0\1\u069f\6\0\12\u069f\1\u0681"+ - "\236\0\1\u03c5\4\u069f\2\0\1\u069f\15\0\1\u069f\6\0"+ - "\2\u06a0\1\u069f\2\u06a0\2\u069f\1\u06a0\1\u069f\1\u06a0\1\u0681"+ - "\237\0\4\u06a1\2\0\1\u06a1\15\0\1\u06a1\6\0\12\u06a1"+ - "\14\0\1\u035e\223\0\4\u06a2\2\0\1\u06a2\15\0\1\u06a2"+ - "\6\0\12\u06a2\1\u0657\13\0\1\u035e\222\0\1\u03c5\4\u06a2"+ - "\2\0\1\u06a2\15\0\1\u06a2\6\0\12\u06a2\1\u0657\13\0"+ - "\1\u035e\223\0\4\u06a3\2\0\1\u06a3\15\0\1\u06a3\6\0"+ - "\12\u06a3\1\u065e\236\0\1\u036b\4\u06a3\2\0\1\u06a3\15\0"+ - "\1\u06a3\6\0\12\u06a3\1\u065e\237\0\4\u06a4\2\0\1\u06a4"+ - "\15\0\1\u06a4\6\0\12\u06a4\14\0\1\u0302\270\0\1\u0627"+ - "\13\0\1\u0302\222\0\1\u036f\1\u06a5\31\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\32\u0303\1\u0370\11\u0303\1\u063c\237\0\1\u036f"+ - "\1\u06a6\31\u0303\1\u0370\12\u0303\237\0\1\u036f\1\u06a7\31\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\7\u0303\1\u06a8\22\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\1\u06a9\31\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\1\u06aa\31\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303"+ - "\1\u0370\6\u0303\1\u06ab\3\u0303\237\0\1\u036f\6\u0303\1\u044e"+ - "\23\u0303\1\u0370\12\u0303\237\0\1\u036f\25\u0303\1\u06ac\4\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\1\u06ad\31\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\32\u0303\1\u0370\6\u0303\1\u06ae\3\u0303\237\0"+ - "\1\u036f\1\u06af\31\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303"+ - "\1\u0370\6\u0303\1\u04ae\3\u0303\237\0\1\u036f\12\u0303\1\u045f"+ - "\17\u0303\1\u0370\12\u0303\237\0\1\u036f\1\u06b0\31\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\10\u0303\1\u06b1\21\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\31\u0303\1\u06b2\1\u0370\12\u0303\237\0\1\u0109"+ - "\7\270\1\u06b3\22\270\1\u010a\12\270\240\0\4\u06b4\2\0"+ - "\1\u06b4\15\0\1\u06b4\6\0\12\u06b4\1\u0681\236\0\1\u03c5"+ - "\4\u06b4\2\0\1\u06b4\15\0\1\u06b4\6\0\12\u06b4\1\u0681"+ - "\237\0\4\u06b5\2\0\1\u06b5\15\0\1\u06b5\6\0\12\u06b5"+ - "\14\0\1\u035e\270\0\1\u0657\13\0\1\u035e\270\0\1\u065e"+ - "\237\0\4\u0573\2\0\1\u0573\15\0\1\u0573\6\0\12\u0573"+ - "\14\0\1\u0302\222\0\1\u036f\32\u0303\1\u0370\1\u06b6\11\u0303"+ - "\237\0\1\u036f\2\u0303\1\u06b7\27\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\32\u0303\1\u0370\10\u0303\1\u062f\1\u0303\237\0\1\u036f"+ - "\15\u0303\1\u03e5\14\u0303\1\u0370\12\u0303\237\0\1\u036f\23\u0303"+ - "\1\u06b8\6\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u0370"+ - "\1\u0303\1\u06b9\10\u0303\237\0\1\u036f\32\u0303\1\u0370\3\u0303"+ - "\1\u04b2\6\u0303\237\0\1\u036f\30\u0303\1\u06ba\1\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\32\u0303\1\u0370\1\u0303\1\u06bb\10\u0303"+ - "\237\0\1\u036f\6\u0303\1\u06bc\23\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\32\u0303\1\u0370\5\u0303\1\u06bd\4\u0303\237\0\1\u036f"+ - "\32\u0303\1\u0370\5\u0303\1\u06be\4\u0303\237\0\1\u036f\32\u0303"+ - "\1\u0370\1\u0303\1\u03e5\10\u0303\237\0\1\u036f\13\u0303\1\u06bf"+ - "\16\u0303\1\u0370\12\u0303\237\0\1\u0109\1\270\1\u046d\30\270"+ - "\1\u010a\12\270\305\0\1\u0681\237\0\4\u05ac\2\0\1\u05ac"+ - "\15\0\1\u05ac\6\0\12\u05ac\14\0\1\u035e\222\0\1\u036f"+ - "\24\u0303\1\u06c0\5\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303"+ - "\1\u0370\10\u0303\1\u06c1\1\u0303\237\0\1\u036f\1\u0303\1\u0455"+ - "\30\u0303\1\u0370\12\u0303\237\0\1\u036f\2\u0303\1\u06c2\27\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\3\u0303\1\u06c3\26\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\3\u0303\1\u06c4\26\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\32\u0303\1\u0370\1\u0303\1\u06c5\10\u0303\237\0"+ - "\1\u036f\3\u0303\1\u06c6\26\u0303\1\u0370\12\u0303\237\0\1\u036f"+ - "\1\u06c7\31\u0303\1\u0370\12\u0303\237\0\1\u036f\26\u0303\1\u06c8"+ - "\3\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u0370\11\u0303"+ - "\1\u06c9\237\0\1\u036f\26\u0303\1\u044e\3\u0303\1\u0370\12\u0303"+ - "\237\0\1\u036f\32\u0303\1\u0370\7\u0303\1\u06ca\2\u0303\237\0"+ - "\1\u036f\32\u0303\1\u0370\11\u0303\1\u03e5\237\0\1\u036f\3\u0303"+ - "\1\u06cb\26\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u0370"+ - "\4\u0303\1\u06cc\5\u0303\237\0\1\u036f\16\u0303\1\u06cd\13\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\26\u0303\1\u06ce\3\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\32\u0303\1\u0370\7\u0303\1\u0694\2\u0303"+ - "\237\0\1\u036f\7\u0303\1\u06cf\22\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\1\u06d0\31\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303"+ - "\1\u0370\1\u062f\11\u0303\237\0\1\u036f\24\u0303\1\u06d1\5\u0303"+ - "\1\u0370\12\u0303\237\0\1\u036f\1\u0303\1\u06d2\30\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\32\u0303\1\u0370\2\u0303\1\u0455\7\u0303"+ - "\237\0\1\u036f\32\u0303\1\u0370\11\u0303\1\u06d3\237\0\1\u036f"+ - "\4\u0303\1\u044e\25\u0303\1\u0370\12\u0303\237\0\1\u036f\24\u0303"+ - "\1\u03e5\5\u0303\1\u0370\12\u0303\237\0\1\u036f\32\u0303\1\u0370"+ - "\6\u0303\1\u03e5\3\u0303\237\0\1\u036f\1\u06d4\31\u0303\1\u0370"+ - "\12\u0303\237\0\1\u036f\32\u0303\1\u0370\5\u0303\1\u06d5\4\u0303"+ - "\237\0\1\u036f\7\u0303\1\u06d6\22\u0303\1\u0370\12\u0303\237\0"+ - "\1\u036f\1\u0303\1\u0666\30\u0303\1\u0370\12\u0303\26\0"; + "\3\154\3\0\1\154\3\0\2\154\2\0\1\55\1\0"+ + "\1\56\2\0\1\57\1\0\1\60\4\0\1\61\1\0"+ + "\1\62\1\0\1\63\2\0\1\64\3\0\1\65\2\0"+ + "\1\66\4\0\1\67\3\0\1\70\17\0\1\71\2\0"+ + "\1\72\21\0\1\73\2\0\1\74\61\0\2\30\1\75"+ + "\1\0\1\76\1\0\1\76\1\77\1\0\1\30\2\0"+ + "\1\201\1\0\1\41\1\30\1\202\13\43\1\u0141\16\43"+ + "\1\203\12\204\1\76\1\154\1\205\1\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\217\0\1\u03d1"+ + "\32\u035d\1\u03d2\12\u035d\10\0\1\u035e\231\0\51\u035e\1\u03d3"+ + "\2\0\3\u035e\1\u022a\3\0\1\u035e\225\0\4\u03d4\2\0"+ + "\1\u03d4\15\0\1\u03d4\6\0\12\u03d4\1\u03d5\235\0\1\u02f3"+ + "\3\0\1\u02f3\32\u02f4\1\u02f3\12\u02f4\1\u02f5\2\u02f3\1\u02f6"+ + "\2\u02f3\1\u02f7\5\0\2\u02f3\3\0\1\u02f3\214\0\1\u02f3"+ + "\3\0\1\u02f3\32\u02f4\1\u0361\12\u02f4\1\u02f5\2\u02f3\1\u02f6"+ + "\2\u02f3\1\u02f7\5\0\2\u02f3\3\0\1\u02f3\214\0\1\u02f5"+ + "\3\0\34\u02f5\12\u03d6\1\0\2\u02f5\1\u0364\2\u02f5\1\u02f7"+ + "\5\0\2\u02f5\3\0\1\u02f5\220\0\51\u0363\1\u03d7\2\0"+ + "\3\u0363\1\u022a\2\0\1\u03d8\1\u0363\225\0\4\u03d9\2\0"+ + "\1\u03d9\15\0\1\u03d9\6\0\12\u03d9\243\0\4\u02f3\2\0"+ + "\1\u02f3\15\0\1\u02f3\6\0\12\u02f3\242\0\1\u03da\32\u0366"+ + "\1\u03db\12\u0366\1\u03dc\7\0\1\u0363\232\0\4\u03dd\2\0"+ + "\1\u03dd\15\0\1\u03dd\6\0\12\u03dd\1\u03de\307\0\1\u03df"+ + "\24\0\1\55\1\0\1\56\2\0\1\245\1\0\1\246"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\247\2\0\1\250\4\0\1\251\3\0\1\252"+ + "\17\0\1\71\2\0\1\253\21\0\1\254\2\0\1\255"+ + "\61\0\1\30\1\76\2\0\1\76\1\0\2\76\1\0"+ + "\1\76\2\0\1\u0369\1\0\2\30\1\u03e0\32\u036b\13\u036c"+ + "\1\76\1\u036c\1\u0369\1\u036c\1\0\1\u036c\1\u03e1\3\u036c"+ + "\3\0\1\u036c\3\0\2\u036c\213\0\1\u036a\1\u03e2\2\0"+ + "\65\u036a\1\u03e3\1\0\2\u036a\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\u0369"+ + "\1\0\1\41\1\30\1\u03e0\32\u036b\1\u036c\12\u03e4\1\76"+ + "\1\u036c\1\u03e5\1\u036c\1\0\1\u036c\1\u03e1\3\u036c\3\0"+ + "\1\u036c\3\0\2\u036c\213\0\1\u036c\3\0\1\u03e6\45\u036c"+ + "\1\0\3\u036c\1\0\1\u036c\1\u03e1\3\u036c\3\0\1\u036c"+ + "\3\0\2\u036c\213\0\1\u036d\3\0\46\u036d\1\u036f\2\u036d"+ + "\1\u0370\2\u036d\1\u0371\5\0\2\u036d\3\0\1\u036d\214\0"+ + "\1\u036d\3\0\1\u03e7\32\u036e\1\u03e8\12\u036e\1\u03e9\2\u036d"+ + "\1\u0370\2\u036d\1\u0371\1\u0228\1\u0229\1\u022a\2\0\2\u036d"+ + "\3\0\1\u036d\214\0\1\u036f\3\0\46\u036f\1\0\2\u036f"+ + "\1\u03ea\2\u036f\1\u0371\5\0\2\u036f\3\0\1\u036f\221\0"+ + "\4\u03eb\2\0\1\u03eb\15\0\1\u03eb\6\0\12\u03eb\243\0"+ + "\32\u03ec\1\0\12\u03ec\12\0\1\u0372\223\0\1\154\3\0"+ + "\1\u016f\20\371\1\u03ed\11\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\1\371\1\u03ee\30\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\13\371"+ + "\1\u0180\16\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\2\371\1\u0213\27\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\5\371\1\u0302\24\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\4\371\1\u03ef\25\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\3\371\1\u03f0\26\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\1\371\1\u0213"+ + "\30\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\4\371\1\u03f1\25\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\11\371\1\u03f2\20\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\7\371"+ + "\1\u0213\22\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\13\371\1\u0174\16\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\100\1\0"+ + "\1\101\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\102\2\0\1\103\4\0\1\104\3\0"+ + "\1\105\17\0\1\71\2\0\1\106\21\0\1\107\2\0"+ + "\1\110\61\0\1\30\2\31\2\0\2\111\1\112\1\0"+ + "\1\31\2\0\1\212\1\0\1\41\1\30\1\u01a5\32\43"+ + "\1\203\12\u0120\1\u01f9\1\154\1\215\1\154\1\0\1\212"+ + "\1\156\1\u01d1\1\u01d2\1\u01d3\2\0\1\111\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\100\1\0"+ + "\1\101\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\102\2\0\1\103\4\0\1\104\3\0"+ + "\1\105\17\0\1\71\2\0\1\106\21\0\1\107\2\0"+ + "\1\110\61\0\1\30\2\31\2\0\2\111\1\112\1\0"+ + "\1\31\2\0\1\212\1\0\1\41\1\30\1\u01a5\32\43"+ + "\1\203\2\u037f\1\u0120\2\u037f\2\u0120\2\u037f\1\u0120\1\u01f9"+ + "\1\154\1\215\1\154\1\0\1\212\1\156\1\u01d1\1\u01d2"+ + "\1\u01d3\2\0\1\111\1\154\3\0\2\154\220\0\4\u03f3"+ + "\2\0\1\u03f3\15\0\1\u03f3\6\0\12\u03f3\1\u0310\242\0"+ + "\4\u03f4\2\0\1\u03f4\15\0\1\u03f4\6\0\12\u03f4\1\u03f5"+ + "\242\0\4\u03f6\2\0\1\u03f6\15\0\1\u03f6\6\0\1\u03f7"+ + "\1\u03f8\5\u03f7\1\u03f9\1\u03f8\1\u03f7\13\0\1\u01ad\227\0"+ + "\4\u03fa\2\0\1\u03fa\15\0\1\u03fa\6\0\12\u03fa\1\u0385"+ + "\12\0\1\u01ad\227\0\4\u03f6\2\0\1\u03f6\15\0\1\u03f6"+ + "\6\0\1\u03f7\1\u03f8\5\u03f7\1\u03f9\1\u03f8\1\u03f7\242\0"+ + "\1\u0224\4\u03fa\2\0\1\u03fa\15\0\1\u03fa\6\0\12\u03fa"+ + "\1\u0385\12\0\1\u01ad\226\0\1\u0224\4\u03fa\2\0\1\u03fa"+ + "\15\0\1\u03fa\6\0\12\u03fb\1\u0385\12\0\1\u01ad\226\0"+ + "\1\u0224\4\u03fa\2\0\1\u03fa\15\0\1\u03fa\6\0\2\u03fb"+ + "\1\u03fa\2\u03fb\2\u03fa\2\u03fb\1\u03fa\1\u0385\12\0\1\u01ad"+ + "\274\0\1\u029d\12\0\1\u01ad\226\0\1\u03fc\33\0\12\u03fd"+ + "\242\0\1\u03fc\33\0\12\u038a\242\0\1\u03fc\33\0\2\u038a"+ + "\1\u03fd\1\u038a\1\u03fe\2\u03fd\2\u038a\1\u03fd\242\0\1\u0136"+ + "\4\323\1\u03ff\25\323\1\u0137\12\323\242\0\1\u0136\1\u0400"+ + "\31\323\1\u0137\12\323\242\0\1\u0136\10\323\1\u0401\21\323"+ + "\1\u0137\12\323\242\0\1\u0136\13\323\1\u0402\16\323\1\u0137"+ + "\12\323\242\0\1\u0136\17\323\1\u0403\12\323\1\u0137\12\323"+ + "\242\0\1\u0136\15\323\1\u0404\14\323\1\u0137\12\323\242\0"+ + "\1\u0136\12\323\1\u0405\17\323\1\u0137\12\323\242\0\1\u0136"+ + "\4\323\1\u02bd\25\323\1\u0137\12\323\242\0\1\u0136\10\323"+ + "\1\u0406\21\323\1\u0137\12\323\242\0\1\u0136\12\323\1\u022b"+ + "\17\323\1\u0137\12\323\242\0\1\u0136\7\323\1\u0407\22\323"+ + "\1\u0137\12\323\242\0\1\u0136\3\323\1\u02c3\26\323\1\u0137"+ + "\12\323\242\0\1\u0136\5\323\1\u0408\24\323\1\u0137\12\323"+ + "\242\0\1\u0136\11\323\1\u0409\20\323\1\u0137\12\323\242\0"+ + "\1\u0136\7\323\1\u040a\22\323\1\u0137\1\u040b\11\323\242\0"+ + "\1\u0136\10\323\1\u040c\4\323\1\u040d\5\323\1\u040e\6\323"+ + "\1\u0137\12\323\242\0\1\u0136\3\323\1\u040f\26\323\1\u0137"+ + "\12\323\242\0\1\u0136\7\323\1\u0410\22\323\1\u0137\10\323"+ + "\1\u0411\1\323\242\0\1\u0136\7\323\1\u0412\22\323\1\u0137"+ + "\12\323\242\0\1\u0136\7\323\1\u0413\22\323\1\u0137\12\323"+ + "\242\0\1\u0136\32\323\1\u0137\5\323\1\u0414\4\323\242\0"+ + "\1\u0136\7\323\1\u0415\22\323\1\u0137\10\323\1\u0416\1\323"+ + "\242\0\1\u0136\32\323\1\u0137\5\323\1\u0417\4\323\242\0"+ + "\1\u0136\13\323\1\u0418\16\323\1\u0137\12\323\242\0\1\u0136"+ + "\7\323\1\u0419\22\323\1\u0137\12\323\242\0\1\u0136\26\323"+ + "\1\u041a\3\323\1\u0137\12\323\242\0\1\u0136\32\323\1\u0137"+ + "\7\323\1\u0417\2\323\242\0\1\u0136\15\323\1\u041b\14\323"+ + "\1\u0137\12\323\242\0\1\u0136\32\323\1\u0137\10\323\1\u041c"+ + "\1\u041d\242\0\1\u0136\6\323\1\u041e\1\u041f\22\323\1\u0137"+ + "\12\323\242\0\1\u0136\3\323\1\u0420\26\323\1\u0137\12\323"+ + "\242\0\1\u0136\32\323\1\u0137\4\323\1\u0417\5\323\242\0"+ + "\1\u0136\32\323\1\u0137\1\323\1\u0421\10\323\242\0\1\u0136"+ + "\32\323\1\u0137\1\323\1\u0422\10\323\242\0\1\u0136\13\323"+ + "\1\u0423\16\323\1\u0137\12\323\242\0\1\u0136\3\323\1\u0424"+ + "\26\323\1\u0137\12\323\242\0\1\u0136\4\323\1\u0398\25\323"+ + "\1\u0137\12\323\276\0\12\u0425\7\0\1\u0228\1\u0229\1\u022a"+ + "\13\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\1\43\1\u0426\30\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\17\43\1\u0427\12\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\10\43\1\u0428\21\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\13\43\1\u01e4\16\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\1\u0429\31\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\2\0\1\55\1\0\1\56"+ + "\2\0\1\57\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\65\2\0\1\66"+ + "\4\0\1\67\3\0\1\70\17\0\1\71\2\0\1\72"+ + "\21\0\1\73\2\0\1\74\61\0\2\30\1\75\1\0"+ + "\1\76\1\0\1\76\1\77\1\0\1\30\2\0\1\201"+ + "\1\0\1\41\1\30\1\202\5\43\1\u042a\24\43\1\203"+ + "\12\204\1\76\1\154\1\205\1\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\25\371\1\u042b\4\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\15\371\1\u042c\14\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\21\371"+ + "\1\u042d\10\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\16\371\1\u042e\4\371\1\u042f\6\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\4\371"+ + "\1\u0430\25\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\32\371\1\203\7\371\1\u0431\2\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\4\371\1\u0432\25\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\24\371\1\u0433\5\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\1\371\1\u0434\30\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\1\u0435\1\u0436"+ + "\1\371\1\u0437\16\371\1\u0438\1\371\1\u0439\5\371\1\203"+ + "\5\371\1\u043a\4\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\1\371\1\u043b\30\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\31\371\1\u043c\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\16\371\1\u043d"+ + "\13\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\15\371\1\u043e\14\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\11\371\1\u043f\13\371\1\u0440"+ + "\4\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\32\371\1\203\7\371\1\u0441\2\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\21\371\1\u0442\7\371\1\u0443"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\12\371\1\u0444\17\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\32\371\1\203\10\371\1\u0445\1\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\5\371\1\u0446"+ + "\24\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\10\371\1\u0447\21\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\24\371\1\u0448\5\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371"+ + "\1\203\1\u0449\11\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\5\371\1\u044a\10\371\1\u044b\13\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\220\0\32\u035d\1\0\12\u035d\243\0\32\u035d"+ + "\1\u03d2\12\u035d\243\0\4\u044c\2\0\1\u044c\15\0\1\u044c"+ + "\6\0\12\u044c\243\0\4\u044d\2\0\1\u044d\15\0\1\u044d"+ + "\6\0\12\u044d\1\u044e\307\0\1\u044f\235\0\1\u02f5\3\0"+ + "\34\u02f5\12\u0450\1\0\2\u02f5\1\u0364\2\u02f5\1\u02f7\1\0"+ + "\1\u0363\3\0\2\u02f5\3\0\1\u02f5\221\0\4\u0451\2\0"+ + "\1\u0451\15\0\1\u0451\6\0\12\u0451\262\0\1\u0452\270\0"+ + "\4\u02f5\2\0\1\u02f5\15\0\1\u02f5\6\0\12\u02f5\243\0"+ + "\32\u0366\1\0\12\u0366\243\0\32\u0366\1\u03db\12\u0366\276\0"+ + "\12\u0453\243\0\4\u0454\2\0\1\u0454\15\0\1\u0454\6\0"+ + "\12\u0454\1\u03de\242\0\4\u0455\2\0\1\u0455\15\0\1\u0455"+ + "\6\0\12\u0455\1\u0456\242\0\4\u0457\2\0\1\u0457\15\0"+ + "\1\u0457\6\0\1\u0458\1\u0459\5\u0458\1\u045a\1\u0459\1\u0458"+ + "\13\0\1\u045b\11\0\1\55\1\0\1\56\2\0\1\245"+ + "\1\0\1\246\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\247\2\0\1\250\4\0\1\251"+ + "\3\0\1\252\17\0\1\71\2\0\1\253\21\0\1\254"+ + "\2\0\1\255\61\0\1\30\1\76\2\0\1\76\1\0"+ + "\2\76\1\0\1\76\2\0\1\u0369\1\u036a\2\30\1\u03e0"+ + "\32\u036b\13\u036c\1\76\1\u036c\1\u0369\1\u036c\1\0\1\u036c"+ + "\1\u03e1\3\u036c\3\0\1\u036c\3\0\2\u036c\220\0\32\u045c"+ + "\1\0\12\u045c\12\0\1\u045d\227\0\1\u045e\53\0\1\u03e1"+ + "\227\0\2\u036a\2\0\72\u036a\1\0\1\55\1\0\1\56"+ + "\2\0\1\234\1\0\1\60\4\0\1\61\1\0\1\62"+ + "\1\0\1\63\2\0\1\64\3\0\1\235\2\0\1\236"+ + "\4\0\1\67\3\0\1\237\17\0\1\71\2\0\1\240"+ + "\21\0\1\241\2\0\1\242\61\0\1\30\2\75\2\0"+ + "\2\243\1\244\1\0\1\75\2\0\1\u045f\1\0\1\41"+ + "\1\30\1\u0460\32\u036b\1\u036c\12\u03e4\1\0\1\u036c\1\u0461"+ + "\1\u036c\1\0\1\u045f\1\u03e1\3\u036c\2\0\1\243\1\u036c"+ + "\3\0\2\u036c\2\0\1\55\1\0\1\56\2\0\1\256"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\257\2\0\1\260\4\0\1\67"+ + "\3\0\1\261\17\0\1\71\2\0\1\262\21\0\1\263"+ + "\2\0\1\264\41\0\1\133\17\0\1\30\1\77\1\75"+ + "\1\135\1\76\1\0\1\76\1\77\1\0\1\77\2\0"+ + "\1\u0369\1\0\1\41\1\30\1\u03e0\32\u036b\1\u036c\12\u03e4"+ + "\1\76\1\u036c\1\u03e5\1\u036c\1\0\1\u036c\1\u03e1\3\u036c"+ + "\3\0\1\u036c\3\0\2\u036c\213\0\1\u036c\1\u036a\2\0"+ + "\1\u03e6\45\u036c\1\0\3\u036c\1\0\1\u036c\1\u03e1\3\u036c"+ + "\3\0\1\u036c\3\0\2\u036c\213\0\1\u036d\3\0\1\u036d"+ + "\32\u036e\1\u036d\12\u036e\1\u036f\2\u036d\1\u0370\2\u036d\1\u0371"+ + "\5\0\2\u036d\3\0\1\u036d\214\0\1\u036d\3\0\1\u036d"+ + "\32\u036e\1\u03e8\12\u036e\1\u036f\2\u036d\1\u0370\2\u036d\1\u0371"+ + "\5\0\2\u036d\3\0\1\u036d\214\0\1\u036f\3\0\34\u036f"+ + "\12\u0462\1\0\2\u036f\1\u03ea\2\u036f\1\u0371\5\0\2\u036f"+ + "\3\0\1\u036f\221\0\4\u0463\2\0\1\u0463\15\0\1\u0463"+ + "\6\0\12\u0463\243\0\4\u036d\2\0\1\u036d\15\0\1\u036d"+ + "\6\0\12\u036d\242\0\1\u0464\32\u03ec\1\u0465\12\u03ec\1\u01f9"+ + "\6\0\1\u0228\1\u0229\1\u022a\224\0\1\154\3\0\1\u016f"+ + "\1\371\1\u0466\30\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\17\371\1\u0467\12\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\10\371\1\u0468"+ + "\21\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\13\371\1\u020a\16\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\1\u0469\31\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\5\371\1\u046a"+ + "\24\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\265\0\1\u0310\242\0"+ + "\4\u046b\2\0\1\u046b\15\0\1\u046b\6\0\12\u046b\1\u03f5"+ + "\242\0\4\u046c\2\0\1\u046c\15\0\1\u046c\6\0\12\u046c"+ + "\1\u046d\242\0\4\u046e\2\0\1\u046e\15\0\1\u046e\6\0"+ + "\12\u046e\1\u046f\12\0\1\u01ad\226\0\1\u0224\4\u046e\2\0"+ + "\1\u046e\15\0\1\u046e\6\0\12\u0470\1\u046f\12\0\1\u01ad"+ + "\226\0\1\u0224\4\u046e\2\0\1\u046e\15\0\1\u046e\6\0"+ + "\12\u0471\1\u046f\12\0\1\u01ad\226\0\1\u0224\4\u046e\2\0"+ + "\1\u046e\15\0\1\u046e\6\0\2\u0471\1\u0470\1\u0471\1\u0472"+ + "\2\u0470\2\u0471\1\u0470\1\u046f\12\0\1\u01ad\227\0\4\u0473"+ + "\2\0\1\u0473\15\0\1\u0473\6\0\12\u0473\1\u0385\12\0"+ + "\1\u01ad\226\0\1\u0224\4\u0473\2\0\1\u0473\15\0\1\u0473"+ + "\6\0\12\u0473\1\u0385\12\0\1\u01ad\262\0\1\u0474\1\u0475"+ + "\5\u0474\1\u0476\1\u0475\1\u0474\242\0\1\u03fc\307\0\1\u03fc"+ + "\33\0\2\u03fd\1\0\2\u03fd\2\0\2\u03fd\243\0\1\u0136"+ + "\20\323\1\u0477\11\323\1\u0137\12\323\242\0\1\u0136\1\323"+ + "\1\u0478\30\323\1\u0137\12\323\242\0\1\u0136\13\323\1\u0237"+ + "\16\323\1\u0137\12\323\242\0\1\u0136\2\323\1\u02c3\27\323"+ + "\1\u0137\12\323\242\0\1\u0136\5\323\1\u0394\24\323\1\u0137"+ + "\12\323\242\0\1\u0136\4\323\1\u0479\25\323\1\u0137\12\323"+ + "\242\0\1\u0136\3\323\1\u047a\26\323\1\u0137\12\323\242\0"+ + "\1\u0136\1\323\1\u02c3\30\323\1\u0137\12\323\242\0\1\u0136"+ + "\4\323\1\u047b\25\323\1\u0137\12\323\242\0\1\u0136\11\323"+ + "\1\u047c\20\323\1\u0137\12\323\242\0\1\u0136\1\323\1\u047d"+ + "\30\323\1\u0137\12\323\242\0\1\u0136\24\323\1\u047e\5\323"+ + "\1\u0137\12\323\242\0\1\u0136\1\323\1\u047f\30\323\1\u0137"+ + "\12\323\242\0\1\u0136\14\323\1\u0480\15\323\1\u0137\12\323"+ + "\242\0\1\u0136\1\323\1\u0481\30\323\1\u0137\12\323\242\0"+ + "\1\u0136\1\323\1\u0482\30\323\1\u0137\12\323\242\0\1\u0136"+ + "\1\323\1\u0483\30\323\1\u0137\12\323\242\0\1\u0136\24\323"+ + "\1\u0484\5\323\1\u0137\12\323\242\0\1\u0136\1\u0485\31\323"+ + "\1\u0137\12\323\242\0\1\u0136\24\323\1\u0486\5\323\1\u0137"+ + "\12\323\242\0\1\u0136\24\323\1\u0487\5\323\1\u0137\12\323"+ + "\242\0\1\u0136\27\323\1\u0488\2\323\1\u0137\12\323\242\0"+ + "\1\u0136\24\323\1\u0489\5\323\1\u0137\12\323\242\0\1\u0136"+ + "\1\u02b7\31\323\1\u0137\12\323\242\0\1\u0136\24\323\1\u0483"+ + "\5\323\1\u0137\12\323\242\0\1\u0136\20\323\1\u048a\11\323"+ + "\1\u0137\12\323\242\0\1\u0136\24\323\1\u048b\5\323\1\u0137"+ + "\12\323\242\0\1\u0136\1\323\1\u048c\30\323\1\u0137\12\323"+ + "\242\0\1\u0136\4\323\1\u048d\25\323\1\u0137\12\323\242\0"+ + "\1\u0136\1\u048e\31\323\1\u0137\12\323\242\0\1\u0136\21\323"+ + "\1\u048f\10\323\1\u0137\12\323\242\0\1\u0136\4\323\1\u0490"+ + "\25\323\1\u0137\12\323\242\0\1\u0136\24\323\1\u0491\5\323"+ + "\1\u0137\12\323\242\0\1\u0136\32\323\1\u0137\1\323\1\u0492"+ + "\10\323\242\0\1\u0136\1\u0493\31\323\1\u0137\12\323\242\0"+ + "\1\u0136\1\u0494\31\323\1\u0137\12\323\242\0\1\u0136\7\323"+ + "\1\u02c3\22\323\1\u0137\12\323\242\0\1\u0136\13\323\1\u022b"+ + "\16\323\1\u0137\12\323\317\0\1\u0228\1\u0229\1\u022a\13\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\17\43"+ + "\1\u0495\12\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\5\43\1\u0496\24\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\16\43\1\u02d8\13\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\2\0\1\55\1\0\1\56\2\0\1\57"+ + "\1\0\1\60\4\0\1\61\1\0\1\62\1\0\1\63"+ + "\2\0\1\64\3\0\1\65\2\0\1\66\4\0\1\67"+ + "\3\0\1\70\17\0\1\71\2\0\1\72\21\0\1\73"+ + "\2\0\1\74\61\0\2\30\1\75\1\0\1\76\1\0"+ + "\1\76\1\77\1\0\1\30\2\0\1\201\1\0\1\41"+ + "\1\30\1\202\15\43\1\u0497\14\43\1\203\12\204\1\76"+ + "\1\154\1\205\1\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\2\0\1\55\1\0\1\56\2\0"+ + "\1\57\1\0\1\60\4\0\1\61\1\0\1\62\1\0"+ + "\1\63\2\0\1\64\3\0\1\65\2\0\1\66\4\0"+ + "\1\67\3\0\1\70\17\0\1\71\2\0\1\72\21\0"+ + "\1\73\2\0\1\74\61\0\2\30\1\75\1\0\1\76"+ + "\1\0\1\76\1\77\1\0\1\30\2\0\1\201\1\0"+ + "\1\41\1\30\1\202\7\43\1\u01e7\22\43\1\203\12\204"+ + "\1\76\1\154\1\205\1\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\1\371\1\u0498\30\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\6\371\1\u0499\23\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203"+ + "\3\371\1\u0444\6\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\32\371\1\203\6\371\1\u0213\3\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\32\371\1\203\5\371\1\u0213"+ + "\4\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\27\371"+ + "\1\u049a\2\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\1\371\1\u049b\30\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\27\371\1\u049c\2\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\1\u049d\31\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\1\371\1\u0174\30\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\1\u049e\30\371\1\u049f"+ + "\1\203\1\u04a0\11\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\32\371\1\203\1\371\1\u04a1\10\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\4\371\1\u04a2\25\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371"+ + "\1\203\3\371\1\u04a3\6\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\25\371\1\u04a4\4\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\1\u04a5\31\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371"+ + "\1\203\4\371\1\u04a6\5\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\24\371\1\u04a7\5\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203\1\371"+ + "\1\u04a8\10\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\32\371\1\203\3\371\1\u020d\6\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\32\371\1\203\11\371\1\u010f\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203\10\371"+ + "\1\u0434\1\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\1\u04a9\1\371\1\u04aa\27\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\32\371\1\203\10\371\1\u04ab"+ + "\1\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371"+ + "\1\203\4\371\1\u04ac\5\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\25\371\1\u0174\4\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203\5\371"+ + "\1\u04ad\4\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\32\371\1\203\3\371\1\u04ae\6\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\32\371\1\203\7\371\1\u04af\2\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203"+ + "\2\371\1\u04b0\7\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\1\u0434\31\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\32\371\1\203\7\371\1\u04b1\2\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\3\371\1\u04b2"+ + "\15\371\1\u0180\10\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\220\0"+ + "\4\u035e\2\0\1\u035e\15\0\1\u035e\6\0\12\u035e\243\0"+ + "\4\u04b3\2\0\1\u04b3\15\0\1\u04b3\6\0\12\u04b3\1\u044e"+ + "\242\0\4\u04b4\2\0\1\u04b4\15\0\1\u04b4\6\0\12\u04b4"+ + "\1\u04b5\242\0\4\u04b6\2\0\1\u04b6\15\0\1\u04b6\6\0"+ + "\1\u04b7\1\u04b8\5\u04b7\1\u04b9\1\u04b8\1\u04b7\13\0\1\u04ba"+ + "\222\0\1\u02f5\3\0\34\u02f5\12\u04bb\1\0\2\u02f5\1\u0364"+ + "\2\u02f5\1\u02f7\1\0\1\u0363\3\0\2\u02f5\3\0\1\u02f5"+ + "\221\0\4\u0363\2\0\1\u0363\15\0\1\u0363\6\0\12\u0363"+ + "\274\0\1\u04bc\311\0\12\u04bd\10\0\1\u0363\232\0\4\u04be"+ + "\2\0\1\u04be\15\0\1\u04be\6\0\12\u04be\1\u03de\242\0"+ + "\4\u04bf\2\0\1\u04bf\15\0\1\u04bf\6\0\12\u04bf\1\u04c0"+ + "\242\0\4\u04c1\2\0\1\u04c1\15\0\1\u04c1\6\0\1\u04c2"+ + "\1\u04c3\5\u04c2\1\u04c4\1\u04c3\1\u04c2\13\0\1\u045b\227\0"+ + "\4\u04c5\2\0\1\u04c5\15\0\1\u04c5\6\0\12\u04c5\1\u04c6"+ + "\12\0\1\u045b\226\0\1\u04c7\4\u04c5\2\0\1\u04c5\15\0"+ + "\1\u04c5\6\0\12\u04c8\1\u04c6\12\0\1\u045b\226\0\1\u04c7"+ + "\4\u04c5\2\0\1\u04c5\15\0\1\u04c5\6\0\12\u04c9\1\u04c6"+ + "\12\0\1\u045b\226\0\1\u04c7\4\u04c5\2\0\1\u04c5\15\0"+ + "\1\u04c5\6\0\2\u04c9\1\u04c8\1\u04c9\1\u04ca\2\u04c8\2\u04c9"+ + "\1\u04c8\1\u04c6\12\0\1\u045b\274\0\1\u03dc\7\0\1\u0363"+ + "\231\0\1\u04cb\32\u045c\1\u04cc\12\u045c\236\0\2\u045d\2\0"+ + "\60\u045d\1\0\1\u04cd\3\u045d\1\u04ce\1\0\3\u045d\212\0"+ + "\1\u036c\1\u036a\2\0\46\u036c\1\0\3\u036c\1\0\1\u036c"+ + "\1\0\3\u036c\3\0\1\u036c\3\0\2\u036c\7\0\1\u0126"+ + "\1\0\1\u0127\17\0\1\u0128\2\0\1\u0129\4\0\1\u012a"+ + "\3\0\1\u012b\22\0\1\u012c\21\0\1\u012d\2\0\1\u012e"+ + "\62\0\1\243\1\75\2\0\3\243\1\0\1\243\2\0"+ + "\1\u045f\3\0\1\u0460\33\u036c\12\u03e4\1\0\1\u036c\1\u045f"+ + "\1\u036c\1\0\1\u045f\1\u03e1\3\u036c\2\0\1\243\1\u036c"+ + "\3\0\2\u036c\7\0\1\u0126\1\0\1\u0127\17\0\1\u0128"+ + "\2\0\1\u0129\4\0\1\u012a\3\0\1\u012b\22\0\1\u012c"+ + "\21\0\1\u012d\2\0\1\u012e\62\0\1\243\1\75\2\0"+ + "\3\243\1\0\1\243\2\0\1\u045f\1\u036a\2\0\1\u0460"+ + "\33\u036c\12\u03e4\1\0\1\u036c\1\u045f\1\u036c\1\0\1\u045f"+ + "\1\u03e1\3\u036c\2\0\1\243\1\u036c\3\0\2\u036c\2\0"+ + "\1\55\1\0\1\56\2\0\1\u012f\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\u0130\2\0\1\u0131\4\0\1\67\3\0\1\u0132\17\0"+ + "\1\71\2\0\1\u0133\21\0\1\u0134\2\0\1\u0135\41\0"+ + "\1\133\17\0\1\30\1\244\1\75\1\135\1\0\2\243"+ + "\1\244\1\0\1\244\2\0\1\u045f\1\0\1\41\1\30"+ + "\1\u0460\32\u036b\1\u036c\12\u03e4\1\0\1\u036c\1\u0461\1\u036c"+ + "\1\0\1\u045f\1\u03e1\3\u036c\2\0\1\243\1\u036c\3\0"+ + "\2\u036c\213\0\1\u036f\3\0\34\u036f\12\u04cf\1\0\2\u036f"+ + "\1\u03ea\2\u036f\1\u0371\1\u0228\1\u0229\1\u022a\2\0\2\u036f"+ + "\3\0\1\u036f\221\0\4\u036f\2\0\1\u036f\15\0\1\u036f"+ + "\6\0\12\u036f\243\0\32\u03ec\1\0\12\u03ec\243\0\32\u03ec"+ + "\1\u0465\12\u03ec\236\0\1\154\3\0\1\u016f\17\371\1\u04d0"+ + "\12\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\5\371\1\u04d1\24\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\16\371\1\u0306\13\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\15\371"+ + "\1\u04d2\14\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\7\371\1\u020d\22\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\220\0\4\u04d3\2\0\1\u04d3\15\0\1\u04d3\6\0"+ + "\12\u04d3\1\u03f5\242\0\4\u04d4\2\0\1\u04d4\15\0\1\u04d4"+ + "\6\0\12\u04d4\1\u04d5\242\0\4\u04d6\2\0\1\u04d6\15\0"+ + "\1\u04d6\6\0\1\u04d7\1\u04d8\5\u04d7\1\u04d9\1\u04d8\1\u04d7"+ + "\13\0\1\u01ad\227\0\4\u04da\2\0\1\u04da\15\0\1\u04da"+ + "\6\0\12\u04da\1\u046f\12\0\1\u01ad\227\0\4\u04d6\2\0"+ + "\1\u04d6\15\0\1\u04d6\6\0\1\u04d7\1\u04d8\5\u04d7\1\u04d9"+ + "\1\u04d8\1\u04d7\242\0\1\u0224\4\u04da\2\0\1\u04da\15\0"+ + "\1\u04da\6\0\12\u04da\1\u046f\12\0\1\u01ad\226\0\1\u0224"+ + "\4\u04da\2\0\1\u04da\15\0\1\u04da\6\0\12\u04db\1\u046f"+ + "\12\0\1\u01ad\226\0\1\u0224\4\u04da\2\0\1\u04da\15\0"+ + "\1\u04da\6\0\2\u04db\1\u04da\2\u04db\2\u04da\2\u04db\1\u04da"+ + "\1\u046f\12\0\1\u01ad\274\0\1\u0385\12\0\1\u01ad\262\0"+ + "\12\u04dc\13\0\1\u01ad\262\0\12\u0474\13\0\1\u01ad\262\0"+ + "\2\u0474\1\u04dc\1\u0474\1\u04dd\2\u04dc\2\u0474\1\u04dc\13\0"+ + "\1\u01ad\226\0\1\u0136\1\323\1\u04de\30\323\1\u0137\12\323"+ + "\242\0\1\u0136\17\323\1\u04df\12\323\1\u0137\12\323\242\0"+ + "\1\u0136\10\323\1\u04e0\21\323\1\u0137\12\323\242\0\1\u0136"+ + "\13\323\1\u02ba\16\323\1\u0137\12\323\242\0\1\u0136\1\u04e1"+ + "\31\323\1\u0137\12\323\242\0\1\u0136\5\323\1\u04e2\24\323"+ + "\1\u0137\12\323\242\0\1\u0136\25\323\1\u04e3\4\323\1\u0137"+ + "\12\323\242\0\1\u0136\15\323\1\u04e4\14\323\1\u0137\12\323"+ + "\242\0\1\u0136\21\323\1\u04e5\10\323\1\u0137\12\323\242\0"+ + "\1\u0136\16\323\1\u04e6\4\323\1\u04e7\6\323\1\u0137\12\323"+ + "\242\0\1\u0136\4\323\1\u04e8\25\323\1\u0137\12\323\242\0"+ + "\1\u0136\32\323\1\u0137\7\323\1\u04e9\2\323\242\0\1\u0136"+ + "\4\323\1\u04ea\25\323\1\u0137\12\323\242\0\1\u0136\24\323"+ + "\1\u04eb\5\323\1\u0137\12\323\242\0\1\u0136\1\323\1\u04ec"+ + "\30\323\1\u0137\12\323\242\0\1\u0136\1\u04ed\1\u04ee\1\323"+ + "\1\u04ef\16\323\1\u04f0\1\323\1\u04f1\5\323\1\u0137\5\323"+ + "\1\u04f2\4\323\242\0\1\u0136\1\323\1\u04f3\30\323\1\u0137"+ + "\12\323\242\0\1\u0136\31\323\1\u04f4\1\u0137\12\323\242\0"+ + "\1\u0136\16\323\1\u04f5\13\323\1\u0137\12\323\242\0\1\u0136"+ + "\15\323\1\u04f6\14\323\1\u0137\12\323\242\0\1\u0136\11\323"+ + "\1\u04f7\13\323\1\u04f8\4\323\1\u0137\12\323\242\0\1\u0136"+ + "\32\323\1\u0137\7\323\1\u04f9\2\323\242\0\1\u0136\21\323"+ + "\1\u04fa\7\323\1\u04fb\1\u0137\12\323\242\0\1\u0136\12\323"+ + "\1\u04fc\17\323\1\u0137\12\323\242\0\1\u0136\32\323\1\u0137"+ + "\10\323\1\u04fd\1\323\242\0\1\u0136\5\323\1\u04fe\24\323"+ + "\1\u0137\12\323\242\0\1\u0136\10\323\1\u04ff\21\323\1\u0137"+ + "\12\323\242\0\1\u0136\24\323\1\u0500\5\323\1\u0137\12\323"+ + "\242\0\1\u0136\32\323\1\u0137\1\u0501\11\323\242\0\1\u0136"+ + "\5\323\1\u0502\10\323\1\u0503\13\323\1\u0137\12\323\25\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\10\43"+ + "\1\u0504\21\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\2\0\1\55\1\0\1\56\2\0\1\57\1\0\1\60"+ + "\4\0\1\61\1\0\1\62\1\0\1\63\2\0\1\64"+ + "\3\0\1\65\2\0\1\66\4\0\1\67\3\0\1\70"+ + "\17\0\1\71\2\0\1\72\21\0\1\73\2\0\1\74"+ + "\61\0\2\30\1\75\1\0\1\76\1\0\1\76\1\77"+ + "\1\0\1\30\2\0\1\201\1\0\1\41\1\30\1\202"+ + "\4\43\1\u01ed\25\43\1\203\12\204\1\76\1\154\1\205"+ + "\1\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\2\0\1\55\1\0\1\56\2\0\1\57\1\0"+ + "\1\60\4\0\1\61\1\0\1\62\1\0\1\63\2\0"+ + "\1\64\3\0\1\65\2\0\1\66\4\0\1\67\3\0"+ + "\1\70\17\0\1\71\2\0\1\72\21\0\1\73\2\0"+ + "\1\74\61\0\2\30\1\75\1\0\1\76\1\0\1\76"+ + "\1\77\1\0\1\30\2\0\1\201\1\0\1\41\1\30"+ + "\1\202\25\43\1\u01e7\4\43\1\203\12\204\1\76\1\154"+ + "\1\205\1\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203"+ + "\1\371\1\u0505\10\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\32\371\1\203\6\371\1\u0506\3\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\32\371\1\203\5\371\1\u0507"+ + "\4\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371"+ + "\1\203\5\371\1\u0508\4\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\32\371\1\203\5\371\1\u0434\4\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\17\371\1\u0509\12\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\12\371\1\u050a\17\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\25\371\1\u050b\4\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\1\u050c\31\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\1\u050d\31\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\15\371\1\u050e\14\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\1\371\1\u050f\30\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\32\371\1\203\10\371\1\u0510\1\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\21\371\1\u0511\10\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\1\u0512\31\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\32\371\1\203\3\371\1\u0434\6\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\2\371\1\u0444\27\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\11\371\1\u0513"+ + "\20\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\11\371\1\u0514\20\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\32\371\1\203\1\u0205\11\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203"+ + "\2\371\1\u0205\7\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\32\371\1\203\1\u0180\11\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\10\371\1\u0515\21\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\1\u0516\31\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\32\371\1\203\1\371\1\u0517\10\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\32\371\1\203\10\371\1\u010f\1\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\25\371\1\u0518"+ + "\4\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\220\0\4\u0519\2\0"+ + "\1\u0519\15\0\1\u0519\6\0\12\u0519\1\u044e\242\0\4\u051a"+ + "\2\0\1\u051a\15\0\1\u051a\6\0\12\u051a\1\u051b\242\0"+ + "\4\u051c\2\0\1\u051c\15\0\1\u051c\6\0\1\u051d\1\u051e"+ + "\5\u051d\1\u051f\1\u051e\1\u051d\13\0\1\u04ba\227\0\4\u0520"+ + "\2\0\1\u0520\15\0\1\u0520\6\0\12\u0520\1\u0521\12\0"+ + "\1\u04ba\226\0\1\u0522\4\u0520\2\0\1\u0520\15\0\1\u0520"+ + "\6\0\12\u0523\1\u0521\12\0\1\u04ba\226\0\1\u0522\4\u0520"+ + "\2\0\1\u0520\15\0\1\u0520\6\0\12\u0524\1\u0521\12\0"+ + "\1\u04ba\226\0\1\u0522\4\u0520\2\0\1\u0520\15\0\1\u0520"+ + "\6\0\2\u0524\1\u0523\1\u0524\1\u0525\2\u0523\2\u0524\1\u0523"+ + "\1\u0521\12\0\1\u04ba\304\0\1\u035e\225\0\1\u02f5\3\0"+ + "\34\u02f5\12\u0526\1\0\2\u02f5\1\u0364\2\u02f5\1\u02f7\1\0"+ + "\1\u0363\3\0\2\u02f5\3\0\1\u02f5\236\0\1\u0527\325\0"+ + "\12\u0528\10\0\1\u0363\277\0\1\u03de\242\0\4\u0529\2\0"+ + "\1\u0529\15\0\1\u0529\6\0\12\u0529\1\u04c0\242\0\4\u052a"+ + "\2\0\1\u052a\15\0\1\u052a\6\0\12\u052a\1\u052b\242\0"+ + "\4\u052c\2\0\1\u052c\15\0\1\u052c\6\0\12\u052c\1\u052d"+ + "\12\0\1\u045b\226\0\1\u04c7\4\u052c\2\0\1\u052c\15\0"+ + "\1\u052c\6\0\12\u052e\1\u052d\12\0\1\u045b\226\0\1\u04c7"+ + "\4\u052c\2\0\1\u052c\15\0\1\u052c\6\0\12\u052f\1\u052d"+ + "\12\0\1\u045b\226\0\1\u04c7\4\u052c\2\0\1\u052c\15\0"+ + "\1\u052c\6\0\2\u052f\1\u052e\1\u052f\1\u0530\2\u052e\2\u052f"+ + "\1\u052e\1\u052d\12\0\1\u045b\227\0\4\u0531\2\0\1\u0531"+ + "\15\0\1\u0531\6\0\12\u0531\1\u04c6\12\0\1\u045b\227\0"+ + "\4\u04c1\2\0\1\u04c1\15\0\1\u04c1\6\0\1\u04c2\1\u04c3"+ + "\5\u04c2\1\u04c4\1\u04c3\1\u04c2\276\0\1\u0532\1\u0533\5\u0532"+ + "\1\u0534\1\u0533\1\u0532\242\0\1\u04c7\4\u0531\2\0\1\u0531"+ + "\15\0\1\u0531\6\0\12\u0531\1\u04c6\12\0\1\u045b\226\0"+ + "\1\u04c7\4\u0531\2\0\1\u0531\15\0\1\u0531\6\0\12\u0535"+ + "\1\u04c6\12\0\1\u045b\226\0\1\u04c7\4\u0531\2\0\1\u0531"+ + "\15\0\1\u0531\6\0\2\u0535\1\u0531\2\u0535\2\u0531\2\u0535"+ + "\1\u0531\1\u04c6\12\0\1\u045b\227\0\1\u0536\1\u0537\1\u0538"+ + "\1\u0539\1\u053a\1\u053b\1\u053c\1\u053d\1\u053e\1\u053f\1\u0540"+ + "\1\u0541\1\u0542\1\u0543\1\u0544\1\u0545\1\u0546\1\u0547\1\u0548"+ + "\1\u0549\1\u054a\1\u054b\1\u054c\1\u054d\1\u054e\1\u054f\1\0"+ + "\12\u045c\243\0\32\u045c\1\u04cc\12\u045c\236\0\2\u045d\2\0"+ + "\72\u045d\212\0\1\u036f\3\0\34\u036f\12\u0550\1\0\2\u036f"+ + "\1\u03ea\2\u036f\1\u0371\1\u0228\1\u0229\1\u022a\2\0\2\u036f"+ + "\3\0\1\u036f\214\0\1\154\3\0\1\u016f\10\371\1\u0551"+ + "\21\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\4\371\1\u0213\25\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\25\371\1\u020d\4\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\265\0\1\u03f5\242\0\4\u0552\2\0"+ + "\1\u0552\15\0\1\u0552\6\0\12\u0552\1\u04d5\242\0\4\u0553"+ + "\2\0\1\u0553\15\0\1\u0553\6\0\12\u0553\1\u0554\242\0"+ + "\4\u0555\2\0\1\u0555\15\0\1\u0555\6\0\12\u0555\1\u0556"+ + "\12\0\1\u01ad\226\0\1\u0224\4\u0555\2\0\1\u0555\15\0"+ + "\1\u0555\6\0\12\u0557\1\u0556\12\0\1\u01ad\226\0\1\u0224"+ + "\4\u0555\2\0\1\u0555\15\0\1\u0555\6\0\12\u0558\1\u0556"+ + "\12\0\1\u01ad\226\0\1\u0224\4\u0555\2\0\1\u0555\15\0"+ + "\1\u0555\6\0\2\u0558\1\u0557\1\u0558\1\u0559\2\u0557\2\u0558"+ + "\1\u0557\1\u0556\12\0\1\u01ad\227\0\4\u055a\2\0\1\u055a"+ + "\15\0\1\u055a\6\0\12\u055a\1\u046f\12\0\1\u01ad\226\0"+ + "\1\u0224\4\u055a\2\0\1\u055a\15\0\1\u055a\6\0\12\u055a"+ + "\1\u046f\12\0\1\u01ad\307\0\1\u01ad\262\0\2\u04dc\1\0"+ + "\2\u04dc\2\0\2\u04dc\14\0\1\u01ad\226\0\1\u0136\17\323"+ + "\1\u055b\12\323\1\u0137\12\323\242\0\1\u0136\5\323\1\u055c"+ + "\24\323\1\u0137\12\323\242\0\1\u0136\16\323\1\u0398\13\323"+ + "\1\u0137\12\323\242\0\1\u0136\15\323\1\u055d\14\323\1\u0137"+ + "\12\323\242\0\1\u0136\7\323\1\u02bd\22\323\1\u0137\12\323"+ + "\242\0\1\u0136\1\323\1\u055e\30\323\1\u0137\12\323\242\0"+ + "\1\u0136\6\323\1\u055f\23\323\1\u0137\12\323\242\0\1\u0136"+ + "\32\323\1\u0137\3\323\1\u04fc\6\323\242\0\1\u0136\32\323"+ + "\1\u0137\6\323\1\u02c3\3\323\242\0\1\u0136\32\323\1\u0137"+ + "\5\323\1\u02c3\4\323\242\0\1\u0136\27\323\1\u0560\2\323"+ + "\1\u0137\12\323\242\0\1\u0136\1\323\1\u0561\30\323\1\u0137"+ + "\12\323\242\0\1\u0136\27\323\1\u0562\2\323\1\u0137\12\323"+ + "\242\0\1\u0136\1\u0563\31\323\1\u0137\12\323\242\0\1\u0136"+ + "\1\323\1\u022b\30\323\1\u0137\12\323\242\0\1\u0136\1\u0564"+ + "\30\323\1\u0565\1\u0137\1\u0566\11\323\242\0\1\u0136\32\323"+ + "\1\u0137\1\323\1\u0567\10\323\242\0\1\u0136\4\323\1\u0568"+ + "\25\323\1\u0137\12\323\242\0\1\u0136\32\323\1\u0137\3\323"+ + "\1\u0569\6\323\242\0\1\u0136\25\323\1\u056a\4\323\1\u0137"+ + "\12\323\242\0\1\u0136\1\u056b\31\323\1\u0137\12\323\242\0"+ + "\1\u0136\32\323\1\u0137\4\323\1\u056c\5\323\242\0\1\u0136"+ + "\24\323\1\u056d\5\323\1\u0137\12\323\242\0\1\u0136\32\323"+ + "\1\u0137\1\323\1\u056e\10\323\242\0\1\u0136\32\323\1\u0137"+ + "\3\323\1\u02bd\6\323\242\0\1\u0136\32\323\1\u0137\11\323"+ + "\1\u01c1\242\0\1\u0136\32\323\1\u0137\10\323\1\u04ec\1\323"+ + "\242\0\1\u0136\1\u056f\1\323\1\u0570\27\323\1\u0137\12\323"+ + "\242\0\1\u0136\32\323\1\u0137\10\323\1\u0571\1\323\242\0"+ + "\1\u0136\32\323\1\u0137\4\323\1\u0572\5\323\242\0\1\u0136"+ + "\25\323\1\u022b\4\323\1\u0137\12\323\242\0\1\u0136\32\323"+ + "\1\u0137\5\323\1\u0573\4\323\242\0\1\u0136\32\323\1\u0137"+ + "\3\323\1\u0574\6\323\242\0\1\u0136\32\323\1\u0137\7\323"+ + "\1\u0575\2\323\242\0\1\u0136\32\323\1\u0137\2\323\1\u0576"+ + "\7\323\242\0\1\u0136\1\u04ec\31\323\1\u0137\12\323\242\0"+ + "\1\u0136\32\323\1\u0137\7\323\1\u0577\2\323\242\0\1\u0136"+ + "\3\323\1\u0578\15\323\1\u0237\10\323\1\u0137\12\323\25\0"+ + "\1\55\1\0\1\56\2\0\1\57\1\0\1\60\4\0"+ + "\1\61\1\0\1\62\1\0\1\63\2\0\1\64\3\0"+ + "\1\65\2\0\1\66\4\0\1\67\3\0\1\70\17\0"+ + "\1\71\2\0\1\72\21\0\1\73\2\0\1\74\61\0"+ + "\2\30\1\75\1\0\1\76\1\0\1\76\1\77\1\0"+ + "\1\30\2\0\1\201\1\0\1\41\1\30\1\202\5\43"+ + "\1\u035c\24\43\1\203\12\204\1\76\1\154\1\205\1\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\3\371\1\u0579\26\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\6\371"+ + "\1\u018a\23\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\1\371\1\u04ab\30\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\3\371\1\u057a\26\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\32\371\1\203\10\371\1\u057b\1\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\32\371\1\203\2\371\1\u057c\7\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203"+ + "\2\371\1\u057d\7\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\32\371\1\203\3\371\1\u057e\6\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\32\371\1\203\5\371\1\u057f"+ + "\4\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371"+ + "\1\203\3\371\1\u0580\6\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\2\371\1\u0581\27\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\1\u0582\31\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\24\371"+ + "\1\u0583\5\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\23\371\1\u0205\6\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203\1\u0584"+ + "\11\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371"+ + "\1\203\1\u0585\11\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\32\371\1\203\11\371\1\u0586\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\12\371\1\u0587\17\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203"+ + "\2\371\1\u0209\7\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\2\371\1\u0588\27\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\265\0\1\u044e\242\0\4\u0589\2\0\1\u0589\15\0\1\u0589"+ + "\6\0\12\u0589\1\u051b\242\0\4\u058a\2\0\1\u058a\15\0"+ + "\1\u058a\6\0\12\u058a\1\u058b\242\0\4\u058c\2\0\1\u058c"+ + "\15\0\1\u058c\6\0\12\u058c\1\u058d\12\0\1\u04ba\226\0"+ + "\1\u0522\4\u058c\2\0\1\u058c\15\0\1\u058c\6\0\12\u058e"+ + "\1\u058d\12\0\1\u04ba\226\0\1\u0522\4\u058c\2\0\1\u058c"+ + "\15\0\1\u058c\6\0\12\u058f\1\u058d\12\0\1\u04ba\226\0"+ + "\1\u0522\4\u058c\2\0\1\u058c\15\0\1\u058c\6\0\2\u058f"+ + "\1\u058e\1\u058f\1\u0590\2\u058e\2\u058f\1\u058e\1\u058d\12\0"+ + "\1\u04ba\227\0\4\u0591\2\0\1\u0591\15\0\1\u0591\6\0"+ + "\12\u0591\1\u0521\12\0\1\u04ba\227\0\4\u051c\2\0\1\u051c"+ + "\15\0\1\u051c\6\0\1\u051d\1\u051e\5\u051d\1\u051f\1\u051e"+ + "\1\u051d\276\0\1\u0592\1\u0593\5\u0592\1\u0594\1\u0593\1\u0592"+ + "\242\0\1\u0522\4\u0591\2\0\1\u0591\15\0\1\u0591\6\0"+ + "\12\u0591\1\u0521\12\0\1\u04ba\226\0\1\u0522\4\u0591\2\0"+ + "\1\u0591\15\0\1\u0591\6\0\12\u0595\1\u0521\12\0\1\u04ba"+ + "\226\0\1\u0522\4\u0591\2\0\1\u0591\15\0\1\u0591\6\0"+ + "\2\u0595\1\u0591\2\u0595\2\u0591\2\u0595\1\u0591\1\u0521\12\0"+ + "\1\u04ba\222\0\1\u02f5\3\0\34\u02f5\12\u0596\1\0\2\u02f5"+ + "\1\u0364\2\u02f5\1\u02f7\1\0\1\u0363\3\0\2\u02f5\3\0"+ + "\1\u02f5\224\0\1\u0597\337\0\12\u0598\10\0\1\u0363\232\0"+ + "\4\u0599\2\0\1\u0599\15\0\1\u0599\6\0\12\u0599\1\u04c0"+ + "\242\0\4\u059a\2\0\1\u059a\15\0\1\u059a\6\0\12\u059a"+ + "\1\u059b\242\0\4\u059c\2\0\1\u059c\15\0\1\u059c\6\0"+ + "\1\u059d\1\u059e\5\u059d\1\u059f\1\u059e\1\u059d\13\0\1\u045b"+ + "\227\0\4\u05a0\2\0\1\u05a0\15\0\1\u05a0\6\0\12\u05a0"+ + "\1\u052d\12\0\1\u045b\227\0\4\u059c\2\0\1\u059c\15\0"+ + "\1\u059c\6\0\1\u059d\1\u059e\5\u059d\1\u059f\1\u059e\1\u059d"+ + "\242\0\1\u04c7\4\u05a0\2\0\1\u05a0\15\0\1\u05a0\6\0"+ + "\12\u05a0\1\u052d\12\0\1\u045b\226\0\1\u04c7\4\u05a0\2\0"+ + "\1\u05a0\15\0\1\u05a0\6\0\12\u05a1\1\u052d\12\0\1\u045b"+ + "\226\0\1\u04c7\4\u05a0\2\0\1\u05a0\15\0\1\u05a0\6\0"+ + "\2\u05a1\1\u05a0\2\u05a1\2\u05a0\2\u05a1\1\u05a0\1\u052d\12\0"+ + "\1\u045b\227\0\4\u05a2\2\0\1\u05a2\15\0\1\u05a2\6\0"+ + "\12\u05a2\1\u04c6\12\0\1\u045b\226\0\1\u05a3\33\0\12\u05a4"+ + "\242\0\1\u05a3\33\0\12\u0532\242\0\1\u05a3\33\0\2\u0532"+ + "\1\u05a4\1\u0532\1\u05a5\2\u05a4\2\u0532\1\u05a4\242\0\1\u04c7"+ + "\4\u05a2\2\0\1\u05a2\15\0\1\u05a2\6\0\12\u05a2\1\u04c6"+ + "\12\0\1\u045b\226\0\1\u04cb\1\u045c\2\u05a6\1\u05a7\1\u05a8"+ + "\10\u05a6\1\u045c\1\u05a9\5\u05a6\6\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\1\u05aa\2\u05a6\1\u045c\1\u05a6\1\u05ab\3\u05a6\1\u05ac"+ + "\2\u05a6\4\u045c\4\u05a6\1\u045c\2\u05a6\1\u045c\2\u05a6\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\3\u045c\1\u05a6\1\u045c\1\u05a6\2\u045c"+ + "\1\u05ad\1\u045c\1\u05a6\10\u045c\1\u05a6\2\u045c\2\u05a6\2\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\1\u045c\1\u05a6\1\u05ae\2\u05a6"+ + "\2\u045c\1\u05a6\3\u045c\1\u05af\1\u05b0\1\u045c\1\u05b1\2\u05a6"+ + "\11\u045c\1\u04cc\12\u045c\242\0\1\u04cb\3\u045c\1\u05a6\1\u045c"+ + "\1\u05a6\10\u045c\1\u05a6\1\u045c\2\u05a6\10\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\4\u045c\1\u05b2\5\u045c\1\u05a6\17\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\4\u045c\2\u05a6\2\u045c\1\u05a6\1\u045c"+ + "\1\u05a6\13\u045c\2\u05a6\2\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\1\u05b3\1\u045c\2\u05a6\1\u05b4\1\u05b5\12\u05a6\1\u05b6\1\u05a6"+ + "\2\u045c\2\u05a6\3\u045c\1\u05a6\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\2\u045c\4\u05a6\3\u045c\2\u05a6\1\u05b7\1\u05a6\1\u045c\2\u05a6"+ + "\12\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u05b8\1\u05a6\2\u045c"+ + "\1\u05a6\3\u045c\1\u05b9\5\u045c\3\u05a6\3\u045c\1\u05a6\1\u045c"+ + "\1\u05a6\1\u045c\2\u05a6\1\u04cc\12\u045c\242\0\1\u04cb\3\u05a6"+ + "\1\u05ba\1\u05a6\1\u05bb\1\u045c\1\u05a6\1\u05bc\7\u05a6\1\u05bd"+ + "\3\u05a6\1\u045c\2\u05a6\1\u045c\2\u05a6\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\1\u05be\1\u05a6\1\u045c\1\u05bf\6\u05a6\3\u045c\1\u05a6"+ + "\2\u045c\1\u05a6\2\u045c\1\u05a6\6\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\1\u05a6\31\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u05a6"+ + "\2\u045c\1\u05a6\1\u05c0\1\u05c1\2\u05a6\1\u045c\1\u05c2\2\u05a6"+ + "\2\u045c\2\u05a6\1\u045c\1\u05a6\3\u045c\1\u05c3\1\u05a6\2\u045c"+ + "\1\u05a6\1\u04cc\12\u045c\242\0\1\u04cb\3\u05a6\1\u05c4\2\u05a6"+ + "\1\u045c\1\u05a6\1\u05c5\3\u05a6\3\u045c\2\u05a6\1\u045c\10\u05a6"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\1\u05c6\2\u05a6\1\u05c7\1\u05c8"+ + "\1\u05c9\2\u05a6\1\u05ca\3\u05a6\1\u045c\1\u05a6\1\u045c\1\u05a6"+ + "\1\u045c\1\u05a6\1\u045c\1\u05a6\1\u045c\4\u05a6\1\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\1\u05a6\6\u045c\1\u05a6\3\u045c\1\u05cb"+ + "\2\u045c\1\u05a6\4\u045c\1\u05a6\2\u045c\1\u05a6\2\u045c\1\u05a6"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\6\u045c\1\u05a6\7\u045c\1\u05a6"+ + "\13\u045c\1\u04cc\12\u045c\242\0\1\u04cb\13\u045c\1\u05cc\6\u045c"+ + "\1\u05cd\7\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u05a6\11\u045c"+ + "\1\u05a6\6\u045c\1\u05a6\10\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\1\u05a6\1\u045c\6\u05a6\1\u05ce\1\u045c\2\u05a6\2\u045c\2\u05a6"+ + "\1\u045c\1\u05a6\1\u045c\3\u05a6\1\u045c\3\u05a6\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\4\u045c\1\u05a6\1\u05cf\4\u045c\2\u05a6\3\u045c"+ + "\2\u05a6\5\u045c\1\u05a6\3\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\3\u045c\2\u05a6\2\u045c\1\u05a6\1\u05d0\1\u045c\2\u05a6\1\u045c"+ + "\1\u05a6\3\u045c\1\u05a6\1\u045c\1\u05a6\1\u045c\1\u05a6\3\u045c"+ + "\1\u05a6\1\u04cc\12\u045c\242\0\1\u04cb\3\u045c\1\u05a6\1\u045c"+ + "\1\u05d1\4\u045c\1\u05a6\2\u045c\1\u05a6\14\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\2\u05a6\1\u045c\1\u05d2\1\u045c\1\u05d3\1\u045c"+ + "\2\u05a6\2\u045c\1\u05a6\4\u045c\1\u05a6\11\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\3\u045c\1\u05a6\13\u045c\1\u05a6\12\u045c\1\u04cc"+ + "\12\u045c\236\0\1\u036f\3\0\34\u036f\12\u05d4\1\0\2\u036f"+ + "\1\u03ea\2\u036f\1\u0371\1\u0228\1\u0229\1\u022a\2\0\2\u036f"+ + "\3\0\1\u036f\214\0\1\154\3\0\1\u016f\5\371\1\u037e"+ + "\24\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\220\0\4\u05d5\2\0"+ + "\1\u05d5\15\0\1\u05d5\6\0\12\u05d5\1\u04d5\242\0\4\u05d6"+ + "\2\0\1\u05d6\15\0\1\u05d6\6\0\12\u05d6\1\u05d7\242\0"+ + "\4\u05d8\2\0\1\u05d8\15\0\1\u05d8\6\0\1\u05d9\1\u05da"+ + "\5\u05d9\1\u05db\1\u05da\1\u05d9\13\0\1\u01ad\227\0\4\u05dc"+ + "\2\0\1\u05dc\15\0\1\u05dc\6\0\12\u05dc\1\u0556\12\0"+ + "\1\u01ad\227\0\4\u05d8\2\0\1\u05d8\15\0\1\u05d8\6\0"+ + "\1\u05d9\1\u05da\5\u05d9\1\u05db\1\u05da\1\u05d9\242\0\1\u0224"+ + "\4\u05dc\2\0\1\u05dc\15\0\1\u05dc\6\0\12\u05dc\1\u0556"+ + "\12\0\1\u01ad\226\0\1\u0224\4\u05dc\2\0\1\u05dc\15\0"+ + "\1\u05dc\6\0\12\u05dd\1\u0556\12\0\1\u01ad\226\0\1\u0224"+ + "\4\u05dc\2\0\1\u05dc\15\0\1\u05dc\6\0\2\u05dd\1\u05dc"+ + "\2\u05dd\2\u05dc\2\u05dd\1\u05dc\1\u0556\12\0\1\u01ad\274\0"+ + "\1\u046f\12\0\1\u01ad\226\0\1\u0136\10\323\1\u05de\21\323"+ + "\1\u0137\12\323\242\0\1\u0136\4\323\1\u02c3\25\323\1\u0137"+ + "\12\323\242\0\1\u0136\25\323\1\u02bd\4\323\1\u0137\12\323"+ + "\242\0\1\u0136\32\323\1\u0137\1\323\1\u05df\10\323\242\0"+ + "\1\u0136\32\323\1\u0137\6\323\1\u05e0\3\323\242\0\1\u0136"+ + "\32\323\1\u0137\5\323\1\u05e1\4\323\242\0\1\u0136\32\323"+ + "\1\u0137\5\323\1\u05e2\4\323\242\0\1\u0136\32\323\1\u0137"+ + "\5\323\1\u04ec\4\323\242\0\1\u0136\17\323\1\u05e3\12\323"+ + "\1\u0137\12\323\242\0\1\u0136\12\323\1\u05e4\17\323\1\u0137"+ + "\12\323\242\0\1\u0136\25\323\1\u05e5\4\323\1\u0137\12\323"+ + "\242\0\1\u0136\1\u05e6\31\323\1\u0137\12\323\242\0\1\u0136"+ + "\1\u05e7\31\323\1\u0137\12\323\242\0\1\u0136\15\323\1\u05e8"+ + "\14\323\1\u0137\12\323\242\0\1\u0136\1\323\1\u05e9\30\323"+ + "\1\u0137\12\323\242\0\1\u0136\32\323\1\u0137\10\323\1\u05ea"+ + "\1\323\242\0\1\u0136\21\323\1\u05eb\10\323\1\u0137\12\323"+ + "\242\0\1\u0136\1\u05ec\31\323\1\u0137\12\323\242\0\1\u0136"+ + "\32\323\1\u0137\3\323\1\u04ec\6\323\242\0\1\u0136\2\323"+ + "\1\u04fc\27\323\1\u0137\12\323\242\0\1\u0136\11\323\1\u05ed"+ + "\20\323\1\u0137\12\323\242\0\1\u0136\11\323\1\u05ee\20\323"+ + "\1\u0137\12\323\242\0\1\u0136\32\323\1\u0137\1\u02b5\11\323"+ + "\242\0\1\u0136\32\323\1\u0137\2\323\1\u02b5\7\323\242\0"+ + "\1\u0136\32\323\1\u0137\1\u0237\11\323\242\0\1\u0136\10\323"+ + "\1\u05ef\21\323\1\u0137\12\323\242\0\1\u0136\1\u05f0\31\323"+ + "\1\u0137\12\323\242\0\1\u0136\32\323\1\u0137\1\323\1\u05f1"+ + "\10\323\242\0\1\u0136\32\323\1\u0137\10\323\1\u01c1\1\323"+ + "\242\0\1\u0136\25\323\1\u05f2\4\323\1\u0137\12\323\236\0"+ + "\1\154\3\0\1\u016f\1\u05f3\31\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203\7\371"+ + "\1\u0434\2\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\1\u05f4\31\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\1\u05f5\31\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\7\371\1\u05f6\22\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\6\371"+ + "\1\u05f7\23\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\1\u05f8\31\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\1\u05f9\31\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203"+ + "\1\371\1\u05fa\10\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\32\371\1\203\2\371\1\u05fb\7\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\6\371\1\u0174\23\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\25\371"+ + "\1\u05fc\4\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\1\u05fd\31\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\32\371\1\203\2\371\1\u0199"+ + "\7\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\12\371"+ + "\1\u019b\17\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\24\371\1\u0174\5\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\220\0\4\u05fe\2\0\1\u05fe\15\0\1\u05fe\6\0"+ + "\12\u05fe\1\u051b\242\0\4\u05ff\2\0\1\u05ff\15\0\1\u05ff"+ + "\6\0\12\u05ff\1\u0600\242\0\4\u0601\2\0\1\u0601\15\0"+ + "\1\u0601\6\0\1\u0602\1\u0603\5\u0602\1\u0604\1\u0603\1\u0602"+ + "\13\0\1\u04ba\227\0\4\u0605\2\0\1\u0605\15\0\1\u0605"+ + "\6\0\12\u0605\1\u058d\12\0\1\u04ba\227\0\4\u0601\2\0"+ + "\1\u0601\15\0\1\u0601\6\0\1\u0602\1\u0603\5\u0602\1\u0604"+ + "\1\u0603\1\u0602\242\0\1\u0522\4\u0605\2\0\1\u0605\15\0"+ + "\1\u0605\6\0\12\u0605\1\u058d\12\0\1\u04ba\226\0\1\u0522"+ + "\4\u0605\2\0\1\u0605\15\0\1\u0605\6\0\12\u0606\1\u058d"+ + "\12\0\1\u04ba\226\0\1\u0522\4\u0605\2\0\1\u0605\15\0"+ + "\1\u0605\6\0\2\u0606\1\u0605\2\u0606\2\u0605\2\u0606\1\u0605"+ + "\1\u058d\12\0\1\u04ba\227\0\4\u0607\2\0\1\u0607\15\0"+ + "\1\u0607\6\0\12\u0607\1\u0521\12\0\1\u04ba\226\0\1\u0608"+ + "\33\0\12\u0609\242\0\1\u0608\33\0\12\u0592\242\0\1\u0608"+ + "\33\0\2\u0592\1\u0609\1\u0592\1\u060a\2\u0609\2\u0592\1\u0609"+ + "\242\0\1\u0522\4\u0607\2\0\1\u0607\15\0\1\u0607\6\0"+ + "\12\u0607\1\u0521\12\0\1\u04ba\222\0\1\u02f5\3\0\46\u02f5"+ + "\1\0\2\u02f5\1\u0364\2\u02f5\1\u02f7\1\0\1\u0363\3\0"+ + "\2\u02f5\3\0\1\u02f5\303\0\1\u060b\260\0\12\u060c\10\0"+ + "\1\u0363\277\0\1\u04c0\242\0\4\u060d\2\0\1\u060d\15\0"+ + "\1\u060d\6\0\12\u060d\1\u059b\242\0\4\u060e\2\0\1\u060e"+ + "\15\0\1\u060e\6\0\12\u060e\1\u060f\242\0\4\u0610\2\0"+ + "\1\u0610\15\0\1\u0610\6\0\12\u0610\1\u0611\12\0\1\u045b"+ + "\226\0\1\u04c7\4\u0610\2\0\1\u0610\15\0\1\u0610\6\0"+ + "\12\u0612\1\u0611\12\0\1\u045b\226\0\1\u04c7\4\u0610\2\0"+ + "\1\u0610\15\0\1\u0610\6\0\12\u0613\1\u0611\12\0\1\u045b"+ + "\226\0\1\u04c7\4\u0610\2\0\1\u0610\15\0\1\u0610\6\0"+ + "\2\u0613\1\u0612\1\u0613\1\u0614\2\u0612\2\u0613\1\u0612\1\u0611"+ + "\12\0\1\u045b\227\0\4\u0615\2\0\1\u0615\15\0\1\u0615"+ + "\6\0\12\u0615\1\u052d\12\0\1\u045b\226\0\1\u04c7\4\u0615"+ + "\2\0\1\u0615\15\0\1\u0615\6\0\12\u0615\1\u052d\12\0"+ + "\1\u045b\274\0\1\u04c6\12\0\1\u045b\262\0\1\u0616\1\u0617"+ + "\5\u0616\1\u0618\1\u0617\1\u0616\242\0\1\u05a3\307\0\1\u05a3"+ + "\33\0\2\u05a4\1\0\2\u05a4\2\0\2\u05a4\243\0\1\u0619"+ + "\32\u045c\1\u04cc\12\u045c\242\0\1\u0619\4\u045c\1\u05cb\25\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u0619\15\u045c\1\u0542\14\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u0619\10\u045c\1\u0542\21\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u0619\12\u045c\1\u061a\4\u045c\1\u05a6\12\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u0619\5\u045c\1\u061b\4\u045c\1\u05a6\1\u061c"+ + "\16\u045c\1\u04cc\12\u045c\242\0\1\u0619\5\u045c\1\u061d\24\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\1\u061e\3\u045c\1\u061f\25\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\20\u045c\1\u05a6\11\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\17\u045c\1\u0620\12\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\20\u045c\1\u0621\11\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u0619\17\u045c\1\u0622\12\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\7\u045c\1\u05a6\22\u045c\1\u04cc\12\u045c\242\0\1\u0619\11\u045c"+ + "\1\u0623\20\u045c\1\u04cc\12\u045c\242\0\1\u0619\1\u0624\31\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\30\u045c\1\u05a6\1\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u0619\4\u045c\1\u05ae\25\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u0619\6\u045c\1\u05cb\10\u045c\1\u05a6\12\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u0619\13\u045c\1\u0625\16\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u0619\7\u045c\1\u0626\22\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u0619\13\u045c\1\u05ae\16\u045c\1\u04cc\12\u045c\242\0\1\u0619"+ + "\24\u045c\1\u0627\5\u045c\1\u04cc\12\u045c\242\0\1\u04cb\11\u045c"+ + "\1\u05a6\20\u045c\1\u04cc\12\u045c\242\0\1\u0619\16\u045c\1\u0628"+ + "\13\u045c\1\u04cc\12\u045c\242\0\1\u0619\12\u045c\1\u0629\17\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u0619\17\u045c\1\u05a6\12\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u0619\5\u045c\1\u05a6\24\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\16\u045c\1\u062a\13\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u0619\20\u045c\1\u062b\11\u045c\1\u04cc\12\u045c\242\0\1\u0619"+ + "\5\u045c\1\u062c\24\u045c\1\u04cc\12\u045c\242\0\1\u0619\22\u045c"+ + "\1\u062d\7\u045c\1\u04cc\12\u045c\242\0\1\u0619\13\u045c\1\u062e"+ + "\16\u045c\1\u04cc\12\u045c\242\0\1\u04cb\17\u045c\1\u062f\12\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\1\u045c\1\u0630\7\u045c\1\u05a6"+ + "\20\u045c\1\u04cc\12\u045c\242\0\1\u0619\1\u0631\31\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u0619\2\u045c\1\u0632\27\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\15\u045c\1\u0633\14\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\5\u045c\1\u05a6\24\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\32\u045c\1\u0634\12\u045c\242\0\1\u04cb\22\u045c\1\u05a6\7\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u0619\23\u045c\1\u05a6\2\u045c\1\u0629"+ + "\3\u045c\1\u04cc\12\u045c\242\0\1\u04cb\11\u045c\1\u0635\20\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u0619\17\u045c\1\u0636\12\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u0619\24\u045c\1\u0633\5\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u0619\13\u045c\1\u0637\16\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\31\u045c\1\u0638\1\u04cc\12\u045c\236\0\1\u036f\3\0"+ + "\34\u036f\12\u0639\1\0\2\u036f\1\u03ea\2\u036f\1\u0371\1\u0228"+ + "\1\u0229\1\u022a\2\0\2\u036f\3\0\1\u036f\266\0\1\u04d5"+ + "\242\0\4\u063a\2\0\1\u063a\15\0\1\u063a\6\0\12\u063a"+ + "\1\u05d7\242\0\4\u063b\2\0\1\u063b\15\0\1\u063b\6\0"+ + "\1\u063c\1\u063d\5\u063c\1\u063e\1\u063d\1\u063c\1\u063f\242\0"+ + "\4\u0640\2\0\1\u0640\15\0\1\u0640\6\0\12\u0640\1\u0641"+ + "\12\0\1\u01ad\226\0\1\u0224\4\u0640\2\0\1\u0640\15\0"+ + "\1\u0640\6\0\12\u0642\1\u0641\12\0\1\u01ad\226\0\1\u0224"+ + "\4\u0640\2\0\1\u0640\15\0\1\u0640\6\0\12\u0643\1\u0641"+ + "\12\0\1\u01ad\226\0\1\u0224\4\u0640\2\0\1\u0640\15\0"+ + "\1\u0640\6\0\2\u0643\1\u0642\1\u0643\1\u0644\2\u0642\2\u0643"+ + "\1\u0642\1\u0641\12\0\1\u01ad\227\0\4\u0645\2\0\1\u0645"+ + "\15\0\1\u0645\6\0\12\u0645\1\u0556\12\0\1\u01ad\226\0"+ + "\1\u0224\4\u0645\2\0\1\u0645\15\0\1\u0645\6\0\12\u0645"+ + "\1\u0556\12\0\1\u01ad\226\0\1\u0136\5\323\1\u0424\24\323"+ + "\1\u0137\12\323\242\0\1\u0136\3\323\1\u0646\26\323\1\u0137"+ + "\12\323\242\0\1\u0136\6\323\1\u0241\23\323\1\u0137\12\323"+ + "\242\0\1\u0136\1\323\1\u0571\30\323\1\u0137\12\323\242\0"+ + "\1\u0136\3\323\1\u0647\26\323\1\u0137\12\323\242\0\1\u0136"+ + "\32\323\1\u0137\10\323\1\u0648\1\323\242\0\1\u0136\32\323"+ + "\1\u0137\2\323\1\u0649\7\323\242\0\1\u0136\32\323\1\u0137"+ + "\2\323\1\u064a\7\323\242\0\1\u0136\32\323\1\u0137\3\323"+ + "\1\u064b\6\323\242\0\1\u0136\32\323\1\u0137\5\323\1\u064c"+ + "\4\323\242\0\1\u0136\32\323\1\u0137\3\323\1\u064d\6\323"+ + "\242\0\1\u0136\2\323\1\u064e\27\323\1\u0137\12\323\242\0"+ + "\1\u0136\1\u064f\31\323\1\u0137\12\323\242\0\1\u0136\24\323"+ + "\1\u0650\5\323\1\u0137\12\323\242\0\1\u0136\23\323\1\u02b5"+ + "\6\323\1\u0137\12\323\242\0\1\u0136\32\323\1\u0137\1\u0651"+ + "\11\323\242\0\1\u0136\32\323\1\u0137\1\u0652\11\323\242\0"+ + "\1\u0136\32\323\1\u0137\11\323\1\u0653\242\0\1\u0136\12\323"+ + "\1\u0654\17\323\1\u0137\12\323\242\0\1\u0136\32\323\1\u0137"+ + "\2\323\1\u02b9\7\323\242\0\1\u0136\2\323\1\u0655\27\323"+ + "\1\u0137\12\323\236\0\1\154\3\0\1\u016f\32\371\1\203"+ + "\1\371\1\u0656\10\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\2\371\1\u0657\27\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\32\371\1\203\6\371\1\u0444"+ + "\3\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f\15\371"+ + "\1\u010f\14\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\32\371\1\203\10\371\1\u043f\1\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\23\371\1\u0658\6\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\32\371\1\203\4\371\1\u0659\5\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\1\u0588\31\371\1\203\12\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203\10\371"+ + "\1\u0209\1\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\31\371\1\u065a\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\32\371\1\203\4\371\1\u065b\5\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\265\0\1\u051b\242\0\4\u065c\2\0\1\u065c\15\0"+ + "\1\u065c\6\0\12\u065c\1\u0600\242\0\4\u065d\2\0\1\u065d"+ + "\15\0\1\u065d\6\0\12\u065d\1\u065e\242\0\4\u065f\2\0"+ + "\1\u065f\15\0\1\u065f\6\0\12\u065f\1\u0660\12\0\1\u04ba"+ + "\226\0\1\u0522\4\u065f\2\0\1\u065f\15\0\1\u065f\6\0"+ + "\12\u0661\1\u0660\12\0\1\u04ba\226\0\1\u0522\4\u065f\2\0"+ + "\1\u065f\15\0\1\u065f\6\0\12\u0662\1\u0660\12\0\1\u04ba"+ + "\226\0\1\u0522\4\u065f\2\0\1\u065f\15\0\1\u065f\6\0"+ + "\2\u0662\1\u0661\1\u0662\1\u0663\2\u0661\2\u0662\1\u0661\1\u0660"+ + "\12\0\1\u04ba\227\0\4\u0664\2\0\1\u0664\15\0\1\u0664"+ + "\6\0\12\u0664\1\u058d\12\0\1\u04ba\226\0\1\u0522\4\u0664"+ + "\2\0\1\u0664\15\0\1\u0664\6\0\12\u0664\1\u058d\12\0"+ + "\1\u04ba\274\0\1\u0521\12\0\1\u04ba\262\0\1\u0665\1\u0666"+ + "\5\u0665\1\u0667\1\u0666\1\u0665\242\0\1\u0608\307\0\1\u0608"+ + "\33\0\2\u0609\1\0\2\u0609\2\0\2\u0609\244\0\1\u0668"+ + "\1\0\1\u0668\5\0\1\u0668\354\0\1\u0363\232\0\4\u0669"+ + "\2\0\1\u0669\15\0\1\u0669\6\0\12\u0669\1\u059b\242\0"+ + "\4\u066a\2\0\1\u066a\15\0\1\u066a\6\0\12\u066a\1\u066b"+ + "\242\0\4\u066c\2\0\1\u066c\15\0\1\u066c\6\0\1\u066d"+ + "\1\u066e\5\u066d\1\u066f\1\u066e\1\u066d\13\0\1\u045b\227\0"+ + "\4\u0670\2\0\1\u0670\15\0\1\u0670\6\0\12\u0670\1\u0611"+ + "\12\0\1\u045b\227\0\4\u066c\2\0\1\u066c\15\0\1\u066c"+ + "\6\0\1\u066d\1\u066e\5\u066d\1\u066f\1\u066e\1\u066d\242\0"+ + "\1\u04c7\4\u0670\2\0\1\u0670\15\0\1\u0670\6\0\12\u0670"+ + "\1\u0611\12\0\1\u045b\226\0\1\u04c7\4\u0670\2\0\1\u0670"+ + "\15\0\1\u0670\6\0\12\u0671\1\u0611\12\0\1\u045b\226\0"+ + "\1\u04c7\4\u0670\2\0\1\u0670\15\0\1\u0670\6\0\2\u0671"+ + "\1\u0670\2\u0671\2\u0670\2\u0671\1\u0670\1\u0611\12\0\1\u045b"+ + "\274\0\1\u052d\12\0\1\u045b\226\0\1\u0672\33\0\12\u0673"+ + "\242\0\1\u0672\33\0\12\u0616\242\0\1\u0672\33\0\2\u0616"+ + "\1\u0673\1\u0616\1\u0674\2\u0673\2\u0616\1\u0673\242\0\1\u04cb"+ + "\3\u045c\1\u0675\26\u045c\1\u04cc\12\u045c\242\0\1\u04cb\15\u045c"+ + "\1\u05a6\14\u045c\1\u04cc\12\u045c\242\0\1\u04cb\16\u045c\1\u0676"+ + "\1\u0677\12\u045c\1\u04cc\12\u045c\242\0\1\u04cb\17\u045c\1\u0678"+ + "\12\u045c\1\u04cc\12\u045c\242\0\1\u04cb\12\u045c\1\u0679\17\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\3\u045c\1\u067a\26\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\3\u045c\1\u067b\26\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\10\u045c\1\u067c\21\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\1\u067d\31\u045c\1\u04cc\12\u045c\242\0\1\u04cb\11\u045c"+ + "\1\u067e\20\u045c\1\u04cc\12\u045c\242\0\1\u04cb\15\u045c\1\u067f"+ + "\14\u045c\1\u04cc\12\u045c\242\0\1\u04cb\2\u045c\1\u05a6\27\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\25\u045c\1\u0680\4\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\10\u045c\1\u05a6\21\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\3\u045c\1\u0681\26\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\3\u045c\1\u05a6\26\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\17\u045c\1\u05a6\12\u045c\1\u04cc\12\u045c\242\0\1\u04cb\12\u045c"+ + "\1\u0682\17\u045c\1\u04cc\12\u045c\242\0\1\u04cb\17\u045c\1\u0683"+ + "\12\u045c\1\u04cc\12\u045c\242\0\1\u04cb\31\u045c\1\u05a6\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\7\u045c\1\u0684\22\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\17\u045c\1\u0685\12\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\25\u045c\1\u0686\4\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\30\u045c\1\u0687\1\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u062d"+ + "\31\u045c\1\u04cc\12\u045c\242\0\1\u04cb\16\u045c\1\u05a6\13\u045c"+ + "\1\u04cc\12\u045c\243\0\32\u045c\1\u0688\12\u045c\242\0\1\u04cb"+ + "\2\u045c\1\u0689\27\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u045c"+ + "\1\u068a\30\u045c\1\u04cc\12\u045c\242\0\1\u04cb\17\u045c\1\u068b"+ + "\12\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u068c\31\u045c\1\u04cc"+ + "\12\u045c\236\0\1\u036f\3\0\46\u036f\1\0\2\u036f\1\u03ea"+ + "\2\u036f\1\u0371\1\u0228\1\u0229\1\u022a\2\0\2\u036f\3\0"+ + "\1\u036f\221\0\4\u068d\2\0\1\u068d\15\0\1\u068d\6\0"+ + "\12\u068d\1\u05d7\242\0\4\u068e\2\0\1\u068e\15\0\1\u068e"+ + "\6\0\12\u068e\1\u068f\241\0\1\u0224\4\u068e\2\0\1\u068e"+ + "\15\0\1\u068e\6\0\12\u0690\1\u068f\241\0\1\u0224\4\u068e"+ + "\2\0\1\u068e\15\0\1\u068e\6\0\12\u0691\1\u068f\241\0"+ + "\1\u0224\4\u068e\2\0\1\u068e\15\0\1\u068e\6\0\2\u0691"+ + "\1\u0690\1\u0691\1\u0692\2\u0690\2\u0691\1\u0690\1\u068f\242\0"+ + "\4\u0693\2\0\1\u0693\15\0\1\u0693\6\0\12\u0693\13\0"+ + "\1\u01ad\227\0\4\u0694\2\0\1\u0694\15\0\1\u0694\6\0"+ + "\12\u0694\1\u0641\12\0\1\u01ad\227\0\4\u0693\2\0\1\u0693"+ + "\15\0\1\u0693\6\0\12\u0693\242\0\1\u0224\4\u0694\2\0"+ + "\1\u0694\15\0\1\u0694\6\0\12\u0694\1\u0641\12\0\1\u01ad"+ + "\226\0\1\u0224\4\u0694\2\0\1\u0694\15\0\1\u0694\6\0"+ + "\12\u0695\1\u0641\12\0\1\u01ad\226\0\1\u0224\4\u0694\2\0"+ + "\1\u0694\15\0\1\u0694\6\0\2\u0695\1\u0694\2\u0695\2\u0694"+ + "\2\u0695\1\u0694\1\u0641\12\0\1\u01ad\274\0\1\u0556\12\0"+ + "\1\u01ad\226\0\1\u0136\1\u0696\31\323\1\u0137\12\323\242\0"+ + "\1\u0136\32\323\1\u0137\7\323\1\u04ec\2\323\242\0\1\u0136"+ + "\1\u0697\31\323\1\u0137\12\323\242\0\1\u0136\1\u0698\31\323"+ + "\1\u0137\12\323\242\0\1\u0136\7\323\1\u0699\22\323\1\u0137"+ + "\12\323\242\0\1\u0136\6\323\1\u069a\23\323\1\u0137\12\323"+ + "\242\0\1\u0136\1\u069b\31\323\1\u0137\12\323\242\0\1\u0136"+ + "\1\u069c\31\323\1\u0137\12\323\242\0\1\u0136\32\323\1\u0137"+ + "\1\323\1\u069d\10\323\242\0\1\u0136\32\323\1\u0137\2\323"+ + "\1\u069e\7\323\242\0\1\u0136\6\323\1\u022b\23\323\1\u0137"+ + "\12\323\242\0\1\u0136\25\323\1\u069f\4\323\1\u0137\12\323"+ + "\242\0\1\u0136\1\u06a0\31\323\1\u0137\12\323\242\0\1\u0136"+ + "\32\323\1\u0137\2\323\1\u0250\7\323\242\0\1\u0136\12\323"+ + "\1\u0252\17\323\1\u0137\12\323\242\0\1\u0136\24\323\1\u022b"+ + "\5\323\1\u0137\12\323\236\0\1\154\3\0\1\u016f\24\371"+ + "\1\u06a1\5\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\32\371\1\203\6\371\1\u06a2\3\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\213\0\1\154\3\0\1\u016f\1\371\1\u0180\30\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\2\371\1\u06a3\27\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\3\371\1\u06a4\26\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\3\371\1\u06a5"+ + "\26\371\1\203\12\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\220\0\4\u06a6\2\0"+ + "\1\u06a6\15\0\1\u06a6\6\0\12\u06a6\1\u0600\242\0\4\u06a7"+ + "\2\0\1\u06a7\15\0\1\u06a7\6\0\12\u06a7\1\u06a8\242\0"+ + "\4\u06a9\2\0\1\u06a9\15\0\1\u06a9\6\0\1\u06aa\1\u06ab"+ + "\5\u06aa\1\u06ac\1\u06ab\1\u06aa\13\0\1\u04ba\227\0\4\u06ad"+ + "\2\0\1\u06ad\15\0\1\u06ad\6\0\12\u06ad\1\u0660\12\0"+ + "\1\u04ba\227\0\4\u06a9\2\0\1\u06a9\15\0\1\u06a9\6\0"+ + "\1\u06aa\1\u06ab\5\u06aa\1\u06ac\1\u06ab\1\u06aa\242\0\1\u0522"+ + "\4\u06ad\2\0\1\u06ad\15\0\1\u06ad\6\0\12\u06ad\1\u0660"+ + "\12\0\1\u04ba\226\0\1\u0522\4\u06ad\2\0\1\u06ad\15\0"+ + "\1\u06ad\6\0\12\u06ae\1\u0660\12\0\1\u04ba\226\0\1\u0522"+ + "\4\u06ad\2\0\1\u06ad\15\0\1\u06ad\6\0\2\u06ae\1\u06ad"+ + "\2\u06ae\2\u06ad\2\u06ae\1\u06ad\1\u0660\12\0\1\u04ba\274\0"+ + "\1\u058d\12\0\1\u04ba\226\0\1\u06af\33\0\12\u06b0\242\0"+ + "\1\u06af\33\0\12\u0665\242\0\1\u06af\33\0\2\u0665\1\u06b0"+ + "\1\u0665\1\u06b1\2\u06b0\2\u0665\1\u06b0\321\0\1\u022a\276\0"+ + "\1\u059b\242\0\4\u06b2\2\0\1\u06b2\15\0\1\u06b2\6\0"+ + "\12\u06b2\1\u066b\242\0\4\u06b3\2\0\1\u06b3\15\0\1\u06b3"+ + "\6\0\12\u06b3\1\u06b4\242\0\4\u06b5\2\0\1\u06b5\15\0"+ + "\1\u06b5\6\0\12\u06b5\1\u06b6\12\0\1\u045b\226\0\1\u04c7"+ + "\4\u06b5\2\0\1\u06b5\15\0\1\u06b5\6\0\12\u06b7\1\u06b6"+ + "\12\0\1\u045b\226\0\1\u04c7\4\u06b5\2\0\1\u06b5\15\0"+ + "\1\u06b5\6\0\12\u06b8\1\u06b6\12\0\1\u045b\226\0\1\u04c7"+ + "\4\u06b5\2\0\1\u06b5\15\0\1\u06b5\6\0\2\u06b8\1\u06b7"+ + "\1\u06b8\1\u06b9\2\u06b7\2\u06b8\1\u06b7\1\u06b6\12\0\1\u045b"+ + "\227\0\4\u06ba\2\0\1\u06ba\15\0\1\u06ba\6\0\12\u06ba"+ + "\1\u0611\12\0\1\u045b\226\0\1\u04c7\4\u06ba\2\0\1\u06ba"+ + "\15\0\1\u06ba\6\0\12\u06ba\1\u0611\12\0\1\u045b\262\0"+ + "\1\u06bb\1\u06bc\5\u06bb\1\u06bd\1\u06bc\1\u06bb\242\0\1\u0672"+ + "\307\0\1\u0672\33\0\2\u0673\1\0\2\u0673\2\0\2\u0673"+ + "\243\0\1\u04cb\4\u045c\1\u0542\25\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\17\u045c\1\u06be\12\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\4\u045c\1\u06bf\25\u045c\1\u04cc\12\u045c\242\0\1\u04cb\25\u045c"+ + "\1\u06c0\4\u045c\1\u04cc\12\u045c\242\0\1\u04cb\5\u045c\1\u06c1"+ + "\24\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u045c\1\u06c2\30\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\4\u045c\1\u06c3\25\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\15\u045c\1\u06c4\14\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\17\u045c\1\u0629\12\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\3\u045c\1\u06c5\26\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\25\u045c\1\u06c6\4\u045c\1\u04cc\12\u045c\242\0\1\u04cb\17\u045c"+ + "\1\u06c0\12\u045c\1\u04cc\12\u045c\242\0\1\u04cb\20\u045c\1\u06c7"+ + "\11\u045c\1\u04cc\12\u045c\242\0\1\u04cb\24\u045c\1\u06c0\5\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\5\u045c\1\u06c8\24\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\11\u045c\1\u06c9\20\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\5\u045c\1\u05cb\24\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\13\u045c\1\u06ca\16\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\3\u045c\1\u05bc\26\u045c\1\u04cc\12\u045c\243\0\1\u045c\1\u06cb"+ + "\3\u045c\1\u06cc\1\u06cd\1\u06ce\1\u045c\1\u06cf\1\u06d0\1\u06d1"+ + "\1\u06d2\1\u06d3\1\u06d4\1\u045c\1\u06d5\1\u06d6\1\u06d7\2\u045c"+ + "\1\u06d8\1\u06d9\1\u06da\1\u045c\1\u06db\1\u04cc\1\u06dc\2\u045c"+ + "\1\u06dd\1\u045c\1\u06de\1\u06df\3\u045c\242\0\1\u04cb\10\u045c"+ + "\1\u06e0\21\u045c\1\u04cc\12\u045c\242\0\1\u04cb\25\u045c\1\u06e1"+ + "\4\u045c\1\u04cc\12\u045c\242\0\1\u04cb\20\u045c\1\u06e2\11\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\7\u045c\1\u0629\22\u045c\1\u04cc"+ + "\12\u045c\310\0\1\u05d7\242\0\4\u06e3\2\0\1\u06e3\15\0"+ + "\1\u06e3\6\0\12\u06e3\1\u068f\242\0\4\u0693\2\0\1\u0693"+ + "\15\0\1\u0693\6\0\12\u0693\1\u04dc\241\0\1\u0224\4\u06e3"+ + "\2\0\1\u06e3\15\0\1\u06e3\6\0\12\u06e3\1\u068f\241\0"+ + "\1\u0224\4\u06e3\2\0\1\u06e3\15\0\1\u06e3\6\0\12\u06e4"+ + "\1\u068f\241\0\1\u0224\4\u06e3\2\0\1\u06e3\15\0\1\u06e3"+ + "\6\0\2\u06e4\1\u06e3\2\u06e4\2\u06e3\2\u06e4\1\u06e3\1\u068f"+ + "\242\0\4\u06e5\2\0\1\u06e5\15\0\1\u06e5\6\0\12\u06e5"+ + "\13\0\1\u01ad\227\0\4\u06e6\2\0\1\u06e6\15\0\1\u06e6"+ + "\6\0\12\u06e6\1\u0641\12\0\1\u01ad\226\0\1\u0224\4\u06e6"+ + "\2\0\1\u06e6\15\0\1\u06e6\6\0\12\u06e6\1\u0641\12\0"+ + "\1\u01ad\226\0\1\u0136\32\323\1\u0137\1\323\1\u06e7\10\323"+ + "\242\0\1\u0136\2\323\1\u06e8\27\323\1\u0137\12\323\242\0"+ + "\1\u0136\32\323\1\u0137\6\323\1\u04fc\3\323\242\0\1\u0136"+ + "\15\323\1\u01c1\14\323\1\u0137\12\323\242\0\1\u0136\32\323"+ + "\1\u0137\10\323\1\u04f7\1\323\242\0\1\u0136\23\323\1\u06e9"+ + "\6\323\1\u0137\12\323\242\0\1\u0136\32\323\1\u0137\4\323"+ + "\1\u06ea\5\323\242\0\1\u0136\1\u0655\31\323\1\u0137\12\323"+ + "\242\0\1\u0136\32\323\1\u0137\10\323\1\u02b9\1\323\242\0"+ + "\1\u0136\31\323\1\u06eb\1\u0137\12\323\242\0\1\u0136\32\323"+ + "\1\u0137\4\323\1\u06ec\5\323\236\0\1\154\3\0\1\u016f"+ + "\32\371\1\203\7\371\1\u06ed\2\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\213\0"+ + "\1\154\3\0\1\u016f\27\371\1\u0174\2\371\1\203\12\371"+ + "\1\0\3\154\1\0\1\154\1\156\3\154\3\0\1\154"+ + "\3\0\2\154\213\0\1\154\3\0\1\u016f\32\371\1\203"+ + "\3\371\1\u06ee\6\371\1\0\3\154\1\0\1\154\1\156"+ + "\3\154\3\0\1\154\3\0\2\154\213\0\1\154\3\0"+ + "\1\u016f\32\371\1\203\7\371\1\u010f\2\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\3\371\1\u06ef\26\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\265\0\1\u0600\242\0\4\u06f0\2\0"+ + "\1\u06f0\15\0\1\u06f0\6\0\12\u06f0\1\u06a8\242\0\4\u06f1"+ + "\2\0\1\u06f1\15\0\1\u06f1\6\0\12\u06f1\1\u06f2\242\0"+ + "\4\u06f3\2\0\1\u06f3\15\0\1\u06f3\6\0\12\u06f3\1\u06f4"+ + "\12\0\1\u04ba\226\0\1\u0522\4\u06f3\2\0\1\u06f3\15\0"+ + "\1\u06f3\6\0\12\u06f5\1\u06f4\12\0\1\u04ba\226\0\1\u0522"+ + "\4\u06f3\2\0\1\u06f3\15\0\1\u06f3\6\0\12\u06f6\1\u06f4"+ + "\12\0\1\u04ba\226\0\1\u0522\4\u06f3\2\0\1\u06f3\15\0"+ + "\1\u06f3\6\0\2\u06f6\1\u06f5\1\u06f6\1\u06f7\2\u06f5\2\u06f6"+ + "\1\u06f5\1\u06f4\12\0\1\u04ba\227\0\4\u06f8\2\0\1\u06f8"+ + "\15\0\1\u06f8\6\0\12\u06f8\1\u0660\12\0\1\u04ba\226\0"+ + "\1\u0522\4\u06f8\2\0\1\u06f8\15\0\1\u06f8\6\0\12\u06f8"+ + "\1\u0660\12\0\1\u04ba\262\0\1\u06f9\1\u06fa\5\u06f9\1\u06fb"+ + "\1\u06fa\1\u06f9\242\0\1\u06af\307\0\1\u06af\33\0\2\u06b0"+ + "\1\0\2\u06b0\2\0\2\u06b0\244\0\4\u06fc\2\0\1\u06fc"+ + "\15\0\1\u06fc\6\0\12\u06fc\1\u066b\242\0\4\u06fd\2\0"+ + "\1\u06fd\15\0\1\u06fd\6\0\12\u06fd\1\u06fe\242\0\4\u06ff"+ + "\2\0\1\u06ff\15\0\1\u06ff\6\0\1\u0700\1\u0701\5\u0700"+ + "\1\u0702\1\u0701\1\u0700\13\0\1\u045b\227\0\4\u0703\2\0"+ + "\1\u0703\15\0\1\u0703\6\0\12\u0703\1\u06b6\12\0\1\u045b"+ + "\227\0\4\u06ff\2\0\1\u06ff\15\0\1\u06ff\6\0\1\u0700"+ + "\1\u0701\5\u0700\1\u0702\1\u0701\1\u0700\242\0\1\u04c7\4\u0703"+ + "\2\0\1\u0703\15\0\1\u0703\6\0\12\u0703\1\u06b6\12\0"+ + "\1\u045b\226\0\1\u04c7\4\u0703\2\0\1\u0703\15\0\1\u0703"+ + "\6\0\12\u0704\1\u06b6\12\0\1\u045b\226\0\1\u04c7\4\u0703"+ + "\2\0\1\u0703\15\0\1\u0703\6\0\2\u0704\1\u0703\2\u0704"+ + "\2\u0703\2\u0704\1\u0703\1\u06b6\12\0\1\u045b\274\0\1\u0611"+ + "\12\0\1\u045b\262\0\12\u0705\13\0\1\u045b\262\0\12\u06bb"+ + "\13\0\1\u045b\262\0\2\u06bb\1\u0705\1\u06bb\1\u0706\2\u0705"+ + "\2\u06bb\1\u0705\13\0\1\u045b\226\0\1\u04cb\4\u045c\1\u0707"+ + "\25\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u0708\31\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\10\u045c\1\u0709\21\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\13\u045c\1\u070a\16\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\17\u045c\1\u070b\12\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\15\u045c\1\u070c\14\u045c\1\u04cc\12\u045c\242\0\1\u04cb\12\u045c"+ + "\1\u070d\17\u045c\1\u04cc\12\u045c\242\0\1\u04cb\4\u045c\1\u062d"+ + "\25\u045c\1\u04cc\12\u045c\242\0\1\u04cb\10\u045c\1\u070e\21\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\12\u045c\1\u05a6\17\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\7\u045c\1\u070f\22\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\3\u045c\1\u0633\26\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\5\u045c\1\u0710\24\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\11\u045c\1\u0711\20\u045c\1\u04cc\12\u045c\242\0\1\u04cb\7\u045c"+ + "\1\u0712\22\u045c\1\u04cc\1\u0713\11\u045c\242\0\1\u04cb\10\u045c"+ + "\1\u0714\4\u045c\1\u0715\5\u045c\1\u0716\6\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\3\u045c\1\u0717\26\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\7\u045c\1\u0718\22\u045c\1\u04cc\10\u045c\1\u0719\1\u045c"+ + "\242\0\1\u04cb\7\u045c\1\u071a\22\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\7\u045c\1\u071b\22\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\32\u045c\1\u04cc\5\u045c\1\u071c\4\u045c\242\0\1\u04cb\7\u045c"+ + "\1\u071d\22\u045c\1\u04cc\10\u045c\1\u071e\1\u045c\242\0\1\u04cb"+ + "\32\u045c\1\u04cc\5\u045c\1\u071f\4\u045c\242\0\1\u04cb\13\u045c"+ + "\1\u0720\16\u045c\1\u04cc\12\u045c\242\0\1\u04cb\7\u045c\1\u0721"+ + "\22\u045c\1\u04cc\12\u045c\242\0\1\u04cb\26\u045c\1\u0722\3\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc\7\u045c\1\u071f"+ + "\2\u045c\242\0\1\u04cb\15\u045c\1\u0723\14\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\32\u045c\1\u04cc\10\u045c\1\u0724\1\u0725\242\0"+ + "\1\u04cb\6\u045c\1\u0726\1\u0727\22\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\3\u045c\1\u0728\26\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\32\u045c\1\u04cc\4\u045c\1\u071f\5\u045c\242\0\1\u04cb\32\u045c"+ + "\1\u04cc\1\u045c\1\u0729\10\u045c\242\0\1\u04cb\32\u045c\1\u04cc"+ + "\1\u045c\1\u072a\10\u045c\242\0\1\u04cb\13\u045c\1\u072b\16\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\3\u045c\1\u072c\26\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\4\u045c\1\u06c9\25\u045c\1\u04cc\12\u045c"+ + "\243\0\4\u072d\2\0\1\u072d\15\0\1\u072d\6\0\12\u072d"+ + "\1\u068f\241\0\1\u0224\4\u072d\2\0\1\u072d\15\0\1\u072d"+ + "\6\0\12\u072d\1\u068f\242\0\4\u072e\2\0\1\u072e\15\0"+ + "\1\u072e\6\0\12\u072e\13\0\1\u01ad\274\0\1\u0641\12\0"+ + "\1\u01ad\226\0\1\u0136\24\323\1\u072f\5\323\1\u0137\12\323"+ + "\242\0\1\u0136\32\323\1\u0137\6\323\1\u0730\3\323\242\0"+ + "\1\u0136\1\323\1\u0237\30\323\1\u0137\12\323\242\0\1\u0136"+ + "\2\323\1\u0731\27\323\1\u0137\12\323\242\0\1\u0136\3\323"+ + "\1\u0732\26\323\1\u0137\12\323\242\0\1\u0136\3\323\1\u0733"+ + "\26\323\1\u0137\12\323\236\0\1\154\3\0\1\u016f\7\371"+ + "\1\u0734\22\371\1\203\12\371\1\0\3\154\1\0\1\154"+ + "\1\156\3\154\3\0\1\154\3\0\2\154\213\0\1\154"+ + "\3\0\1\u016f\1\u0735\31\371\1\203\12\371\1\0\3\154"+ + "\1\0\1\154\1\156\3\154\3\0\1\154\3\0\2\154"+ + "\213\0\1\154\3\0\1\u016f\32\371\1\203\1\371\1\u0444"+ + "\10\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\220\0\4\u0736\2\0\1\u0736\15\0"+ + "\1\u0736\6\0\12\u0736\1\u06a8\242\0\4\u0737\2\0\1\u0737"+ + "\15\0\1\u0737\6\0\12\u0737\1\u0738\242\0\4\u0739\2\0"+ + "\1\u0739\15\0\1\u0739\6\0\1\u073a\1\u073b\5\u073a\1\u073c"+ + "\1\u073b\1\u073a\13\0\1\u04ba\227\0\4\u073d\2\0\1\u073d"+ + "\15\0\1\u073d\6\0\12\u073d\1\u06f4\12\0\1\u04ba\227\0"+ + "\4\u0739\2\0\1\u0739\15\0\1\u0739\6\0\1\u073a\1\u073b"+ + "\5\u073a\1\u073c\1\u073b\1\u073a\242\0\1\u0522\4\u073d\2\0"+ + "\1\u073d\15\0\1\u073d\6\0\12\u073d\1\u06f4\12\0\1\u04ba"+ + "\226\0\1\u0522\4\u073d\2\0\1\u073d\15\0\1\u073d\6\0"+ + "\12\u073e\1\u06f4\12\0\1\u04ba\226\0\1\u0522\4\u073d\2\0"+ + "\1\u073d\15\0\1\u073d\6\0\2\u073e\1\u073d\2\u073e\2\u073d"+ + "\2\u073e\1\u073d\1\u06f4\12\0\1\u04ba\274\0\1\u0660\12\0"+ + "\1\u04ba\262\0\12\u073f\13\0\1\u04ba\262\0\12\u06f9\13\0"+ + "\1\u04ba\262\0\2\u06f9\1\u073f\1\u06f9\1\u0740\2\u073f\2\u06f9"+ + "\1\u073f\13\0\1\u04ba\274\0\1\u066b\242\0\4\u0741\2\0"+ + "\1\u0741\15\0\1\u0741\6\0\12\u0741\1\u06fe\242\0\4\u0742"+ + "\2\0\1\u0742\15\0\1\u0742\6\0\12\u0742\1\u0743\242\0"+ + "\4\u0744\2\0\1\u0744\15\0\1\u0744\6\0\12\u0744\1\u0745"+ + "\12\0\1\u045b\226\0\1\u04c7\4\u0744\2\0\1\u0744\15\0"+ + "\1\u0744\6\0\12\u0746\1\u0745\12\0\1\u045b\226\0\1\u04c7"+ + "\4\u0744\2\0\1\u0744\15\0\1\u0744\6\0\12\u0747\1\u0745"+ + "\12\0\1\u045b\226\0\1\u04c7\4\u0744\2\0\1\u0744\15\0"+ + "\1\u0744\6\0\2\u0747\1\u0746\1\u0747\1\u0748\2\u0746\2\u0747"+ + "\1\u0746\1\u0745\12\0\1\u045b\227\0\4\u0749\2\0\1\u0749"+ + "\15\0\1\u0749\6\0\12\u0749\1\u06b6\12\0\1\u045b\226\0"+ + "\1\u04c7\4\u0749\2\0\1\u0749\15\0\1\u0749\6\0\12\u0749"+ + "\1\u06b6\12\0\1\u045b\307\0\1\u045b\262\0\2\u0705\1\0"+ + "\2\u0705\2\0\2\u0705\14\0\1\u045b\226\0\1\u04cb\20\u045c"+ + "\1\u074a\11\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u045c\1\u074b"+ + "\30\u045c\1\u04cc\12\u045c\242\0\1\u04cb\13\u045c\1\u05b2\16\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\2\u045c\1\u0633\27\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\5\u045c\1\u06c5\24\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\4\u045c\1\u074c\25\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\3\u045c\1\u074d\26\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\1\u045c\1\u0633\30\u045c\1\u04cc\12\u045c\242\0\1\u04cb\4\u045c"+ + "\1\u074e\25\u045c\1\u04cc\12\u045c\242\0\1\u04cb\11\u045c\1\u074f"+ + "\20\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u045c\1\u0750\30\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\24\u045c\1\u0751\5\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\1\u045c\1\u0752\30\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\14\u045c\1\u0753\15\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\1\u045c\1\u0754\30\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\1\u045c\1\u0755\30\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u045c"+ + "\1\u0756\30\u045c\1\u04cc\12\u045c\242\0\1\u04cb\24\u045c\1\u0757"+ + "\5\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u0758\31\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\24\u045c\1\u0759\5\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\24\u045c\1\u075a\5\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\27\u045c\1\u075b\2\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\24\u045c\1\u075c\5\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u0627"+ + "\31\u045c\1\u04cc\12\u045c\242\0\1\u04cb\24\u045c\1\u0756\5\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\20\u045c\1\u075d\11\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\24\u045c\1\u075e\5\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\1\u045c\1\u075f\30\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\4\u045c\1\u0760\25\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\1\u0761\31\u045c\1\u04cc\12\u045c\242\0\1\u04cb\21\u045c\1\u0762"+ + "\10\u045c\1\u04cc\12\u045c\242\0\1\u04cb\4\u045c\1\u0763\25\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\24\u045c\1\u0764\5\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc\1\u045c\1\u0765\10\u045c"+ + "\242\0\1\u04cb\1\u0766\31\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\1\u0767\31\u045c\1\u04cc\12\u045c\242\0\1\u04cb\7\u045c\1\u0633"+ + "\22\u045c\1\u04cc\12\u045c\242\0\1\u04cb\13\u045c\1\u05a6\16\u045c"+ + "\1\u04cc\12\u045c\310\0\1\u068f\242\0\4\u04dc\2\0\1\u04dc"+ + "\15\0\1\u04dc\6\0\12\u04dc\13\0\1\u01ad\226\0\1\u0136"+ + "\32\323\1\u0137\7\323\1\u0768\2\323\242\0\1\u0136\27\323"+ + "\1\u022b\2\323\1\u0137\12\323\242\0\1\u0136\32\323\1\u0137"+ + "\3\323\1\u0769\6\323\242\0\1\u0136\32\323\1\u0137\7\323"+ + "\1\u01c1\2\323\242\0\1\u0136\3\323\1\u076a\26\323\1\u0137"+ + "\12\323\236\0\1\154\3\0\1\u016f\32\371\1\203\7\371"+ + "\1\u076b\2\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\213\0\1\154\3\0\1\u016f"+ + "\4\371\1\u0174\25\371\1\203\12\371\1\0\3\154\1\0"+ + "\1\154\1\156\3\154\3\0\1\154\3\0\2\154\265\0"+ + "\1\u06a8\242\0\4\u076c\2\0\1\u076c\15\0\1\u076c\6\0"+ + "\12\u076c\1\u0738\242\0\4\u076d\2\0\1\u076d\15\0\1\u076d"+ + "\6\0\12\u076d\1\u076e\242\0\4\u076f\2\0\1\u076f\15\0"+ + "\1\u076f\6\0\12\u076f\1\u0770\12\0\1\u04ba\226\0\1\u0522"+ + "\4\u076f\2\0\1\u076f\15\0\1\u076f\6\0\12\u0771\1\u0770"+ + "\12\0\1\u04ba\226\0\1\u0522\4\u076f\2\0\1\u076f\15\0"+ + "\1\u076f\6\0\12\u0772\1\u0770\12\0\1\u04ba\226\0\1\u0522"+ + "\4\u076f\2\0\1\u076f\15\0\1\u076f\6\0\2\u0772\1\u0771"+ + "\1\u0772\1\u0773\2\u0771\2\u0772\1\u0771\1\u0770\12\0\1\u04ba"+ + "\227\0\4\u0774\2\0\1\u0774\15\0\1\u0774\6\0\12\u0774"+ + "\1\u06f4\12\0\1\u04ba\226\0\1\u0522\4\u0774\2\0\1\u0774"+ + "\15\0\1\u0774\6\0\12\u0774\1\u06f4\12\0\1\u04ba\307\0"+ + "\1\u04ba\262\0\2\u073f\1\0\2\u073f\2\0\2\u073f\14\0"+ + "\1\u04ba\227\0\4\u0775\2\0\1\u0775\15\0\1\u0775\6\0"+ + "\12\u0775\1\u06fe\242\0\4\u0776\2\0\1\u0776\15\0\1\u0776"+ + "\6\0\12\u0776\1\u0777\242\0\4\u0778\2\0\1\u0778\15\0"+ + "\1\u0778\6\0\1\u0779\1\u077a\5\u0779\1\u077b\1\u077a\1\u0779"+ + "\13\0\1\u045b\227\0\4\u077c\2\0\1\u077c\15\0\1\u077c"+ + "\6\0\12\u077c\1\u0745\12\0\1\u045b\227\0\4\u0778\2\0"+ + "\1\u0778\15\0\1\u0778\6\0\1\u0779\1\u077a\5\u0779\1\u077b"+ + "\1\u077a\1\u0779\242\0\1\u04c7\4\u077c\2\0\1\u077c\15\0"+ + "\1\u077c\6\0\12\u077c\1\u0745\12\0\1\u045b\226\0\1\u04c7"+ + "\4\u077c\2\0\1\u077c\15\0\1\u077c\6\0\12\u077d\1\u0745"+ + "\12\0\1\u045b\226\0\1\u04c7\4\u077c\2\0\1\u077c\15\0"+ + "\1\u077c\6\0\2\u077d\1\u077c\2\u077d\2\u077c\2\u077d\1\u077c"+ + "\1\u0745\12\0\1\u045b\274\0\1\u06b6\12\0\1\u045b\226\0"+ + "\1\u04cb\1\u045c\1\u077e\30\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\17\u045c\1\u077f\12\u045c\1\u04cc\12\u045c\242\0\1\u04cb\10\u045c"+ + "\1\u0780\21\u045c\1\u04cc\12\u045c\242\0\1\u04cb\13\u045c\1\u062a"+ + "\16\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u0781\31\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\5\u045c\1\u0782\24\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\25\u045c\1\u0783\4\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\15\u045c\1\u0784\14\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\21\u045c\1\u0785\10\u045c\1\u04cc\12\u045c\242\0\1\u04cb\16\u045c"+ + "\1\u0786\4\u045c\1\u0787\6\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\4\u045c\1\u0788\25\u045c\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c"+ + "\1\u04cc\7\u045c\1\u0789\2\u045c\242\0\1\u04cb\4\u045c\1\u078a"+ + "\25\u045c\1\u04cc\12\u045c\242\0\1\u04cb\24\u045c\1\u078b\5\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\1\u045c\1\u078c\30\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\1\u078d\1\u078e\1\u045c\1\u078f\16\u045c"+ + "\1\u0790\1\u045c\1\u0791\5\u045c\1\u04cc\5\u045c\1\u0792\4\u045c"+ + "\242\0\1\u04cb\1\u045c\1\u0793\30\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\31\u045c\1\u0794\1\u04cc\12\u045c\242\0\1\u04cb\16\u045c"+ + "\1\u0795\13\u045c\1\u04cc\12\u045c\242\0\1\u04cb\15\u045c\1\u0796"+ + "\14\u045c\1\u04cc\12\u045c\242\0\1\u04cb\11\u045c\1\u0797\13\u045c"+ + "\1\u0798\4\u045c\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc"+ + "\7\u045c\1\u0799\2\u045c\242\0\1\u04cb\21\u045c\1\u079a\7\u045c"+ + "\1\u079b\1\u04cc\12\u045c\242\0\1\u04cb\12\u045c\1\u079c\17\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc\10\u045c\1\u079d"+ + "\1\u045c\242\0\1\u04cb\5\u045c\1\u079e\24\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\10\u045c\1\u079f\21\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\24\u045c\1\u07a0\5\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\32\u045c\1\u04cc\1\u07a1\11\u045c\242\0\1\u04cb\5\u045c\1\u07a2"+ + "\10\u045c\1\u07a3\13\u045c\1\u04cc\12\u045c\242\0\1\u0136\7\323"+ + "\1\u07a4\22\323\1\u0137\12\323\242\0\1\u0136\1\u07a5\31\323"+ + "\1\u0137\12\323\242\0\1\u0136\32\323\1\u0137\1\323\1\u04fc"+ + "\10\323\236\0\1\154\3\0\1\u016f\1\u07a6\31\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\220\0\4\u07a7\2\0\1\u07a7\15\0"+ + "\1\u07a7\6\0\12\u07a7\1\u0738\242\0\4\u07a8\2\0\1\u07a8"+ + "\15\0\1\u07a8\6\0\12\u07a8\1\u07a9\242\0\4\u07aa\2\0"+ + "\1\u07aa\15\0\1\u07aa\6\0\1\u07ab\1\u07ac\5\u07ab\1\u07ad"+ + "\1\u07ac\1\u07ab\13\0\1\u04ba\227\0\4\u07ae\2\0\1\u07ae"+ + "\15\0\1\u07ae\6\0\12\u07ae\1\u0770\12\0\1\u04ba\227\0"+ + "\4\u07aa\2\0\1\u07aa\15\0\1\u07aa\6\0\1\u07ab\1\u07ac"+ + "\5\u07ab\1\u07ad\1\u07ac\1\u07ab\242\0\1\u0522\4\u07ae\2\0"+ + "\1\u07ae\15\0\1\u07ae\6\0\12\u07ae\1\u0770\12\0\1\u04ba"+ + "\226\0\1\u0522\4\u07ae\2\0\1\u07ae\15\0\1\u07ae\6\0"+ + "\12\u07af\1\u0770\12\0\1\u04ba\226\0\1\u0522\4\u07ae\2\0"+ + "\1\u07ae\15\0\1\u07ae\6\0\2\u07af\1\u07ae\2\u07af\2\u07ae"+ + "\2\u07af\1\u07ae\1\u0770\12\0\1\u04ba\274\0\1\u06f4\12\0"+ + "\1\u04ba\274\0\1\u06fe\242\0\4\u07b0\2\0\1\u07b0\15\0"+ + "\1\u07b0\6\0\12\u07b0\1\u0777\242\0\4\u07b1\2\0\1\u07b1"+ + "\15\0\1\u07b1\6\0\1\u07b2\1\u07b3\5\u07b2\1\u07b4\1\u07b3"+ + "\1\u07b2\1\u07b5\242\0\4\u07b6\2\0\1\u07b6\15\0\1\u07b6"+ + "\6\0\12\u07b6\1\u07b7\12\0\1\u045b\226\0\1\u04c7\4\u07b6"+ + "\2\0\1\u07b6\15\0\1\u07b6\6\0\12\u07b8\1\u07b7\12\0"+ + "\1\u045b\226\0\1\u04c7\4\u07b6\2\0\1\u07b6\15\0\1\u07b6"+ + "\6\0\12\u07b9\1\u07b7\12\0\1\u045b\226\0\1\u04c7\4\u07b6"+ + "\2\0\1\u07b6\15\0\1\u07b6\6\0\2\u07b9\1\u07b8\1\u07b9"+ + "\1\u07ba\2\u07b8\2\u07b9\1\u07b8\1\u07b7\12\0\1\u045b\227\0"+ + "\4\u07bb\2\0\1\u07bb\15\0\1\u07bb\6\0\12\u07bb\1\u0745"+ + "\12\0\1\u045b\226\0\1\u04c7\4\u07bb\2\0\1\u07bb\15\0"+ + "\1\u07bb\6\0\12\u07bb\1\u0745\12\0\1\u045b\226\0\1\u04cb"+ + "\17\u045c\1\u07bc\12\u045c\1\u04cc\12\u045c\242\0\1\u04cb\5\u045c"+ + "\1\u07bd\24\u045c\1\u04cc\12\u045c\242\0\1\u04cb\16\u045c\1\u06c9"+ + "\13\u045c\1\u04cc\12\u045c\242\0\1\u04cb\15\u045c\1\u07be\14\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\7\u045c\1\u062d\22\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\1\u045c\1\u07bf\30\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\6\u045c\1\u07c0\23\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\32\u045c\1\u04cc\3\u045c\1\u079c\6\u045c\242\0\1\u04cb"+ + "\32\u045c\1\u04cc\6\u045c\1\u0633\3\u045c\242\0\1\u04cb\32\u045c"+ + "\1\u04cc\5\u045c\1\u0633\4\u045c\242\0\1\u04cb\27\u045c\1\u07c1"+ + "\2\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u045c\1\u07c2\30\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\27\u045c\1\u07c3\2\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\1\u07c4\31\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\1\u045c\1\u05a6\30\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\1\u07c5\30\u045c\1\u07c6\1\u04cc\1\u07c7\11\u045c\242\0\1\u04cb"+ + "\32\u045c\1\u04cc\1\u045c\1\u07c8\10\u045c\242\0\1\u04cb\4\u045c"+ + "\1\u07c9\25\u045c\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc"+ + "\3\u045c\1\u07ca\6\u045c\242\0\1\u04cb\25\u045c\1\u07cb\4\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\1\u07cc\31\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\32\u045c\1\u04cc\4\u045c\1\u07cd\5\u045c\242\0"+ + "\1\u04cb\24\u045c\1\u07ce\5\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\32\u045c\1\u04cc\1\u045c\1\u07cf\10\u045c\242\0\1\u04cb\32\u045c"+ + "\1\u04cc\3\u045c\1\u062d\6\u045c\242\0\1\u04cb\32\u045c\1\u04cc"+ + "\11\u045c\1\u0542\242\0\1\u04cb\32\u045c\1\u04cc\10\u045c\1\u078c"+ + "\1\u045c\242\0\1\u04cb\1\u07d0\1\u045c\1\u07d1\27\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc\10\u045c\1\u07d2\1\u045c"+ + "\242\0\1\u04cb\32\u045c\1\u04cc\4\u045c\1\u07d3\5\u045c\242\0"+ + "\1\u04cb\25\u045c\1\u05a6\4\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\32\u045c\1\u04cc\5\u045c\1\u07d4\4\u045c\242\0\1\u04cb\32\u045c"+ + "\1\u04cc\3\u045c\1\u07d5\6\u045c\242\0\1\u04cb\32\u045c\1\u04cc"+ + "\7\u045c\1\u07d6\2\u045c\242\0\1\u04cb\32\u045c\1\u04cc\2\u045c"+ + "\1\u07d7\7\u045c\242\0\1\u04cb\1\u078c\31\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\32\u045c\1\u04cc\7\u045c\1\u07d8\2\u045c\242\0"+ + "\1\u04cb\3\u045c\1\u07d9\15\u045c\1\u05b2\10\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u0136\32\323\1\u0137\7\323\1\u07da\2\323\242\0"+ + "\1\u0136\4\323\1\u022b\25\323\1\u0137\12\323\236\0\1\154"+ + "\3\0\1\u016f\32\371\1\203\5\371\1\u07db\4\371\1\0"+ + "\3\154\1\0\1\154\1\156\3\154\3\0\1\154\3\0"+ + "\2\154\265\0\1\u0738\242\0\4\u07dc\2\0\1\u07dc\15\0"+ + "\1\u07dc\6\0\12\u07dc\1\u07a9\242\0\4\u07dd\2\0\1\u07dd"+ + "\15\0\1\u07dd\6\0\1\u07de\1\u07df\5\u07de\1\u07e0\1\u07df"+ + "\1\u07de\1\u07e1\242\0\4\u07e2\2\0\1\u07e2\15\0\1\u07e2"+ + "\6\0\12\u07e2\1\u07e3\12\0\1\u04ba\226\0\1\u0522\4\u07e2"+ + "\2\0\1\u07e2\15\0\1\u07e2\6\0\12\u07e4\1\u07e3\12\0"+ + "\1\u04ba\226\0\1\u0522\4\u07e2\2\0\1\u07e2\15\0\1\u07e2"+ + "\6\0\12\u07e5\1\u07e3\12\0\1\u04ba\226\0\1\u0522\4\u07e2"+ + "\2\0\1\u07e2\15\0\1\u07e2\6\0\2\u07e5\1\u07e4\1\u07e5"+ + "\1\u07e6\2\u07e4\2\u07e5\1\u07e4\1\u07e3\12\0\1\u04ba\227\0"+ + "\4\u07e7\2\0\1\u07e7\15\0\1\u07e7\6\0\12\u07e7\1\u0770"+ + "\12\0\1\u04ba\226\0\1\u0522\4\u07e7\2\0\1\u07e7\15\0"+ + "\1\u07e7\6\0\12\u07e7\1\u0770\12\0\1\u04ba\227\0\4\u07e8"+ + "\2\0\1\u07e8\15\0\1\u07e8\6\0\12\u07e8\1\u0777\242\0"+ + "\4\u07e9\2\0\1\u07e9\15\0\1\u07e9\6\0\12\u07e9\1\u07ea"+ + "\241\0\1\u04c7\4\u07e9\2\0\1\u07e9\15\0\1\u07e9\6\0"+ + "\12\u07eb\1\u07ea\241\0\1\u04c7\4\u07e9\2\0\1\u07e9\15\0"+ + "\1\u07e9\6\0\12\u07ec\1\u07ea\241\0\1\u04c7\4\u07e9\2\0"+ + "\1\u07e9\15\0\1\u07e9\6\0\2\u07ec\1\u07eb\1\u07ec\1\u07ed"+ + "\2\u07eb\2\u07ec\1\u07eb\1\u07ea\242\0\4\u07ee\2\0\1\u07ee"+ + "\15\0\1\u07ee\6\0\12\u07ee\13\0\1\u045b\227\0\4\u07ef"+ + "\2\0\1\u07ef\15\0\1\u07ef\6\0\12\u07ef\1\u07b7\12\0"+ + "\1\u045b\227\0\4\u07ee\2\0\1\u07ee\15\0\1\u07ee\6\0"+ + "\12\u07ee\242\0\1\u04c7\4\u07ef\2\0\1\u07ef\15\0\1\u07ef"+ + "\6\0\12\u07ef\1\u07b7\12\0\1\u045b\226\0\1\u04c7\4\u07ef"+ + "\2\0\1\u07ef\15\0\1\u07ef\6\0\12\u07f0\1\u07b7\12\0"+ + "\1\u045b\226\0\1\u04c7\4\u07ef\2\0\1\u07ef\15\0\1\u07ef"+ + "\6\0\2\u07f0\1\u07ef\2\u07f0\2\u07ef\2\u07f0\1\u07ef\1\u07b7"+ + "\12\0\1\u045b\274\0\1\u0745\12\0\1\u045b\226\0\1\u04cb"+ + "\10\u045c\1\u07f1\21\u045c\1\u04cc\12\u045c\242\0\1\u04cb\4\u045c"+ + "\1\u0633\25\u045c\1\u04cc\12\u045c\242\0\1\u04cb\25\u045c\1\u062d"+ + "\4\u045c\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc\1\u045c"+ + "\1\u07f2\10\u045c\242\0\1\u04cb\32\u045c\1\u04cc\6\u045c\1\u07f3"+ + "\3\u045c\242\0\1\u04cb\32\u045c\1\u04cc\5\u045c\1\u07f4\4\u045c"+ + "\242\0\1\u04cb\32\u045c\1\u04cc\5\u045c\1\u07f5\4\u045c\242\0"+ + "\1\u04cb\32\u045c\1\u04cc\5\u045c\1\u078c\4\u045c\242\0\1\u04cb"+ + "\17\u045c\1\u07f6\12\u045c\1\u04cc\12\u045c\242\0\1\u04cb\12\u045c"+ + "\1\u07f7\17\u045c\1\u04cc\12\u045c\242\0\1\u04cb\25\u045c\1\u07f8"+ + "\4\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u07f9\31\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\1\u07fa\31\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\15\u045c\1\u07fb\14\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\1\u045c\1\u07fc\30\u045c\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c"+ + "\1\u04cc\10\u045c\1\u07fd\1\u045c\242\0\1\u04cb\21\u045c\1\u07fe"+ + "\10\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u07ff\31\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc\3\u045c\1\u078c\6\u045c"+ + "\242\0\1\u04cb\2\u045c\1\u079c\27\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\11\u045c\1\u0800\20\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\11\u045c\1\u0801\20\u045c\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c"+ + "\1\u04cc\1\u0625\11\u045c\242\0\1\u04cb\32\u045c\1\u04cc\2\u045c"+ + "\1\u0625\7\u045c\242\0\1\u04cb\32\u045c\1\u04cc\1\u05b2\11\u045c"+ + "\242\0\1\u04cb\10\u045c\1\u0802\21\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\1\u0803\31\u045c\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c"+ + "\1\u04cc\1\u045c\1\u0804\10\u045c\242\0\1\u04cb\32\u045c\1\u04cc"+ + "\10\u045c\1\u0542\1\u045c\242\0\1\u04cb\25\u045c\1\u0805\4\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u0136\1\u0806\31\323\1\u0137\12\323"+ + "\236\0\1\154\3\0\1\u016f\7\371\1\u0807\22\371\1\203"+ + "\12\371\1\0\3\154\1\0\1\154\1\156\3\154\3\0"+ + "\1\154\3\0\2\154\220\0\4\u0808\2\0\1\u0808\15\0"+ + "\1\u0808\6\0\12\u0808\1\u07a9\242\0\4\u0809\2\0\1\u0809"+ + "\15\0\1\u0809\6\0\12\u0809\1\u080a\241\0\1\u0522\4\u0809"+ + "\2\0\1\u0809\15\0\1\u0809\6\0\12\u080b\1\u080a\241\0"+ + "\1\u0522\4\u0809\2\0\1\u0809\15\0\1\u0809\6\0\12\u080c"+ + "\1\u080a\241\0\1\u0522\4\u0809\2\0\1\u0809\15\0\1\u0809"+ + "\6\0\2\u080c\1\u080b\1\u080c\1\u080d\2\u080b\2\u080c\1\u080b"+ + "\1\u080a\242\0\4\u080e\2\0\1\u080e\15\0\1\u080e\6\0"+ + "\12\u080e\13\0\1\u04ba\227\0\4\u080f\2\0\1\u080f\15\0"+ + "\1\u080f\6\0\12\u080f\1\u07e3\12\0\1\u04ba\227\0\4\u080e"+ + "\2\0\1\u080e\15\0\1\u080e\6\0\12\u080e\242\0\1\u0522"+ + "\4\u080f\2\0\1\u080f\15\0\1\u080f\6\0\12\u080f\1\u07e3"+ + "\12\0\1\u04ba\226\0\1\u0522\4\u080f\2\0\1\u080f\15\0"+ + "\1\u080f\6\0\12\u0810\1\u07e3\12\0\1\u04ba\226\0\1\u0522"+ + "\4\u080f\2\0\1\u080f\15\0\1\u080f\6\0\2\u0810\1\u080f"+ + "\2\u0810\2\u080f\2\u0810\1\u080f\1\u07e3\12\0\1\u04ba\274\0"+ + "\1\u0770\12\0\1\u04ba\274\0\1\u0777\242\0\4\u0811\2\0"+ + "\1\u0811\15\0\1\u0811\6\0\12\u0811\1\u07ea\242\0\4\u07ee"+ + "\2\0\1\u07ee\15\0\1\u07ee\6\0\12\u07ee\1\u0705\241\0"+ + "\1\u04c7\4\u0811\2\0\1\u0811\15\0\1\u0811\6\0\12\u0811"+ + "\1\u07ea\241\0\1\u04c7\4\u0811\2\0\1\u0811\15\0\1\u0811"+ + "\6\0\12\u0812\1\u07ea\241\0\1\u04c7\4\u0811\2\0\1\u0811"+ + "\15\0\1\u0811\6\0\2\u0812\1\u0811\2\u0812\2\u0811\2\u0812"+ + "\1\u0811\1\u07ea\242\0\4\u0813\2\0\1\u0813\15\0\1\u0813"+ + "\6\0\12\u0813\13\0\1\u045b\227\0\4\u0814\2\0\1\u0814"+ + "\15\0\1\u0814\6\0\12\u0814\1\u07b7\12\0\1\u045b\226\0"+ + "\1\u04c7\4\u0814\2\0\1\u0814\15\0\1\u0814\6\0\12\u0814"+ + "\1\u07b7\12\0\1\u045b\226\0\1\u04cb\5\u045c\1\u072c\24\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\3\u045c\1\u0815\26\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\6\u045c\1\u05bc\23\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\1\u045c\1\u07d2\30\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\3\u045c\1\u0816\26\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\32\u045c\1\u04cc\10\u045c\1\u0817\1\u045c\242\0\1\u04cb\32\u045c"+ + "\1\u04cc\2\u045c\1\u0818\7\u045c\242\0\1\u04cb\32\u045c\1\u04cc"+ + "\2\u045c\1\u0819\7\u045c\242\0\1\u04cb\32\u045c\1\u04cc\3\u045c"+ + "\1\u081a\6\u045c\242\0\1\u04cb\32\u045c\1\u04cc\5\u045c\1\u081b"+ + "\4\u045c\242\0\1\u04cb\32\u045c\1\u04cc\3\u045c\1\u081c\6\u045c"+ + "\242\0\1\u04cb\2\u045c\1\u081d\27\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\1\u081e\31\u045c\1\u04cc\12\u045c\242\0\1\u04cb\24\u045c"+ + "\1\u081f\5\u045c\1\u04cc\12\u045c\242\0\1\u04cb\23\u045c\1\u0625"+ + "\6\u045c\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc\1\u0820"+ + "\11\u045c\242\0\1\u04cb\32\u045c\1\u04cc\1\u0821\11\u045c\242\0"+ + "\1\u04cb\32\u045c\1\u04cc\11\u045c\1\u0822\242\0\1\u04cb\12\u045c"+ + "\1\u0823\17\u045c\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc"+ + "\2\u045c\1\u0629\7\u045c\242\0\1\u04cb\2\u045c\1\u0824\27\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u0136\32\323\1\u0137\5\323\1\u0825"+ + "\4\323\236\0\1\154\3\0\1\u016f\1\371\1\u0205\30\371"+ + "\1\203\12\371\1\0\3\154\1\0\1\154\1\156\3\154"+ + "\3\0\1\154\3\0\2\154\265\0\1\u07a9\242\0\4\u0826"+ + "\2\0\1\u0826\15\0\1\u0826\6\0\12\u0826\1\u080a\242\0"+ + "\4\u080e\2\0\1\u080e\15\0\1\u080e\6\0\12\u080e\1\u073f"+ + "\241\0\1\u0522\4\u0826\2\0\1\u0826\15\0\1\u0826\6\0"+ + "\12\u0826\1\u080a\241\0\1\u0522\4\u0826\2\0\1\u0826\15\0"+ + "\1\u0826\6\0\12\u0827\1\u080a\241\0\1\u0522\4\u0826\2\0"+ + "\1\u0826\15\0\1\u0826\6\0\2\u0827\1\u0826\2\u0827\2\u0826"+ + "\2\u0827\1\u0826\1\u080a\242\0\4\u0828\2\0\1\u0828\15\0"+ + "\1\u0828\6\0\12\u0828\13\0\1\u04ba\227\0\4\u0829\2\0"+ + "\1\u0829\15\0\1\u0829\6\0\12\u0829\1\u07e3\12\0\1\u04ba"+ + "\226\0\1\u0522\4\u0829\2\0\1\u0829\15\0\1\u0829\6\0"+ + "\12\u0829\1\u07e3\12\0\1\u04ba\227\0\4\u082a\2\0\1\u082a"+ + "\15\0\1\u082a\6\0\12\u082a\1\u07ea\241\0\1\u04c7\4\u082a"+ + "\2\0\1\u082a\15\0\1\u082a\6\0\12\u082a\1\u07ea\242\0"+ + "\4\u082b\2\0\1\u082b\15\0\1\u082b\6\0\12\u082b\13\0"+ + "\1\u045b\274\0\1\u07b7\12\0\1\u045b\226\0\1\u04cb\1\u082c"+ + "\31\u045c\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc\7\u045c"+ + "\1\u078c\2\u045c\242\0\1\u04cb\1\u082d\31\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\1\u082e\31\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\7\u045c\1\u082f\22\u045c\1\u04cc\12\u045c\242\0\1\u04cb\6\u045c"+ + "\1\u0830\23\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u0831\31\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\1\u0832\31\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\32\u045c\1\u04cc\1\u045c\1\u0833\10\u045c\242\0"+ + "\1\u04cb\32\u045c\1\u04cc\2\u045c\1\u0834\7\u045c\242\0\1\u04cb"+ + "\6\u045c\1\u05a6\23\u045c\1\u04cc\12\u045c\242\0\1\u04cb\25\u045c"+ + "\1\u0835\4\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u0836\31\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc\2\u045c\1\u05cb"+ + "\7\u045c\242\0\1\u04cb\12\u045c\1\u05cd\17\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\24\u045c\1\u05a6\5\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u0136\7\323\1\u0837\22\323\1\u0137\12\323\243\0\4\u0838"+ + "\2\0\1\u0838\15\0\1\u0838\6\0\12\u0838\1\u080a\241\0"+ + "\1\u0522\4\u0838\2\0\1\u0838\15\0\1\u0838\6\0\12\u0838"+ + "\1\u080a\242\0\4\u0839\2\0\1\u0839\15\0\1\u0839\6\0"+ + "\12\u0839\13\0\1\u04ba\274\0\1\u07e3\12\0\1\u04ba\274\0"+ + "\1\u07ea\242\0\4\u0705\2\0\1\u0705\15\0\1\u0705\6\0"+ + "\12\u0705\13\0\1\u045b\226\0\1\u04cb\32\u045c\1\u04cc\1\u045c"+ + "\1\u083a\10\u045c\242\0\1\u04cb\2\u045c\1\u083b\27\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc\6\u045c\1\u079c\3\u045c"+ + "\242\0\1\u04cb\15\u045c\1\u0542\14\u045c\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\32\u045c\1\u04cc\10\u045c\1\u0797\1\u045c\242\0\1\u04cb"+ + "\23\u045c\1\u083c\6\u045c\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c"+ + "\1\u04cc\4\u045c\1\u083d\5\u045c\242\0\1\u04cb\1\u0824\31\u045c"+ + "\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc\10\u045c\1\u0629"+ + "\1\u045c\242\0\1\u04cb\31\u045c\1\u083e\1\u04cc\12\u045c\242\0"+ + "\1\u04cb\32\u045c\1\u04cc\4\u045c\1\u083f\5\u045c\242\0\1\u0136"+ + "\1\323\1\u02b5\30\323\1\u0137\12\323\310\0\1\u080a\242\0"+ + "\4\u073f\2\0\1\u073f\15\0\1\u073f\6\0\12\u073f\13\0"+ + "\1\u04ba\226\0\1\u04cb\24\u045c\1\u0840\5\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\32\u045c\1\u04cc\6\u045c\1\u0841\3\u045c\242\0"+ + "\1\u04cb\1\u045c\1\u05b2\30\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\2\u045c\1\u0842\27\u045c\1\u04cc\12\u045c\242\0\1\u04cb\3\u045c"+ + "\1\u0843\26\u045c\1\u04cc\12\u045c\242\0\1\u04cb\3\u045c\1\u0844"+ + "\26\u045c\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc\7\u045c"+ + "\1\u0845\2\u045c\242\0\1\u04cb\27\u045c\1\u05a6\2\u045c\1\u04cc"+ + "\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc\3\u045c\1\u0846\6\u045c"+ + "\242\0\1\u04cb\32\u045c\1\u04cc\7\u045c\1\u0542\2\u045c\242\0"+ + "\1\u04cb\3\u045c\1\u0847\26\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\7\u045c\1\u0848\22\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u0849"+ + "\31\u045c\1\u04cc\12\u045c\242\0\1\u04cb\32\u045c\1\u04cc\1\u045c"+ + "\1\u079c\10\u045c\242\0\1\u04cb\32\u045c\1\u04cc\7\u045c\1\u084a"+ + "\2\u045c\242\0\1\u04cb\4\u045c\1\u05a6\25\u045c\1\u04cc\12\u045c"+ + "\242\0\1\u04cb\1\u084b\31\u045c\1\u04cc\12\u045c\242\0\1\u04cb"+ + "\32\u045c\1\u04cc\5\u045c\1\u084c\4\u045c\242\0\1\u04cb\7\u045c"+ + "\1\u084d\22\u045c\1\u04cc\12\u045c\242\0\1\u04cb\1\u045c\1\u0625"+ + "\30\u045c\1\u04cc\12\u045c\24\0"; private static int [] zzUnpackTrans() { - int [] result = new int[341204]; + int [] result = new int[421400]; int offset = 0; offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); offset = zzUnpackTrans(ZZ_TRANS_PACKED_1, offset, result); @@ -3919,25 +5512,31 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); private static final String ZZ_ATTRIBUTE_PACKED_0 = - "\1\0\1\11\50\1\20\0\1\1\1\0\1\1\12\0"+ - "\1\1\21\0\1\1\32\0\2\1\1\0\4\1\1\0"+ - "\1\1\1\0\4\1\67\0\32\1\3\0\5\1\32\0"+ - "\4\1\21\0\1\11\1\0\24\1\2\0\1\1\1\0"+ - "\10\1\3\0\2\1\1\0\4\1\2\0\2\1\1\0"+ - "\2\1\10\0\1\1\32\0\1\1\1\0\11\1\1\0"+ - "\1\1\2\0\2\1\1\0\1\1\10\0\3\1\15\0"+ - "\11\1\3\0\2\1\1\0\4\1\2\0\4\1\1\0"+ - "\2\1\1\0\2\1\1\0\3\1\3\0\1\1\4\0"+ - "\2\1\20\0\1\1\10\0\1\1\3\0\1\1\40\0"+ - "\3\1\23\0\1\1\40\0\1\1\4\0\1\1\6\0"+ - "\1\1\2\0\1\1\4\0\2\1\43\0\1\1\57\0"+ - "\2\1\10\0\1\1\53\0\1\1\72\0\1\1\150\0"+ - "\1\11\1\0\1\1\177\0\1\1\132\0\6\1\3\0"+ - "\2\1\1\0\4\1\2\0\3\1\112\0\1\1\10\0"+ - "\1\1\64\0\1\1\u01eb\0"; + "\1\0\1\11\51\1\21\0\1\1\1\0\1\1\12\0"+ + "\1\1\10\0\1\1\11\0\1\1\46\0\6\1\2\0"+ + "\5\1\23\0\1\1\70\0\1\1\1\0\32\1\3\0"+ + "\6\1\33\0\4\1\4\0\1\1\22\0\1\11\10\0"+ + "\56\1\1\0\1\1\1\0\11\1\4\0\1\1\1\0"+ + "\2\1\1\0\6\1\1\0\4\1\1\0\4\1\2\0"+ + "\2\1\4\0\1\1\1\0\3\1\2\0\2\1\10\0"+ + "\1\1\41\0\1\1\1\0\35\1\1\0\4\1\2\0"+ + "\2\1\1\0\1\1\37\0\3\1\15\0\12\1\4\0"+ + "\1\1\1\0\2\1\1\0\6\1\1\0\4\1\1\0"+ + "\4\1\2\0\2\1\4\0\1\1\1\0\3\1\1\0"+ + "\2\1\1\0\2\1\1\0\2\1\1\0\24\1\1\0"+ + "\4\1\2\0\1\1\31\0\2\1\20\0\1\1\37\0"+ + "\1\1\3\0\15\1\25\0\3\1\31\0\3\1\50\0"+ + "\13\1\32\0\2\1\1\0\1\1\4\0\1\1\7\0"+ + "\1\1\2\0\1\1\20\0\2\1\61\0\7\1\53\0"+ + "\2\1\6\0\1\1\70\0\6\1\66\0\2\1\62\0"+ + "\3\1\65\0\1\11\1\0\1\1\64\0\1\1\113\0"+ + "\1\1\125\0\7\1\4\0\1\1\1\0\2\1\1\0"+ + "\6\1\1\0\4\1\1\0\4\1\2\0\2\1\4\0"+ + "\1\1\1\0\3\1\1\0\1\1\104\0\1\1\37\0"+ + "\1\1\56\0\1\1\u01e5\0"; private static int [] zzUnpackAttribute() { - int [] result = new int[1750]; + int [] result = new int[2125]; int offset = 0; offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); return result; @@ -4048,7 +5647,6 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf /** * Creates a new scanner - * There is also a java.io.InputStream version of this constructor. * * @param in the java.io.Reader to read input from. */ @@ -4056,7 +5654,6 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf this.zzReader = in; } - /** * Unpacks the compressed character translation table. @@ -4068,7 +5665,7 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf char [] map = new char[0x10000]; int i = 0; /* index in packed string */ int j = 0; /* index in unpacked array */ - while (i < 3010) { + while (i < 3018) { int count = packed.charAt(i++); char value = packed.charAt(i++); do map[j++] = value; while (--count > 0); @@ -4348,7 +5945,7 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { case 1: - { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ + { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } case 12: break; case 2: diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex index 91bbe2e232d..08ade389faa 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex @@ -35,11 +35,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; * Asian languages, including Thai, Lao, Myanmar, and Khmer *
  • <IDEOGRAPHIC>: A single CJKV ideographic character
  • *
  • <HIRAGANA>: A single hiragana character
  • + *
  • <KATAKANA>: A sequence of katakana characters
  • + *
  • <HANGUL>: A sequence of Hangul characters
  • * */ %% -%unicode 6.1 +%unicode 6.3 %integer %final %public @@ -50,33 +52,39 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; %buffer 4096 %include SUPPLEMENTARY.jflex-macro -ALetter = ([\p{WB:ALetter}] | {ALetterSupp}) -Format = ([\p{WB:Format}] | {FormatSupp}) -Numeric = ([\p{WB:Numeric}] | {NumericSupp}) -Extend = ([\p{WB:Extend}] | {ExtendSupp}) -Katakana = ([\p{WB:Katakana}] | {KatakanaSupp}) -MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp}) -MidNum = ([\p{WB:MidNum}] | {MidNumSupp}) -MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp}) -ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp}) -ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp}) -Han = ([\p{Script:Han}] | {HanSupp}) -Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp}) +ALetter = (\p{WB:ALetter} | {ALetterSupp}) +Format = (\p{WB:Format} | {FormatSupp}) +Numeric = ([\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]] | {NumericSupp}) +Extend = (\p{WB:Extend} | {ExtendSupp}) +Katakana = (\p{WB:Katakana} | {KatakanaSupp}) +MidLetter = (\p{WB:MidLetter} | {MidLetterSupp}) +MidNum = (\p{WB:MidNum} | {MidNumSupp}) +MidNumLet = (\p{WB:MidNumLet} | {MidNumLetSupp}) +ExtendNumLet = (\p{WB:ExtendNumLet} | {ExtendNumLetSupp}) +ComplexContext = (\p{LB:Complex_Context} | {ComplexContextSupp}) +Han = (\p{Script:Han} | {HanSupp}) +Hiragana = (\p{Script:Hiragana} | {HiraganaSupp}) +SingleQuote = (\p{WB:Single_Quote} | {SingleQuoteSupp}) +DoubleQuote = (\p{WB:Double_Quote} | {DoubleQuoteSupp}) +HebrewLetter = (\p{WB:Hebrew_Letter} | {HebrewLetterSupp}) +RegionalIndicator = (\p{WB:Regional_Indicator} | {RegionalIndicatorSupp}) +HebrewOrALetter = ({HebrewLetter} | {ALetter}) -// Script=Hangul & Aletter -HangulEx = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})* // UAX#29 WB4. X (Extend | Format)* --> X // -ALetterEx = {ALetter} ({Format} | {Extend})* -// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it -NumericEx = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})* -KatakanaEx = {Katakana} ({Format} | {Extend})* -MidLetterEx = ({MidLetter} | {MidNumLet}) ({Format} | {Extend})* -MidNumericEx = ({MidNum} | {MidNumLet}) ({Format} | {Extend})* -ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})* - -HanEx = {Han} ({Format} | {Extend})* -HiraganaEx = {Hiragana} ({Format} | {Extend})* +HangulEx = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] ({Format} | {Extend})* +HebrewOrALetterEx = {HebrewOrALetter} ({Format} | {Extend})* +NumericEx = {Numeric} ({Format} | {Extend})* +KatakanaEx = {Katakana} ({Format} | {Extend})* +MidLetterEx = ({MidLetter} | {MidNumLet} | {SingleQuote}) ({Format} | {Extend})* +MidNumericEx = ({MidNum} | {MidNumLet} | {SingleQuote}) ({Format} | {Extend})* +ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})* +HanEx = {Han} ({Format} | {Extend})* +HiraganaEx = {Hiragana} ({Format} | {Extend})* +SingleQuoteEx = {SingleQuote} ({Format} | {Extend})* +DoubleQuoteEx = {DoubleQuote} ({Format} | {Extend})* +HebrewLetterEx = {HebrewLetter} ({Format} | {Extend})* +RegionalIndicatorEx = {RegionalIndicator} ({Format} | {Extend})* // URL and E-mail syntax specifications: // @@ -213,40 +221,47 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost}) {EMAIL} { return EMAIL_TYPE; } // UAX#29 WB8. Numeric × Numeric -// WB11. Numeric (MidNum | MidNumLet) × Numeric -// WB12. Numeric × (MidNum | MidNumLet) Numeric -// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet -// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana) +// WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric +// WB12. Numeric × (MidNum | MidNumLet | Single_Quote) Numeric +// WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet +// WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana) // -{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx} - | {MidNumericEx} {NumericEx} - | {NumericEx})* -{ExtendNumLetEx}* +{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}* { return NUMERIC_TYPE; } // subset of the below for typing purposes only! {HangulEx}+ { return HANGUL_TYPE; } - + {KatakanaEx}+ { return KATAKANA_TYPE; } -// UAX#29 WB5. ALetter × ALetter -// WB6. ALetter × (MidLetter | MidNumLet) ALetter -// WB7. ALetter (MidLetter | MidNumLet) × ALetter -// WB9. ALetter × Numeric -// WB10. Numeric × ALetter +// UAX#29 WB5. (ALetter | Hebrew_Letter) × (ALetter | Hebrew_Letter) +// WB6. (ALetter | Hebrew_Letter) × (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter) +// WB7. (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) × (ALetter | Hebrew_Letter) +// WB7a. Hebrew_Letter × Single_Quote +// WB7b. Hebrew_Letter × Double_Quote Hebrew_Letter +// WB7c. Hebrew_Letter Double_Quote × Hebrew_Letter +// WB9. (ALetter | Hebrew_Letter) × Numeric +// WB10. Numeric × (ALetter | Hebrew_Letter) // WB13. Katakana × Katakana -// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet -// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana) +// WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet +// WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana) // -{ExtendNumLetEx}* ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* - | ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})* - | {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ ) -({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* - | ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})* - | {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ ) )* -{ExtendNumLetEx}* +{ExtendNumLetEx}* ( {KatakanaEx} ( {ExtendNumLetEx}* {KatakanaEx} )* + | ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} ) + | {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx} )* + | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} )* {HebrewOrALetterEx} )* + )+ + ) +({ExtendNumLetEx}+ ( {KatakanaEx} ( {ExtendNumLetEx}* {KatakanaEx} )* + | ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} ) + | {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx} )* + | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} )* {HebrewOrALetterEx} )* + )+ + ) +)* +{ExtendNumLetEx}* { return WORD_TYPE; } @@ -258,7 +273,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost}) // annex. That means that satisfactory treatment of languages like Chinese // or Thai requires special handling. // -// In Unicode 6.1, only one character has the \p{Line_Break = Contingent_Break} +// In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break} // property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER. // // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context} @@ -280,6 +295,8 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost}) // UAX#29 WB3. CR × LF // WB3a. (Newline | CR | LF) ÷ // WB3b. ÷ (Newline | CR | LF) +// WB13c. Regional_Indicator × Regional_Indicator // WB14. Any ÷ Any // -[^] { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } +{RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^] + { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java index 6aa504b976a..8c1de5ae048 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java @@ -133,8 +133,8 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { - Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_50, reader) : factory.create(reader); - TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_50, tokenizer) : tokenizer; + Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader); + TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; @@ -201,7 +201,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource private Analyzer loadAnalyzer(ResourceLoader loader, String cname) throws IOException { Class clazz = loader.findClass(cname, Analyzer.class); try { - Analyzer analyzer = clazz.getConstructor(Version.class).newInstance(Version.LUCENE_50); + Analyzer analyzer = clazz.getConstructor(Version.class).newInstance(Version.LUCENE_CURRENT); if (analyzer instanceof ResourceLoaderAware) { ((ResourceLoaderAware) analyzer).inform(loader); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java index f5723182f53..bdacc563f9a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex. */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT */ package org.apache.lucene.analysis.wikipedia; @@ -84,21 +84,20 @@ class WikipediaTokenizerImpl { private static final int [] ZZ_ACTION = zzUnpackAction(); private static final String ZZ_ACTION_PACKED_0 = - "\12\0\4\1\4\2\1\3\1\1\1\4\1\1\2\5"+ - "\1\6\2\5\1\7\1\5\2\10\1\11\1\12\1\11"+ - "\1\13\1\14\1\10\1\15\1\16\1\15\1\17\1\20"+ - "\1\10\1\21\1\10\4\22\1\23\1\22\1\24\1\25"+ - "\1\26\3\0\1\27\14\0\1\30\1\31\1\32\1\33"+ - "\1\11\1\0\1\34\1\35\1\36\1\0\1\37\1\0"+ - "\1\40\3\0\1\41\1\42\2\43\1\42\2\44\2\0"+ - "\1\43\1\0\14\43\1\42\3\0\1\11\1\45\3\0"+ - "\1\46\1\47\5\0\1\50\4\0\1\50\2\0\2\50"+ - "\2\0\1\11\5\0\1\31\1\42\1\43\1\51\3\0"+ - "\1\11\2\0\1\52\30\0\1\53\2\0\1\54\1\55"+ - "\1\56"; + "\12\0\4\1\4\2\1\3\1\4\1\1\2\5\1\6"+ + "\1\5\1\7\1\5\2\10\1\11\1\5\1\12\1\11"+ + "\1\13\1\14\1\15\1\16\1\15\1\17\1\20\1\10"+ + "\1\21\1\10\4\22\1\23\1\24\1\25\1\26\3\0"+ + "\1\27\14\0\1\30\1\31\1\32\1\33\1\11\1\0"+ + "\1\34\1\35\1\36\1\0\1\37\1\0\1\40\3\0"+ + "\1\41\1\42\2\43\1\42\2\44\2\0\1\43\1\0"+ + "\14\43\1\42\3\0\1\11\1\45\3\0\1\46\1\47"+ + "\5\0\1\50\4\0\1\50\2\0\2\50\2\0\1\11"+ + "\5\0\1\31\1\42\1\43\1\51\3\0\1\11\2\0"+ + "\1\52\30\0\1\53\2\0\1\54\1\55\1\56"; private static int [] zzUnpackAction() { - int [] result = new int[184]; + int [] result = new int[181]; int offset = 0; offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); return result; @@ -125,30 +124,30 @@ class WikipediaTokenizerImpl { private static final String ZZ_ROWMAP_PACKED_0 = "\0\0\0\54\0\130\0\204\0\260\0\334\0\u0108\0\u0134"+ "\0\u0160\0\u018c\0\u01b8\0\u01e4\0\u0210\0\u023c\0\u0268\0\u0294"+ - "\0\u02c0\0\u02ec\0\u01b8\0\u0318\0\u0344\0\u0370\0\u01b8\0\u039c"+ - "\0\u03c8\0\u03f4\0\u0420\0\u044c\0\u0478\0\u01b8\0\u039c\0\u04a4"+ - "\0\u01b8\0\u04d0\0\u04fc\0\u0528\0\u0554\0\u0580\0\u05ac\0\u05d8"+ - "\0\u0604\0\u0630\0\u065c\0\u0688\0\u06b4\0\u01b8\0\u06e0\0\u039c"+ - "\0\u070c\0\u0738\0\u0764\0\u0790\0\u01b8\0\u01b8\0\u07bc\0\u07e8"+ - "\0\u0814\0\u01b8\0\u0840\0\u086c\0\u0898\0\u08c4\0\u08f0\0\u091c"+ - "\0\u0948\0\u0974\0\u09a0\0\u09cc\0\u09f8\0\u0a24\0\u0a50\0\u0a7c"+ - "\0\u01b8\0\u01b8\0\u0aa8\0\u0ad4\0\u0b00\0\u0b00\0\u01b8\0\u0b2c"+ + "\0\u02c0\0\u02ec\0\u01b8\0\u0318\0\u0344\0\u01b8\0\u0370\0\u039c"+ + "\0\u03c8\0\u03f4\0\u0420\0\u01b8\0\u0370\0\u044c\0\u0478\0\u01b8"+ + "\0\u04a4\0\u04d0\0\u04fc\0\u0528\0\u0554\0\u0580\0\u05ac\0\u05d8"+ + "\0\u0604\0\u0630\0\u065c\0\u01b8\0\u0688\0\u0370\0\u06b4\0\u06e0"+ + "\0\u070c\0\u01b8\0\u01b8\0\u0738\0\u0764\0\u0790\0\u01b8\0\u07bc"+ + "\0\u07e8\0\u0814\0\u0840\0\u086c\0\u0898\0\u08c4\0\u08f0\0\u091c"+ + "\0\u0948\0\u0974\0\u09a0\0\u09cc\0\u09f8\0\u01b8\0\u01b8\0\u0a24"+ + "\0\u0a50\0\u0a7c\0\u0a7c\0\u01b8\0\u0aa8\0\u0ad4\0\u0b00\0\u0b2c"+ "\0\u0b58\0\u0b84\0\u0bb0\0\u0bdc\0\u0c08\0\u0c34\0\u0c60\0\u0c8c"+ - "\0\u0cb8\0\u0ce4\0\u0d10\0\u0898\0\u0d3c\0\u0d68\0\u0d94\0\u0dc0"+ + "\0\u0814\0\u0cb8\0\u0ce4\0\u0d10\0\u0d3c\0\u0d68\0\u0d94\0\u0dc0"+ "\0\u0dec\0\u0e18\0\u0e44\0\u0e70\0\u0e9c\0\u0ec8\0\u0ef4\0\u0f20"+ - "\0\u0f4c\0\u0f78\0\u0fa4\0\u0fd0\0\u0ffc\0\u1028\0\u1054\0\u1080"+ - "\0\u10ac\0\u10d8\0\u01b8\0\u1104\0\u1130\0\u115c\0\u1188\0\u01b8"+ + "\0\u0f4c\0\u0f78\0\u0fa4\0\u0fd0\0\u0ffc\0\u1028\0\u1054\0\u01b8"+ + "\0\u1080\0\u10ac\0\u10d8\0\u1104\0\u01b8\0\u1130\0\u115c\0\u1188"+ "\0\u11b4\0\u11e0\0\u120c\0\u1238\0\u1264\0\u1290\0\u12bc\0\u12e8"+ - "\0\u1314\0\u1340\0\u136c\0\u1398\0\u13c4\0\u086c\0\u09f8\0\u13f0"+ - "\0\u141c\0\u1448\0\u1474\0\u14a0\0\u14cc\0\u14f8\0\u1524\0\u01b8"+ - "\0\u1550\0\u157c\0\u15a8\0\u15d4\0\u1600\0\u162c\0\u1658\0\u1684"+ - "\0\u16b0\0\u01b8\0\u16dc\0\u1708\0\u1734\0\u1760\0\u178c\0\u17b8"+ + "\0\u1314\0\u1340\0\u07e8\0\u0974\0\u136c\0\u1398\0\u13c4\0\u13f0"+ + "\0\u141c\0\u1448\0\u1474\0\u14a0\0\u01b8\0\u14cc\0\u14f8\0\u1524"+ + "\0\u1550\0\u157c\0\u15a8\0\u15d4\0\u1600\0\u162c\0\u01b8\0\u1658"+ + "\0\u1684\0\u16b0\0\u16dc\0\u1708\0\u1734\0\u1760\0\u178c\0\u17b8"+ "\0\u17e4\0\u1810\0\u183c\0\u1868\0\u1894\0\u18c0\0\u18ec\0\u1918"+ "\0\u1944\0\u1970\0\u199c\0\u19c8\0\u19f4\0\u1a20\0\u1a4c\0\u1a78"+ - "\0\u1aa4\0\u1ad0\0\u1afc\0\u1b28\0\u1b54\0\u01b8\0\u01b8\0\u01b8"; + "\0\u1aa4\0\u1ad0\0\u01b8\0\u01b8\0\u01b8"; private static int [] zzUnpackRowMap() { - int [] result = new int[184]; + int [] result = new int[181]; int offset = 0; offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); return result; @@ -172,152 +171,149 @@ class WikipediaTokenizerImpl { private static final Stringprivate static int [] zzUnpackTrans() { - int [] result = new int[7040]; + int [] result = new int[6908]; int offset = 0; offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); return result; @@ -355,8 +351,8 @@ class WikipediaTokenizerImpl { private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); private static final String ZZ_ATTRIBUTE_PACKED_0 = - "\12\0\1\11\7\1\1\11\3\1\1\11\6\1\1\11"+ - "\2\1\1\11\14\1\1\11\6\1\2\11\3\0\1\11"+ + "\12\0\1\11\7\1\1\11\2\1\1\11\5\1\1\11"+ + "\3\1\1\11\13\1\1\11\5\1\2\11\3\0\1\11"+ "\14\0\2\1\2\11\1\1\1\0\2\1\1\11\1\0"+ "\1\1\1\0\1\1\3\0\7\1\2\0\1\1\1\0"+ "\15\1\3\0\1\1\1\11\3\0\1\1\1\11\5\0"+ @@ -365,7 +361,7 @@ class WikipediaTokenizerImpl { "\2\0\3\11"; private static int [] zzUnpackAttribute() { - int [] result = new int[184]; + int [] result = new int[181]; int offset = 0; offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); return result; @@ -508,7 +504,6 @@ final void reset() { /** * Creates a new scanner - * There is also a java.io.InputStream version of this constructor. * * @param in the java.io.Reader to read input from. */ @@ -516,7 +511,6 @@ final void reset() { this.zzReader = in; } - /** * Unpacks the compressed character translation table. diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex index 3865ea08aba..7fde30afb3d 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex @@ -212,7 +212,7 @@ DOUBLE_EQUALS = "="{2} {DOUBLE_BRACE} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;} {CITATION} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} |{INFOBOX} {numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fall-through warning: */ break;} + [^] |{INFOBOX} {numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fall-through warning: */ break;} } { @@ -221,7 +221,7 @@ DOUBLE_EQUALS = "="{2} {ALPHANUM} {yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;} {DOUBLE_BRACKET_CLOSE} {numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;} + [^] { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;} } { @@ -236,7 +236,7 @@ DOUBLE_EQUALS = "="{2} {ALPHANUM} {yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;} {DOUBLE_BRACKET_CLOSE} {yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;} + [^] { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;} } //italics { @@ -249,7 +249,7 @@ DOUBLE_EQUALS = "="{2} {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } + [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } } //bold { @@ -260,7 +260,7 @@ DOUBLE_EQUALS = "="{2} {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } + [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } } //bold italics @@ -272,7 +272,7 @@ DOUBLE_EQUALS = "="{2} {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } + [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } } { @@ -280,15 +280,15 @@ DOUBLE_EQUALS = "="{2} {ALPHANUM} {currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;} {DOUBLE_EQUALS} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } + [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } } { {ALPHANUM} {yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;} {DOUBLE_BRACE_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;} {CITATION_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;} - //ignore - . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } + //ignore + [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } } { @@ -305,7 +305,7 @@ DOUBLE_EQUALS = "="{2} {PIPE} {yybegin(STRING); return currentTokType;/*pipe*/} - .|{WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */ } + [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */ } } @@ -327,7 +327,7 @@ DOUBLE_EQUALS = "="{2} //end wikipedia /** Ignore the rest */ -. | {WHITESPACE}|{TAGS} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } +[^] | {TAGS} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } //INTERNAL_LINK = "["{2}({ALPHANUM}+{WHITESPACE}*)+"]"{2} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java index 7c593a57041..570e3612a1b 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java @@ -202,7 +202,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase { } public void testUnicodeWordBreaks() throws Exception { - WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0(); + WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0(); wordBreakTest.test(a); } @@ -230,6 +230,8 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase { checkOneTerm(a, "壹゙", "壹゙"); // ideographic checkOneTerm(a, "아゙", "아゙"); // hangul } + + /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java index e27adbb5883..0656c2832ae 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java @@ -60,7 +60,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase { public void testStopList() throws IOException { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); - StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_40, stopWordsSet); + StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); try (TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer")) { assertNotNull(stream); CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java index 383fd7066d4..b363b0097f1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java @@ -94,7 +94,7 @@ public class TestStopFilter extends BaseTokenStreamTestCase { // LUCENE-3849: make sure after .end() we see the "ending" posInc public void testEndStopword() throws Exception { CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of"); - StopFilter stpf = new StopFilter(Version.LUCENE_40, new MockTokenizer(new StringReader("test of"), MockTokenizer.WHITESPACE, false), stopSet); + StopFilter stpf = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("test of"), MockTokenizer.WHITESPACE, false), stopSet); assertTokenStreamContents(stpf, new String[] { "test" }, new int[] {0}, new int[] {4}, diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java index 736b07f2c29..becc397f157 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java @@ -424,7 +424,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { } public void testUnicodeWordBreaks() throws Exception { - WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0(); + WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0(); wordBreakTest.test(a); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/WordBreakTestUnicode_6_1_0.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/WordBreakTestUnicode_6_3_0.java similarity index 64% rename from lucene/analysis/common/src/test/org/apache/lucene/analysis/core/WordBreakTestUnicode_6_1_0.java rename to lucene/analysis/common/src/test/org/apache/lucene/analysis/core/WordBreakTestUnicode_6_3_0.java index a9a79a32818..9838530cf83 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/WordBreakTestUnicode_6_1_0.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/WordBreakTestUnicode_6_3_0.java @@ -23,7 +23,7 @@ import org.junit.Ignore; /** * This class was automatically generated by generateJavaUnicodeWordBreakTest.pl - * from: http://www.unicode.org/Public/6.1.0/ucd/auxiliary/WordBreakTest.txt + * from: http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt * * WordBreakTest.txt indicates the points in the provided character sequences * at which conforming implementations must and must not break words. This @@ -32,16 +32,17 @@ import org.junit.Ignore; * sequences bounded by word breaks and containing at least one character * from one of the following character sets: * - * \p{Script = Han} (From http://www.unicode.org/Public/6.1.0/ucd/Scripts.txt) + * \p{Script = Han} (From http://www.unicode.org/Public/6.3.0/ucd/Scripts.txt) * \p{Script = Hiragana} - * \p{LineBreak = Complex_Context} (From http://www.unicode.org/Public/6.1.0/ucd/LineBreak.txt) - * \p{WordBreak = ALetter} (From http://www.unicode.org/Public/6.1.0/ucd/auxiliary/WordBreakProperty.txt) + * \p{LineBreak = Complex_Context} (From http://www.unicode.org/Public/6.3.0/ucd/LineBreak.txt) + * \p{WordBreak = ALetter} (From http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt) + * \p{WordBreak = Hebrew_Letter} * \p{WordBreak = Katakana} * \p{WordBreak = Numeric} (Excludes full-width Arabic digits) - * [\uFF10-\uFF19] (Full-width Arabic digits) + * [\uFF10-\uFF19] (Full-width Arabic digits) */ @Ignore -public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { +public class WordBreakTestUnicode_6_3_0 extends BaseTokenStreamTestCase { public void test(Analyzer analyzer) throws Exception { // ÷ 0001 ÷ 0001 ÷ # ÷ [0.2] (Other) ÷ [999.0] (Other) ÷ [0.3] @@ -108,12 +109,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0001\u0308\u002C", new String[] { }); - // ÷ 0001 ÷ 0027 ÷ # ÷ [0.2] (Other) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0001\u0027", + // ÷ 0001 ÷ 002E ÷ # ÷ [0.2] (Other) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u002E", new String[] { }); - // ÷ 0001 × 0308 ÷ 0027 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0001\u0308\u0027", + // ÷ 0001 × 0308 ÷ 002E ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u002E", new String[] { }); // ÷ 0001 ÷ 0030 ÷ # ÷ [0.2] (Other) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] @@ -132,6 +133,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0001\u0308\u005F", new String[] { }); + // ÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\uD83C\uDDE6", + new String[] { }); + + // ÷ 0001 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\uD83C\uDDE6", + new String[] { }); + + // ÷ 0001 ÷ 05D0 ÷ # ÷ [0.2] (Other) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u05D0", + new String[] { "\u05D0" }); + + // ÷ 0001 × 0308 ÷ 05D0 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u05D0", + new String[] { "\u05D0" }); + + // ÷ 0001 ÷ 0022 ÷ # ÷ [0.2] (Other) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\"", + new String[] { }); + + // ÷ 0001 × 0308 ÷ 0022 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\"", + new String[] { }); + + // ÷ 0001 ÷ 0027 ÷ # ÷ [0.2] (Other) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0027", + new String[] { }); + + // ÷ 0001 × 0308 ÷ 0027 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0027", + new String[] { }); + // ÷ 0001 × 00AD ÷ # ÷ [0.2] (Other) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0001\u00AD", new String[] { }); @@ -164,19 +197,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u003A", new String[] { "\u0061" }); - // ÷ 0001 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0001 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0001\u0061\u0027", new String[] { "\u0061" }); - // ÷ 0001 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0001 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027", new String[] { "\u0061" }); - // ÷ 0001 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0001 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0001\u0061\u0027\u2060", new String[] { "\u0061" }); - // ÷ 0001 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0001 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027\u2060", new String[] { "\u0061" }); @@ -196,11 +229,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u003A", new String[] { "\u0031" }); - // ÷ 0001 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0001 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0001\u0031\u0027", new String[] { "\u0031" }); - // ÷ 0001 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0001 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u0027", new String[] { "\u0031" }); @@ -284,12 +317,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\r\u0308\u002C", new String[] { }); - // ÷ 000D ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\r\u0027", + // ÷ 000D ÷ 002E ÷ # ÷ [0.2] (CR) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u002E", new String[] { }); - // ÷ 000D ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\r\u0308\u0027", + // ÷ 000D ÷ 0308 ÷ 002E ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u002E", new String[] { }); // ÷ 000D ÷ 0030 ÷ # ÷ [0.2] (CR) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3] @@ -308,6 +341,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\r\u0308\u005F", new String[] { }); + // ÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] (CR) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\uD83C\uDDE6", + new String[] { }); + + // ÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\uD83C\uDDE6", + new String[] { }); + + // ÷ 000D ÷ 05D0 ÷ # ÷ [0.2] (CR) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u05D0", + new String[] { "\u05D0" }); + + // ÷ 000D ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u05D0", + new String[] { "\u05D0" }); + + // ÷ 000D ÷ 0022 ÷ # ÷ [0.2] (CR) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\"", + new String[] { }); + + // ÷ 000D ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\"", + new String[] { }); + + // ÷ 000D ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0027", + new String[] { }); + + // ÷ 000D ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0027", + new String[] { }); + // ÷ 000D ÷ 00AD ÷ # ÷ [0.2] (CR) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\r\u00AD", new String[] { }); @@ -340,19 +405,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\r\u0308\u0061\u003A", new String[] { "\u0061" }); - // ÷ 000D ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 000D ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\r\u0061\u0027", new String[] { "\u0061" }); - // ÷ 000D ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 000D ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027", new String[] { "\u0061" }); - // ÷ 000D ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 000D ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\r\u0061\u0027\u2060", new String[] { "\u0061" }); - // ÷ 000D ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 000D ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027\u2060", new String[] { "\u0061" }); @@ -372,11 +437,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\r\u0308\u0031\u003A", new String[] { "\u0031" }); - // ÷ 000D ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 000D ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\r\u0031\u0027", new String[] { "\u0031" }); - // ÷ 000D ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 000D ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\r\u0308\u0031\u0027", new String[] { "\u0031" }); @@ -460,12 +525,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\n\u0308\u002C", new String[] { }); - // ÷ 000A ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\n\u0027", + // ÷ 000A ÷ 002E ÷ # ÷ [0.2] (LF) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u002E", new String[] { }); - // ÷ 000A ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\n\u0308\u0027", + // ÷ 000A ÷ 0308 ÷ 002E ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u002E", new String[] { }); // ÷ 000A ÷ 0030 ÷ # ÷ [0.2] (LF) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3] @@ -484,6 +549,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\n\u0308\u005F", new String[] { }); + // ÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] (LF) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\uD83C\uDDE6", + new String[] { }); + + // ÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\uD83C\uDDE6", + new String[] { }); + + // ÷ 000A ÷ 05D0 ÷ # ÷ [0.2] (LF) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u05D0", + new String[] { "\u05D0" }); + + // ÷ 000A ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u05D0", + new String[] { "\u05D0" }); + + // ÷ 000A ÷ 0022 ÷ # ÷ [0.2] (LF) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\"", + new String[] { }); + + // ÷ 000A ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\"", + new String[] { }); + + // ÷ 000A ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0027", + new String[] { }); + + // ÷ 000A ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0027", + new String[] { }); + // ÷ 000A ÷ 00AD ÷ # ÷ [0.2] (LF) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\n\u00AD", new String[] { }); @@ -516,19 +613,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\n\u0308\u0061\u003A", new String[] { "\u0061" }); - // ÷ 000A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 000A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\n\u0061\u0027", new String[] { "\u0061" }); - // ÷ 000A ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 000A ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027", new String[] { "\u0061" }); - // ÷ 000A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 000A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\n\u0061\u0027\u2060", new String[] { "\u0061" }); - // ÷ 000A ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 000A ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027\u2060", new String[] { "\u0061" }); @@ -548,11 +645,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\n\u0308\u0031\u003A", new String[] { "\u0031" }); - // ÷ 000A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 000A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\n\u0031\u0027", new String[] { "\u0031" }); - // ÷ 000A ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 000A ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\n\u0308\u0031\u0027", new String[] { "\u0031" }); @@ -636,12 +733,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u000B\u0308\u002C", new String[] { }); - // ÷ 000B ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u000B\u0027", + // ÷ 000B ÷ 002E ÷ # ÷ [0.2] (Newline) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u002E", new String[] { }); - // ÷ 000B ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u000B\u0308\u0027", + // ÷ 000B ÷ 0308 ÷ 002E ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u002E", new String[] { }); // ÷ 000B ÷ 0030 ÷ # ÷ [0.2] (Newline) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3] @@ -660,6 +757,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u000B\u0308\u005F", new String[] { }); + // ÷ 000B ÷ 1F1E6 ÷ # ÷ [0.2] (Newline) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\uD83C\uDDE6", + new String[] { }); + + // ÷ 000B ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\uD83C\uDDE6", + new String[] { }); + + // ÷ 000B ÷ 05D0 ÷ # ÷ [0.2] (Newline) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u05D0", + new String[] { "\u05D0" }); + + // ÷ 000B ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u05D0", + new String[] { "\u05D0" }); + + // ÷ 000B ÷ 0022 ÷ # ÷ [0.2] (Newline) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\"", + new String[] { }); + + // ÷ 000B ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\"", + new String[] { }); + + // ÷ 000B ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0027", + new String[] { }); + + // ÷ 000B ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0027", + new String[] { }); + // ÷ 000B ÷ 00AD ÷ # ÷ [0.2] (Newline) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u000B\u00AD", new String[] { }); @@ -692,19 +821,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u003A", new String[] { "\u0061" }); - // ÷ 000B ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 000B ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u000B\u0061\u0027", new String[] { "\u0061" }); - // ÷ 000B ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 000B ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027", new String[] { "\u0061" }); - // ÷ 000B ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 000B ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u000B\u0061\u0027\u2060", new String[] { "\u0061" }); - // ÷ 000B ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 000B ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027\u2060", new String[] { "\u0061" }); @@ -724,11 +853,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u003A", new String[] { "\u0031" }); - // ÷ 000B ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 000B ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u000B\u0031\u0027", new String[] { "\u0031" }); - // ÷ 000B ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 000B ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u0027", new String[] { "\u0031" }); @@ -812,12 +941,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u3031\u0308\u002C", new String[] { "\u3031\u0308" }); - // ÷ 3031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u3031\u0027", + // ÷ 3031 ÷ 002E ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u002E", new String[] { "\u3031" }); - // ÷ 3031 × 0308 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u3031\u0308\u0027", + // ÷ 3031 × 0308 ÷ 002E ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u002E", new String[] { "\u3031\u0308" }); // ÷ 3031 ÷ 0030 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] @@ -836,6 +965,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u3031\u0308\u005F", new String[] { "\u3031\u0308\u005F" }); + // ÷ 3031 ÷ 1F1E6 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\uD83C\uDDE6", + new String[] { "\u3031" }); + + // ÷ 3031 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\uD83C\uDDE6", + new String[] { "\u3031\u0308" }); + + // ÷ 3031 ÷ 05D0 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u05D0", + new String[] { "\u3031", "\u05D0" }); + + // ÷ 3031 × 0308 ÷ 05D0 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u05D0", + new String[] { "\u3031\u0308", "\u05D0" }); + + // ÷ 3031 ÷ 0022 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\"", + new String[] { "\u3031" }); + + // ÷ 3031 × 0308 ÷ 0022 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\"", + new String[] { "\u3031\u0308" }); + + // ÷ 3031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0027", + new String[] { "\u3031" }); + + // ÷ 3031 × 0308 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0027", + new String[] { "\u3031\u0308" }); + // ÷ 3031 × 00AD ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u3031\u00AD", new String[] { "\u3031\u00AD" }); @@ -868,19 +1029,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u003A", new String[] { "\u3031\u0308", "\u0061" }); - // ÷ 3031 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 3031 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u3031\u0061\u0027", new String[] { "\u3031", "\u0061" }); - // ÷ 3031 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 3031 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027", new String[] { "\u3031\u0308", "\u0061" }); - // ÷ 3031 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 3031 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u3031\u0061\u0027\u2060", new String[] { "\u3031", "\u0061" }); - // ÷ 3031 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 3031 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027\u2060", new String[] { "\u3031\u0308", "\u0061" }); @@ -900,11 +1061,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u003A", new String[] { "\u3031\u0308", "\u0031" }); - // ÷ 3031 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 3031 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u3031\u0031\u0027", new String[] { "\u3031", "\u0031" }); - // ÷ 3031 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 3031 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u0027", new String[] { "\u3031\u0308", "\u0031" }); @@ -988,12 +1149,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0041\u0308\u002C", new String[] { "\u0041\u0308" }); - // ÷ 0041 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0041\u0027", + // ÷ 0041 ÷ 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u002E", new String[] { "\u0041" }); - // ÷ 0041 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0041\u0308\u0027", + // ÷ 0041 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u002E", new String[] { "\u0041\u0308" }); // ÷ 0041 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3] @@ -1012,6 +1173,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0041\u0308\u005F", new String[] { "\u0041\u0308\u005F" }); + // ÷ 0041 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\uD83C\uDDE6", + new String[] { "\u0041" }); + + // ÷ 0041 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\uD83C\uDDE6", + new String[] { "\u0041\u0308" }); + + // ÷ 0041 × 05D0 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u05D0", + new String[] { "\u0041\u05D0" }); + + // ÷ 0041 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u05D0", + new String[] { "\u0041\u0308\u05D0" }); + + // ÷ 0041 ÷ 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\"", + new String[] { "\u0041" }); + + // ÷ 0041 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\"", + new String[] { "\u0041\u0308" }); + + // ÷ 0041 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0027", + new String[] { "\u0041" }); + + // ÷ 0041 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0027", + new String[] { "\u0041\u0308" }); + // ÷ 0041 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0041\u00AD", new String[] { "\u0041\u00AD" }); @@ -1044,19 +1237,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u003A", new String[] { "\u0041\u0308\u0061" }); - // ÷ 0041 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0041 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0041\u0061\u0027", new String[] { "\u0041\u0061" }); - // ÷ 0041 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0041 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u0027", new String[] { "\u0041\u0308\u0061" }); - // ÷ 0041 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0041 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0041\u0061\u0027\u2060", new String[] { "\u0041\u0061" }); - // ÷ 0041 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0041 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u0027\u2060", new String[] { "\u0041\u0308\u0061" }); @@ -1076,11 +1269,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u003A", new String[] { "\u0041\u0308\u0031" }); - // ÷ 0041 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0041 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0041\u0031\u0027", new String[] { "\u0041\u0031" }); - // ÷ 0041 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0041 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u0027", new String[] { "\u0041\u0308\u0031" }); @@ -1164,12 +1357,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u003A\u0308\u002C", new String[] { }); - // ÷ 003A ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u003A\u0027", + // ÷ 003A ÷ 002E ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u002E", new String[] { }); - // ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u003A\u0308\u0027", + // ÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u002E", new String[] { }); // ÷ 003A ÷ 0030 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] @@ -1188,6 +1381,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u003A\u0308\u005F", new String[] { }); + // ÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\uD83C\uDDE6", + new String[] { }); + + // ÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\uD83C\uDDE6", + new String[] { }); + + // ÷ 003A ÷ 05D0 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u05D0", + new String[] { "\u05D0" }); + + // ÷ 003A × 0308 ÷ 05D0 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u05D0", + new String[] { "\u05D0" }); + + // ÷ 003A ÷ 0022 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\"", + new String[] { }); + + // ÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\"", + new String[] { }); + + // ÷ 003A ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0027", + new String[] { }); + + // ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0027", + new String[] { }); + // ÷ 003A × 00AD ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u003A\u00AD", new String[] { }); @@ -1220,19 +1445,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u003A\u0308\u0061\u003A", new String[] { "\u0061" }); - // ÷ 003A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 003A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u003A\u0061\u0027", new String[] { "\u0061" }); - // ÷ 003A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 003A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u003A\u0308\u0061\u0027", new String[] { "\u0061" }); - // ÷ 003A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 003A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u003A\u0061\u0027\u2060", new String[] { "\u0061" }); - // ÷ 003A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 003A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u003A\u0308\u0061\u0027\u2060", new String[] { "\u0061" }); @@ -1252,11 +1477,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u003A\u0308\u0031\u003A", new String[] { "\u0031" }); - // ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u003A\u0031\u0027", new String[] { "\u0031" }); - // ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u003A\u0308\u0031\u0027", new String[] { "\u0031" }); @@ -1340,12 +1565,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u002C\u0308\u002C", new String[] { }); - // ÷ 002C ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u002C\u0027", + // ÷ 002C ÷ 002E ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u002E", new String[] { }); - // ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u002C\u0308\u0027", + // ÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u002E", new String[] { }); // ÷ 002C ÷ 0030 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] @@ -1364,6 +1589,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u002C\u0308\u005F", new String[] { }); + // ÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\uD83C\uDDE6", + new String[] { }); + + // ÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\uD83C\uDDE6", + new String[] { }); + + // ÷ 002C ÷ 05D0 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u05D0", + new String[] { "\u05D0" }); + + // ÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u05D0", + new String[] { "\u05D0" }); + + // ÷ 002C ÷ 0022 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\"", + new String[] { }); + + // ÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\"", + new String[] { }); + + // ÷ 002C ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0027", + new String[] { }); + + // ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0027", + new String[] { }); + // ÷ 002C × 00AD ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u002C\u00AD", new String[] { }); @@ -1396,19 +1653,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u002C\u0308\u0061\u003A", new String[] { "\u0061" }); - // ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u002C\u0061\u0027", new String[] { "\u0061" }); - // ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u002C\u0308\u0061\u0027", new String[] { "\u0061" }); - // ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u002C\u0061\u0027\u2060", new String[] { "\u0061" }); - // ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u002C\u0308\u0061\u0027\u2060", new String[] { "\u0061" }); @@ -1428,11 +1685,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u002C\u0308\u0031\u003A", new String[] { "\u0031" }); - // ÷ 002C ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 002C ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u002C\u0031\u0027", new String[] { "\u0031" }); - // ÷ 002C × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 002C × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u002C\u0308\u0031\u0027", new String[] { "\u0031" }); @@ -1452,180 +1709,212 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u002C\u0308\u0031\u002E\u2060", new String[] { "\u0031" }); - // ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] (Other) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0001", + // ÷ 002E ÷ 0001 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0001", new String[] { }); - // ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u0001", + // ÷ 002E × 0308 ÷ 0001 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u0001", new String[] { }); - // ÷ 0027 ÷ 000D ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [3.2] (CR) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\r", + // ÷ 002E ÷ 000D ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\r", new String[] { }); - // ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (CR) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\r", + // ÷ 002E × 0308 ÷ 000D ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\r", new String[] { }); - // ÷ 0027 ÷ 000A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [3.2] (LF) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\n", + // ÷ 002E ÷ 000A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\n", new String[] { }); - // ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (LF) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\n", + // ÷ 002E × 0308 ÷ 000A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\n", new String[] { }); - // ÷ 0027 ÷ 000B ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [3.2] (Newline) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u000B", + // ÷ 002E ÷ 000B ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u000B", new String[] { }); - // ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (Newline) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u000B", + // ÷ 002E × 0308 ÷ 000B ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u000B", new String[] { }); - // ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u3031", + // ÷ 002E ÷ 3031 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u3031", new String[] { "\u3031" }); - // ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u3031", + // ÷ 002E × 0308 ÷ 3031 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u3031", new String[] { "\u3031" }); - // ÷ 0027 ÷ 0041 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0041", + // ÷ 002E ÷ 0041 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0041", new String[] { "\u0041" }); - // ÷ 0027 × 0308 ÷ 0041 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u0041", + // ÷ 002E × 0308 ÷ 0041 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u0041", new String[] { "\u0041" }); - // ÷ 0027 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u003A", + // ÷ 002E ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u003A", new String[] { }); - // ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u003A", + // ÷ 002E × 0308 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u003A", new String[] { }); - // ÷ 0027 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u002C", + // ÷ 002E ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u002C", new String[] { }); - // ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u002C", + // ÷ 002E × 0308 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u002C", new String[] { }); - // ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0027", + // ÷ 002E ÷ 002E ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u002E", new String[] { }); - // ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u0027", + // ÷ 002E × 0308 ÷ 002E ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u002E", new String[] { }); - // ÷ 0027 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0030", + // ÷ 002E ÷ 0030 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0030", new String[] { "\u0030" }); - // ÷ 0027 × 0308 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u0030", + // ÷ 002E × 0308 ÷ 0030 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u0030", new String[] { "\u0030" }); - // ÷ 0027 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u005F", + // ÷ 002E ÷ 005F ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u005F", new String[] { }); - // ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u005F", + // ÷ 002E × 0308 ÷ 005F ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u005F", new String[] { }); - // ÷ 0027 × 00AD ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u00AD", + // ÷ 002E ÷ 1F1E6 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\uD83C\uDDE6", new String[] { }); - // ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u00AD", + // ÷ 002E × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\uD83C\uDDE6", new String[] { }); - // ÷ 0027 × 0300 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0300", + // ÷ 002E ÷ 05D0 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u05D0", + new String[] { "\u05D0" }); + + // ÷ 002E × 0308 ÷ 05D0 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u05D0", + new String[] { "\u05D0" }); + + // ÷ 002E ÷ 0022 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\"", new String[] { }); - // ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u0300", + // ÷ 002E × 0308 ÷ 0022 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\"", new String[] { }); - // ÷ 0027 ÷ 0061 × 2060 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0061\u2060", + // ÷ 002E ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0027", + new String[] { }); + + // ÷ 002E × 0308 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u0027", + new String[] { }); + + // ÷ 002E × 00AD ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u00AD", + new String[] { }); + + // ÷ 002E × 0308 × 00AD ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u00AD", + new String[] { }); + + // ÷ 002E × 0300 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0300", + new String[] { }); + + // ÷ 002E × 0308 × 0300 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u0300", + new String[] { }); + + // ÷ 002E ÷ 0061 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0061\u2060", new String[] { "\u0061\u2060" }); - // ÷ 0027 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u2060", + // ÷ 002E × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u0061\u2060", new String[] { "\u0061\u2060" }); - // ÷ 0027 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0061\u003A", + // ÷ 002E ÷ 0061 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0061\u003A", new String[] { "\u0061" }); - // ÷ 0027 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u003A", + // ÷ 002E × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u0061\u003A", new String[] { "\u0061" }); - // ÷ 0027 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0061\u0027", + // ÷ 002E ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0061\u0027", new String[] { "\u0061" }); - // ÷ 0027 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u0027", + // ÷ 002E × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u0061\u0027", new String[] { "\u0061" }); - // ÷ 0027 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0061\u0027\u2060", + // ÷ 002E ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0061\u0027\u2060", new String[] { "\u0061" }); - // ÷ 0027 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u0027\u2060", + // ÷ 002E × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u0061\u0027\u2060", new String[] { "\u0061" }); - // ÷ 0027 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0061\u002C", + // ÷ 002E ÷ 0061 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0061\u002C", new String[] { "\u0061" }); - // ÷ 0027 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u002C", + // ÷ 002E × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u0061\u002C", new String[] { "\u0061" }); - // ÷ 0027 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0031\u003A", + // ÷ 002E ÷ 0031 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0031\u003A", new String[] { "\u0031" }); - // ÷ 0027 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u0031\u003A", + // ÷ 002E × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u0031\u003A", new String[] { "\u0031" }); - // ÷ 0027 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0031\u0027", + // ÷ 002E ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0031\u0027", new String[] { "\u0031" }); - // ÷ 0027 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u0031\u0027", + // ÷ 002E × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u0031\u0027", new String[] { "\u0031" }); - // ÷ 0027 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0031\u002C", + // ÷ 002E ÷ 0031 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0031\u002C", new String[] { "\u0031" }); - // ÷ 0027 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u0031\u002C", + // ÷ 002E × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u0031\u002C", new String[] { "\u0031" }); - // ÷ 0027 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0031\u002E\u2060", + // ÷ 002E ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0031\u002E\u2060", new String[] { "\u0031" }); - // ÷ 0027 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0027\u0308\u0031\u002E\u2060", + // ÷ 002E × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002E\u0308\u0031\u002E\u2060", new String[] { "\u0031" }); // ÷ 0030 ÷ 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] (Other) ÷ [0.3] @@ -1692,12 +1981,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0030\u0308\u002C", new String[] { "\u0030\u0308" }); - // ÷ 0030 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0030\u0027", + // ÷ 0030 ÷ 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u002E", new String[] { "\u0030" }); - // ÷ 0030 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0030\u0308\u0027", + // ÷ 0030 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u002E", new String[] { "\u0030\u0308" }); // ÷ 0030 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ZERO (Numeric) ÷ [0.3] @@ -1716,6 +2005,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0030\u0308\u005F", new String[] { "\u0030\u0308\u005F" }); + // ÷ 0030 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\uD83C\uDDE6", + new String[] { "\u0030" }); + + // ÷ 0030 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\uD83C\uDDE6", + new String[] { "\u0030\u0308" }); + + // ÷ 0030 × 05D0 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u05D0", + new String[] { "\u0030\u05D0" }); + + // ÷ 0030 × 0308 × 05D0 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u05D0", + new String[] { "\u0030\u0308\u05D0" }); + + // ÷ 0030 ÷ 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\"", + new String[] { "\u0030" }); + + // ÷ 0030 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\"", + new String[] { "\u0030\u0308" }); + + // ÷ 0030 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0027", + new String[] { "\u0030" }); + + // ÷ 0030 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0027", + new String[] { "\u0030\u0308" }); + // ÷ 0030 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0030\u00AD", new String[] { "\u0030\u00AD" }); @@ -1748,19 +2069,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0030\u0308\u0061\u003A", new String[] { "\u0030\u0308\u0061" }); - // ÷ 0030 × 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0030 × 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0030\u0061\u0027", new String[] { "\u0030\u0061" }); - // ÷ 0030 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0030 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0030\u0308\u0061\u0027", new String[] { "\u0030\u0308\u0061" }); - // ÷ 0030 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0030 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0030\u0061\u0027\u2060", new String[] { "\u0030\u0061" }); - // ÷ 0030 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0030 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0030\u0308\u0061\u0027\u2060", new String[] { "\u0030\u0308\u0061" }); @@ -1780,11 +2101,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0030\u0308\u0031\u003A", new String[] { "\u0030\u0308\u0031" }); - // ÷ 0030 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0030 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0030\u0031\u0027", new String[] { "\u0030\u0031" }); - // ÷ 0030 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0030 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0030\u0308\u0031\u0027", new String[] { "\u0030\u0308\u0031" }); @@ -1868,12 +2189,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u005F\u0308\u002C", new String[] { }); - // ÷ 005F ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u005F\u0027", + // ÷ 005F ÷ 002E ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u002E", new String[] { }); - // ÷ 005F × 0308 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u005F\u0308\u0027", + // ÷ 005F × 0308 ÷ 002E ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u002E", new String[] { }); // ÷ 005F × 0030 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ZERO (Numeric) ÷ [0.3] @@ -1892,6 +2213,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u005F\u0308\u005F", new String[] { }); + // ÷ 005F ÷ 1F1E6 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\uD83C\uDDE6", + new String[] { }); + + // ÷ 005F × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\uD83C\uDDE6", + new String[] { }); + + // ÷ 005F × 05D0 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u05D0", + new String[] { "\u005F\u05D0" }); + + // ÷ 005F × 0308 × 05D0 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u05D0", + new String[] { "\u005F\u0308\u05D0" }); + + // ÷ 005F ÷ 0022 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\"", + new String[] { }); + + // ÷ 005F × 0308 ÷ 0022 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\"", + new String[] { }); + + // ÷ 005F ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0027", + new String[] { }); + + // ÷ 005F × 0308 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0027", + new String[] { }); + // ÷ 005F × 00AD ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u005F\u00AD", new String[] { }); @@ -1924,19 +2277,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u005F\u0308\u0061\u003A", new String[] { "\u005F\u0308\u0061" }); - // ÷ 005F × 0061 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 005F × 0061 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u005F\u0061\u0027", new String[] { "\u005F\u0061" }); - // ÷ 005F × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 005F × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u005F\u0308\u0061\u0027", new String[] { "\u005F\u0308\u0061" }); - // ÷ 005F × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 005F × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u005F\u0061\u0027\u2060", new String[] { "\u005F\u0061" }); - // ÷ 005F × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 005F × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u005F\u0308\u0061\u0027\u2060", new String[] { "\u005F\u0308\u0061" }); @@ -1956,11 +2309,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u005F\u0308\u0031\u003A", new String[] { "\u005F\u0308\u0031" }); - // ÷ 005F × 0031 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 005F × 0031 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u005F\u0031\u0027", new String[] { "\u005F\u0031" }); - // ÷ 005F × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 005F × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u005F\u0308\u0031\u0027", new String[] { "\u005F\u0308\u0031" }); @@ -1980,6 +2333,838 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u005F\u0308\u0031\u002E\u2060", new String[] { "\u005F\u0308\u0031" }); + // ÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0001", + new String[] { }); + + // ÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u0001", + new String[] { }); + + // ÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [3.2] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\r", + new String[] { }); + + // ÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\r", + new String[] { }); + + // ÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [3.2] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\n", + new String[] { }); + + // ÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\n", + new String[] { }); + + // ÷ 1F1E6 ÷ 000B ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [3.2] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u000B", + new String[] { }); + + // ÷ 1F1E6 × 0308 ÷ 000B ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u000B", + new String[] { }); + + // ÷ 1F1E6 ÷ 3031 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u3031", + new String[] { "\u3031" }); + + // ÷ 1F1E6 × 0308 ÷ 3031 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u3031", + new String[] { "\u3031" }); + + // ÷ 1F1E6 ÷ 0041 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0041", + new String[] { "\u0041" }); + + // ÷ 1F1E6 × 0308 ÷ 0041 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u0041", + new String[] { "\u0041" }); + + // ÷ 1F1E6 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u003A", + new String[] { }); + + // ÷ 1F1E6 × 0308 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u003A", + new String[] { }); + + // ÷ 1F1E6 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u002C", + new String[] { }); + + // ÷ 1F1E6 × 0308 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u002C", + new String[] { }); + + // ÷ 1F1E6 ÷ 002E ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u002E", + new String[] { }); + + // ÷ 1F1E6 × 0308 ÷ 002E ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u002E", + new String[] { }); + + // ÷ 1F1E6 ÷ 0030 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0030", + new String[] { "\u0030" }); + + // ÷ 1F1E6 × 0308 ÷ 0030 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u0030", + new String[] { "\u0030" }); + + // ÷ 1F1E6 ÷ 005F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u005F", + new String[] { }); + + // ÷ 1F1E6 × 0308 ÷ 005F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u005F", + new String[] { }); + + // ÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [13.3] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\uD83C\uDDE6", + new String[] { }); + + // ÷ 1F1E6 × 0308 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.3] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\uD83C\uDDE6", + new String[] { }); + + // ÷ 1F1E6 ÷ 05D0 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u05D0", + new String[] { "\u05D0" }); + + // ÷ 1F1E6 × 0308 ÷ 05D0 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u05D0", + new String[] { "\u05D0" }); + + // ÷ 1F1E6 ÷ 0022 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\"", + new String[] { }); + + // ÷ 1F1E6 × 0308 ÷ 0022 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\"", + new String[] { }); + + // ÷ 1F1E6 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0027", + new String[] { }); + + // ÷ 1F1E6 × 0308 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u0027", + new String[] { }); + + // ÷ 1F1E6 × 00AD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u00AD", + new String[] { }); + + // ÷ 1F1E6 × 0308 × 00AD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u00AD", + new String[] { }); + + // ÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0300", + new String[] { }); + + // ÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u0300", + new String[] { }); + + // ÷ 1F1E6 ÷ 0061 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 1F1E6 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 1F1E6 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 1F1E6 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 1F1E6 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 1F1E6 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 1F1E6 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 1F1E6 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 1F1E6 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 1F1E6 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 1F1E6 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 1F1E6 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 1F1E6 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 1F1E6 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 1F1E6 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 1F1E6 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 1F1E6 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 1F1E6 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u0308\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 05D0 ÷ 0001 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0001", + new String[] { "\u05D0" }); + + // ÷ 05D0 × 0308 ÷ 0001 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u0001", + new String[] { "\u05D0\u0308" }); + + // ÷ 05D0 ÷ 000D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\r", + new String[] { "\u05D0" }); + + // ÷ 05D0 × 0308 ÷ 000D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\r", + new String[] { "\u05D0\u0308" }); + + // ÷ 05D0 ÷ 000A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\n", + new String[] { "\u05D0" }); + + // ÷ 05D0 × 0308 ÷ 000A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\n", + new String[] { "\u05D0\u0308" }); + + // ÷ 05D0 ÷ 000B ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u000B", + new String[] { "\u05D0" }); + + // ÷ 05D0 × 0308 ÷ 000B ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u000B", + new String[] { "\u05D0\u0308" }); + + // ÷ 05D0 ÷ 3031 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u3031", + new String[] { "\u05D0", "\u3031" }); + + // ÷ 05D0 × 0308 ÷ 3031 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u3031", + new String[] { "\u05D0\u0308", "\u3031" }); + + // ÷ 05D0 × 0041 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0041", + new String[] { "\u05D0\u0041" }); + + // ÷ 05D0 × 0308 × 0041 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u0041", + new String[] { "\u05D0\u0308\u0041" }); + + // ÷ 05D0 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u003A", + new String[] { "\u05D0" }); + + // ÷ 05D0 × 0308 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u003A", + new String[] { "\u05D0\u0308" }); + + // ÷ 05D0 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u002C", + new String[] { "\u05D0" }); + + // ÷ 05D0 × 0308 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u002C", + new String[] { "\u05D0\u0308" }); + + // ÷ 05D0 ÷ 002E ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u002E", + new String[] { "\u05D0" }); + + // ÷ 05D0 × 0308 ÷ 002E ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u002E", + new String[] { "\u05D0\u0308" }); + + // ÷ 05D0 × 0030 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0030", + new String[] { "\u05D0\u0030" }); + + // ÷ 05D0 × 0308 × 0030 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u0030", + new String[] { "\u05D0\u0308\u0030" }); + + // ÷ 05D0 × 005F ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u005F", + new String[] { "\u05D0\u005F" }); + + // ÷ 05D0 × 0308 × 005F ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u005F", + new String[] { "\u05D0\u0308\u005F" }); + + // ÷ 05D0 ÷ 1F1E6 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\uD83C\uDDE6", + new String[] { "\u05D0" }); + + // ÷ 05D0 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\uD83C\uDDE6", + new String[] { "\u05D0\u0308" }); + + // ÷ 05D0 × 05D0 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u05D0", + new String[] { "\u05D0\u05D0" }); + + // ÷ 05D0 × 0308 × 05D0 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u05D0", + new String[] { "\u05D0\u0308\u05D0" }); + + // ÷ 05D0 ÷ 0022 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\"", + new String[] { "\u05D0" }); + + // ÷ 05D0 × 0308 ÷ 0022 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\"", + new String[] { "\u05D0\u0308" }); + + // ÷ 05D0 × 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [7.1] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0027", + new String[] { "\u05D0\u0027" }); + + // ÷ 05D0 × 0308 × 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.1] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u0027", + new String[] { "\u05D0\u0308\u0027" }); + + // ÷ 05D0 × 00AD ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u00AD", + new String[] { "\u05D0\u00AD" }); + + // ÷ 05D0 × 0308 × 00AD ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u00AD", + new String[] { "\u05D0\u0308\u00AD" }); + + // ÷ 05D0 × 0300 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0300", + new String[] { "\u05D0\u0300" }); + + // ÷ 05D0 × 0308 × 0300 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u0300", + new String[] { "\u05D0\u0308\u0300" }); + + // ÷ 05D0 × 0061 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0061\u2060", + new String[] { "\u05D0\u0061\u2060" }); + + // ÷ 05D0 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u0061\u2060", + new String[] { "\u05D0\u0308\u0061\u2060" }); + + // ÷ 05D0 × 0061 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0061\u003A", + new String[] { "\u05D0\u0061" }); + + // ÷ 05D0 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u0061\u003A", + new String[] { "\u05D0\u0308\u0061" }); + + // ÷ 05D0 × 0061 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0061\u0027", + new String[] { "\u05D0\u0061" }); + + // ÷ 05D0 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u0061\u0027", + new String[] { "\u05D0\u0308\u0061" }); + + // ÷ 05D0 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0061\u0027\u2060", + new String[] { "\u05D0\u0061" }); + + // ÷ 05D0 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u0061\u0027\u2060", + new String[] { "\u05D0\u0308\u0061" }); + + // ÷ 05D0 × 0061 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0061\u002C", + new String[] { "\u05D0\u0061" }); + + // ÷ 05D0 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u0061\u002C", + new String[] { "\u05D0\u0308\u0061" }); + + // ÷ 05D0 × 0031 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0031\u003A", + new String[] { "\u05D0\u0031" }); + + // ÷ 05D0 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u0031\u003A", + new String[] { "\u05D0\u0308\u0031" }); + + // ÷ 05D0 × 0031 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0031\u0027", + new String[] { "\u05D0\u0031" }); + + // ÷ 05D0 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u0031\u0027", + new String[] { "\u05D0\u0308\u0031" }); + + // ÷ 05D0 × 0031 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0031\u002C", + new String[] { "\u05D0\u0031" }); + + // ÷ 05D0 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u0031\u002C", + new String[] { "\u05D0\u0308\u0031" }); + + // ÷ 05D0 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0031\u002E\u2060", + new String[] { "\u05D0\u0031" }); + + // ÷ 05D0 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u05D0\u0308\u0031\u002E\u2060", + new String[] { "\u05D0\u0308\u0031" }); + + // ÷ 0022 ÷ 0001 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0001", + new String[] { }); + + // ÷ 0022 × 0308 ÷ 0001 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u0001", + new String[] { }); + + // ÷ 0022 ÷ 000D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\r", + new String[] { }); + + // ÷ 0022 × 0308 ÷ 000D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\r", + new String[] { }); + + // ÷ 0022 ÷ 000A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\n", + new String[] { }); + + // ÷ 0022 × 0308 ÷ 000A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\n", + new String[] { }); + + // ÷ 0022 ÷ 000B ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u000B", + new String[] { }); + + // ÷ 0022 × 0308 ÷ 000B ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u000B", + new String[] { }); + + // ÷ 0022 ÷ 3031 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u3031", + new String[] { "\u3031" }); + + // ÷ 0022 × 0308 ÷ 3031 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u3031", + new String[] { "\u3031" }); + + // ÷ 0022 ÷ 0041 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0041", + new String[] { "\u0041" }); + + // ÷ 0022 × 0308 ÷ 0041 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u0041", + new String[] { "\u0041" }); + + // ÷ 0022 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u003A", + new String[] { }); + + // ÷ 0022 × 0308 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u003A", + new String[] { }); + + // ÷ 0022 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u002C", + new String[] { }); + + // ÷ 0022 × 0308 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u002C", + new String[] { }); + + // ÷ 0022 ÷ 002E ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u002E", + new String[] { }); + + // ÷ 0022 × 0308 ÷ 002E ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u002E", + new String[] { }); + + // ÷ 0022 ÷ 0030 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0030", + new String[] { "\u0030" }); + + // ÷ 0022 × 0308 ÷ 0030 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u0030", + new String[] { "\u0030" }); + + // ÷ 0022 ÷ 005F ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u005F", + new String[] { }); + + // ÷ 0022 × 0308 ÷ 005F ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u005F", + new String[] { }); + + // ÷ 0022 ÷ 1F1E6 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\uD83C\uDDE6", + new String[] { }); + + // ÷ 0022 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\uD83C\uDDE6", + new String[] { }); + + // ÷ 0022 ÷ 05D0 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u05D0", + new String[] { "\u05D0" }); + + // ÷ 0022 × 0308 ÷ 05D0 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u05D0", + new String[] { "\u05D0" }); + + // ÷ 0022 ÷ 0022 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\"", + new String[] { }); + + // ÷ 0022 × 0308 ÷ 0022 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\"", + new String[] { }); + + // ÷ 0022 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0027", + new String[] { }); + + // ÷ 0022 × 0308 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u0027", + new String[] { }); + + // ÷ 0022 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u00AD", + new String[] { }); + + // ÷ 0022 × 0308 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u00AD", + new String[] { }); + + // ÷ 0022 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0300", + new String[] { }); + + // ÷ 0022 × 0308 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u0300", + new String[] { }); + + // ÷ 0022 ÷ 0061 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 0022 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 0022 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 0022 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 0022 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 0022 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 0022 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 0022 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 0022 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 0022 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 0022 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 0022 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 0022 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 0022 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 0022 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 0022 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 0022 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 0022 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\"\u0308\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0001", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0001", + new String[] { }); + + // ÷ 0027 ÷ 000D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\r", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\r", + new String[] { }); + + // ÷ 0027 ÷ 000A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\n", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\n", + new String[] { }); + + // ÷ 0027 ÷ 000B ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u000B", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u000B", + new String[] { }); + + // ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u3031", + new String[] { "\u3031" }); + + // ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u3031", + new String[] { "\u3031" }); + + // ÷ 0027 ÷ 0041 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0041", + new String[] { "\u0041" }); + + // ÷ 0027 × 0308 ÷ 0041 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0041", + new String[] { "\u0041" }); + + // ÷ 0027 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u003A", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u003A", + new String[] { }); + + // ÷ 0027 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u002C", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u002C", + new String[] { }); + + // ÷ 0027 ÷ 002E ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u002E", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u002E", + new String[] { }); + + // ÷ 0027 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0030", + new String[] { "\u0030" }); + + // ÷ 0027 × 0308 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0030", + new String[] { "\u0030" }); + + // ÷ 0027 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u005F", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u005F", + new String[] { }); + + // ÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\uD83C\uDDE6", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\uD83C\uDDE6", + new String[] { }); + + // ÷ 0027 ÷ 05D0 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u05D0", + new String[] { "\u05D0" }); + + // ÷ 0027 × 0308 ÷ 05D0 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u05D0", + new String[] { "\u05D0" }); + + // ÷ 0027 ÷ 0022 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\"", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\"", + new String[] { }); + + // ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0027", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0027", + new String[] { }); + + // ÷ 0027 × 00AD ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u00AD", + new String[] { }); + + // ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u00AD", + new String[] { }); + + // ÷ 0027 × 0300 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0300", + new String[] { }); + + // ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0300", + new String[] { }); + + // ÷ 0027 ÷ 0061 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 0027 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 0027 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 0027 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 0027 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 0027 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 0027 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 0027 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 0027 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 0027 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 0027 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 0027 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 0027 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 0027 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 0027 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 0027 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 0027 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 0027 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0031\u002E\u2060", + new String[] { "\u0031" }); + // ÷ 00AD ÷ 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] (Other) ÷ [0.3] assertAnalyzesTo(analyzer, "\u00AD\u0001", new String[] { }); @@ -2044,12 +3229,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u00AD\u0308\u002C", new String[] { }); - // ÷ 00AD ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u00AD\u0027", + // ÷ 00AD ÷ 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u002E", new String[] { }); - // ÷ 00AD × 0308 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u00AD\u0308\u0027", + // ÷ 00AD × 0308 ÷ 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u002E", new String[] { }); // ÷ 00AD ÷ 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] @@ -2068,6 +3253,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u00AD\u0308\u005F", new String[] { }); + // ÷ 00AD ÷ 1F1E6 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\uD83C\uDDE6", + new String[] { }); + + // ÷ 00AD × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\uD83C\uDDE6", + new String[] { }); + + // ÷ 00AD ÷ 05D0 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u05D0", + new String[] { "\u05D0" }); + + // ÷ 00AD × 0308 ÷ 05D0 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u05D0", + new String[] { "\u05D0" }); + + // ÷ 00AD ÷ 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\"", + new String[] { }); + + // ÷ 00AD × 0308 ÷ 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\"", + new String[] { }); + + // ÷ 00AD ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0027", + new String[] { }); + + // ÷ 00AD × 0308 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0027", + new String[] { }); + // ÷ 00AD × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u00AD\u00AD", new String[] { }); @@ -2100,19 +3317,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u00AD\u0308\u0061\u003A", new String[] { "\u0061" }); - // ÷ 00AD ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 00AD ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u00AD\u0061\u0027", new String[] { "\u0061" }); - // ÷ 00AD × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 00AD × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u00AD\u0308\u0061\u0027", new String[] { "\u0061" }); - // ÷ 00AD ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 00AD ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u00AD\u0061\u0027\u2060", new String[] { "\u0061" }); - // ÷ 00AD × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 00AD × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u00AD\u0308\u0061\u0027\u2060", new String[] { "\u0061" }); @@ -2132,11 +3349,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u00AD\u0308\u0031\u003A", new String[] { "\u0031" }); - // ÷ 00AD ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 00AD ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u00AD\u0031\u0027", new String[] { "\u0031" }); - // ÷ 00AD × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 00AD × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u00AD\u0308\u0031\u0027", new String[] { "\u0031" }); @@ -2220,12 +3437,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0300\u0308\u002C", new String[] { }); - // ÷ 0300 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0300\u0027", + // ÷ 0300 ÷ 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u002E", new String[] { }); - // ÷ 0300 × 0308 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0300\u0308\u0027", + // ÷ 0300 × 0308 ÷ 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u002E", new String[] { }); // ÷ 0300 ÷ 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] @@ -2244,6 +3461,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0300\u0308\u005F", new String[] { }); + // ÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\uD83C\uDDE6", + new String[] { }); + + // ÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\uD83C\uDDE6", + new String[] { }); + + // ÷ 0300 ÷ 05D0 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u05D0", + new String[] { "\u05D0" }); + + // ÷ 0300 × 0308 ÷ 05D0 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u05D0", + new String[] { "\u05D0" }); + + // ÷ 0300 ÷ 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\"", + new String[] { }); + + // ÷ 0300 × 0308 ÷ 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\"", + new String[] { }); + + // ÷ 0300 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0027", + new String[] { }); + + // ÷ 0300 × 0308 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0027", + new String[] { }); + // ÷ 0300 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0300\u00AD", new String[] { }); @@ -2276,19 +3525,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0300\u0308\u0061\u003A", new String[] { "\u0061" }); - // ÷ 0300 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0300 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0300\u0061\u0027", new String[] { "\u0061" }); - // ÷ 0300 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0300 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0300\u0308\u0061\u0027", new String[] { "\u0061" }); - // ÷ 0300 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0300 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0300\u0061\u0027\u2060", new String[] { "\u0061" }); - // ÷ 0300 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0300 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0300\u0308\u0061\u0027\u2060", new String[] { "\u0061" }); @@ -2308,11 +3557,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0300\u0308\u0031\u003A", new String[] { "\u0031" }); - // ÷ 0300 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0300 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0300\u0031\u0027", new String[] { "\u0031" }); - // ÷ 0300 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0300 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0300\u0308\u0031\u0027", new String[] { "\u0031" }); @@ -2396,12 +3645,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u002C", new String[] { "\u0061\u2060\u0308" }); - // ÷ 0061 × 2060 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0061\u2060\u0027", + // ÷ 0061 × 2060 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u002E", new String[] { "\u0061\u2060" }); - // ÷ 0061 × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0027", + // ÷ 0061 × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u002E", new String[] { "\u0061\u2060\u0308" }); // ÷ 0061 × 2060 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3] @@ -2420,6 +3669,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u005F", new String[] { "\u0061\u2060\u0308\u005F" }); + // ÷ 0061 × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\uD83C\uDDE6", + new String[] { "\u0061\u2060" }); + + // ÷ 0061 × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\uD83C\uDDE6", + new String[] { "\u0061\u2060\u0308" }); + + // ÷ 0061 × 2060 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u05D0", + new String[] { "\u0061\u2060\u05D0" }); + + // ÷ 0061 × 2060 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u05D0", + new String[] { "\u0061\u2060\u0308\u05D0" }); + + // ÷ 0061 × 2060 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\"", + new String[] { "\u0061\u2060" }); + + // ÷ 0061 × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\"", + new String[] { "\u0061\u2060\u0308" }); + + // ÷ 0061 × 2060 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0027", + new String[] { "\u0061\u2060" }); + + // ÷ 0061 × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0027", + new String[] { "\u0061\u2060\u0308" }); + // ÷ 0061 × 2060 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u2060\u00AD", new String[] { "\u0061\u2060\u00AD" }); @@ -2452,19 +3733,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0061\u003A", new String[] { "\u0061\u2060\u0308\u0061" }); - // ÷ 0061 × 2060 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 × 2060 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u2060\u0061\u0027", new String[] { "\u0061\u2060\u0061" }); - // ÷ 0061 × 2060 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 × 2060 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0061\u0027", new String[] { "\u0061\u2060\u0308\u0061" }); - // ÷ 0061 × 2060 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 × 2060 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u2060\u0061\u0027\u2060", new String[] { "\u0061\u2060\u0061" }); - // ÷ 0061 × 2060 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 × 2060 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0061\u0027\u2060", new String[] { "\u0061\u2060\u0308\u0061" }); @@ -2484,11 +3765,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0031\u003A", new String[] { "\u0061\u2060\u0308\u0031" }); - // ÷ 0061 × 2060 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 × 2060 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u2060\u0031\u0027", new String[] { "\u0061\u2060\u0031" }); - // ÷ 0061 × 2060 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 × 2060 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0031\u0027", new String[] { "\u0061\u2060\u0308\u0031" }); @@ -2572,12 +3853,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u002C", new String[] { "\u0061" }); - // ÷ 0061 ÷ 003A ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0061\u003A\u0027", + // ÷ 0061 ÷ 003A ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u002E", new String[] { "\u0061" }); - // ÷ 0061 ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0027", + // ÷ 0061 ÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u002E", new String[] { "\u0061" }); // ÷ 0061 ÷ 003A ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] @@ -2596,6 +3877,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u005F", new String[] { "\u0061" }); + // ÷ 0061 ÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\uD83C\uDDE6", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\uD83C\uDDE6", + new String[] { "\u0061" }); + + // ÷ 0061 × 003A × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u05D0", + new String[] { "\u0061\u003A\u05D0" }); + + // ÷ 0061 × 003A × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u05D0", + new String[] { "\u0061\u003A\u0308\u05D0" }); + + // ÷ 0061 ÷ 003A ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\"", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\"", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0027", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0027", + new String[] { "\u0061" }); + // ÷ 0061 ÷ 003A × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u003A\u00AD", new String[] { "\u0061" }); @@ -2628,19 +3941,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0061\u003A", new String[] { "\u0061\u003A\u0308\u0061" }); - // ÷ 0061 × 003A × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 × 003A × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u003A\u0061\u0027", new String[] { "\u0061\u003A\u0061" }); - // ÷ 0061 × 003A × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 × 003A × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0061\u0027", new String[] { "\u0061\u003A\u0308\u0061" }); - // ÷ 0061 × 003A × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 × 003A × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u003A\u0061\u0027\u2060", new String[] { "\u0061\u003A\u0061" }); - // ÷ 0061 × 003A × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 × 003A × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0061\u0027\u2060", new String[] { "\u0061\u003A\u0308\u0061" }); @@ -2660,11 +3973,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0031\u003A", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u003A\u0031\u0027", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0031\u0027", new String[] { "\u0061", "\u0031" }); @@ -2684,355 +3997,419 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0031\u002E\u2060", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] (Other) ÷ [0.3] + // ÷ 0061 ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] (Other) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0001", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0001", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [3.2] (CR) ÷ [0.3] + // ÷ 0061 ÷ 0027 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] (CR) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\r", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (CR) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (CR) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\r", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [3.2] (LF) ÷ [0.3] + // ÷ 0061 ÷ 0027 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] (LF) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\n", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (LF) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (LF) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\n", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [3.2] (Newline) ÷ [0.3] + // ÷ 0061 ÷ 0027 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] (Newline) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u000B", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (Newline) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (Newline) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u000B", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + // ÷ 0061 ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u3031", new String[] { "\u0061", "\u3031" }); - // ÷ 0061 ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u3031", new String[] { "\u0061", "\u3031" }); - // ÷ 0061 × 0027 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + // ÷ 0061 × 0027 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0041", new String[] { "\u0061\u0027\u0041" }); - // ÷ 0061 × 0027 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + // ÷ 0061 × 0027 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0041", new String[] { "\u0061\u0027\u0308\u0041" }); - // ÷ 0061 ÷ 0027 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0061 ÷ 0027 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u003A", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u003A", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0061 ÷ 0027 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u002C", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u002C", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0061\u0027\u0027", + // ÷ 0061 ÷ 0027 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u002E", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0027", + // ÷ 0061 ÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u002E", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + // ÷ 0061 ÷ 0027 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0030", new String[] { "\u0061", "\u0030" }); - // ÷ 0061 ÷ 0027 × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0030", new String[] { "\u0061", "\u0030" }); - // ÷ 0061 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + // ÷ 0061 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u005F", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u005F", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + // ÷ 0061 ÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\uD83C\uDDE6", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\uD83C\uDDE6", + new String[] { "\u0061" }); + + // ÷ 0061 × 0027 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u05D0", + new String[] { "\u0061\u0027\u05D0" }); + + // ÷ 0061 × 0027 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u05D0", + new String[] { "\u0061\u0027\u0308\u05D0" }); + + // ÷ 0061 ÷ 0027 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\"", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\"", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0027", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0027", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u00AD", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u00AD", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0300", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0300", new String[] { "\u0061" }); - // ÷ 0061 × 0027 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 × 0027 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0061\u2060", new String[] { "\u0061\u0027\u0061\u2060" }); - // ÷ 0061 × 0027 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 × 0027 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0061\u2060", new String[] { "\u0061\u0027\u0308\u0061\u2060" }); - // ÷ 0061 × 0027 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0061 × 0027 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0061\u003A", new String[] { "\u0061\u0027\u0061" }); - // ÷ 0061 × 0027 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0061 × 0027 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0061\u003A", new String[] { "\u0061\u0027\u0308\u0061" }); - // ÷ 0061 × 0027 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 × 0027 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0061\u0027", new String[] { "\u0061\u0027\u0061" }); - // ÷ 0061 × 0027 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 × 0027 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0061\u0027", new String[] { "\u0061\u0027\u0308\u0061" }); - // ÷ 0061 × 0027 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 × 0027 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0061\u0027\u2060", new String[] { "\u0061\u0027\u0061" }); - // ÷ 0061 × 0027 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 × 0027 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0061\u0027\u2060", new String[] { "\u0061\u0027\u0308\u0061" }); - // ÷ 0061 × 0027 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0061 × 0027 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0061\u002C", new String[] { "\u0061\u0027\u0061" }); - // ÷ 0061 × 0027 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0061 × 0027 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0061\u002C", new String[] { "\u0061\u0027\u0308\u0061" }); - // ÷ 0061 ÷ 0027 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0061 ÷ 0027 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0031\u003A", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0031\u003A", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 ÷ 0027 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0031\u0027", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0031\u0027", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0061 ÷ 0027 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0031\u002C", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0031\u002C", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 ÷ 0027 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0031\u002E\u2060", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0031\u002E\u2060", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 × 2060 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] (Other) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] (Other) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0001", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0001", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] (CR) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] (CR) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\r", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (CR) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (CR) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\r", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] (LF) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] (LF) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\n", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (LF) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (LF) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\n", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] (Newline) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] (Newline) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u000B", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (Newline) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (Newline) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u000B", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u3031", new String[] { "\u0061", "\u3031" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u3031", new String[] { "\u0061", "\u3031" }); - // ÷ 0061 × 0027 × 2060 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + // ÷ 0061 × 0027 × 2060 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0041", new String[] { "\u0061\u0027\u2060\u0041" }); - // ÷ 0061 × 0027 × 2060 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + // ÷ 0061 × 0027 × 2060 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0041", new String[] { "\u0061\u0027\u2060\u0308\u0041" }); - // ÷ 0061 ÷ 0027 × 2060 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u003A", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u003A", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u002C", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u002C", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0027", + // ÷ 0061 ÷ 0027 × 2060 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u002E", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0027", + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u002E", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0030", new String[] { "\u0061", "\u0030" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0030", new String[] { "\u0061", "\u0030" }); - // ÷ 0061 ÷ 0027 × 2060 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u005F", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u005F", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\uD83C\uDDE6", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\uD83C\uDDE6", + new String[] { "\u0061" }); + + // ÷ 0061 × 0027 × 2060 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u05D0", + new String[] { "\u0061\u0027\u2060\u05D0" }); + + // ÷ 0061 × 0027 × 2060 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u05D0", + new String[] { "\u0061\u0027\u2060\u0308\u05D0" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\"", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\"", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0027", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0027", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u00AD", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u00AD", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0300", new String[] { "\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0300", new String[] { "\u0061" }); - // ÷ 0061 × 0027 × 2060 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 × 0027 × 2060 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0061\u2060", new String[] { "\u0061\u0027\u2060\u0061\u2060" }); - // ÷ 0061 × 0027 × 2060 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 × 0027 × 2060 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0061\u2060", new String[] { "\u0061\u0027\u2060\u0308\u0061\u2060" }); - // ÷ 0061 × 0027 × 2060 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0061 × 0027 × 2060 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0061\u003A", new String[] { "\u0061\u0027\u2060\u0061" }); - // ÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0061\u003A", new String[] { "\u0061\u0027\u2060\u0308\u0061" }); - // ÷ 0061 × 0027 × 2060 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 × 0027 × 2060 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0061\u0027", new String[] { "\u0061\u0027\u2060\u0061" }); - // ÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0061\u0027", new String[] { "\u0061\u0027\u2060\u0308\u0061" }); - // ÷ 0061 × 0027 × 2060 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 × 0027 × 2060 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0061\u0027\u2060", new String[] { "\u0061\u0027\u2060\u0061" }); - // ÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0061\u0027\u2060", new String[] { "\u0061\u0027\u2060\u0308\u0061" }); - // ÷ 0061 × 0027 × 2060 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0061 × 0027 × 2060 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0061\u002C", new String[] { "\u0061\u0027\u2060\u0061" }); - // ÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0061\u002C", new String[] { "\u0061\u0027\u2060\u0308\u0061" }); - // ÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0031\u003A", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0031\u003A", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0031\u0027", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0031\u0027", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0031\u002C", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0031\u002C", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0031\u002E\u2060", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0031\u002E\u2060", new String[] { "\u0061", "\u0031" }); @@ -3100,12 +4477,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u002C", new String[] { "\u0061" }); - // ÷ 0061 ÷ 002C ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0061\u002C\u0027", + // ÷ 0061 ÷ 002C ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u002E", new String[] { "\u0061" }); - // ÷ 0061 ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0027", + // ÷ 0061 ÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u002E", new String[] { "\u0061" }); // ÷ 0061 ÷ 002C ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] @@ -3124,6 +4501,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u005F", new String[] { "\u0061" }); + // ÷ 0061 ÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\uD83C\uDDE6", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\uD83C\uDDE6", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C ÷ 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u05D0", + new String[] { "\u0061", "\u05D0" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u05D0", + new String[] { "\u0061", "\u05D0" }); + + // ÷ 0061 ÷ 002C ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\"", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\"", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0027", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0027", + new String[] { "\u0061" }); + // ÷ 0061 ÷ 002C × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u002C\u00AD", new String[] { "\u0061" }); @@ -3156,19 +4565,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0061\u003A", new String[] { "\u0061", "\u0061" }); - // ÷ 0061 ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u002C\u0061\u0027", new String[] { "\u0061", "\u0061" }); - // ÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0061\u0027", new String[] { "\u0061", "\u0061" }); - // ÷ 0061 ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u002C\u0061\u0027\u2060", new String[] { "\u0061", "\u0061" }); - // ÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0061\u0027\u2060", new String[] { "\u0061", "\u0061" }); @@ -3188,11 +4597,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0031\u003A", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 002C ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 ÷ 002C ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u002C\u0031\u0027", new String[] { "\u0061", "\u0031" }); - // ÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0031\u0027", new String[] { "\u0061", "\u0031" }); @@ -3276,12 +4685,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u002C", new String[] { "\u0031" }); - // ÷ 0031 ÷ 003A ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0031\u003A\u0027", + // ÷ 0031 ÷ 003A ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u002E", new String[] { "\u0031" }); - // ÷ 0031 ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0027", + // ÷ 0031 ÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u002E", new String[] { "\u0031" }); // ÷ 0031 ÷ 003A ÷ 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] @@ -3300,6 +4709,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u005F", new String[] { "\u0031" }); + // ÷ 0031 ÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\uD83C\uDDE6", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\uD83C\uDDE6", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u05D0", + new String[] { "\u0031", "\u05D0" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u05D0", + new String[] { "\u0031", "\u05D0" }); + + // ÷ 0031 ÷ 003A ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\"", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\"", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0027", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0027", + new String[] { "\u0031" }); + // ÷ 0031 ÷ 003A × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u003A\u00AD", new String[] { "\u0031" }); @@ -3332,19 +4773,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0061\u003A", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 003A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 ÷ 003A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u003A\u0061\u0027", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0061\u0027", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 003A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0031 ÷ 003A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u003A\u0061\u0027\u2060", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0061\u0027\u2060", new String[] { "\u0031", "\u0061" }); @@ -3364,11 +4805,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0031\u003A", new String[] { "\u0031", "\u0031" }); - // ÷ 0031 ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u003A\u0031\u0027", new String[] { "\u0031", "\u0031" }); - // ÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0031\u0027", new String[] { "\u0031", "\u0031" }); @@ -3388,179 +4829,211 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0031\u002E\u2060", new String[] { "\u0031", "\u0031" }); - // ÷ 0031 ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] (Other) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] (Other) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0001", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0001", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [3.2] (CR) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] (CR) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\r", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (CR) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (CR) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\r", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [3.2] (LF) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] (LF) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\n", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (LF) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (LF) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\n", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [3.2] (Newline) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] (Newline) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u000B", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (Newline) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] (Newline) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u000B", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u3031", new String[] { "\u0031", "\u3031" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u3031", new String[] { "\u0031", "\u3031" }); - // ÷ 0031 ÷ 0027 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0041", new String[] { "\u0031", "\u0041" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0041", new String[] { "\u0031", "\u0041" }); - // ÷ 0031 ÷ 0027 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u003A", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u003A", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u002C", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u002C", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0031\u0027\u0027", + // ÷ 0031 ÷ 0027 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u002E", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0027", + // ÷ 0031 ÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u002E", new String[] { "\u0031" }); - // ÷ 0031 × 0027 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] + // ÷ 0031 × 0027 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0030", new String[] { "\u0031\u0027\u0030" }); - // ÷ 0031 × 0027 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] + // ÷ 0031 × 0027 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0030", new String[] { "\u0031\u0027\u0308\u0030" }); - // ÷ 0031 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u005F", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u005F", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\uD83C\uDDE6", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\uD83C\uDDE6", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u05D0", + new String[] { "\u0031", "\u05D0" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u05D0", + new String[] { "\u0031", "\u05D0" }); + + // ÷ 0031 ÷ 0027 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\"", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\"", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0027", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0027", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u00AD", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u00AD", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0300", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0300", new String[] { "\u0031" }); - // ÷ 0031 ÷ 0027 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0061\u2060", new String[] { "\u0031", "\u0061\u2060" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0061\u2060", new String[] { "\u0031", "\u0061\u2060" }); - // ÷ 0031 ÷ 0027 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0061\u003A", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0061\u003A", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 0027 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0061\u0027", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0061\u0027", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 0027 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0061\u0027\u2060", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0061\u0027\u2060", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 0027 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0031 ÷ 0027 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0061\u002C", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0061\u002C", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 × 0027 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0031 × 0027 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0031\u003A", new String[] { "\u0031\u0027\u0031" }); - // ÷ 0031 × 0027 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + // ÷ 0031 × 0027 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0031\u003A", new String[] { "\u0031\u0027\u0308\u0031" }); - // ÷ 0031 × 0027 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 × 0027 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0031\u0027", new String[] { "\u0031\u0027\u0031" }); - // ÷ 0031 × 0027 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 × 0027 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0031\u0027", new String[] { "\u0031\u0027\u0308\u0031" }); - // ÷ 0031 × 0027 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0031 × 0027 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0031\u002C", new String[] { "\u0031\u0027\u0031" }); - // ÷ 0031 × 0027 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + // ÷ 0031 × 0027 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0031\u002C", new String[] { "\u0031\u0027\u0308\u0031" }); - // ÷ 0031 × 0027 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0031 × 0027 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0031\u002E\u2060", new String[] { "\u0031\u0027\u0031" }); - // ÷ 0031 × 0027 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0031 × 0027 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0031\u002E\u2060", new String[] { "\u0031\u0027\u0308\u0031" }); @@ -3628,12 +5101,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u002C", new String[] { "\u0031" }); - // ÷ 0031 ÷ 002C ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0031\u002C\u0027", + // ÷ 0031 ÷ 002C ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u002E", new String[] { "\u0031" }); - // ÷ 0031 ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0027", + // ÷ 0031 ÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u002E", new String[] { "\u0031" }); // ÷ 0031 × 002C × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] @@ -3652,6 +5125,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u005F", new String[] { "\u0031" }); + // ÷ 0031 ÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\uD83C\uDDE6", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\uD83C\uDDE6", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u05D0", + new String[] { "\u0031", "\u05D0" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u05D0", + new String[] { "\u0031", "\u05D0" }); + + // ÷ 0031 ÷ 002C ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\"", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\"", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0027", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0027", + new String[] { "\u0031" }); + // ÷ 0031 ÷ 002C × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u002C\u00AD", new String[] { "\u0031" }); @@ -3684,19 +5189,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0061\u003A", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u002C\u0061\u0027", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0061\u0027", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0031 ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u002C\u0061\u0027\u2060", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0061\u0027\u2060", new String[] { "\u0031", "\u0061" }); @@ -3716,11 +5221,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0031\u003A", new String[] { "\u0031\u002C\u0308\u0031" }); - // ÷ 0031 × 002C × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 × 002C × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u002C\u0031\u0027", new String[] { "\u0031\u002C\u0031" }); - // ÷ 0031 × 002C × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 × 002C × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0031\u0027", new String[] { "\u0031\u002C\u0308\u0031" }); @@ -3804,12 +5309,12 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u002C", new String[] { "\u0031" }); - // ÷ 0031 ÷ 002E × 2060 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0027", + // ÷ 0031 ÷ 002E × 2060 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u002E", new String[] { "\u0031" }); - // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] - assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0027", + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u002E", new String[] { "\u0031" }); // ÷ 0031 × 002E × 2060 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] @@ -3828,6 +5333,38 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u005F", new String[] { "\u0031" }); + // ÷ 0031 ÷ 002E × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\uD83C\uDDE6", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\uD83C\uDDE6", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u05D0", + new String[] { "\u0031", "\u05D0" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u05D0", + new String[] { "\u0031", "\u05D0" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\"", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\"", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0027", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0027", + new String[] { "\u0031" }); + // ÷ 0031 ÷ 002E × 2060 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u00AD", new String[] { "\u0031" }); @@ -3860,19 +5397,19 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0061\u003A", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0061\u0027", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0061\u0027", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0061\u0027\u2060", new String[] { "\u0031", "\u0061" }); - // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0061\u0027\u2060", new String[] { "\u0031", "\u0061" }); @@ -3892,11 +5429,11 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0031\u003A", new String[] { "\u0031\u002E\u2060\u0308\u0031" }); - // ÷ 0031 × 002E × 2060 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 × 002E × 2060 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0031\u0027", new String[] { "\u0031\u002E\u2060\u0031" }); - // ÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + // ÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0031\u0027", new String[] { "\u0031\u002E\u2060\u0308\u0031" }); @@ -3916,7 +5453,7 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0031\u002E\u2060", new String[] { "\u0031\u002E\u2060\u0308\u0031" }); - // ÷ 0063 × 0061 × 006E × 0027 × 0074 ÷ # ÷ [0.2] LATIN SMALL LETTER C (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER N (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [7.0] LATIN SMALL LETTER T (ALetter) ÷ [0.3] + // ÷ 0063 × 0061 × 006E × 0027 × 0074 ÷ # ÷ [0.2] LATIN SMALL LETTER C (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER N (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER T (ALetter) ÷ [0.3] assertAnalyzesTo(analyzer, "\u0063\u0061\u006E\u0027\u0074", new String[] { "\u0063\u0061\u006E\u0027\u0074" }); @@ -3936,7 +5473,7 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u0033\u0061", new String[] { "\u0033\u0061" }); - // ÷ 2060 ÷ 0063 × 2060 × 0061 × 2060 × 006E × 2060 × 0027 × 2060 × 0074 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER C (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER N (ALetter) × [4.0] WORD JOINER (Format_FE) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER T (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + // ÷ 2060 ÷ 0063 × 2060 × 0061 × 2060 × 006E × 2060 × 0027 × 2060 × 0074 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER C (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER N (ALetter) × [4.0] WORD JOINER (Format_FE) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER T (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] assertAnalyzesTo(analyzer, "\u2060\u0063\u2060\u0061\u2060\u006E\u2060\u0027\u2060\u0074\u2060\u2060", new String[] { "\u0063\u2060\u0061\u2060\u006E\u2060\u0027\u2060\u0074\u2060\u2060" }); @@ -3956,5 +5493,45 @@ public class WordBreakTestUnicode_6_1_0 extends BaseTokenStreamTestCase { assertAnalyzesTo(analyzer, "\u2060\u0033\u2060\u0061\u2060\u2060", new String[] { "\u0033\u2060\u0061\u2060\u2060" }); + // ÷ 0061 ÷ 1F1E6 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\uD83C\uDDE6\u0062", + new String[] { "\u0061", "\u0062" }); + + // ÷ 1F1F7 × 1F1FA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [13.3] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDF7\uD83C\uDDFA", + new String[] { }); + + // ÷ 1F1F7 × 1F1FA × 1F1F8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [13.3] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) × [13.3] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDF7\uD83C\uDDFA\uD83C\uDDF8", + new String[] { }); + + // ÷ 1F1F7 × 1F1FA × 1F1F8 × 1F1EA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [13.3] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) × [13.3] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) × [13.3] REGIONAL INDICATOR SYMBOL LETTER E (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDF7\uD83C\uDDFA\uD83C\uDDF8\uD83C\uDDEA", + new String[] { }); + + // ÷ 1F1F7 × 1F1FA ÷ 200B ÷ 1F1F8 × 1F1EA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [13.3] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) ÷ [999.0] ZERO WIDTH SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) × [13.3] REGIONAL INDICATOR SYMBOL LETTER E (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDF7\uD83C\uDDFA\u200B\uD83C\uDDF8\uD83C\uDDEA", + new String[] { }); + + // ÷ 1F1E6 × 1F1E7 × 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [13.3] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) × [13.3] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\uD83C\uDDE7\uD83C\uDDE8", + new String[] { }); + + // ÷ 1F1E6 × 200D × 1F1E7 × 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [4.0] ZERO WIDTH JOINER (Extend_FE) × [13.3] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) × [13.3] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\u200D\uD83C\uDDE7\uD83C\uDDE8", + new String[] { }); + + // ÷ 1F1E6 × 1F1E7 × 200D × 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [13.3] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) × [4.0] ZERO WIDTH JOINER (Extend_FE) × [13.3] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3] + assertAnalyzesTo(analyzer, "\uD83C\uDDE6\uD83C\uDDE7\u200D\uD83C\uDDE8", + new String[] { }); + + // ÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (Other) × [4.0] ZERO WIDTH JOINER (Extend_FE) ÷ [999.0] ARABIC LETTER NOON (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0020\u200D\u0646", + new String[] { "\u0646" }); + + // ÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (ALetter) × [4.0] ZERO WIDTH JOINER (Extend_FE) ÷ [999.0] SPACE (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0646\u200D\u0020", + new String[] { "\u0646\u200D" }); + } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/email.addresses.from.random.text.with.email.addresses.txt b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/email.addresses.from.random.text.with.email.addresses.txt index 832a2aa749f..ae86ee6e316 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/email.addresses.from.random.text.with.email.addresses.txt +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/email.addresses.from.random.text.with.email.addresses.txt @@ -78,13 +78,13 @@ LTLNFsgB@[191.56.104.113] iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU VGLn@z3E2.3an2.MM TWmfsxn@[112.192.017.029] -2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KGBECHTV +2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KPRW13D CjaPC63@['\RDrwk] Ayydpdoa@tdgypppmen.wf "gfKP9"@jo3-r0.mz -aTMgDW4@t5gax.XN--0ZWM56D +aTMgDW4@t5gax.XN--3E0B707E mcDrMO3FQ@nwc21.y5qd45lesryrp.IL -NZqj@v50egeveepk.z290kk.Bc3.xn--jxalpdlp +NZqj@v50egeveepk.z290kk.Bc3.xn--kprw13d XtAhFnq@[218.214.251.103] x0S8uos@[109.82.126.233] ALB4KFavj16pODdd@i206d6s.MM diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl index e4a83ff16ca..46ac3ef3568 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl @@ -78,9 +78,10 @@ import org.junit.Ignore; * \\p{Script = Hiragana} * \\p{LineBreak = Complex_Context} (From $line_break_url) * \\p{WordBreak = ALetter} (From $word_break_url) + * \\p{WordBreak = Hebrew_Letter} * \\p{WordBreak = Katakana} * \\p{WordBreak = Numeric} (Excludes full-width Arabic digits) - * [\\uFF10-\\uFF19] (Full-width Arabic digits) + * [\\uFF10-\\uFF19] (Full-width Arabic digits) */ \@Ignore public class ${class_name} extends BaseTokenStreamTestCase { @@ -97,7 +98,7 @@ parse_Unicode_data_file($line_break_url, $codepoints, {'sa' => 1}); parse_Unicode_data_file($scripts_url, $codepoints, {'han' => 1, 'hiragana' => 1}); parse_Unicode_data_file($word_break_url, $codepoints, - {'aletter' => 1, 'katakana' => 1, 'numeric' => 1}); + {'aletter' => 1, 'hebrew_letter' => 1, 'katakana' => 1, 'numeric' => 1}); my @tests = split /\r?\n/, get_URL_content($word_break_test_url); my $output_path = File::Spec->catpath($volume, $directory, $output_filename); @@ -109,25 +110,33 @@ print STDERR "Writing '$output_path'..."; print OUT $header; for my $line (@tests) { - next if ($line =~ /^\s*\#/); - # ÷ 0001 × 0300 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + next if ($line =~ /^\s*(?:|\#.*)$/); # Skip blank or comment-only lines + # Example line: ÷ 0001 × 0300 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] my ($sequence) = $line =~ /^(.*?)\s*\#/; + $line =~ s/\t/ /g; # Convert tabs to two spaces (no tabs allowed in Lucene source) print OUT " // $line\n"; $sequence =~ s/\s*÷\s*$//; # Trim trailing break character my $test_string = $sequence; $test_string =~ s/\s*÷\s*/\\u/g; $test_string =~ s/\s*×\s*/\\u/g; + $test_string =~ s/\\u([0-9A-F]{5,})/join('', map { "\\u$_" } above_BMP_char_to_surrogates($1))/ge; $test_string =~ s/\\u000A/\\n/g; $test_string =~ s/\\u000D/\\r/g; + $test_string =~ s/\\u0022/\\\"/g; $sequence =~ s/^\s*÷\s*//; # Trim leading break character my @tokens = (); for my $candidate (split /\s*÷\s*/, $sequence) { my @chars = (); my $has_wanted_char = 0; while ($candidate =~ /([0-9A-F]+)/gi) { - push @chars, $1; + my $hexchar = $1; + if (4 == length($hexchar)) { + push @chars, $hexchar; + } else { + push @chars, above_BMP_char_to_surrogates($hexchar); + } unless ($has_wanted_char) { - $has_wanted_char = 1 if (defined($codepoints->[hex($1)])); + $has_wanted_char = 1 if (defined($codepoints->[hex($hexchar)])); } } if ($has_wanted_char) { @@ -144,6 +153,21 @@ close OUT; print STDERR "done.\n"; +# sub above_BMP_char_to_surrogates +# +# Converts hex references to chars above the BMP (i.e., greater than 0xFFFF) +# to the corresponding UTF-16 surrogate pair +# +# Assumption: input string is a sequence more than four hex digits +# +sub above_BMP_char_to_surrogates { + my $ch = hex(shift); + my $high_surrogate = 0xD800 + (($ch - 0x10000) >> 10); + my $low_surrogate = 0xDC00 + ($ch & 0x3FF); + return map { sprintf("%04X", $_) } ($high_surrogate, $low_surrogate); +} + + # sub parse_Unicode_data_file # # Downloads and parses the specified Unicode data file, parses it, and diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.email.addresses.txt b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.email.addresses.txt index 71ac34ccd84..84062cd794a 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.email.addresses.txt +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.email.addresses.txt @@ -121,14 +121,14 @@ Bzzzzzzzz! Bzzzzzzzzzzzzzzz! Tell them "0\!P?".shQVdSerA@2qmqj8ul.hm the leg of LTLNFsgB@[191.56.104.113] all, until it has read it is iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU there. Once TWmfsxn@[112.192.017.029] Spiros under the place -2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KGBECHTV as were not a house of the +2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KPRW13D as were not a house of the rosebushes and the whateverend, feel her waist. She changes everything. We had decided to do you know CjaPC63@['\RDrwk] this, is what did leave, pray; let us come to, what history as died. Strange, Spiros with delight: That night "gfKP9"@jo3-r0.mz and gold case - is spring: the aeon arising, wherein he returned, + is spring: the aeon arising, wherein he returned, retraversing the mcDrMO3FQ@nwc21.y5qd45lesryrp.IL gates, first - to reach session. Initiating first + to reach session. Initiating first part of the main hall toward his own spurs. Hes an Irifix And older ones who wins? ADAM: x0S8uos@[109.82.126.233] The violin and reality. The hidden set up to come. ROSE WAKINS: No answer. The diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.urls.txt b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.urls.txt index 241c806e1b5..ef5ad895e22 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.urls.txt +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.urls.txt @@ -24,7 +24,7 @@ and Joe recited this iron bars with their account, poor elth, and she had been almost drove me towards evening. At HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH the sergeant and then on the raw - afternoon towards + afternoon towards the terror, merely wished him as biled M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb -- a conciliatory air on in @@ -47,7 +47,7 @@ to live. You didn't know nothing could attend more.' He had been a coming! Get behind the answer those aids, I saw him in the same appearance of the convict's file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf confession, and bring you see? ' -HTTP://yA2O3F.XN--0ZWM56D/qPDTt/MwMXGQq2S7JT/TJ2iCND said my limbs. Joe in an +HTTP://yA2O3F.XN--3E0B707E/qPDTt/MwMXGQq2S7JT/TJ2iCND said my limbs. Joe in an accusatory manner as well known that Joe Gargery marry her cup. `I wonder and there was publicly made it was, as lookers on; me, I @@ -63,7 +63,7 @@ again FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB towards evening. At last, and kneaded, and a dead man taking any. There was publicly made out there?' said I, -ftp://w0yaysrl.XN--9T4B11YI5A/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM +ftp://w0yaysrl.XN--CLCHC0EA0B2G2A9GCD/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM glancing http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY at the N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/ river wound, twenty miles of the number called, hears the awful it lights; here and trimmings of Caesar. This @@ -155,7 +155,7 @@ ftp://E1cdf-p.XN--MGBERP4A5D4AR:60510/qMaw4kSSgYM/7jgIuL/gSVW6O91/2bhnsj/kl7R5sg at me, and that her walking z3ymb.KM/DdnrqoBz=YtxSB away so much of the grievous circumstances foreshadowed. After receiving the way, that I thought, if she should go to?' `Good again!' cried the -FTP://7kgip3z.XN--HGBK6AJ7F53BBA:15983/OYEQzIA0 society of a savoury pork pie, +FTP://7kgip3z.XN--KPRY57D:15983/OYEQzIA0 society of a savoury pork pie, and nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc they challenged, hears nothin' all my hands in herself, and bring him by hand. `This,' ftp://085.062.055.011/bopfVV/ said he wore ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs a dog of @@ -191,7 +191,7 @@ and tingling, and that I had won of the shoulder. `Excuse me, and we departed from Richard the furthest end of http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w both imp and stung by the bright fire, another look -zQFC1SPO96J.Jy20d8.xn--0zwm56d:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1 over her +zQFC1SPO96J.Jy20d8.xn--3e0b707e:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1 over her best use asking questions, and feet, hanging to try back was the poker. `It was not warmly. `Seems @@ -204,7 +204,7 @@ kitchen wall, Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1 he ate the house, end with the Ghost in order): Forty-three pence?' To five hundred Gargerys.' `I say, Pip; stay -7WO6F.XN--11B5BS3A9AJ6G/1L%f9G0NEu/L2lD/mQGNS9UhgCEb out with +7WO6F.XN--45BRJ9C/1L%f9G0NEu/L2lD/mQGNS9UhgCEb out with ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB his shot, and reposing no help to my seat. It was in the kitchen wall, because I calculated the sounds by giving me by the name for a rush of Joe's forge @@ -299,7 +299,7 @@ She drew the kitchen, carrying file:///Y?GG/BBqMPBJ/nsxX3qP/8P24WdqBxH so low wooden hut ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T where it seemed to give Pirrip as -<79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--DEBA0AD/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO> +<79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--FIQS8S/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO> to say, on the guiltily coarse his head, he tried to the Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z remark. `There's one sprinkled all I was possible she beggared me. All these @@ -311,7 +311,7 @@ Http://Ed095eimjy.rlb5698d.kp/_l5uoOO/aA494s?3nSxdIpE=y%79qu+2un1hGR&J%76=8&L%be he shook her veil so thick nor my milk and would impart all had returned, with soap-suds, I had FILE:///#F9Bgl just like thin snow. `Enough of his right side of thenceforth sitting -jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--0ZWM56D/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw +jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--3E0B707E/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw in File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg my soul. I sat down on it, I have been a spoon that the pie, blacksmith?' asked Estella of it made a mouth wide open, and so @@ -324,7 +324,7 @@ FTP://7qf.hlj.TN/IXOeaf/t%c52Jxwy#YkcAy2 of the stranger looked at it, I pointed to Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz himself. No glimpse of file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg herself, I saw that he would have been there, I was too far and uncomfortable by it. -http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--9T4B11YI5A/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms= +http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--CLCHC0EA0B2G2A9GCD/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms= Under the Above,' I rather to become transfixed -- he gave me out of the kitchen empty-handed, to keep him, I had made a Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG subject, if he had @@ -468,7 +468,7 @@ hard twist upon his -- `Well, boy,' Uncle Pumblechook: a look at the sermon he had heard it had hesitated as little window, violently plunging and she had committed, and had all about the present calling, which the fingers of tea on Saturdays than this country, gentlemen, but I could see those, -https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--0ZWM56D/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G +https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--3E0B707E/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G too, if you remember what stock she told me again. `But I know what file:///enqvF%EFLOBsZhl8h2z wittles is?' `Yes, ma'am.' `Estella, take me again and ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A refractory @@ -493,7 +493,7 @@ right-side ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg flaxen curls and tables, and a foot of the blacksmith's.' `Halloa!' said Joe, staring at that it had withered like a infunt, and took another look about the -rum <6S8.Crwllo5e3.jmtz.XN--G6W251D/6InlQn/hnhu2f%ac8tX/apq%0D6o/> out at once. +rum <6S8.Crwllo5e3.jmtz.XN--GECRJ9C/6InlQn/hnhu2f%ac8tX/apq%0D6o/> out at once. Three Jolly Bargemen to think she seemed to tell you were. When we saw the file coming at my slice. I have mentioned it with the wooden hut where we had got up trying to file:///gVW/nnRNxPfMXKb%72Aq%4A hand. If ever grateful for. If a @@ -662,7 +662,7 @@ open,' he https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/ wiped the liquor. He was the bad; and some one Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE another -Ftp://3zd7z.etw.XN--JXALPDLP/4UztCuTbW2z/LL%2cDI/dTYSi9 turned to put straws +Ftp://3zd7z.etw.XN--KPRW13D/4UztCuTbW2z/LL%2cDI/dTYSi9 turned to put straws down by a most powerfully down t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x to me, and all that know the window, @@ -993,7 +993,7 @@ upon a door, which was gobbling mincemeat, meatbone, bread, some lace for it that Joe's blue file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/ eyes, had an hour longer than at me, and dismal, and gloves, and that's further than I mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs looked on. `Now, boy! -g6tylc0.daeczh.4q.XN--9T4B11YI5A/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P +g6tylc0.daeczh.4q.XN--CLCHC0EA0B2G2A9GCD/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P Why, here's a ridiculous old chap. And looked up by hand. `Why don't like `sulks.' Therefore, I was in such game?' Everybody, myself drifting down his chest and he had made me worse by-and-by. I was a @@ -1035,7 +1035,7 @@ in every word out again. `You are prison-ships, and they fought for us heavy. `I Bolted, myself, 5.Piba4ac.JE/55M1H/AZXdj and thread, and we after him, or to inspire confidence. This was brought you spoke all the act, he -couldn't m-k6-ej7x.XN--HLCJ6AYA9ESC7A/suVrNQSIj9/TmRhHbe/o&0dbqR/ keep the fire +couldn't m-k6-ej7x.XN--J6W193G/suVrNQSIj9/TmRhHbe/o&0dbqR/ keep the fire between the forge was busy in it. Until hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/ she jammed @@ -1329,7 +1329,7 @@ sort Http://w9ys35.wb55p6l.hxl.rs/Y97%58Lp8JjLZw/5L -- FILE://155.24.106.255/3VEZIT7 if it was to him, I might not do not afraid of report, and looking rather to make nothing of a confidential voice, d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ -as lda5l5wc.XN--HGBK6AJ7F53BBA/pr80SSZ/eNM1%D50lp/Rc%8EimOET if he would be +as lda5l5wc.XN--KPRY57D/pr80SSZ/eNM1%D50lp/Rc%8EimOET if he would be supposed,' said the wind and so we were read the conversation consisted of it had so that we saw some bread, some l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C brandy out: no black velvet diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/urls.from.random.text.with.urls.txt b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/urls.from.random.text.with.urls.txt index bf0d419b2a6..cf216ca085a 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/urls.from.random.text.with.urls.txt +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/urls.from.random.text.with.urls.txt @@ -10,7 +10,7 @@ http://Rcbu6/Oxc%C0IkGSZ8rO9IUpd/BEvkvw3nWNXZ/P%17tp3gjATN/0ZRzs file:///2CdsP/U2GCLT Http://Pzw978uzb.ai/yB;mt/o8hVKG/%231Y/Xb1%bb6v1fhjfdkfkBvxed?8mq~=OvF&STpJJk=ws0ZO&0DRA= HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH -Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m +Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb ftp://evzed8zvv.l2xkky.Dq85qcl1.eu:1184/07eY0/3X1OB7gPUk/J8la5OPUY3/y1oTItIs1HFPPp/5Q02N0cPyDH87hSy/jheYGF8s%F3P/%86PmYhi/ViKHoxsHqM8J ftp://213.7.210.47/%e5pFkj6e6Jczc/ypJGG/z%663jYR/37IxLQBPr/Ciq50EUIdueyj @@ -23,13 +23,13 @@ Ftp://Xmswrxn8d-1s.pe.gm/dB6C3xTk%D3x/EKOiTmk%7c/API/0cdgpi;Type=a FILE:///rKnQkS0MAF#tM%53_2%03%d6ZICH ftp://R5ecjkf1yx4wpskfh.tv0y3m90ak.0R605.se:51297/zpWcRRcG/1woSqw7ZUko/ file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf -HTTP://yA2O3F.XN--0ZWM56D/qPDTt/MwMXGQq2S7JT/TJ2iCND +HTTP://yA2O3F.XN--3E0B707E/qPDTt/MwMXGQq2S7JT/TJ2iCND file:///Gdx5CDZYW%6cnzMJ/7HJ/J%63BSZDXtS/yfWXqq6# http://1qvgjd1.TP/7oq5gWW/Gwqf8fxBXR4/?Br,q=ayMz0&1IO%370N7=;Sl1czc2L+5bRISfD+w&ygP3FhV%E1w36=2Rx ftp://5SCC6BUYP.Knf1cvlc22z9.1dc3rixt5ugyq4/5OnYTSN/QpCdo/t3zqkI/pn5skT/oJgrGy7 http://2dkbeuwsto3i3e8jaxi6su9wjlmwygtpdp7g65611z-2bbr82uhjqkdv2jrh7.KZ/FiSvI/aaB&dPQ%42kLdM FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB -ftp://w0yaysrl.XN--9T4B11YI5A/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM +ftp://w0yaysrl.XN--CLCHC0EA0B2G2A9GCD/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/ http://ah-2d4.ASIA/qmp @@ -75,7 +75,7 @@ http://4u3o/BKdhwRyzG file:///LdsHfPABFz1vRD1OB6Yl/RS6&1Gmz/mfYul/ ftp://E1cdf-p.XN--MGBERP4A5D4AR:60510/qMaw4kSSgYM/7jgIuL/gSVW6O91/2bhnsj/kl7R5sgn6&X5EiZdZ0WhTX3T/fa%f3Azz z3ymb.KM/DdnrqoBz=YtxSB -FTP://7kgip3z.XN--HGBK6AJ7F53BBA:15983/OYEQzIA0 +FTP://7kgip3z.XN--KPRY57D:15983/OYEQzIA0 nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc ftp://085.062.055.011/bopfVV/ ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs @@ -93,12 +93,12 @@ https://[3790:ad57:0B63::e5f7:f6ac:164C]/Obax;zcD/Y%48%9a/Z2xcdar bl60k0jqkc9.oow84o1.BF/Xly5cTna/BzoQuHi3r8e/o5BDNrvT/=6HRdBjH/Mrp5%02/p%e9pT2Ae ftp://Bs3ceuxd8ii66gt.X8wwdpt.BB:27095/3BfkvfzcmTS/FTffh&S/gIWvJ5Kd/AlOQ%3EnO http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w -zQFC1SPO96J.Jy20d8.xn--0zwm56d:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1 +zQFC1SPO96J.Jy20d8.xn--3e0b707e:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1 ftp://Xctk9iigg.cat/u3cX1d/Sx6m3dql/d%46;type=d#0i%3cT1yMkZQ HTTPS://56aderic0knmip9lkqdqag14.uk:45885/lELiK:/vF%4C5Enwqy/P5NGJ2b/dD6sg1yMV ftp://vlt.3g45k63viz2.tcnm3.UA:60664/AJ9iqYk%c1/uKbohn2/K%D1kequ4z8rxFpJ Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1 -7WO6F.XN--11B5BS3A9AJ6G/1L%f9G0NEu/L2lD/mQGNS9UhgCEb +7WO6F.XN--45BRJ9C/1L%f9G0NEu/L2lD/mQGNS9UhgCEb ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB ftp://lv56pdepzu0b0fo-04qtxv5tt2jc0nsaukrhtz5-e3u1vcb517y3b135zl.e0r1hson.dk/3TVoqjp6%1FCFSkt/006VZfho/gxrWxgDawM3Uk Ftp://7n977.Niyt.2fgkzfhj.q7-DJ.Ow7a.it/5zfRi3PO8/1zfKT9%421tP/?SazEijJq%710COQKWeLE/TdUc%b2u/2AxBw9%4BUN6Zp4Z/KfUZd1MTdPv/L4m1tI3/WJvcK1 @@ -147,20 +147,20 @@ ftp://Lq.es/%B1ZPdTZgB2mNFW/qre92rM file:///IZ47ESCtX%aatQab1/V553gjR?Me/#9%68qPw file:///Y?GG/BBqMPBJ/nsxX3qP/8P24WdqBxH ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T -79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--DEBA0AD/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO +79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--FIQS8S/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z ftp://[fd77:4982:C37F:a0a1:7651:E09C:117.093.145.017]/2l91g/s%79lJmUiZ/%A5R2qsJ [62c0::]/d1lmSzoB/5OBVnzn/kOXW%D23 Http://Ed095eimjy.rlb5698d.kp/_l5uoOO/aA494s?3nSxdIpE=y%79qu+2un1hGR&J%76=8&L%bed=uY5hO+s+IKk1S&Q=HHXEC+Gof86QIRHy&35QY5= FILE:///#F9Bgl -jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--0ZWM56D/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw +jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--3E0B707E/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg ftp://892f7.oel50j.32.9qj1p-g7lgw.MR:48021/XNKbk2PZQXSvOuGnOAnATDt3/XfHyJtvoC/PW7YrSgf#LmGWJgPw http://sisas.ua/4CU60ZLK4VgY8AR89 FTP://7qf.hlj.TN/IXOeaf/t%c52Jxwy#YkcAy2 Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg -http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--9T4B11YI5A/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms= +http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--CLCHC0EA0B2G2A9GCD/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms= Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG ftp://tw7d-6yu.im:2055/%66qbqzss/OmPGW;type=d FTP://zst.tn/QcUpaA/VKvJ2/JN6AKew/iXYIiHm7mfPFmD%21E5/yTQpoiqdbaaS1/LnzOX#VqsobH @@ -228,7 +228,7 @@ file:///UIIGOxv6jvF2%c0/%A8J3%677Gmq8im1zklKhqx/HMhCSY2QcyxvL/ http://Qhk9z.zm/cOGBen/mBsDycEI5V7L1s%84WUj7863/p%5f~okuRD51b0M?b%F2d%67ujGr=oh8PWUtK&j6uX7baX=&sg3RUocA9W=m5IaF&JWH9G=fyiOtnC3+7RJA+ippw96rvu+BxtGg&F6f1=jmPS&3PE0xX5=TGV%5c5J&%fc@NSEynhuvb=&MkRIt33= Http://[98cc:433d:2C25:62dd:54ba:d10b:63d3:4C40]/YlbNrJod/fdjuN/qYqSdqr5/KAbXYHO%F0m7Ws9 file:///ywFY5HK/XAv@v%66o/M2O4Wlny50hypf5%02A8 -https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--0ZWM56D/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G +https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--3E0B707E/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G file:///enqvF%EFLOBsZhl8h2z ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A ftp://1xf.ipl4f0y6c4.VA/LHuq~/p2nPbE/0YGGNJB%DEje2psef_B/aKOuMl1Q9 @@ -240,7 +240,7 @@ http://nEN5ZN.EG/%0efsf4v30L file:///19%9947/ksd3Sq7W78%27/2K_Ylzcu2q r8sht9qzsc1e2wp.ci/8SbPwlW%5ac/qKEqFi0Q ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg -6S8.Crwllo5e3.jmtz.XN--G6W251D/6InlQn/hnhu2f%ac8tX/apq%0D6o/ +6S8.Crwllo5e3.jmtz.XN--GECRJ9C/6InlQn/hnhu2f%ac8tX/apq%0D6o/ file:///gVW/nnRNxPfMXKb%72Aq%4A file:///Fzza388TQ file:/// @@ -314,7 +314,7 @@ file:///3%aexrb7UdZ5GpR4ZIfoxwL/vQV%4a2zQxki/QRji6gHpMGgBaM/d%71A2CTpZv-kF0tD/Ig f5ms.jp/%A1FpERWwTd%BFG/ExC8V5aqx5l2CLJr0mJb5u/DgMvEzAr2U/py9Vg/igr9PzANtw/FFiN1E7 https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/ Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE -Ftp://3zd7z.etw.XN--JXALPDLP/4UztCuTbW2z/LL%2cDI/dTYSi9 +Ftp://3zd7z.etw.XN--KPRW13D/4UztCuTbW2z/LL%2cDI/dTYSi9 t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x ftp://D02-auxxaeqnv9ve-jlmo3.l10vqu.12jl.2mvjwrsqm.BA/r71QLLNu6oGJjG/HbxrX1Grq8/QR%2agZv4hR file:///XoCg%EDVf/A3ibJYjU @@ -476,7 +476,7 @@ ftp://53.151.134.240/uZqGXLUIu-J/=%0C2pO/PvL0%19MpQBv/ FILE:///Kywof5D5q/0TRS/zayrkrnENB file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/ mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs -g6tylc0.daeczh.4q.XN--9T4B11YI5A/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P +g6tylc0.daeczh.4q.XN--CLCHC0EA0B2G2A9GCD/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P file:///TJa%86AczeCmM5QMhi/Wox~Ajl/WxUF%5eSA:y%0fD%E21/x%cca%d3Qgx/8iWJ5-h%26/fCK%01nQNrK8#ygTTB file:///~%303cUUVYTEaQU5%5DXbogiPKb/favR2rETEh/9TXM%15u/nYCOZpZgL file:///mJM%a1/jv5%53QDqE/bFMu0CBp @@ -496,7 +496,7 @@ http://gpu16lz.LS/9e%daJrwQfHEpFvsZ3jx/c4STIJ/CmvEGAUx9f/ file://ij9anjtok86ro.uN-BGDQ855IB.sDXAQR.5kr8kz.3J3M8XRM.18r3s0g-6.4rjsmwue0lwao0og17d-5-1.F1h3qgkul29yw2t4p4se5clomncxhmoy.g6c9tbz7.pa/5LMtmbl/1tfIF/pBOV7Hc HTTPS://bF2RA.kw/1TA9pTTBg/nM/VSRo%85Kt?%62mxNfo=HDowgwkM3&9oPOLH2=yKOxIe+YNtt 5.Piba4ac.JE/55M1H/AZXdj -m-k6-ej7x.XN--HLCJ6AYA9ESC7A/suVrNQSIj9/TmRhHbe/o&0dbqR/ +m-k6-ej7x.XN--J6W193G/suVrNQSIj9/TmRhHbe/o&0dbqR/ ftp://242.228.138.8/o%CC_QjILS%17aYH/%caw8CcVZyPRZ/ hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/ Ftp://mez27g2tpmk.MC/%B8AHk%95etDns%46/gXbsCn%6C-/s8_Jmy/DhmfT~Di6KD @@ -633,7 +633,7 @@ http://047.014.184.200/Z_QdOwjzfBue4Nt/aEn/xuEQD/cXlnoxHIK%7d8h/1%eegEk7E0/8Ejku Http://w9ys35.wb55p6l.hxl.rs/Y97%58Lp8JjLZw/5L FILE://155.24.106.255/3VEZIT7 d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ -lda5l5wc.XN--HGBK6AJ7F53BBA/pr80SSZ/eNM1%D50lp/Rc%8EimOET +lda5l5wc.XN--KPRY57D/pr80SSZ/eNM1%D50lp/Rc%8EimOET l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C FILE://a6ys9a4.xj.BY/%99BGXp/F=yJtxc71/gvXuHuB9k 212.072.006.032/6kV8ce%2e/%e7lzm-HB%4artP/zg6tWMW7RIG?U7=HAXw$D3sM%7DyDJ&Gt= diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java index f8de0fdae5d..1bad8ddfa73 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java @@ -75,7 +75,7 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes + " samba Halta gamba " + "ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R\n" + "M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb\n" - + "Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m" + + "Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m" + " inter Locutio " + "[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/\n" + "file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7" @@ -91,7 +91,7 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes "samba", "Halta", "gamba", "ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R", "M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb", - "Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m", + "Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m", "inter", "Locutio", "[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/", "file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7", diff --git a/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java b/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java index 5d7c23d4ae5..0cfea3fafd8 100644 --- a/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java +++ b/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java @@ -60,20 +60,21 @@ public class GenerateJflexTLDMacros { private static final String APACHE_LICENSE = "/*" + NL - + " * Copyright 2001-2005 The Apache Software Foundation." + NL - + " *" + NL - + " * Licensed under the Apache License, Version 2.0 (the \"License\");" + NL - + " * you may not use this file except in compliance with the License." + NL - + " * You may obtain a copy of the License at" + NL - + " *" + NL - + " * http://www.apache.org/licenses/LICENSE-2.0" + NL - + " *" + NL - + " * Unless required by applicable law or agreed to in writing, software" + NL - + " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL - + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL - + " * See the License for the specific language governing permissions and" + NL - + " * limitations under the License." + NL - + " */" + NL + NL; + + " * Licensed to the Apache Software Foundation (ASF) under one or more" + NL + + " * contributor license agreements. See the NOTICE file distributed with" + NL + + " * this work for additional information regarding copyright ownership." + NL + + " * The ASF licenses this file to You under the Apache License, Version 2.0" + NL + + " * (the \"License\"); you may not use this file except in compliance with" + NL + + " * the License. You may obtain a copy of the License at" + NL + + " *" + NL + + " * http://www.apache.org/licenses/LICENSE-2.0" + NL + + " *" + NL + + " * Unless required by applicable law or agreed to in writing, software" + NL + + " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL + + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL + + " * See the License for the specific language governing permissions and" + NL + + " * limitations under the License." + NL + + " */" + NL; private static final Pattern TLD_PATTERN_1 = Pattern.compile("([-A-Za-z0-9]+)\\.\\s+NS\\s+.*"); diff --git a/lucene/analysis/icu/src/data/uax29/Default.rbbi b/lucene/analysis/icu/src/data/uax29/Default.rbbi index 9dbab966632..6c6d1f9ef23 100644 --- a/lucene/analysis/icu/src/data/uax29/Default.rbbi +++ b/lucene/analysis/icu/src/data/uax29/Default.rbbi @@ -14,27 +14,52 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# Default RBBI rules, based on UAX#29. +# This file is from ICU (with some small modifications, to avoid CJK dictionary break) # +# Copyright (C) 2002-2013, International Business Machines Corporation +# and others. All Rights Reserved. +# +# file: word.txt +# +# ICU Word Break Rules +# See Unicode Standard Annex #29. +# These rules are based on UAX #29 Revision 22 for Unicode Version 6.3 +# +# Note: Updates to word.txt will usually need to be merged into +# word_POSIX.txt also. + +############################################################################## +# +# Character class definitions from TR 29 +# +############################################################################## !!chain; + # # Character Class Definitions. # -$CR = [\p{Word_Break = CR}]; -$LF = [\p{Word_Break = LF}]; -$Newline = [\p{Word_Break = Newline}]; -$Extend = [\p{Word_Break = Extend}]; -$Format = [\p{Word_Break = Format}]; -$Katakana = [\p{Word_Break = Katakana}]; -$ALetter = [\p{Word_Break = ALetter}]; -$MidNumLet = [\p{Word_Break = MidNumLet}]; -$MidLetter = [\p{Word_Break = MidLetter}]; -$MidNum = [\p{Word_Break = MidNum}]; -$Numeric = [\p{Word_Break = Numeric}[[:Decomposition_Type=Wide:]&[:General_Category=Decimal_Number:]]]; -$ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; +$CR = [\p{Word_Break = CR}]; +$LF = [\p{Word_Break = LF}]; +$Newline = [\p{Word_Break = Newline}]; +$Extend = [\p{Word_Break = Extend}]; +$Regional_Indicator = [\p{Word_Break = Regional_Indicator}]; +$Format = [\p{Word_Break = Format}]; +$Katakana = [\p{Word_Break = Katakana}]; +$Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}]; +$ALetter = [\p{Word_Break = ALetter}]; +$Single_Quote = [\p{Word_Break = Single_Quote}]; +$Double_Quote = [\p{Word_Break = Double_Quote}]; +$MidNumLet = [\p{Word_Break = MidNumLet}]; +$MidLetter = [\p{Word_Break = MidLetter}]; +$MidNum = [\p{Word_Break = MidNum}]; +$Numeric = [\p{Word_Break = Numeric}[[:Decomposition_Type=Wide:]&[:General_Category=Decimal_Number:]]]; +$ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; + +$Han = [:Han:]; +$Hiragana = [:Hiragana:]; # Dictionary character set, for triggering language-based break engines. Currently @@ -42,24 +67,34 @@ $ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; # 5.0 or later as the definition of Complex_Context was corrected to include all # characters requiring dictionary break. -$dictionary = [:LineBreak = Complex_Context:]; $Control = [\p{Grapheme_Cluster_Break = Control}]; -$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]]; # Note: default ALetter does not - # include the dictionary characters. +$HangulSyllable = [\uac00-\ud7a3]; +$ComplexContext = [:LineBreak = Complex_Context:]; +$KanaKanji = [$Han $Hiragana $Katakana]; +$dictionaryCJK = [$Han $Hiragana $HangulSyllable]; +$dictionary = [$ComplexContext]; + +# leave CJK scripts out of ALetterPlus +$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; + # # Rules 4 Ignore Format and Extend characters, # except when they appear at the beginning of a region of text. # -$KatakanaEx = $Katakana ($Extend | $Format)*; -$ALetterEx = $ALetterPlus ($Extend | $Format)*; -$MidNumLetEx = $MidNumLet ($Extend | $Format)*; -$MidLetterEx = $MidLetter ($Extend | $Format)*; -$MidNumEx = $MidNum ($Extend | $Format)*; -$NumericEx = $Numeric ($Extend | $Format)*; -$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*; +# TODO: check if handling of katakana in dictionary makes rules incorrect/void +$KatakanaEx = $Katakana ($Extend | $Format)*; +$Hebrew_LetterEx = $Hebrew_Letter ($Extend | $Format)*; +$ALetterEx = $ALetterPlus ($Extend | $Format)*; +$Single_QuoteEx = $Single_Quote ($Extend | $Format)*; +$Double_QuoteEx = $Double_Quote ($Extend | $Format)*; +$MidNumLetEx = $MidNumLet ($Extend | $Format)*; +$MidLetterEx = $MidLetter ($Extend | $Format)*; +$MidNumEx = $MidNum ($Extend | $Format)*; +$NumericEx = $Numeric ($Extend | $Format)*; +$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*; +$Regional_IndicatorEx = $Regional_Indicator ($Extend | $Format)*; -$Hiragana = [\p{script=Hiragana}]; $Ideographic = [\p{Ideographic}]; $HiraganaEx = $Hiragana ($Extend | $Format)*; $IdeographicEx = $Ideographic ($Extend | $Format)*; @@ -77,23 +112,31 @@ $CR $LF; # of a region of Text. The rule here comes into play when the start of text # begins with a group of Format chars, or with a "word" consisting of a single # char that is not in any of the listed word break categories followed by -# format char(s). +# format char(s), or is not a CJK dictionary character. [^$CR $LF $Newline]? ($Extend | $Format)+; $NumericEx {100}; $ALetterEx {200}; +$HangulSyllable {200}; +$Hebrew_LetterEx{200}; $KatakanaEx {300}; # note: these status values override those from rule 5 -$HiraganaEx {300}; # by virtual of being numerically larger. +$HiraganaEx {300}; # by virtue of being numerically larger. $IdeographicEx {400}; # # # rule 5 # Do not break between most letters. # -$ALetterEx $ALetterEx {200}; +($ALetterEx | $Hebrew_LetterEx) ($ALetterEx | $Hebrew_LetterEx) {200}; # rule 6 and 7 -$ALetterEx ($MidLetterEx | $MidNumLetEx) $ALetterEx {200}; +($ALetterEx | $Hebrew_LetterEx) ($MidLetterEx | $MidNumLetEx | $Single_QuoteEx) ($ALetterEx | $Hebrew_LetterEx) {200}; + +# rule 7a +$Hebrew_LetterEx $Single_QuoteEx {200}; + +# rule 7b and 7c +$Hebrew_LetterEx $Double_QuoteEx $Hebrew_LetterEx {200}; # rule 8 @@ -101,27 +144,35 @@ $NumericEx $NumericEx {100}; # rule 9 -$ALetterEx $NumericEx {200}; +($ALetterEx | $Hebrew_LetterEx) $NumericEx {200}; # rule 10 -$NumericEx $ALetterEx {200}; +$NumericEx ($ALetterEx | $Hebrew_LetterEx) {200}; # rule 11 and 12 -$NumericEx ($MidNumEx | $MidNumLetEx) $NumericEx {100}; +$NumericEx ($MidNumEx | $MidNumLetEx | $Single_QuoteEx) $NumericEx {100}; # rule 13 - $KatakanaEx $KatakanaEx {300}; # rule 13a/b -$ALetterEx $ExtendNumLetEx {200}; # (13a) -$NumericEx $ExtendNumLetEx {100}; # (13a) -$KatakanaEx $ExtendNumLetEx {300}; # (13a) -$ExtendNumLetEx $ExtendNumLetEx {200}; # (13a) +$ALetterEx $ExtendNumLetEx {200}; # (13a) +$Hebrew_LetterEx $ExtendNumLetEx {200}; # (13a) +$NumericEx $ExtendNumLetEx {100}; # (13a) +$KatakanaEx $ExtendNumLetEx {300}; # (13a) +$ExtendNumLetEx $ExtendNumLetEx {200}; # (13a) -$ExtendNumLetEx $ALetterEx {200}; # (13b) -$ExtendNumLetEx $NumericEx {100}; # (13b) -$ExtendNumLetEx $KatakanaEx {300}; # (13b) +$ExtendNumLetEx $ALetterEx {200}; # (13b) +$ExtendNumLetEx $Hebrew_Letter {200}; # (13b) +$ExtendNumLetEx $NumericEx {100}; # (13b) +$ExtendNumLetEx $KatakanaEx {300}; # (13b) + +# rule 13c + +$Regional_IndicatorEx $Regional_IndicatorEx; + +# special handling for CJK characters: chain for later dictionary segmentation +$HangulSyllable $HangulSyllable {200}; diff --git a/lucene/analysis/icu/src/data/uax29/Hebrew.rbbi b/lucene/analysis/icu/src/data/uax29/Hebrew.rbbi deleted file mode 100644 index c238cbb4216..00000000000 --- a/lucene/analysis/icu/src/data/uax29/Hebrew.rbbi +++ /dev/null @@ -1,61 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# -# This is an example of rule tailoring for Hebrew. -# In this example the single-quote is added to the Extend category -# The double-quote is added to the MidLetter category. -# -!!chain; -$CR = [\p{Word_Break = CR}]; -$LF = [\p{Word_Break = LF}]; -$Newline = [\p{Word_Break = Newline}]; -$Extend = [\p{Word_Break = Extend}\u0027]; -$Format = [\p{Word_Break = Format}]; -$ALetter = [\p{Word_Break = ALetter}]; -$MidNumLet = [\p{Word_Break = MidNumLet}]; -$MidLetter = [\p{Word_Break = MidLetter}\u0022]; -$MidNum = [\p{Word_Break = MidNum}]; -$Numeric = [\p{Word_Break = Numeric}]; -$ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; -$dictionary = [:LineBreak = Complex_Context:]; -$Control = [\p{Grapheme_Cluster_Break = Control}]; -$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]]; - -$ALetterEx = $ALetterPlus ($Extend | $Format)*; -$MidNumLetEx = $MidNumLet ($Extend | $Format)*; -$MidLetterEx = $MidLetter ($Extend | $Format)*; -$MidNumEx = $MidNum ($Extend | $Format)*; -$NumericEx = $Numeric ($Extend | $Format)*; -$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*; - -!!forward; - -$CR $LF; -[^$CR $LF $Newline]? ($Extend | $Format)+; -$NumericEx {100}; -$ALetterEx {200}; -$ALetterEx $ALetterEx {200}; -$ALetterEx ($MidLetterEx | $MidNumLetEx) $ALetterEx {200}; -$NumericEx $NumericEx {100}; -$ALetterEx $NumericEx {200}; -$NumericEx $ALetterEx {200}; -$NumericEx ($MidNumEx | $MidNumLetEx) $NumericEx {100}; -$ALetterEx $ExtendNumLetEx {200}; -$NumericEx $ExtendNumLetEx {100}; -$ExtendNumLetEx $ExtendNumLetEx {200}; -$ExtendNumLetEx $ALetterEx {200}; -$ExtendNumLetEx $NumericEx {100}; diff --git a/lucene/analysis/icu/src/data/uax29/Lao.rbbi b/lucene/analysis/icu/src/data/uax29/Lao.rbbi deleted file mode 100644 index 8ce4f152efe..00000000000 --- a/lucene/analysis/icu/src/data/uax29/Lao.rbbi +++ /dev/null @@ -1,192 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Parses Lao text, with syllable as token. -# -# The definition of Lao syllable is based from: -# -# Syllabification of Lao Script for Line Breaking -# Phonpasit Phissamay, Valaxay Dalolay, Chitaphone Chanhsililath, Oulaiphone Silimasak, -# Sarmad Hussain, Nadir Durrani, Science Technology and Environment Agency, CRULP -# http://www.panl10n.net/english/final%20reports/pdf%20files/Laos/LAO06.pdf -# http://www.panl10n.net/Presentations/Cambodia/Phonpassit/LineBreakingAlgo.pdf -# -# NOTE: -# There are some ambiguities in Lao syllabification without additional processing, as mentioned in the paper. -# For this reason, this RBBI grammar really only works with LaoBreakIterator, as it does this additional work. -# -# Syllable structure, where X is the nuclear consonant: -# -# +----+ -# | X5 | -# +----+ -# | X4 | -# +----+----+----+----+----+----+----+-----+ -# | X0 | X1 | X | X6 | X7 | X8 | X9 | X10 | -# +----+----+----+----+----+----+----+-----+ -# | X2 | -# +----+ -# | X3 | -# +----+ -# -# X0 represents a vowel which occurs before the nuclear consonant. -# It can always define the beginning of syllable. -$X0 = [\u0EC0-\u0EC4]; -# X1 is a combination consonant which comes before the nuclear consonant, -# but only if nuclear consonant is one of {ງ ຍ ລ ວ ຼ ມ ນ ຣ} -$X1 = [\u0EAB]; -# X represents the nuclear consonant. -$X = [\u0E81-\u0EAE\u0EDC\u0EDD]; -# X2 is a combination consonant which comes after the nuclear consonant, -# which is placed under or next to the nuclear consonant. -$X2 = [\u0EBC\u0EA3\u0EA7\u0EA5]; -# X3 represents a vowel which occurs under the nuclear consonant. -$X3 = [\u0EB8\u0EB9]; -# X4 represents a vowel which occurs above the nuclear consonant. -$X4 = [\u0EB4-\u0EB7\u0ECD\u0EBB\u0EB1]; -# X5 represents a tone mark which occurs above the nuclear consonant or upper vowel. -$X5 = [\u0EC8-\u0ECB]; -# X6 represents a consonant vowel, which occurs after the nuclear consonant. -# It functions when the syllable doesn’t have any vowels. And it always exists with X8. -$X6 = [\u0EA7\u0EAD\u0EBD]; -# X7 represents a final vowel. -# However X7_1 always represents the end of syllable and it never exists with tone mark. -$X7 = [\u0EB0\u0EB2\u0EB3]; -# X8 represents an alternate consonant. -$X8 = [\u0E81\u0E87\u0E8D\u0E94\u0E99\u0EA1\u0E9A\u0EA7]; -# X9 represents alternate consonants to pronounce foreign terms, it always exist with X10_3. -$X9 = [\u0E88\u0EAA\u0E8A\u0E9E\u0E9F\u0EA5]; -# X10 represents a sign mark. -# It always occurs at the end of a syllable, but mostly people keep it separate from syllable. -$X10 = [\u0EAF\u0EC6\u0ECC]; - -# Section 1 -$X0_1 = [\u0EC0]; -$X4_1_2 = [\u0EB4\u0EB5]; -$X4_3_4 = [\u0EB6\u0EB7]; -$X4_6 = [\u0EBB]; -$X4_7 = [\u0EB1]; -$X6_2 = [\u0EAD]; -$X6_3 = [\u0EBD]; -$X7_1 = [\u0EB0]; -$X7_2 = [\u0EB2]; -$X10_1 = [\u0EAF]; -$X10_2 = [\u0EC6]; -$X10_3 = [\u0ECC]; - -$Rule1_1 = $X0_1 ($X1)? $X ($X2)? ($X5)? ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?; -$Rule1_2 = $X0_1 ($X1)? $X ($X2)? $X4_1_2 ($X5)? ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?; -$Rule1_3 = $X0_1 ($X1)? $X ($X2)? $X4_3_4 ($X5)? $X6_2 ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?; -$Rule1_4 = $X0_1 ($X1)? $X ($X2)? ($X7_2)? $X7_1; -$Rule1_5 = $X0_1 ($X1)? $X ($X2)? $X4_6 ($X5)? $X7_2 ($X9 $X10_3)? ($X10_2)? ($X10_1)?; -$Rule1_6 = $X0_1 ($X1)? $X ($X2)? $X4_7 ($X5)? $X8 ($X9 $X10_3)? ($X10_2)? ($X10_1)?; -$Rule1_7 = $X0_1 ($X1)? $X ($X2)? ($X4_7)? ($X5)? $X6_3 ($X9 $X10_3)? ($X10_2)? ($X10_1)?; - -$Rule1 = ($Rule1_1 | $Rule1_2 | $Rule1_3 | $Rule1_4 | $Rule1_5 | $Rule1_6 | $Rule1_7); - -# Section 2 -$X0_2 = [\u0EC1]; - -$Rule2_1 = $X0_2 ($X1)? $X ($X2)? ($X5)? ($X6)? ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?; -$Rule2_2 = $X0_2 ($X1)? $X ($X2)? $X7_1; -$Rule2_3 = $X0_2 ($X1)? $X ($X2)? $X4_7 ($X5)? $X8 ($X9 $X10_3)? ($X10_2)? ($X10_1)?; - -$Rule2 = ($Rule2_1 | $Rule2_2 | $Rule2_3); - -# Section 3 -$X0_3 = [\u0EC2]; -$X8_3 = [\u0E8D]; -$X8_8 = [\u0EA7]; - -$Rule3_1 = $X0_3 ($X1)? $X ($X2)? ($X5)? ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?; -$Rule3_2 = $X0_3 ($X1)? $X ($X2)? $X7_1; -$Rule3_3 = $X0_3 ($X1)? $X ($X2)? $X4_7 ($X5)? ($X8_3 | $X8_8); - -$Rule3 = ($Rule3_1 | $Rule3_2 | $Rule3_3); - -# Section 4 -$X0_4 = [\u0EC4]; -$X6_1 = [\u0EA7]; - -$Rule4 = $X0_4 ($X1)? $X ($X2)? ($X5)? ($X6_1)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?; - -# Section 5 -$X0_5 = [\u0EC3]; - -$Rule5 = $X0_5 ($X1)? $X ($X2)? ($X5)? ($X6_1)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?; - -# Section 6 -$Rule6 = ($X1)? $X ($X2)? $X3 ($X5)? ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?; - -# Section 7 -$X4_1_4 = [\u0EB4-\u0EB7]; - -$Rule7 = ($X1)? $X ($X2)? $X4_1_4 ($X5)? ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?; - -# Section 8 -$X4_5 = [\u0ECD]; - -$Rule8 = ($X1)? $X ($X2)? $X4_5 ($X5)? ($X7_2)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?; - -# Section 9 - -$Rule9_1 = ($X1)? $X ($X2)? $X4_6 ($X5)? $X8 ($X9 $X10_3)? ($X10_2)? ($X10_1)?; -$Rule9_2 = ($X1)? $X ($X2)? $X4_6 ($X5)? $X6_1 $X7_1; - -$Rule9 = ($Rule9_1 | $Rule9_2); - -# Section 10 -$Rule10 = ($X1)? $X ($X2)? $X4_7 ($X5)? ($X6_1)? $X8 ($X9 $X10_3)? ($X10_2)? ($X10_1)?; - -# Section 11 -$Rule11 = ($X1)? $X ($X2)? ($X5)? $X6 $X8 ($X9 $X10_3)? ($X10_2)? ($X10_1)?; - -# Section 12 -$Rule12 = ($X1)? $X ($X2)? ($X5)? $X7_1; - -# Section 13 -$Rule13 = ($X1)? $X ($X2)? ($X5)? $X7_2 ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?; - -# Section 14 -$X7_3 = [\u0EB3]; - -$Rule14 = ($X1)? $X ($X2)? ($X5)? $X7_3 ($X9 $X10_3)? ($X10_2)? ($X10_1)?; - -$LaoSyllableEx = ($Rule1 | $Rule2 | $Rule3 | $Rule4 | $Rule5 | $Rule6 | $Rule7 | $Rule8 | $Rule9 | $Rule10 | $Rule11 | $Rule12 | $Rule13 | $Rule14); - -$WordJoin = [:Line_Break=Word_Joiner:]; - -$LaoJoinedSyllableEx = $LaoSyllableEx ($WordJoin $LaoSyllableEx)*; - -# -# default numerical definitions -# -$Extend = [\p{Word_Break = Extend}]; -$Format = [\p{Word_Break = Format}]; -$MidNumLet = [\p{Word_Break = MidNumLet}]; -$MidNum = [\p{Word_Break = MidNum}]; -$Numeric = [\p{Word_Break = Numeric}]; -$ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; -$MidNumLetEx = $MidNumLet ($Extend | $Format)*; -$MidNumEx = $MidNum ($Extend | $Format)*; -$NumericEx = $Numeric ($Extend | $Format)*; -$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*; - -!!forward; - -$LaoJoinedSyllableEx {200}; -# default numeric rules -$NumericEx $ExtendNumLetEx? (($MidNumEx | $MidNumLetEx)? $NumericEx $ExtendNumLetEx?)* {100}; diff --git a/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt b/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt index 44a1d5793d9..ed8b3a70040 100644 --- a/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt +++ b/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt @@ -78,7 +78,6 @@ FF0D>002D ## Space Folding # Rule: [[:Zs:] - [:Changes_When_NFKC_Casefolded=Yes:] - [\u0020]] > 0020 1680>0020 -180E>0020 ## Spacing Accents folding (done by kd) diff --git a/lucene/analysis/icu/src/data/utr30/nfc.txt b/lucene/analysis/icu/src/data/utr30/nfc.txt index d251d531b1c..6ecdfd3482c 100644 --- a/lucene/analysis/icu/src/data/utr30/nfc.txt +++ b/lucene/analysis/icu/src/data/utr30/nfc.txt @@ -1,4 +1,4 @@ -# Copyright (C) 1999-2012, International Business Machines +# Copyright (C) 1999-2013, International Business Machines # Corporation and others. All Rights Reserved. # # file name: nfc.txt @@ -7,7 +7,7 @@ # # Complete data for Unicode NFC normalization. -* Unicode 6.1.0 +* Unicode 6.3.0 # Canonical_Combining_Class (ccc) values 0300..0314:230 diff --git a/lucene/analysis/icu/src/data/utr30/nfkc.txt b/lucene/analysis/icu/src/data/utr30/nfkc.txt index fccbbacca16..2cafb0d8853 100644 --- a/lucene/analysis/icu/src/data/utr30/nfkc.txt +++ b/lucene/analysis/icu/src/data/utr30/nfkc.txt @@ -1,4 +1,4 @@ -# Copyright (C) 1999-2012, International Business Machines +# Copyright (C) 1999-2013, International Business Machines # Corporation and others. All Rights Reserved. # # file name: nfkc.txt @@ -11,7 +11,7 @@ # to NFKC one-way mappings. # Use this file as the second gennorm2 input file after nfc.txt. -* Unicode 6.1.0 +* Unicode 6.3.0 00A0>0020 00A8>0020 0308 diff --git a/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt b/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt index 1b043d17a6a..0ac6134f20c 100644 --- a/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt +++ b/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt @@ -1,5 +1,5 @@ # Unicode Character Database -# Copyright (c) 1991-2012 Unicode, Inc. +# Copyright (c) 1991-2013 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -12,7 +12,7 @@ # and reformatted into syntax for the gennorm2 Normalizer2 data generator tool. # Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt. -* Unicode 6.1.0 +* Unicode 6.3.0 0041>0061 0042>0062 @@ -537,6 +537,7 @@ 0555>0585 0556>0586 0587>0565 0582 +061C> 0675>0627 0674 0676>0648 0674 0677>06C7 0674 @@ -627,7 +628,7 @@ 10FC>10DC 115F..1160> 17B4..17B5> -180B..180D> +180B..180E> 1D2C>0061 1D2D>00E6 1D2E>0062 diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/BreakIteratorWrapper.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/BreakIteratorWrapper.java index 654d1adc7c2..9a8a8070b84 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/BreakIteratorWrapper.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/BreakIteratorWrapper.java @@ -21,7 +21,6 @@ import java.text.CharacterIterator; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.text.BreakIterator; -import com.ibm.icu.text.DictionaryBasedBreakIterator; import com.ibm.icu.text.RuleBasedBreakIterator; import com.ibm.icu.text.UTF16; @@ -60,15 +59,12 @@ abstract class BreakIteratorWrapper { } /** - * If its a DictionaryBasedBreakIterator, it doesn't return rulestatus, so - * treat it like a generic BreakIterator If its any other - * RuleBasedBreakIterator, the rule status can be used for token type. If its + * If its a RuleBasedBreakIterator, the rule status can be used for token type. If its * any other BreakIterator, the rulestatus method is not available, so treat * it like a generic BreakIterator. */ static BreakIteratorWrapper wrap(BreakIterator breakIterator) { - if (breakIterator instanceof RuleBasedBreakIterator - && !(breakIterator instanceof DictionaryBasedBreakIterator)) + if (breakIterator instanceof RuleBasedBreakIterator) return new RBBIWrapper((RuleBasedBreakIterator) breakIterator); else return new BIWrapper(breakIterator); diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/CompositeBreakIterator.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/CompositeBreakIterator.java index 5f15880c184..51ff1647f53 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/CompositeBreakIterator.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/CompositeBreakIterator.java @@ -41,12 +41,13 @@ final class CompositeBreakIterator { private final BreakIteratorWrapper wordBreakers[] = new BreakIteratorWrapper[UScript.CODE_LIMIT]; private BreakIteratorWrapper rbbi; - private final ScriptIterator scriptIterator = new ScriptIterator(); + private final ScriptIterator scriptIterator; private char text[]; CompositeBreakIterator(ICUTokenizerConfig config) { this.config = config; + this.scriptIterator = new ScriptIterator(config.combineCJ()); } /** diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java index f7ac9949692..bb41f46a295 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java @@ -35,12 +35,9 @@ import com.ibm.icu.util.ULocale; * ({@link BreakIterator#getWordInstance(ULocale) BreakIterator.getWordInstance(ULocale.ROOT)}), * but with the following tailorings: *
      - *
    • Thai text is broken into words with a - * {@link com.ibm.icu.text.DictionaryBasedBreakIterator} - *
    • Lao, Myanmar, and Khmer text is broken into syllables + *
    • Thai, Lao, and CJK text is broken into words with a dictionary. + *
    • Myanmar, and Khmer text is broken into syllables * based on custom BreakIterator rules. - *
    • Hebrew text has custom tailorings to handle special cases - * involving punctuation. *
    * @lucene.experimental */ @@ -62,34 +59,44 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig { * the default breakiterators in use. these can be expensive to * instantiate, cheap to clone. */ - private static final BreakIterator rootBreakIterator = + // we keep the cjk breaking separate, thats because it cannot be customized (because dictionary + // is only triggered when kind = WORD, but kind = LINE by default and we have no non-evil way to change it) + private static final BreakIterator cjkBreakIterator = BreakIterator.getWordInstance(ULocale.ROOT); + // the same as ROOT, except no dictionary segmentation for cjk + private static final BreakIterator defaultBreakIterator = readBreakIterator("Default.brk"); - private static final BreakIterator thaiBreakIterator = - BreakIterator.getWordInstance(new ULocale("th_TH")); - private static final BreakIterator hebrewBreakIterator = - readBreakIterator("Hebrew.brk"); private static final BreakIterator khmerBreakIterator = readBreakIterator("Khmer.brk"); - private static final BreakIterator laoBreakIterator = - new LaoBreakIterator(readBreakIterator("Lao.brk")); private static final BreakIterator myanmarBreakIterator = readBreakIterator("Myanmar.brk"); + // TODO: deprecate this boolean? you only care if you are doing super-expert stuff... + private final boolean cjkAsWords; + /** * Creates a new config. This object is lightweight, but the first * time the class is referenced, breakiterators will be initialized. + * @param cjkAsWords true if cjk text should undergo dictionary-based segmentation, + * otherwise text will be segmented according to UAX#29 defaults. + * If this is true, all Han+Hiragana+Katakana words will be tagged as + * IDEOGRAPHIC. */ - public DefaultICUTokenizerConfig() {} + public DefaultICUTokenizerConfig(boolean cjkAsWords) { + this.cjkAsWords = cjkAsWords; + } + + @Override + public boolean combineCJ() { + return cjkAsWords; + } @Override public BreakIterator getBreakIterator(int script) { switch(script) { - case UScript.THAI: return (BreakIterator)thaiBreakIterator.clone(); - case UScript.HEBREW: return (BreakIterator)hebrewBreakIterator.clone(); case UScript.KHMER: return (BreakIterator)khmerBreakIterator.clone(); - case UScript.LAO: return (BreakIterator)laoBreakIterator.clone(); case UScript.MYANMAR: return (BreakIterator)myanmarBreakIterator.clone(); - default: return (BreakIterator)rootBreakIterator.clone(); + case UScript.JAPANESE: return (BreakIterator)cjkBreakIterator.clone(); + default: return (BreakIterator)defaultBreakIterator.clone(); } } diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java index 24a6fdea108..68b16a6bcee 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java @@ -68,7 +68,7 @@ public final class ICUTokenizer extends Tokenizer { * @see DefaultICUTokenizerConfig */ public ICUTokenizer(Reader input) { - this(input, new DefaultICUTokenizerConfig()); + this(input, new DefaultICUTokenizerConfig(true)); } /** diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerConfig.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerConfig.java index 550db3c8de8..c972d0c1205 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerConfig.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerConfig.java @@ -36,4 +36,6 @@ public abstract class ICUTokenizerConfig { /** Return a token type value for a given script and BreakIterator * rule status. */ public abstract String getType(int script, int ruleStatus); + /** true if Han, Hiragana, and Katakana scripts should all be returned as Japanese */ + public abstract boolean combineCJ(); } diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java index fecbb2253e0..81507b31309 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java @@ -70,7 +70,7 @@ import com.ibm.icu.text.RuleBasedBreakIterator; *
      * <fieldType name="text_icu_custom" class="solr.TextField" positionIncrementGap="100">
      *   <analyzer>
    - *     <tokenizer class="solr.ICUTokenizerFactory"
    + *     <tokenizer class="solr.ICUTokenizerFactory" cjkAsWords="true"
      *                rulefiles="Latn:my.Latin.rules.rbbi,Cyrl:my.Cyrillic.rules.rbbi"/>
      *   </analyzer>
      * </fieldType>
    @@ -79,6 +79,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa static final String RULEFILES = "rulefiles"; private final Map tailored; private ICUTokenizerConfig config; + private final boolean cjkAsWords; /** Creates a new ICUTokenizerFactory */ public ICUTokenizerFactory(Map args) { @@ -94,6 +95,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa tailored.put(UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptCode), resourcePath); } } + cjkAsWords = getBoolean(args, "cjkAsWords", true); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -103,7 +105,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa public void inform(ResourceLoader loader) throws IOException { assert tailored != null : "init must be called first!"; if (tailored.isEmpty()) { - config = new DefaultICUTokenizerConfig(); + config = new DefaultICUTokenizerConfig(cjkAsWords); } else { final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT]; for (Map.Entry entry : tailored.entrySet()) { @@ -111,7 +113,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa String resourcePath = entry.getValue(); breakers[code] = parseRules(resourcePath, loader); } - config = new DefaultICUTokenizerConfig() { + config = new DefaultICUTokenizerConfig(cjkAsWords) { @Override public BreakIterator getBreakIterator(int script) { diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/LaoBreakIterator.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/LaoBreakIterator.java deleted file mode 100644 index 16e56a4f8b3..00000000000 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/LaoBreakIterator.java +++ /dev/null @@ -1,230 +0,0 @@ -package org.apache.lucene.analysis.icu.segmentation; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.text.CharacterIterator; - -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.text.BreakIterator; -import com.ibm.icu.text.RuleBasedBreakIterator; -import com.ibm.icu.text.UnicodeSet; - -/** - * Syllable iterator for Lao text. - *

    - * This breaks Lao text into syllables according to: - * Syllabification of Lao Script for Line Breaking - * Phonpasit Phissamay, Valaxay Dalolay, Chitaphone Chanhsililath, Oulaiphone Silimasak, - * Sarmad Hussain, Nadir Durrani, Science Technology and Environment Agency, CRULP. - *

      - *
    • http://www.panl10n.net/english/final%20reports/pdf%20files/Laos/LAO06.pdf - *
    • http://www.panl10n.net/Presentations/Cambodia/Phonpassit/LineBreakingAlgo.pdf - *
    - *

    - * Most work is accomplished with RBBI rules, however some additional special logic is needed - * that cannot be coded in a grammar, and this is implemented here. - *

    - * For example, what appears to be a final consonant might instead be part of the next syllable. - * Rules match in a greedy fashion, leaving an illegal sequence that matches no rules. - *

    - * Take for instance the text ກວ່າດອກ - * The first rule greedily matches ກວ່າດ, but then ອກ is encountered, which is illegal. - * What LaoBreakIterator does, according to the paper: - *

      - *
    1. backtrack and remove the ດ from the last syllable, placing it on the current syllable. - *
    2. verify the modified previous syllable (ກວ່າ ) is still legal. - *
    3. verify the modified current syllable (ດອກ) is now legal. - *
    4. If 2 or 3 fails, then restore the ດ to the last syllable and skip the current character. - *
    - *

    - * Finally, LaoBreakIterator also takes care of the second concern mentioned in the paper. - * This is the issue of combining marks being in the wrong order (typos). - * @lucene.experimental - */ -public class LaoBreakIterator extends BreakIterator { - RuleBasedBreakIterator rules; - CharArrayIterator text; - - CharArrayIterator working = new CharArrayIterator(); - int workingOffset = 0; - - CharArrayIterator verifyText = new CharArrayIterator(); - RuleBasedBreakIterator verify; - - private static final UnicodeSet laoSet; - static { - laoSet = new UnicodeSet("[:Lao:]"); - laoSet.compact(); - laoSet.freeze(); - } - - /** - * Creates a new iterator, performing the backtracking verification - * across the provided rules. - */ - public LaoBreakIterator(RuleBasedBreakIterator rules) { - this.rules = (RuleBasedBreakIterator) rules.clone(); - this.verify = (RuleBasedBreakIterator) rules.clone(); - } - - @Override - public int current() { - int current = rules.current(); - return current == BreakIterator.DONE ? BreakIterator.DONE : workingOffset + current; - } - - @Override - public int first() { - working.setText(this.text.getText(), this.text.getStart(), this.text.getLength()); - rules.setText(working); - workingOffset = 0; - int first = rules.first(); - return first == BreakIterator.DONE ? BreakIterator.DONE : workingOffset + first; - } - - @Override - public int following(int offset) { - throw new UnsupportedOperationException(); - } - - @Override - public CharacterIterator getText() { - return text; - } - - @Override - public int last() { - throw new UnsupportedOperationException(); - } - - @Override - public int next() { - int current = current(); - int next = rules.next(); - if (next == BreakIterator.DONE) - return next; - else - next += workingOffset; - - char c = working.current(); - int following = rules.next(); // lookahead - if (following != BreakIterator.DONE) { - following += workingOffset; - if (rules.getRuleStatus() == 0 && laoSet.contains(c) && verifyPushBack(current, next)) { - workingOffset = next - 1; - working.setText(text.getText(), text.getStart() + workingOffset, text.getLength() - workingOffset); - return next - 1; - } - rules.previous(); // undo the lookahead - } - - return next; - } - - @Override - public int next(int n) { - if (n < 0) - throw new UnsupportedOperationException("Backwards traversal is unsupported"); - - int result = current(); - while (n > 0) { - result = next(); - --n; - } - return result; - } - - @Override - public int previous() { - throw new UnsupportedOperationException("Backwards traversal is unsupported"); - } - - @Override - public void setText(CharacterIterator text) { - if (!(text instanceof CharArrayIterator)) - throw new UnsupportedOperationException("unsupported CharacterIterator"); - this.text = (CharArrayIterator) text; - ccReorder(this.text.getText(), this.text.getStart(), this.text.getLength()); - working.setText(this.text.getText(), this.text.getStart(), this.text.getLength()); - rules.setText(working); - workingOffset = 0; - } - - @Override - public void setText(String newText) { - CharArrayIterator ci = new CharArrayIterator(); - ci.setText(newText.toCharArray(), 0, newText.length()); - setText(ci); - } - - private boolean verifyPushBack(int current, int next) { - int shortenedSyllable = next - current - 1; - - verifyText.setText(text.getText(), text.getStart() + current, shortenedSyllable); - verify.setText(verifyText); - if (verify.next() != shortenedSyllable || verify.getRuleStatus() == 0) - return false; - - - verifyText.setText(text.getText(), text.getStart() + next - 1, text.getLength() - next + 1); - verify.setText(verifyText); - - return (verify.next() != BreakIterator.DONE && verify.getRuleStatus() != 0); - } - - // TODO: only bubblesort around runs of combining marks, instead of the entire text. - private void ccReorder(char[] text, int start, int length) { - boolean reordered; - do { - int prevCC = 0; - reordered = false; - for (int i = start; i < start + length; i++) { - final char c = text[i]; - final int cc = UCharacter.getCombiningClass(c); - if (cc > 0 && cc < prevCC) { - // swap - text[i] = text[i - 1]; - text[i - 1] = c; - reordered = true; - } else { - prevCC = cc; - } - } - - } while (reordered == true); - } - - /** - * Clone method. Creates another LaoBreakIterator with the same behavior - * and current state as this one. - * @return The clone. - */ - @Override - public LaoBreakIterator clone() { - LaoBreakIterator other = (LaoBreakIterator) super.clone(); - other.rules = (RuleBasedBreakIterator) rules.clone(); - other.verify = (RuleBasedBreakIterator) verify.clone(); - if (text != null) - other.text = text.clone(); - if (working != null) - other.working = working.clone(); - if (verifyText != null) - other.verifyText = verifyText.clone(); - return other; - } -} diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ScriptIterator.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ScriptIterator.java index 779dc9ba404..f573b192bce 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ScriptIterator.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ScriptIterator.java @@ -59,6 +59,15 @@ final class ScriptIterator { private int scriptStart; private int scriptLimit; private int scriptCode; + + private final boolean combineCJ; + + /** + * @param combineCJ if true: Han,Hiragana,Katakana will all return as {@link UScript#JAPANESE} + */ + ScriptIterator(boolean combineCJ) { + this.combineCJ = combineCJ; + } /** * Get the start of this script run @@ -162,10 +171,24 @@ final class ScriptIterator { } /** fast version of UScript.getScript(). Basic Latin is an array lookup */ - private static int getScript(int codepoint) { - if (0 <= codepoint && codepoint < basicLatin.length) + private int getScript(int codepoint) { + if (0 <= codepoint && codepoint < basicLatin.length) { return basicLatin[codepoint]; - else - return UScript.getScript(codepoint); + } else { + int script = UScript.getScript(codepoint); + if (combineCJ) { + if (script == UScript.HAN || script == UScript.HIRAGANA || script == UScript.KATAKANA) { + return UScript.JAPANESE; + } else if (codepoint >= 0xFF10 && codepoint <= 0xFF19) { + // when using CJK dictionary breaking, don't let full width numbers go to it, otherwise + // they are treated as punctuation. we currently have no cleaner way to fix this! + return UScript.LATIN; + } else { + return script; + } + } else { + return script; + } + } } } diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java index 9e5ac475979..e9d911964da 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java @@ -84,6 +84,10 @@ public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribut @Override public void reflectWith(AttributeReflector reflector) { - reflector.reflect(ScriptAttribute.class, "script", getName()); + // when wordbreaking CJK, we use the 15924 code Japanese (Han+Hiragana+Katakana) to + // mark runs of Chinese/Japanese. our use is correct (as for chinese Han is a subset), + // but this is just to help prevent confusion. + String name = code == UScript.JAPANESE ? "Chinese/Japanese" : getName(); + reflector.reflect(ScriptAttribute.class, "script", name); } } diff --git a/lucene/analysis/icu/src/java/overview.html b/lucene/analysis/icu/src/java/overview.html index a379f55963e..5411a4fcaee 100644 --- a/lucene/analysis/icu/src/java/overview.html +++ b/lucene/analysis/icu/src/java/overview.html @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. --> + @@ -114,9 +115,9 @@ algorithm.

    Farsi Range Queries

       Collator collator = Collator.getInstance(new ULocale("ar"));
    -  ICUCollationKeyAnalyzer analyzer = new ICUCollationKeyAnalyzer(Version.LUCENE_40, collator);
    +  ICUCollationKeyAnalyzer analyzer = new ICUCollationKeyAnalyzer(Version.LUCENE_50, collator);
       RAMDirectory ramDir = new RAMDirectory();
    -  IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
    +  IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_50, analyzer));
       Document doc = new Document();
       doc.add(new Field("content", "\u0633\u0627\u0628", 
                         Field.Store.YES, Field.Index.ANALYZED));
    @@ -124,7 +125,7 @@ algorithm.
       writer.close();
       IndexSearcher is = new IndexSearcher(ramDir, true);
     
    -  QueryParser aqp = new QueryParser(Version.LUCENE_40, "content", analyzer);
    +  QueryParser aqp = new QueryParser(Version.LUCENE_50, "content", analyzer);
       aqp.setAnalyzeRangeTerms(true);
         
       // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
    @@ -140,9 +141,9 @@ algorithm.
     

    Danish Sorting

       Analyzer analyzer 
    -    = new ICUCollationKeyAnalyzer(Version.LUCENE_40, Collator.getInstance(new ULocale("da", "dk")));
    +    = new ICUCollationKeyAnalyzer(Version.LUCENE_50, Collator.getInstance(new ULocale("da", "dk")));
       RAMDirectory indexStore = new RAMDirectory();
    -  IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(Version.LUCENE_40, analyzer));
    +  IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(Version.LUCENE_50, analyzer));
       String[] tracer = new String[] { "A", "B", "C", "D", "E" };
       String[] data = new String[] { "HAT", "HUT", "H\u00C5T", "H\u00D8T", "HOT" };
       String[] sortedTracerOrder = new String[] { "A", "E", "B", "D", "C" };
    @@ -168,15 +169,15 @@ algorithm.
     
       Collator collator = Collator.getInstance(new ULocale("tr", "TR"));
       collator.setStrength(Collator.PRIMARY);
    -  Analyzer analyzer = new ICUCollationKeyAnalyzer(Version.LUCENE_40, collator);
    +  Analyzer analyzer = new ICUCollationKeyAnalyzer(Version.LUCENE_50, collator);
       RAMDirectory ramDir = new RAMDirectory();
    -  IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
    +  IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_50, analyzer));
       Document doc = new Document();
       doc.add(new Field("contents", "DIGY", Field.Store.NO, Field.Index.ANALYZED));
       writer.addDocument(doc);
       writer.close();
       IndexSearcher is = new IndexSearcher(ramDir, true);
    -  QueryParser parser = new QueryParser(Version.LUCENE_40, "contents", analyzer);
    +  QueryParser parser = new QueryParser(Version.LUCENE_50, "contents", analyzer);
       Query query = parser.parse("d\u0131gy");   // U+0131: dotless i
       ScoreDoc[] result = is.search(query, null, 1000).scoreDocs;
       assertEquals("The index Term should be included.", 1, result.length);
    @@ -353,7 +354,7 @@ and
     

    Backwards Compatibility

    This module exists to provide up-to-date Unicode functionality that supports -the most recent version of Unicode (currently 6.1). However, some users who wish +the most recent version of Unicode (currently 6.3). However, some users who wish for stronger backwards compatibility can restrict {@link org.apache.lucene.analysis.icu.ICUNormalizer2Filter} to operate on only a specific Unicode Version by using a {@link com.ibm.icu.text.FilteredNormalizer2}. diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk index 3972d1cd7d4..e4b35d24e80 100644 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk differ diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Hebrew.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Hebrew.brk deleted file mode 100644 index 5a6666466a7..00000000000 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Hebrew.brk and /dev/null differ diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Khmer.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Khmer.brk index f5b50e14e76..dd368d05ec2 100644 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Khmer.brk and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Khmer.brk differ diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Lao.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Lao.brk deleted file mode 100644 index 571b0163441..00000000000 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Lao.brk and /dev/null differ diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Myanmar.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Myanmar.brk index 1bab7a616ef..dcaeb571789 100644 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Myanmar.brk and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Myanmar.brk differ diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm index 6e85a18dbf1..efbbb9e490c 100644 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm differ diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java index a7c02688b54..1d9a901fb1e 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java @@ -42,7 +42,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { sb.append(whitespace); sb.append("testing 1234"); String input = sb.toString(); - ICUTokenizer tokenizer = new ICUTokenizer(new StringReader(input)); + ICUTokenizer tokenizer = new ICUTokenizer(new StringReader(input), new DefaultICUTokenizerConfig(false)); assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" }); } @@ -52,7 +52,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { sb.append('a'); } String input = sb.toString(); - ICUTokenizer tokenizer = new ICUTokenizer(new StringReader(input)); + ICUTokenizer tokenizer = new ICUTokenizer(new StringReader(input), new DefaultICUTokenizerConfig(false)); char token[] = new char[4096]; Arrays.fill(token, 'a'); String expectedToken = new String(token); @@ -69,7 +69,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { - Tokenizer tokenizer = new ICUTokenizer(reader); + Tokenizer tokenizer = new ICUTokenizer(reader, new DefaultICUTokenizerConfig(false)); TokenFilter filter = new ICUNormalizer2Filter(tokenizer); return new TokenStreamComponents(tokenizer, filter); } @@ -118,6 +118,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { public void testLao() throws Exception { assertAnalyzesTo(a, "ກວ່າດອກ", new String[] { "ກວ່າ", "ດອກ" }); + assertAnalyzesTo(a, "ພາສາລາວ", new String[] { "ພາສາ", "ລາວ"}, new String[] { "", "" }); } public void testThai() throws Exception { @@ -138,6 +139,13 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { new String[] { "我", "是", "中", "国", "人", "1234", "tests"}); } + public void testHebrew() throws Exception { + assertAnalyzesTo(a, "דנקנר תקף את הדו\"ח", + new String[] { "דנקנר", "תקף", "את", "הדו\"ח" }); + assertAnalyzesTo(a, "חברת בת של מודי'ס", + new String[] { "חברת", "בת", "של", "מודי'ס" }); + } + public void testEmpty() throws Exception { assertAnalyzesTo(a, "", new String[] {}); assertAnalyzesTo(a, ".", new String[] {}); diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java new file mode 100644 index 00000000000..2e60717d064 --- /dev/null +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java @@ -0,0 +1,91 @@ +package org.apache.lucene.analysis.icu.segmentation; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.util.Random; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; + +/** + * test ICUTokenizer with dictionary-based CJ segmentation + */ +public class TestICUTokenizerCJK extends BaseTokenStreamTestCase { + Analyzer a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new ICUTokenizer(reader)); + } + }; + + /** + * test stolen from smartcn + */ + public void testSimpleChinese() throws Exception { + assertAnalyzesTo(a, "我购买了道具和服装。", + new String[] { "我", "购买", "了", "道具", "和", "服装" } + ); + } + + public void testChineseNumerics() throws Exception { + assertAnalyzesTo(a, "9483", new String[] { "9483" }); + assertAnalyzesTo(a, "院內分機9483。", + new String[] { "院", "內", "分機", "9483" }); + assertAnalyzesTo(a, "院內分機9483。", + new String[] { "院", "內", "分機", "9483" }); + } + + /** + * test stolen from kuromoji + */ + public void testSimpleJapanese() throws Exception { + assertAnalyzesTo(a, "それはまだ実験段階にあります", + new String[] { "それ", "は", "まだ", "実験", "段階", "に", "あり", "ます" } + ); + } + + public void testJapaneseTypes() throws Exception { + assertAnalyzesTo(a, "仮名遣い カタカナ", + new String[] { "仮名遣い", "カタカナ" }, + new String[] { "", "" }); + } + + public void testKorean() throws Exception { + // Korean words + assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"}); + } + + /** make sure that we still tag korean as HANGUL (for further decomposition/ngram/whatever) */ + public void testKoreanTypes() throws Exception { + assertAnalyzesTo(a, "훈민정음", + new String[] { "훈민정음" }, + new String[] { "" }); + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random(), a, 10000*RANDOM_MULTIPLIER); + } + + /** blast some random large strings through the analyzer */ + public void testRandomHugeStrings() throws Exception { + Random random = random(); + checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 8192); + } +} diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestLaoBreakIterator.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestLaoBreakIterator.java deleted file mode 100644 index 27179aa1bcd..00000000000 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestLaoBreakIterator.java +++ /dev/null @@ -1,90 +0,0 @@ -package org.apache.lucene.analysis.icu.segmentation; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.InputStream; - -import org.apache.lucene.util.LuceneTestCase; - -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.text.BreakIterator; -import com.ibm.icu.text.RuleBasedBreakIterator; -import com.ibm.icu.text.UTF16; - -/** - * Tests LaoBreakIterator and its RBBI rules - */ -public class TestLaoBreakIterator extends LuceneTestCase { - private BreakIterator wordIterator; - - @Override - public void setUp() throws Exception { - super.setUp(); - InputStream is = getClass().getResourceAsStream("Lao.brk"); - wordIterator = new LaoBreakIterator(RuleBasedBreakIterator.getInstanceFromCompiledRules(is)); - is.close(); - } - - private void assertBreaksTo(BreakIterator iterator, String sourceText, String tokens[]) { - char text[] = sourceText.toCharArray(); - CharArrayIterator ci = new CharArrayIterator(); - ci.setText(text, 0, text.length); - iterator.setText(ci); - - for (int i = 0; i < tokens.length; i++) { - int start, end; - do { - start = iterator.current(); - end = iterator.next(); - } while (end != BreakIterator.DONE && !isWord(text, start, end)); - assertTrue(start != BreakIterator.DONE); - assertTrue(end != BreakIterator.DONE); - assertEquals(tokens[i], new String(text, start, end - start)); - } - - assertTrue(iterator.next() == BreakIterator.DONE); - } - - protected boolean isWord(char text[], int start, int end) { - int codepoint; - for (int i = start; i < end; i += UTF16.getCharCount(codepoint)) { - codepoint = UTF16.charAt(text, 0, end, start); - - if (UCharacter.isLetterOrDigit(codepoint)) - return true; - } - - return false; - } - - public void testBasicUsage() throws Exception { - assertBreaksTo(wordIterator, "ກວ່າດອກ", new String[] { "ກວ່າ", "ດອກ" }); - assertBreaksTo(wordIterator, "ຜູ້​ເຂົ້າ", new String[] { "ຜູ້", "ເຂົ້າ" }); - assertBreaksTo(wordIterator, "", new String[] {}); - assertBreaksTo(wordIterator, "ສະບາຍດີ", new String[] { "ສະ", "ບາຍ", "ດີ" }); - } - - public void testNumerics() throws Exception { - assertBreaksTo(wordIterator, "໐໑໒໓", new String[] { "໐໑໒໓" }); - assertBreaksTo(wordIterator, "໐໑໒໓.໕໖", new String[] { "໐໑໒໓.໕໖" }); - } - - public void testTextAndNumerics() throws Exception { - assertBreaksTo(wordIterator, "ກວ່າດອກ໐໑໒໓", new String[] { "ກວ່າ", "ດອກ", "໐໑໒໓" }); - } -} diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java index ca25597ce78..2840d238968 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java @@ -41,7 +41,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase { private Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { - Tokenizer source = new ICUTokenizer(reader); + Tokenizer source = new ICUTokenizer(reader, new DefaultICUTokenizerConfig(false)); TokenStream result = new CJKBigramFilter(source); return new TokenStreamComponents(source, new StopFilter(TEST_VERSION_CURRENT, result, CharArraySet.EMPTY_SET)); } @@ -56,7 +56,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase { private Analyzer analyzer2 = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { - Tokenizer source = new ICUTokenizer(reader); + Tokenizer source = new ICUTokenizer(reader, new DefaultICUTokenizerConfig(false)); // we put this before the CJKBigramFilter, because the normalization might combine // some halfwidth katakana forms, which will affect the bigramming. TokenStream result = new ICUNormalizer2Filter(source); diff --git a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java index 23cc391879d..2b0ba48cfbb 100644 --- a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java +++ b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java @@ -36,40 +36,45 @@ public class GenerateJFlexSupplementaryMacros { static { DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); } - - private static final String APACHE_LICENSE - = "/*" + NL - + " * Copyright 2010 The Apache Software Foundation." + NL + + private static final String APACHE_LICENSE + = "/*" + NL + + " * Licensed to the Apache Software Foundation (ASF) under one or more" + NL + + " * contributor license agreements. See the NOTICE file distributed with" + NL + + " * this work for additional information regarding copyright ownership." + NL + + " * The ASF licenses this file to You under the Apache License, Version 2.0" + NL + + " * (the \"License\"); you may not use this file except in compliance with" + NL + + " * the License. You may obtain a copy of the License at" + NL + " *" + NL - + " * Licensed under the Apache License, Version 2.0 (the \"License\");" + NL - + " * you may not use this file except in compliance with the License." + NL - + " * You may obtain a copy of the License at" + NL - + " *" + NL - + " * http://www.apache.org/licenses/LICENSE-2.0" + NL + + " * http://www.apache.org/licenses/LICENSE-2.0" + NL + " *" + NL + " * Unless required by applicable law or agreed to in writing, software" + NL + " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL + " * See the License for the specific language governing permissions and" + NL + " * limitations under the License." + NL - + " */" + NL + NL; + + " */" + NL; public static void main(String args[]) { outputHeader(); - outputMacro("ALetterSupp", "[:WordBreak=ALetter:]"); - outputMacro("FormatSupp", "[:WordBreak=Format:]"); - outputMacro("ExtendSupp", "[:WordBreak=Extend:]"); - outputMacro("NumericSupp", "[:WordBreak=Numeric:]"); - outputMacro("KatakanaSupp", "[:WordBreak=Katakana:]"); - outputMacro("MidLetterSupp", "[:WordBreak=MidLetter:]"); - outputMacro("MidNumSupp", "[:WordBreak=MidNum:]"); - outputMacro("MidNumLetSupp", "[:WordBreak=MidNumLet:]"); - outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]"); - outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]"); - outputMacro("ComplexContextSupp", "[:LineBreak=Complex_Context:]"); - outputMacro("HanSupp", "[:Script=Han:]"); - outputMacro("HiraganaSupp", "[:Script=Hiragana:]"); + outputMacro("ALetterSupp", "[:WordBreak=ALetter:]"); + outputMacro("FormatSupp", "[:WordBreak=Format:]"); + outputMacro("NumericSupp", "[:WordBreak=Numeric:]"); + outputMacro("ExtendSupp", "[:WordBreak=Extend:]"); + outputMacro("KatakanaSupp", "[:WordBreak=Katakana:]"); + outputMacro("MidLetterSupp", "[:WordBreak=MidLetter:]"); + outputMacro("MidNumSupp", "[:WordBreak=MidNum:]"); + outputMacro("MidNumLetSupp", "[:WordBreak=MidNumLet:]"); + outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]"); + outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]"); + outputMacro("ComplexContextSupp", "[:LineBreak=Complex_Context:]"); + outputMacro("HanSupp", "[:Script=Han:]"); + outputMacro("HiraganaSupp", "[:Script=Hiragana:]"); + outputMacro("SingleQuoteSupp", "[:WordBreak=Single_Quote:]"); + outputMacro("DoubleQuoteSupp", "[:WordBreak=Double_Quote:]"); + outputMacro("HebrewLetterSupp", "[:WordBreak=Hebrew_Letter:]"); + outputMacro("RegionalIndicatorSupp", "[:WordBreak=Regional_Indicator:]"); } static void outputHeader() { diff --git a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java index a91a9ddbb7b..9fb5cee5c89 100644 --- a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java +++ b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java @@ -62,7 +62,7 @@ import java.util.regex.Pattern; public class GenerateUTR30DataFiles { private static final String ICU_SVN_TAG_URL = "http://source.icu-project.org/repos/icu/icu/tags"; - private static final String ICU_RELEASE_TAG = "release-49-1-2"; + private static final String ICU_RELEASE_TAG = "release-52-1"; private static final String ICU_DATA_NORM2_PATH = "source/data/unidata/norm2"; private static final String NFC_TXT = "nfc.txt"; private static final String NFKC_TXT = "nfkc.txt"; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java index 24087aad35f..9d0ee64ee2b 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java @@ -97,7 +97,8 @@ public class CreateIndexTask extends PerfTask { } public static IndexWriterConfig createWriterConfig(Config config, PerfRunData runData, OpenMode mode, IndexCommit commit) { - Version version = Version.valueOf(config.get("writer.version", Version.LUCENE_40.toString())); + // :Post-Release-Update-Version.LUCENE_XY: + Version version = Version.valueOf(config.get("writer.version", Version.LUCENE_50.toString())); IndexWriterConfig iwConf = new IndexWriterConfig(version, runData.getAnalyzer()); iwConf.setOpenMode(mode); IndexDeletionPolicy indexDeletionPolicy = getIndexDeletionPolicy(config); diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTaskTest.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTaskTest.java index 3f1d0ccf040..4c1c8fd6285 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTaskTest.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTaskTest.java @@ -37,7 +37,8 @@ public class CreateIndexTaskTest extends BenchmarkTestCase { private PerfRunData createPerfRunData(String infoStreamValue) throws Exception { Properties props = new Properties(); - props.setProperty("writer.version", Version.LUCENE_40.toString()); + // :Post-Release-Update-Version.LUCENE_XY: + props.setProperty("writer.version", Version.LUCENE_50.toString()); props.setProperty("print.props", "false"); // don't print anything props.setProperty("directory", "RAMDirectory"); if (infoStreamValue != null) { diff --git a/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java index e21e670d2d5..22d530ac87d 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java @@ -49,6 +49,9 @@ public class KNearestNeighborClassifier implements Classifier { private final int k; private Query query; + private int minDocsFreq; + private int minTermFreq; + /** * Create a {@link Classifier} using kNN algorithm * @@ -58,6 +61,19 @@ public class KNearestNeighborClassifier implements Classifier { this.k = k; } + /** + * Create a {@link Classifier} using kNN algorithm + * + * @param k the number of neighbors to analyze as an int + * @param minDocsFreq the minimum number of docs frequency for MLT to be set with {@link MoreLikeThis#setMinDocFreq(int)} + * @param minTermFreq the minimum number of term frequency for MLT to be set with {@link MoreLikeThis#setMinTermFreq(int)} + */ + public KNearestNeighborClassifier(int k, int minDocsFreq, int minTermFreq) { + this.k = k; + this.minDocsFreq = minDocsFreq; + this.minTermFreq = minTermFreq; + } + /** * {@inheritDoc} */ @@ -93,11 +109,11 @@ public class KNearestNeighborClassifier implements Classifier { } double max = 0; BytesRef assignedClass = new BytesRef(); - for (BytesRef cl : classCounts.keySet()) { - Integer count = classCounts.get(cl); + for (Map.Entry entry : classCounts.entrySet()) { + Integer count = entry.getValue(); if (count > max) { max = count; - assignedClass = cl.clone(); + assignedClass = entry.getKey().clone(); } } double score = max / (double) k; @@ -117,13 +133,7 @@ public class KNearestNeighborClassifier implements Classifier { */ @Override public void train(AtomicReader atomicReader, String textFieldName, String classFieldName, Analyzer analyzer, Query query) throws IOException { - this.textFieldNames = new String[]{textFieldName}; - this.classFieldName = classFieldName; - mlt = new MoreLikeThis(atomicReader); - mlt.setAnalyzer(analyzer); - mlt.setFieldNames(new String[]{textFieldName}); - indexSearcher = new IndexSearcher(atomicReader); - this.query = query; + train(atomicReader, new String[]{textFieldName}, classFieldName, analyzer, query); } /** @@ -137,6 +147,12 @@ public class KNearestNeighborClassifier implements Classifier { mlt.setAnalyzer(analyzer); mlt.setFieldNames(textFieldNames); indexSearcher = new IndexSearcher(atomicReader); + if (minDocsFreq > 0) { + mlt.setMinDocFreq(minDocsFreq); + } + if (minTermFreq > 0) { + mlt.setMinTermFreq(minTermFreq); + } this.query = query; } } diff --git a/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java index d1393523c09..923f695852a 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java @@ -64,23 +64,17 @@ public class SimpleNaiveBayesClassifier implements Classifier { * {@inheritDoc} */ @Override - public void train(AtomicReader atomicReader, String textFieldName, String classFieldName, Analyzer analyzer, Query query) - throws IOException { - this.atomicReader = atomicReader; - this.indexSearcher = new IndexSearcher(this.atomicReader); - this.textFieldNames = new String[]{textFieldName}; - this.classFieldName = classFieldName; - this.analyzer = analyzer; - this.docsWithClassSize = countDocsWithClass(); - this.query = query; + public void train(AtomicReader atomicReader, String textFieldName, String classFieldName, Analyzer analyzer) throws IOException { + train(atomicReader, textFieldName, classFieldName, analyzer, null); } /** * {@inheritDoc} */ @Override - public void train(AtomicReader atomicReader, String textFieldName, String classFieldName, Analyzer analyzer) throws IOException { - train(atomicReader, textFieldName, classFieldName, analyzer, null); + public void train(AtomicReader atomicReader, String textFieldName, String classFieldName, Analyzer analyzer, Query query) + throws IOException { + train(atomicReader, new String[]{textFieldName}, classFieldName, analyzer, query); } /** @@ -137,7 +131,7 @@ public class SimpleNaiveBayesClassifier implements Classifier { if (atomicReader == null) { throw new IOException("You must first call Classifier#train"); } - double max = 0d; + double max = - Double.MAX_VALUE; BytesRef foundClass = new BytesRef(); Terms terms = MultiFields.getTerms(atomicReader, classFieldName); @@ -145,20 +139,20 @@ public class SimpleNaiveBayesClassifier implements Classifier { BytesRef next; String[] tokenizedDoc = tokenizeDoc(inputDocument); while ((next = termsEnum.next()) != null) { - // TODO : turn it to be in log scale - double clVal = calculatePrior(next) * calculateLikelihood(tokenizedDoc, next); + double clVal = calculateLogPrior(next) + calculateLogLikelihood(tokenizedDoc, next); if (clVal > max) { max = clVal; foundClass = BytesRef.deepCopyOf(next); } } - return new ClassificationResult(foundClass, max); + double score = 10 / Math.abs(max); + return new ClassificationResult(foundClass, score); } - private double calculateLikelihood(String[] tokenizedDoc, BytesRef c) throws IOException { + private double calculateLogLikelihood(String[] tokenizedDoc, BytesRef c) throws IOException { // for each word - double result = 1d; + double result = 0d; for (String word : tokenizedDoc) { // search with text:word AND class:c int hits = getWordFreqForClass(word, c); @@ -171,10 +165,10 @@ public class SimpleNaiveBayesClassifier implements Classifier { // P(w|c) = num/den double wordProbability = num / den; - result *= wordProbability; + result += Math.log(wordProbability); } - // P(d|c) = P(w1|c)*...*P(wn|c) + // log(P(d|c)) = log(P(w1|c))+...+log(P(wn|c)) return result; } @@ -205,8 +199,8 @@ public class SimpleNaiveBayesClassifier implements Classifier { return totalHitCountCollector.getTotalHits(); } - private double calculatePrior(BytesRef currentClass) throws IOException { - return (double) docCount(currentClass) / docsWithClassSize; + private double calculateLogPrior(BytesRef currentClass) throws IOException { + return Math.log((double) docCount(currentClass)) - Math.log(docsWithClassSize); } private int docCount(BytesRef countedClass) throws IOException { diff --git a/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java b/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java index 401da814b8d..5dbea2ee5c4 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java @@ -69,6 +69,7 @@ public class DatasetSplitter { Analyzer analyzer, String... fieldNames) throws IOException { // create IWs for train / test / cv IDXs + // :Post-Release-Update-Version.LUCENE_XY: IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(Version.LUCENE_50, analyzer)); IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(Version.LUCENE_50, analyzer)); IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(Version.LUCENE_50, analyzer)); diff --git a/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java b/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java index cd488043fd5..f8de59fa90f 100644 --- a/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java +++ b/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java @@ -39,14 +39,17 @@ import java.util.Random; * Base class for testing {@link Classifier}s */ public abstract class ClassificationTestBase extends LuceneTestCase { - public final static String POLITICS_INPUT = "Here are some interesting questions and answers about Mitt Romney.. If you don't know the answer to the question about Mitt Romney, then simply click on the answer below the question section."; + public final static String POLITICS_INPUT = "Here are some interesting questions and answers about Mitt Romney.. " + + "If you don't know the answer to the question about Mitt Romney, then simply click on the answer below the question section."; public static final BytesRef POLITICS_RESULT = new BytesRef("politics"); - public static final String TECHNOLOGY_INPUT = "Much is made of what the likes of Facebook, Google and Apple know about users. Truth is, Amazon may know more."; + public static final String TECHNOLOGY_INPUT = "Much is made of what the likes of Facebook, Google and Apple know about users." + + " Truth is, Amazon may know more."; public static final BytesRef TECHNOLOGY_RESULT = new BytesRef("technology"); private RandomIndexWriter indexWriter; private Directory dir; + private FieldType ft; String textFieldName; String categoryFieldName; @@ -61,6 +64,10 @@ public abstract class ClassificationTestBase extends LuceneTestCase { textFieldName = "text"; categoryFieldName = "cat"; booleanFieldName = "bool"; + ft = new FieldType(TextField.TYPE_STORED); + ft.setStoreTermVectors(true); + ft.setStoreTermVectorOffsets(true); + ft.setStoreTermVectorPositions(true); } @Override @@ -72,7 +79,7 @@ public abstract class ClassificationTestBase extends LuceneTestCase { } protected void checkCorrectClassification(Classifier classifier, String inputDoc, T expectedResult, Analyzer analyzer, String textFieldName, String classFieldName) throws Exception { - checkCorrectClassification(classifier, inputDoc, expectedResult, analyzer, textFieldName, classFieldName, null); + checkCorrectClassification(classifier, inputDoc, expectedResult, analyzer, textFieldName, classFieldName, null); } protected void checkCorrectClassification(Classifier classifier, String inputDoc, T expectedResult, Analyzer analyzer, String textFieldName, String classFieldName, Query query) throws Exception { @@ -90,63 +97,35 @@ public abstract class ClassificationTestBase extends LuceneTestCase { atomicReader.close(); } } + protected void checkOnlineClassification(Classifier classifier, String inputDoc, T expectedResult, Analyzer analyzer, String textFieldName, String classFieldName) throws Exception { + checkOnlineClassification(classifier, inputDoc, expectedResult, analyzer, textFieldName, classFieldName, null); + } - protected void checkPerformance(Classifier classifier, Analyzer analyzer, String classFieldName) throws Exception { + protected void checkOnlineClassification(Classifier classifier, String inputDoc, T expectedResult, Analyzer analyzer, String textFieldName, String classFieldName, Query query) throws Exception { AtomicReader atomicReader = null; - long trainStart = System.currentTimeMillis(); try { - populatePerformanceIndex(analyzer); + populateSampleIndex(analyzer); atomicReader = SlowCompositeReaderWrapper.wrap(indexWriter.getReader()); - classifier.train(atomicReader, textFieldName, classFieldName, analyzer); - long trainEnd = System.currentTimeMillis(); - long trainTime = trainEnd - trainStart; - assertTrue("training took more than 2 mins : " + trainTime / 1000 + "s", trainTime < 120000); + classifier.train(atomicReader, textFieldName, classFieldName, analyzer, query); + ClassificationResult classificationResult = classifier.assignClass(inputDoc); + assertNotNull(classificationResult.getAssignedClass()); + assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass()); + assertTrue("got a not positive score " + classificationResult.getScore(), classificationResult.getScore() > 0); + updateSampleIndex(analyzer); + ClassificationResult secondClassificationResult = classifier.assignClass(inputDoc); + assertEquals(classificationResult.getAssignedClass(), secondClassificationResult.getAssignedClass()); + assertEquals(Double.valueOf(classificationResult.getScore()), Double.valueOf(secondClassificationResult.getScore())); + } finally { if (atomicReader != null) atomicReader.close(); } } - private void populatePerformanceIndex(Analyzer analyzer) throws IOException { + private void populateSampleIndex(Analyzer analyzer) throws IOException { indexWriter.deleteAll(); indexWriter.commit(); - FieldType ft = new FieldType(TextField.TYPE_STORED); - ft.setStoreTermVectors(true); - ft.setStoreTermVectorOffsets(true); - ft.setStoreTermVectorPositions(true); - int docs = 1000; - Random random = random(); - for (int i = 0; i < docs; i++) { - boolean b = random.nextBoolean(); - Document doc = new Document(); - doc.add(new Field(textFieldName, createRandomString(random), ft)); - doc.add(new Field(categoryFieldName, b ? "technology" : "politics", ft)); - doc.add(new Field(booleanFieldName, String.valueOf(b), ft)); - indexWriter.addDocument(doc, analyzer); - } - indexWriter.commit(); - } - - private String createRandomString(Random random) { - StringBuilder builder = new StringBuilder(); - for (int i = 0; i < 20; i++) { - builder.append(_TestUtil.randomSimpleString(random, 5)); - builder.append(" "); - } - return builder.toString(); - } - - private void populateSampleIndex(Analyzer analyzer) throws Exception { - - indexWriter.deleteAll(); - indexWriter.commit(); - - FieldType ft = new FieldType(TextField.TYPE_STORED); - ft.setStoreTermVectors(true); - ft.setStoreTermVectorOffsets(true); - ft.setStoreTermVectorPositions(true); - String text; Document doc = new Document(); @@ -218,4 +197,112 @@ public abstract class ClassificationTestBase extends LuceneTestCase { indexWriter.commit(); } + + protected void checkPerformance(Classifier classifier, Analyzer analyzer, String classFieldName) throws Exception { + AtomicReader atomicReader = null; + long trainStart = System.currentTimeMillis(); + try { + populatePerformanceIndex(analyzer); + atomicReader = SlowCompositeReaderWrapper.wrap(indexWriter.getReader()); + classifier.train(atomicReader, textFieldName, classFieldName, analyzer); + long trainEnd = System.currentTimeMillis(); + long trainTime = trainEnd - trainStart; + assertTrue("training took more than 2 mins : " + trainTime / 1000 + "s", trainTime < 120000); + } finally { + if (atomicReader != null) + atomicReader.close(); + } + } + + private void populatePerformanceIndex(Analyzer analyzer) throws IOException { + indexWriter.deleteAll(); + indexWriter.commit(); + + FieldType ft = new FieldType(TextField.TYPE_STORED); + ft.setStoreTermVectors(true); + ft.setStoreTermVectorOffsets(true); + ft.setStoreTermVectorPositions(true); + int docs = 1000; + Random random = random(); + for (int i = 0; i < docs; i++) { + boolean b = random.nextBoolean(); + Document doc = new Document(); + doc.add(new Field(textFieldName, createRandomString(random), ft)); + doc.add(new Field(categoryFieldName, b ? "technology" : "politics", ft)); + doc.add(new Field(booleanFieldName, String.valueOf(b), ft)); + indexWriter.addDocument(doc, analyzer); + } + indexWriter.commit(); + } + + private String createRandomString(Random random) { + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < 20; i++) { + builder.append(_TestUtil.randomSimpleString(random, 5)); + builder.append(" "); + } + return builder.toString(); + } + + private void updateSampleIndex(Analyzer analyzer) throws Exception { + + String text; + + Document doc = new Document(); + text = "Warren Bennis says John F. Kennedy grasped a key lesson about the presidency that few have followed."; + doc.add(new Field(textFieldName, text, ft)); + doc.add(new Field(categoryFieldName, "politics", ft)); + doc.add(new Field(booleanFieldName, "true", ft)); + + indexWriter.addDocument(doc, analyzer); + + doc = new Document(); + text = "Julian Zelizer says Bill Clinton is still trying to shape his party, years after the White House, while George W. Bush opts for a much more passive role."; + doc.add(new Field(textFieldName, text, ft)); + doc.add(new Field(categoryFieldName, "politics", ft)); + doc.add(new Field(booleanFieldName, "true", ft)); + indexWriter.addDocument(doc, analyzer); + + doc = new Document(); + text = "Crossfire: Sen. Tim Scott passes on Sen. Lindsey Graham endorsement"; + doc.add(new Field(textFieldName, text, ft)); + doc.add(new Field(categoryFieldName, "politics", ft)); + doc.add(new Field(booleanFieldName, "true", ft)); + indexWriter.addDocument(doc, analyzer); + + doc = new Document(); + text = "Illinois becomes 16th state to allow same-sex marriage."; + doc.add(new Field(textFieldName, text, ft)); + doc.add(new Field(categoryFieldName, "politics", ft)); + doc.add(new Field(booleanFieldName, "true", ft)); + indexWriter.addDocument(doc, analyzer); + + doc = new Document(); + text = "Apple is developing iPhones with curved-glass screens and enhanced sensors that detect different levels of pressure, according to a new report."; + doc.add(new Field(textFieldName, text, ft)); + doc.add(new Field(categoryFieldName, "technology", ft)); + doc.add(new Field(booleanFieldName, "false", ft)); + indexWriter.addDocument(doc, analyzer); + + doc = new Document(); + text = "The Xbox One is Microsoft's first new gaming console in eight years. It's a quality piece of hardware but it's also noteworthy because Microsoft is using it to make a statement."; + doc.add(new Field(textFieldName, text, ft)); + doc.add(new Field(categoryFieldName, "technology", ft)); + doc.add(new Field(booleanFieldName, "false", ft)); + indexWriter.addDocument(doc, analyzer); + + doc = new Document(); + text = "Google says it will replace a Google Maps image after a California father complained it shows the body of his teen-age son, who was shot to death in 2009."; + doc.add(new Field(textFieldName, text, ft)); + doc.add(new Field(categoryFieldName, "technology", ft)); + doc.add(new Field(booleanFieldName, "false", ft)); + indexWriter.addDocument(doc, analyzer); + + doc = new Document(); + text = "second unlabeled doc"; + doc.add(new Field(textFieldName, text, ft)); + indexWriter.addDocument(doc, analyzer); + + indexWriter.commit(); + } } diff --git a/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java b/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java index 7e754adb560..2a0308286f2 100644 --- a/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java +++ b/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java @@ -29,7 +29,10 @@ public class KNearestNeighborClassifierTest extends ClassificationTestBase - @@ -169,6 +168,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -355,7 +383,7 @@ - @@ -448,7 +476,7 @@ - +   ################################################################## JFlex not found. JFlex Home: ${jflex.home} @@ -456,14 +484,14 @@ Please install the jFlex 1.5 version (currently not released) from its SVN repository: - svn co -r 623 http://jflex.svn.sourceforge.net/svnroot/jflex/trunk jflex + svn co -r 722 https://svn.code.sf.net/p/jflex/code/trunk jflex cd jflex mvn install Then, create a build.properties file either in your home directory, or within the Lucene directory and set the jflex.home property to the path where the JFlex trunk checkout is located - (in the above example its the directory called "jflex"). + (in the above example it's the directory called "jflex"). ################################################################## @@ -623,6 +651,7 @@ value="The Apache Software Foundation"/> + @@ -979,6 +1008,9 @@ + + + @@ -1331,7 +1363,7 @@ ${tests-output}/junit4-*.suites - per-JVM executed suites ]]> Code coverage with Atlassian Clover enabled. - @@ -2168,7 +2200,7 @@ ${ant.project.name}.test.dependencies=${test.classpath.list} - - @@ -2226,7 +2258,7 @@ ${ant.project.name}.test.dependencies=${test.classpath.list} - diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java index e721efd657d..588c0f5455e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java @@ -373,6 +373,10 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader { return compressionMode; } + int getChunkSize() { + return chunkSize; + } + ChunkIterator chunkIterator(int startDocID) throws IOException { ensureOpen(); fieldsStream.seek(indexReader.getStartPointer(startDocID)); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java index f01605065d1..35f829daa26 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java @@ -337,7 +337,9 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter { final Bits liveDocs = reader.getLiveDocs(); if (matchingFieldsReader == null - || matchingFieldsReader.getVersion() != VERSION_CURRENT) { // means reader version is not the same as the writer version + || matchingFieldsReader.getVersion() != VERSION_CURRENT // means reader version is not the same as the writer version + || matchingFieldsReader.getCompressionMode() != compressionMode + || matchingFieldsReader.getChunkSize() != chunkSize) { // the way data is decompressed depends on the chunk size // naive merge... for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) { StoredDocument doc = reader.document(i); @@ -362,8 +364,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter { startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1]; } - if (compressionMode == matchingFieldsReader.getCompressionMode() // same compression mode - && numBufferedDocs == 0 // starting a new chunk + if (numBufferedDocs == 0 // starting a new chunk && startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small enough && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize // chunk is large enough && nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) { // no deletion in the chunk diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexReader.java b/lucene/core/src/java/org/apache/lucene/index/IndexReader.java index 70174d5f23f..b416812fa55 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexReader.java @@ -168,8 +168,9 @@ public abstract class IndexReader implements Closeable { * @see #tryIncRef */ public final void incRef() { - ensureOpen(); - refCount.incrementAndGet(); + if (!tryIncRef()) { + ensureOpen(); + } } /** diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java index 99d96570163..057c98dff51 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java @@ -32,6 +32,7 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.index.SegmentReader.CoreClosedListener; +import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -139,7 +140,13 @@ final class SegmentCoreReaders { } void incRef() { - ref.incrementAndGet(); + int count; + while ((count = ref.get()) > 0) { + if (ref.compareAndSet(count, count+1)) { + return; + } + } + throw new AlreadyClosedException("SegmentCoreReaders is already closed"); } NumericDocValues getNormValues(FieldInfo fi) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/index/package.html b/lucene/core/src/java/org/apache/lucene/index/package.html index 6870e08e479..9235262f98f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/package.html +++ b/lucene/core/src/java/org/apache/lucene/index/package.html @@ -94,7 +94,7 @@ and methods to access the term's documents and positions.

     // seek to a specific term
    -boolean found = termsEnum.seekExact(new BytesRef("foobar"), true);
    +boolean found = termsEnum.seekExact(new BytesRef("foobar"));
     if (found) {
       // get the document frequency
       System.out.println(termsEnum.docFreq());
    diff --git a/lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java b/lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
    index 0d54d57f1c7..c4c092486ec 100644
    --- a/lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
    +++ b/lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
    @@ -50,7 +50,7 @@ import org.apache.lucene.util.IOUtils;
      * 
      *   Directory fsDir = FSDirectory.open(new File("/path/to/index"));
      *   NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0);
    - *   IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, analyzer);
    + *   IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_50, analyzer);
      *   IndexWriter writer = new IndexWriter(cachedFSDir, conf);
      * 
    * diff --git a/lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java b/lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java index 44ac8bf3587..92f58935ec9 100644 --- a/lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java +++ b/lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java @@ -24,9 +24,8 @@ import java.io.IOException; *

    Implements {@link LockFactory} using {@link * File#createNewFile()}.

    * - *

    NOTE: the javadocs - * for File.createNewFile contain a vague + *

    NOTE: the {@linkplain File#createNewFile() javadocs + * for File.createNewFile()} contain a vague * yet spooky warning about not using the API for file * locking. This warning was added due to this diff --git a/lucene/core/src/java/org/apache/lucene/util/SloppyMath.java b/lucene/core/src/java/org/apache/lucene/util/SloppyMath.java index d27c8b3677b..52a2087f283 100644 --- a/lucene/core/src/java/org/apache/lucene/util/SloppyMath.java +++ b/lucene/core/src/java/org/apache/lucene/util/SloppyMath.java @@ -33,7 +33,8 @@ package org.apache.lucene.util; public class SloppyMath { /** - * Returns the distance between two points in decimal degrees. + * Returns the distance in kilometers between two points + * specified in decimal degrees (latitude/longitude). * @param lat1 Latitude of the first point. * @param lon1 Longitude of the first point. * @param lat2 Latitude of the second point. diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java index f468a6da31a..6304f52cd35 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java @@ -1103,7 +1103,9 @@ public class TestIndexWriterReader extends LuceneTestCase { * writer, we don't see merge starvation. */ public void testTooManySegments() throws Exception { Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + // Don't use newIndexWriterConfig, because we need a + // "sane" mergePolicy: + IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriter w = new IndexWriter(dir, iwc); // Create 500 segments: for(int i=0;i<500;i++) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java index e78649a745e..d43c9b38cb5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java @@ -217,7 +217,7 @@ public class TestPhraseQuery extends LuceneTestCase { Directory directory = newDirectory(); Analyzer stopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, - newIndexWriterConfig( Version.LUCENE_40, stopAnalyzer)); + newIndexWriterConfig(TEST_VERSION_CURRENT, stopAnalyzer)); Document doc = new Document(); doc.add(newTextField("field", "the stop words are here", Field.Store.YES)); writer.addDocument(doc); diff --git a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java index a4abadc21b8..5f1e48d66e8 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java @@ -86,8 +86,9 @@ public class IndexFiles { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); - Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); - IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); + // :Post-Release-Update-Version.LUCENE_XY: + Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_50); + IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_50, analyzer); if (create) { // Create a new index in the directory, removing any diff --git a/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java b/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java index 7bb22f6ccbd..621ecf484c7 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java @@ -90,7 +90,8 @@ public class SearchFiles { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); - Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); + // :Post-Release-Update-Version.LUCENE_XY: + Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_50); BufferedReader in = null; if (queries != null) { @@ -98,7 +99,8 @@ public class SearchFiles { } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } - QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); + // :Post-Release-Update-Version.LUCENE_XY: + QueryParser parser = new QueryParser(Version.LUCENE_50, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/FacetExamples.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/FacetExamples.java index 23113960fe8..2e4844c6a31 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/FacetExamples.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/FacetExamples.java @@ -25,7 +25,8 @@ import org.apache.lucene.util.Version; * @lucene.experimental */ public interface FacetExamples { - + + // :Post-Release-Update-Version.LUCENE_XY: /** The Lucene {@link Version} used by the example code. */ public static final Version EXAMPLES_VER = Version.LUCENE_50; diff --git a/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java b/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java index acef8a55730..bda9d3f1dad 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java @@ -133,7 +133,7 @@ public class FormBasedXmlQueryDemo extends HttpServlet { private void openExampleIndex() throws IOException { //Create a RAM-based index from our test data file RAMDirectory rd = new RAMDirectory(); - IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer); + IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer); IndexWriter writer = new IndexWriter(rd, iwConfig); InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv"); BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, IOUtils.CHARSET_UTF_8)); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java index 1c3e184c11b..edf25f62ebe 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java @@ -282,7 +282,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) { // TODO: should we use a more optimized Codec, e.g. Pulsing (or write custom)? // The taxonomy has a unique structure, where each term is associated with one document - + + // :Post-Release-Update-Version.LUCENE_XY: // Make sure we use a MergePolicy which always merges adjacent segments and thus // keeps the doc IDs ordered as well (this is crucial for the taxonomy index). return new IndexWriterConfig(Version.LUCENE_50, null).setOpenMode(openMode).setMergePolicy( diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java index 012e6696460..8ee06dc9471 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java @@ -258,9 +258,8 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder { List subInfos = new ArrayList(); - WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, subInfos, fragInfo.getTotalBoost()); - Iterator subInfoIterator = fragInfo.getSubInfos().iterator(); + float boost = 0.0f; // The boost of the new info will be the sum of the boosts of its SubInfos while (subInfoIterator.hasNext()) { SubInfo subInfo = subInfoIterator.next(); List toffsList = new ArrayList(); @@ -268,18 +267,21 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder { while (toffsIterator.hasNext()) { Toffs toffs = toffsIterator.next(); if (toffs.getStartOffset() >= fieldStart && toffs.getEndOffset() <= fieldEnd) { + toffsList.add(toffs); toffsIterator.remove(); } } if (!toffsList.isEmpty()) { - subInfos.add(new SubInfo(subInfo.getText(), toffsList, subInfo.getSeqnum())); + subInfos.add(new SubInfo(subInfo.getText(), toffsList, subInfo.getSeqnum(), subInfo.getBoost())); + boost += subInfo.getBoost(); } if (subInfo.getTermsOffsets().isEmpty()) { subInfoIterator.remove(); } } + WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, subInfos, boost); fieldNameToFragInfos.get(field.name()).add(weightedFragInfo); } } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java index 158cc879eb7..81afd4e3023 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java @@ -107,12 +107,14 @@ public abstract class FieldFragList { private final String text; // unnecessary member, just exists for debugging purpose private final List termsOffsets; // usually termsOffsets.size() == 1, // but if position-gap > 1 and slop > 0 then size() could be greater than 1 - private int seqnum; + private final int seqnum; + private final float boost; // used for scoring split WeightedPhraseInfos. - public SubInfo( String text, List termsOffsets, int seqnum ){ + public SubInfo( String text, List termsOffsets, int seqnum, float boost ){ this.text = text; this.termsOffsets = termsOffsets; this.seqnum = seqnum; + this.boost = boost; } public List getTermsOffsets(){ @@ -127,6 +129,10 @@ public abstract class FieldFragList { return text; } + public float getBoost(){ + return boost; + } + @Override public String toString(){ StringBuilder sb = new StringBuilder(); diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFieldFragList.java b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFieldFragList.java index d9f0b473469..93d1140cd60 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFieldFragList.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFieldFragList.java @@ -45,7 +45,7 @@ public class SimpleFieldFragList extends FieldFragList { float totalBoost = 0; List subInfos = new ArrayList(); for( WeightedPhraseInfo phraseInfo : phraseInfoList ){ - subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum() ) ); + subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum(), phraseInfo.getBoost() ) ); totalBoost += phraseInfo.getBoost(); } getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) ); diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/WeightedFieldFragList.java b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/WeightedFieldFragList.java index 54122ff3267..e542f6d2b3b 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/WeightedFieldFragList.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/WeightedFieldFragList.java @@ -44,33 +44,37 @@ public class WeightedFieldFragList extends FieldFragList { */ @Override public void add( int startOffset, int endOffset, List phraseInfoList ) { - - float totalBoost = 0; - - List subInfos = new ArrayList(); - - HashSet distinctTerms = new HashSet(); - + List tempSubInfos = new ArrayList(); + List realSubInfos = new ArrayList(); + HashSet distinctTerms = new HashSet(); int length = 0; for( WeightedPhraseInfo phraseInfo : phraseInfoList ){ - - subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum() ) ); - + float phraseTotalBoost = 0; for ( TermInfo ti : phraseInfo.getTermsInfos()) { if ( distinctTerms.add( ti.getText() ) ) - totalBoost += ti.getWeight() * phraseInfo.getBoost(); + phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost(); length++; } + tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), + phraseInfo.getSeqnum(), phraseTotalBoost ) ); } // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query // would cause an equal weight for all fragments regardless of how much words they contain. // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments - // we "bend" the length with a standard-normalization a little bit. - totalBoost *= length * ( 1 / Math.sqrt( length ) ); - - getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) ); + // we "bend" the length with a standard-normalization a little bit. + float norm = length * ( 1 / (float)Math.sqrt( length ) ); + + float totalBoost = 0; + for ( SubInfo tempSubInfo : tempSubInfos ) { + float subInfoBoost = tempSubInfo.getBoost() * norm; + realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(), + tempSubInfo.getSeqnum(), subInfoBoost )); + totalBoost += subInfoBoost; + } + + getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, realSubInfos, totalBoost ) ); } } \ No newline at end of file diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java index 40ab5e91635..8fe273b5f16 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java @@ -412,6 +412,54 @@ public class FastVectorHighlighterTest extends LuceneTestCase { clause( "field_der_red", "red" ), clause( "field_der_red", "der" ), clause( "field_exact", "a", "cat" ) ); } + public void testMultiValuedSortByScore() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer( random() ) ) ); + Document doc = new Document(); + FieldType type = new FieldType( TextField.TYPE_STORED ); + type.setStoreTermVectorOffsets( true ); + type.setStoreTermVectorPositions( true ); + type.setStoreTermVectors( true ); + type.freeze(); + doc.add( new Field( "field", "zero if naught", type ) ); // The first two fields contain the best match + doc.add( new Field( "field", "hero of legend", type ) ); // but total a lower score (3) than the bottom + doc.add( new Field( "field", "naught of hero", type ) ); // two fields (4) + doc.add( new Field( "field", "naught of hero", type ) ); + writer.addDocument(doc); + + FastVectorHighlighter highlighter = new FastVectorHighlighter(); + + ScoreOrderFragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder(); + fragmentsBuilder.setDiscreteMultiValueHighlighting( true ); + IndexReader reader = DirectoryReader.open(writer, true ); + String[] preTags = new String[] { "" }; + String[] postTags = new String[] { "" }; + Encoder encoder = new DefaultEncoder(); + int docId = 0; + BooleanQuery query = new BooleanQuery(); + query.add( clause( "field", "hero" ), Occur.SHOULD); + query.add( clause( "field", "of" ), Occur.SHOULD); + query.add( clause( "field", "legend" ), Occur.SHOULD); + FieldQuery fieldQuery = highlighter.getFieldQuery( query, reader ); + + for ( FragListBuilder fragListBuilder : new FragListBuilder[] { + new SimpleFragListBuilder(), new WeightedFragListBuilder() } ) { + String[] bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 20, 1, + fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); + assertEquals("hero of legend", bestFragments[0]); + bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 28, 1, + fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); + assertEquals("hero of legend", bestFragments[0]); + bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 30000, 1, + fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); + assertEquals("hero of legend", bestFragments[0]); + } + + reader.close(); + writer.close(); + dir.close(); + } + private void matchedFieldsTestCase( String fieldValue, String expected, Query... queryClauses ) throws IOException { matchedFieldsTestCase( true, true, fieldValue, expected, queryClauses ); } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/WeightedFragListBuilderTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/WeightedFragListBuilderTest.java index 1071544ac7e..bfe2817249b 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/WeightedFragListBuilderTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/WeightedFragListBuilderTest.java @@ -17,19 +17,48 @@ package org.apache.lucene.search.vectorhighlight; * limitations under the License. */ +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo; +import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo; + public class WeightedFragListBuilderTest extends AbstractTestCase { - public void test2WeightedFragList() throws Exception { - + testCase( pqF( "the", "both" ), 100, + "subInfos=(theboth((195,203)))/0.8679108(149,249)", + 0.8679108 ); + } + + public void test2SubInfos() throws Exception { + BooleanQuery query = new BooleanQuery(); + query.add( pqF( "the", "both" ), Occur.MUST ); + query.add( tq( "examples" ), Occur.MUST ); + + testCase( query, 1000, + "subInfos=(examples((19,27))examples((66,74))theboth((195,203)))/1.8411169(0,1000)", + 1.8411169 ); + } + + private void testCase( Query query, int fragCharSize, String expectedFragInfo, + double expectedTotalSubInfoBoost ) throws Exception { makeIndexLongMV(); - FieldQuery fq = new FieldQuery( pqF( "the", "both" ), true, true ); + FieldQuery fq = new FieldQuery( query, true, true ); FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); FieldPhraseList fpl = new FieldPhraseList( stack, fq ); WeightedFragListBuilder wflb = new WeightedFragListBuilder(); - FieldFragList ffl = wflb.createFieldFragList( fpl, 100 ); + FieldFragList ffl = wflb.createFieldFragList( fpl, fragCharSize ); assertEquals( 1, ffl.getFragInfos().size() ); - assertEquals( "subInfos=(theboth((195,203)))/0.86791086(149,249)", ffl.getFragInfos().get( 0 ).toString() ); + assertEquals( expectedFragInfo, ffl.getFragInfos().get( 0 ).toString() ); + + float totalSubInfoBoost = 0; + for ( WeightedFragInfo info : ffl.getFragInfos() ) { + for ( SubInfo subInfo : info.getSubInfos() ) { + totalSubInfoBoost += subInfo.getBoost(); + } + } + assertEquals( expectedTotalSubInfoBoost, totalSubInfoBoost, .0000001 ); } } diff --git a/lucene/ivy-settings.xml b/lucene/ivy-settings.xml index 3ab31383696..0edbd0b4f2d 100644 --- a/lucene/ivy-settings.xml +++ b/lucene/ivy-settings.xml @@ -28,18 +28,20 @@ - + + + + + + - - - + + - + diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties index a71f538dc51..b08b01f82bf 100644 --- a/lucene/ivy-versions.properties +++ b/lucene/ivy-versions.properties @@ -2,23 +2,63 @@ # Blank lines, comment lines, and keys that aren't in /org/name format are ignored # when the lexical sort check is performed by the ant check-lib-versions target. + +/aopalliance/aopalliance = 1.0 +/asm/asm = 3.1 /cglib/cglib-nodep = 2.2 +/com.adobe.xmp/xmpcore = 5.1.2 com.carrotsearch.randomizedtesting.version = 2.0.13 /com.carrotsearch.randomizedtesting/junit4-ant = ${com.carrotsearch.randomizedtesting.version} /com.carrotsearch.randomizedtesting/randomizedtesting-runner = ${com.carrotsearch.randomizedtesting.version} /com.carrotsearch/hppc = 0.5.2 + +com.cloudera.cdk.cdk-morphlines.version = 0.9.0 +/com.cloudera.cdk/cdk-morphlines-avro = ${com.cloudera.cdk.cdk-morphlines.version} +/com.cloudera.cdk/cdk-morphlines-core = ${com.cloudera.cdk.cdk-morphlines.version} +/com.cloudera.cdk/cdk-morphlines-hadoop-sequencefile = ${com.cloudera.cdk.cdk-morphlines.version} +/com.cloudera.cdk/cdk-morphlines-json = ${com.cloudera.cdk.cdk-morphlines.version} +/com.cloudera.cdk/cdk-morphlines-saxon = ${com.cloudera.cdk.cdk-morphlines.version} +/com.cloudera.cdk/cdk-morphlines-tika-core = ${com.cloudera.cdk.cdk-morphlines.version} +/com.cloudera.cdk/cdk-morphlines-tika-decompress = ${com.cloudera.cdk.cdk-morphlines.version} +/com.cloudera.cdk/cdk-morphlines-twitter = ${com.cloudera.cdk.cdk-morphlines.version} + +com.codahale.metrics.version = 3.0.1 +/com.codahale.metrics/metrics-core = ${com.codahale.metrics.version} +/com.codahale.metrics/metrics-healthchecks = ${com.codahale.metrics.version} + /com.cybozu.labs/langdetect = 1.1-20120112 /com.drewnoakes/metadata-extractor = 2.6.2 + +com.fasterxml.jackson.core.version = 2.2.3 +/com.fasterxml.jackson.core/jackson-annotations = ${com.fasterxml.jackson.core.version} +/com.fasterxml.jackson.core/jackson-core = ${com.fasterxml.jackson.core.version} +/com.fasterxml.jackson.core/jackson-databind = ${com.fasterxml.jackson.core.version} + /com.google.guava/guava = 14.0.1 + +com.google.inject.guice.version = 3.0 +/com.google.inject.extensions/guice-servlet = ${com.google.inject.guice.version} +/com.google.inject/guice = ${com.google.inject.guice.version} + /com.google.protobuf/protobuf-java = 2.5.0 /com.googlecode.concurrentlinkedhashmap/concurrentlinkedhashmap-lru = 1.2 /com.googlecode.juniversalchardet/juniversalchardet = 1.0.3 /com.googlecode.mp4parser/isoparser = 1.0-RC-1 -/com.ibm.icu/icu4j = 49.1 +/com.ibm.icu/icu4j = 52.1 /com.spatial4j/spatial4j = 0.3 -/com.sun.jersey/jersey-core = 1.16 + +com.sun.jersey.version = 1.8 +/com.sun.jersey.contribs/jersey-guice = ${com.sun.jersey.version} +/com.sun.jersey/jersey-bundle = ${com.sun.jersey.version} +/com.sun.jersey/jersey-core = ${com.sun.jersey.version} +/com.sun.jersey/jersey-json = ${com.sun.jersey.version} +/com.sun.jersey/jersey-server = ${com.sun.jersey.version} + +/com.sun.xml.bind/jaxb-impl = 2.2.2 +/com.thoughtworks.paranamer/paranamer = 2.3 +/com.typesafe/config = 1.0.2 /commons-beanutils/commons-beanutils = 1.7.0 /commons-cli/commons-cli = 1.2 /commons-codec/commons-codec = 1.7 @@ -33,8 +73,10 @@ com.carrotsearch.randomizedtesting.version = 2.0.13 /dom4j/dom4j = 1.6.1 /edu.ucar/netcdf = 4.2-min /hsqldb/hsqldb = 1.8.0.10 +/io.netty/netty = 3.6.2.Final /jakarta-regexp/jakarta-regexp = 1.4 /javax.activation/activation = 1.1 +/javax.inject/javax.inject= 1 /javax.mail/mail = 1.4.1 /javax.servlet/javax.servlet-api = 3.0.1 /javax.servlet/servlet-api = 2.4 @@ -45,9 +87,12 @@ com.carrotsearch.randomizedtesting.version = 2.0.13 /mecab/mecab-ipadic = 2.7.0-20070801 /mecab/mecab-naist-jdic = 0.6.3b-20111013 /net.arnx/jsonic = 1.2.7 +/net.sf.saxon/Saxon-HE = 9.5.1-2 +/net.sourceforge.argparse4j/argparse4j = 0.4.0 /net.sourceforge.nekohtml/nekohtml = 1.9.17 /org.antlr/antlr-runtime = 3.5 /org.apache.ant/ant = 1.8.2 +/org.apache.avro/avro = 1.7.4 /org.apache.commons/commons-compress = 1.4.1 /org.apache.derby/derby = 10.9.1.0 @@ -57,18 +102,34 @@ org.apache.hadoop.version = 2.2.0 /org.apache.hadoop/hadoop-common = ${org.apache.hadoop.version} /org.apache.hadoop/hadoop-hdfs = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-mapreduce-client-app = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-mapreduce-client-common = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-mapreduce-client-core = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-mapreduce-client-hs = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-mapreduce-client-jobclient = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-mapreduce-client-shuffle = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-api = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-client = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-common = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-server-common = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-server-nodemanager = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-server-resourcemanager = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-server-tests = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-server-web-proxy = ${org.apache.hadoop.version} + # The httpcore version is often different from the httpclient and httpmime versions, # so the httpcore version value should not share the same symbolic name with them. /org.apache.httpcomponents/httpclient = 4.2.6 /org.apache.httpcomponents/httpcore = 4.2.5 /org.apache.httpcomponents/httpmime = 4.2.6 -org.apache.james.apache.mime4j = 0.7.2 -/org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j} -/org.apache.james/apache-mime4j-dom = ${org.apache.james.apache.mime4j} +org.apache.james.apache.mime4j.version = 0.7.2 +/org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j.version} +/org.apache.james/apache-mime4j-dom = ${org.apache.james.apache.mime4j.version} /org.apache.mahout/mahout-collections = 1.0 /org.apache.mahout/mahout-math = 0.6 +/org.apache.mrunit/mrunit = 1.0.0 org.apache.pdfbox.version = 1.8.1 /org.apache.pdfbox/fontbox = ${org.apache.pdfbox.version} @@ -84,6 +145,7 @@ org.apache.poi.version = 3.9 org.apache.tika.version = 1.4 /org.apache.tika/tika-core = ${org.apache.tika.version} /org.apache.tika/tika-parsers = ${org.apache.tika.version} +/org.apache.tika/tika-xmp = ${org.apache.tika.version} org.apache.uima.version = 2.3.1 /org.apache.uima/AlchemyAPIAnnotator = ${org.apache.uima.version} @@ -96,6 +158,7 @@ org.apache.uima.version = 2.3.1 /org.apache.velocity/velocity-tools = 2.0 /org.apache.xmlbeans/xmlbeans = 2.3.0 /org.apache.zookeeper/zookeeper = 3.4.5 +/org.aspectj/aspectjrt = 1.6.11 org.bouncycastle.version = 1.45 /org.bouncycastle/bcmail-jdk15 = ${org.bouncycastle.version} @@ -111,8 +174,9 @@ org.carrot2.morfologik.version = 1.7.1 /org.ccil.cowan.tagsoup/tagsoup = 1.2.1 -org.codehaus.jackson.version = 1.7.4 +org.codehaus.jackson.version = 1.9.13 /org.codehaus.jackson/jackson-core-asl = ${org.codehaus.jackson.version} +/org.codehaus.jackson/jackson-jaxrs = ${org.codehaus.jackson.version} /org.codehaus.jackson/jackson-mapper-asl = ${org.codehaus.jackson.version} /org.codehaus.woodstox/wstx-asl = 3.2.7 @@ -137,6 +201,8 @@ org.gagravarr.vorbis.java.version = 0.1 /org.gagravarr/vorbis-java-core = ${org.gagravarr.vorbis.java.version} /org.gagravarr/vorbis-java-tika = ${org.gagravarr.vorbis.java.version} +/org.mockito/mockito-core = 1.9.5 + org.mortbay.jetty.version = 6.1.26 /org.mortbay.jetty/jetty = ${org.mortbay.jetty.version} /org.mortbay.jetty/jetty-util = ${org.mortbay.jetty.version} @@ -161,5 +227,6 @@ org.slf4j.version = 1.6.6 /org.slf4j/slf4j-log4j12 = ${org.slf4j.version} /org.tukaani/xz = 1.0 +/org.xerial.snappy/snappy-java = 1.0.4.1 /rome/rome = 0.9 /xerces/xercesImpl = 2.9.1 diff --git a/lucene/licenses/icu4j-49.1.jar.sha1 b/lucene/licenses/icu4j-49.1.jar.sha1 deleted file mode 100644 index 12d3fb3cce1..00000000000 --- a/lucene/licenses/icu4j-49.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -fbf7a438e6bf3660e0da2fd77dd1df1635fe503c diff --git a/lucene/licenses/icu4j-52.1.jar.sha1 b/lucene/licenses/icu4j-52.1.jar.sha1 new file mode 100644 index 00000000000..d3551e8380b --- /dev/null +++ b/lucene/licenses/icu4j-52.1.jar.sha1 @@ -0,0 +1 @@ +7dbc327670673acd14b487d120f05747d712c1c0 diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/RangeMapFloatFunction.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/RangeMapFloatFunction.java index 79df9b91411..2402af82fbe 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/RangeMapFloatFunction.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/RangeMapFloatFunction.java @@ -27,8 +27,8 @@ import java.io.IOException; import java.util.Map; /** - * LinearFloatFunction implements a linear function over - * another {@link org.apache.lucene.queries.function.ValueSource}. + * RangeMapFloatFunction implements a map function over + * another {@link ValueSource} whose values fall within min and max inclusive to target. *
    * Normally Used as an argument to a {@link org.apache.lucene.queries.function.FunctionQuery} * @@ -38,10 +38,14 @@ public class RangeMapFloatFunction extends ValueSource { protected final ValueSource source; protected final float min; protected final float max; - protected final float target; - protected final Float defaultVal; + protected final ValueSource target; + protected final ValueSource defaultVal; public RangeMapFloatFunction(ValueSource source, float min, float max, float target, Float def) { + this(source, min, max, new ConstValueSource(target), def == null ? null : new ConstValueSource(def)); + } + + public RangeMapFloatFunction(ValueSource source, float min, float max, ValueSource target, ValueSource def) { this.source = source; this.min = min; this.max = max; @@ -51,21 +55,23 @@ public class RangeMapFloatFunction extends ValueSource { @Override public String description() { - return "map(" + source.description() + "," + min + "," + max + "," + target + ")"; + return "map(" + source.description() + "," + min + "," + max + "," + target.description() + ")"; } @Override public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { final FunctionValues vals = source.getValues(context, readerContext); + final FunctionValues targets = target.getValues(context, readerContext); + final FunctionValues defaults = (this.defaultVal == null) ? null : defaultVal.getValues(context, readerContext); return new FloatDocValues(this) { @Override public float floatVal(int doc) { float val = vals.floatVal(doc); - return (val>=min && val<=max) ? target : (defaultVal == null ? val : defaultVal); + return (val>=min && val<=max) ? targets.floatVal(doc) : (defaultVal == null ? val : defaults.floatVal(doc)); } @Override public String toString(int doc) { - return "map(" + vals.toString(doc) + ",min=" + min + ",max=" + max + ",target=" + target + ")"; + return "map(" + vals.toString(doc) + ",min=" + min + ",max=" + max + ",target=" + targets.toString(doc) + ")"; } }; } @@ -82,8 +88,7 @@ public class RangeMapFloatFunction extends ValueSource { h += Float.floatToIntBits(min); h ^= (h << 14) | (h >>> 19); h += Float.floatToIntBits(max); - h ^= (h << 13) | (h >>> 20); - h += Float.floatToIntBits(target); + h += target.hashCode(); if (defaultVal != null) h += defaultVal.hashCode(); return h; @@ -95,7 +100,7 @@ public class RangeMapFloatFunction extends ValueSource { RangeMapFloatFunction other = (RangeMapFloatFunction)o; return this.min == other.min && this.max == other.max - && this.target == other.target + && this.target.equals(other.target) && this.source.equals(other.source) && (this.defaultVal == other.defaultVal || (this.defaultVal != null && this.defaultVal.equals(other.defaultVal))); } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ScaleFloatFunction.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ScaleFloatFunction.java index 388f3a2c982..4771e3291cb 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ScaleFloatFunction.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ScaleFloatFunction.java @@ -94,14 +94,14 @@ public class ScaleFloatFunction extends ValueSource { ScaleInfo scaleInfo = new ScaleInfo(); scaleInfo.minVal = minVal; scaleInfo.maxVal = maxVal; - context.put(this.source, scaleInfo); + context.put(ScaleFloatFunction.this, scaleInfo); return scaleInfo; } @Override public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { - ScaleInfo scaleInfo = (ScaleInfo)context.get(source); + ScaleInfo scaleInfo = (ScaleInfo)context.get(ScaleFloatFunction.this); if (scaleInfo == null) { scaleInfo = createScaleInfo(context, readerContext); } diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java b/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java index e781b22b419..50f85a84bf2 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java @@ -276,6 +276,10 @@ public class TestValueSources extends LuceneTestCase { assertHits(new FunctionQuery(new RangeMapFloatFunction(new FloatFieldSource("float"), 5, 6, 1, 0f)), new float[] { 1f, 0f }); + assertHits(new FunctionQuery(new RangeMapFloatFunction(new FloatFieldSource("float"), + 5, 6, new SumFloatFunction(new ValueSource[] {new ConstValueSource(1f), new ConstValueSource(2f)}), + new ConstValueSource(11f))), + new float[] { 3f, 11f }); } public void testReciprocal() throws Exception { @@ -338,8 +342,8 @@ public class TestValueSources extends LuceneTestCase { expectedDocs[i] = i; expected[i] = new ScoreDoc(i, scores[i]); } - TopDocs docs = searcher.search(q, documents.size(), - new Sort(new SortField("id", SortField.Type.STRING))); + TopDocs docs = searcher.search(q, null, documents.size(), + new Sort(new SortField("id", SortField.Type.STRING)), true, false); CheckHits.checkHits(random(), q, "", searcher, expectedDocs); CheckHits.checkHitsQuery(q, expected, docs.scoreDocs, expectedDocs); CheckHits.checkExplanations(q, "", searcher); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java index 9f5b6fe64eb..eaa71db5fe0 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java @@ -65,7 +65,7 @@ public class TestParser extends LuceneTestCase { BufferedReader d = new BufferedReader(new InputStreamReader( TestParser.class.getResourceAsStream("reuters21578.txt"), "US-ASCII")); dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(Version.LUCENE_40, analyzer)); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); String line = d.readLine(); while (line != null) { int endOfDate = line.indexOf('\t'); diff --git a/lucene/site/changes/changes2html.pl b/lucene/site/changes/changes2html.pl index fd94f4a13bb..61ac1c11e63 100755 --- a/lucene/site/changes/changes2html.pl +++ b/lucene/site/changes/changes2html.pl @@ -113,10 +113,13 @@ for (my $line_num = 0 ; $line_num <= $#lines ; ++$line_num) { } # Section heading: no leading whitespace, initial word capitalized, - # five words or less, and no trailing punctuation - if ( /^([A-Z]\S*(?:\s+\S+){0,4})(? + diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java index ce9f0fcf7b3..d948e20edb8 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java @@ -17,13 +17,13 @@ package org.apache.lucene.search.suggest; * limitations under the License. */ import java.io.IOException; -import java.util.Arrays; import java.util.HashSet; -import java.util.List; import java.util.Set; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.StorableField; import org.apache.lucene.index.StoredDocument; import org.apache.lucene.search.spell.Dictionary; @@ -32,14 +32,24 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; /** + *

    * Dictionary with terms, weights and optionally payload information - * taken from stored fields in a Lucene index. - * - * NOTE: + * taken from stored/indexed fields in a Lucene index. + *

    + * NOTE: *
      *
    • - * The term, weight and (optionally) payload fields supplied - * are required for ALL documents and has to be stored + * The term and (optionally) payload fields have to be + * stored + *
    • + *
    • + * The weight field can be stored or can be a {@link NumericDocValues}. + * If the weight field is not defined, the value of the weight is 0 + *
    • + *
    • + * if any of the term or (optionally) payload fields supplied + * do not have a value for a document, then the document is + * skipped by the dictionary *
    • *
    */ @@ -59,10 +69,7 @@ public class DocumentDictionary implements Dictionary { * the corresponding terms. */ public DocumentDictionary(IndexReader reader, String field, String weightField) { - this.reader = reader; - this.field = field; - this.weightField = weightField; - this.payloadField = null; + this(reader, field, weightField, null); } /** @@ -85,14 +92,16 @@ public class DocumentDictionary implements Dictionary { /** Implements {@link InputIterator} from stored fields. */ protected class DocumentInputIterator implements InputIterator { + private final int docCount; private final Set relevantFields; private final boolean hasPayloads; private final Bits liveDocs; private int currentDocId = -1; - private long currentWeight; - private BytesRef currentPayload; - private StoredDocument doc; + private long currentWeight = 0; + private BytesRef currentPayload = null; + private final NumericDocValues weightValues; + /** * Creates an iterator over term, weight and payload fields from the lucene @@ -100,11 +109,11 @@ public class DocumentDictionary implements Dictionary { * over only term and weight. */ public DocumentInputIterator(boolean hasPayloads) throws IOException { - docCount = reader.maxDoc() - 1; this.hasPayloads = hasPayloads; - currentPayload = null; - liveDocs = MultiFields.getLiveDocs(reader); - this.relevantFields = getRelevantFields(new String [] {field, weightField, payloadField}); + docCount = reader.maxDoc() - 1; + weightValues = (weightField != null) ? MultiDocValues.getNumericValues(reader, weightField) : null; + liveDocs = (reader.leaves().size() > 0) ? MultiFields.getLiveDocs(reader) : null; + relevantFields = getRelevantFields(new String [] {field, weightField, payloadField}); } @Override @@ -120,28 +129,29 @@ public class DocumentDictionary implements Dictionary { continue; } - doc = reader.document(currentDocId, relevantFields); + StoredDocument doc = reader.document(currentDocId, relevantFields); + + BytesRef tempPayload = null; + BytesRef tempTerm = null; if (hasPayloads) { StorableField payload = doc.getField(payloadField); - if (payload == null) { - throw new IllegalArgumentException(payloadField + " does not exist"); - } else if (payload.binaryValue() == null) { - throw new IllegalArgumentException(payloadField + " does not have binary value"); + if (payload == null || (payload.binaryValue() == null && payload.stringValue() == null)) { + continue; } - currentPayload = payload.binaryValue(); + tempPayload = (payload.binaryValue() != null) ? payload.binaryValue() : new BytesRef(payload.stringValue()); } - currentWeight = getWeight(currentDocId); - StorableField fieldVal = doc.getField(field); - if (fieldVal == null) { - throw new IllegalArgumentException(field + " does not exist"); - } else if(fieldVal.stringValue() == null) { - throw new IllegalArgumentException(field + " does not have string value"); + if (fieldVal == null || (fieldVal.binaryValue() == null && fieldVal.stringValue() == null)) { + continue; } + tempTerm = (fieldVal.stringValue() != null) ? new BytesRef(fieldVal.stringValue()) : fieldVal.binaryValue(); - return new BytesRef(fieldVal.stringValue()); + currentPayload = tempPayload; + currentWeight = getWeight(doc, currentDocId); + + return tempTerm; } return null; } @@ -156,15 +166,21 @@ public class DocumentDictionary implements Dictionary { return hasPayloads; } - /** Return the suggestion weight for this document */ - protected long getWeight(int docId) { + /** + * Returns the value of the weightField for the current document. + * Retrieves the value for the weightField if its stored (using doc) + * or if its indexed as {@link NumericDocValues} (using docId) for the document. + * If no value is found, then the weight is 0. + */ + protected long getWeight(StoredDocument doc, int docId) { StorableField weight = doc.getField(weightField); - if (weight == null) { - throw new IllegalArgumentException(weightField + " does not exist"); - } else if (weight.numericValue() == null) { - throw new IllegalArgumentException(weightField + " does not have numeric value"); + if (weight != null) { // found weight as stored + return (weight.numericValue() != null) ? weight.numericValue().longValue() : 0; + } else if (weightValues != null) { // found weight as NumericDocValue + return weightValues.get(docId); + } else { // fall back + return 0; } - return weight.numericValue().longValue(); } private Set getRelevantFields(String... fields) { diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java index ea494e1cfd2..2834851fb22 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java @@ -30,6 +30,7 @@ import org.apache.lucene.expressions.js.JavascriptCompiler; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.ReaderUtil; +import org.apache.lucene.index.StoredDocument; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.SortField; @@ -37,23 +38,34 @@ import org.apache.lucene.util.BytesRefIterator; /** + *

    * Dictionary with terms and optionally payload information * taken from stored fields in a Lucene index. Similar to * {@link DocumentDictionary}, except it computes the weight * of the terms in a document based on a user-defined expression * having one or more {@link NumericDocValuesField} in the document. - * + *

    * NOTE: *
      *
    • - * The term and (optionally) payload fields supplied - * are required for ALL documents and has to be stored + * The term and (optionally) payload fields have to be + * stored + *
    • + *
    • + * if the term or (optionally) payload fields supplied + * do not have a value for a document, then the document is + * rejected by the dictionary + *
    • + *
    • + * All the fields used in weightExpression should + * have values for all documents, if any of the fields do not + * have a value for a document, it will default to 0 *
    • *
    */ public class DocumentExpressionDictionary extends DocumentDictionary { - private ValueSource weightsValueSource; + private final ValueSource weightsValueSource; /** * Creates a new dictionary with the contents of the fields named field @@ -86,8 +98,31 @@ public class DocumentExpressionDictionary extends DocumentDictionary { for (SortField sortField: sortFields) { bindings.add(sortField); } - weightsValueSource = expression.getValueSource(bindings); + weightsValueSource = expression.getValueSource(bindings); + } + + /** + * Creates a new dictionary with the contents of the fields named field + * for the terms, payloadField for the corresponding payloads + * and uses the weightsValueSource supplied to determine the + * score. + */ + public DocumentExpressionDictionary(IndexReader reader, String field, + ValueSource weightsValueSource, String payload) { + super(reader, field, null, payload); + this.weightsValueSource = weightsValueSource; + } + + /** + * Creates a new dictionary with the contents of the fields named field + * for the terms and uses the weightsValueSource supplied to determine the + * score. + */ + public DocumentExpressionDictionary(IndexReader reader, String field, + ValueSource weightsValueSource) { + super(reader, field, null, null); + this.weightsValueSource = weightsValueSource; } @Override @@ -98,30 +133,36 @@ public class DocumentExpressionDictionary extends DocumentDictionary { final class DocumentExpressionInputIterator extends DocumentDictionary.DocumentInputIterator { private FunctionValues currentWeightValues; - private int currentLeafIndex = 0; + /** leaves of the reader */ private final List leaves; - + /** starting docIds of all the leaves */ private final int[] starts; + /** current leave index */ + private int currentLeafIndex = 0; public DocumentExpressionInputIterator(boolean hasPayloads) throws IOException { super(hasPayloads); leaves = reader.leaves(); - if (leaves.size() == 0) { - throw new IllegalArgumentException("Reader has to have at least one leaf"); - } starts = new int[leaves.size() + 1]; for (int i = 0; i < leaves.size(); i++) { starts[i] = leaves.get(i).docBase; } starts[leaves.size()] = reader.maxDoc(); - - currentLeafIndex = 0; - currentWeightValues = weightsValueSource.getValues(new HashMap(), leaves.get(currentLeafIndex)); + currentWeightValues = (leaves.size() > 0) + ? weightsValueSource.getValues(new HashMap(), leaves.get(currentLeafIndex)) + : null; } + /** + * Returns the weight for the current docId as computed + * by the weightsValueSource + * */ @Override - protected long getWeight(int docId) { + protected long getWeight(StoredDocument doc, int docId) { + if (currentWeightValues == null) { + return 0; + } int subIndex = ReaderUtil.subIndex(docId, starts); if (subIndex != currentLeafIndex) { currentLeafIndex = subIndex; diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java index 797acaea959..e901ef7f16e 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java @@ -301,7 +301,7 @@ public class FreeTextSuggester extends Lookup { Directory dir = FSDirectory.open(tempIndexPath); - IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_46, indexAnalyzer); + IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_CURRENT, indexAnalyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); iwc.setRAMBufferSizeMB(ramBufferSizeMB); IndexWriter writer = new IndexWriter(dir, iwc); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java index 85418ff02d1..9e5d8e85901 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java @@ -1,22 +1,24 @@ package org.apache.lucene.search.suggest; import java.io.IOException; +import java.util.AbstractMap.SimpleEntry; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.StorableField; import org.apache.lucene.index.Term; import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.suggest.DocumentDictionary; @@ -48,19 +50,73 @@ public class DocumentDictionaryTest extends LuceneTestCase { static final String WEIGHT_FIELD_NAME = "w1"; static final String PAYLOAD_FIELD_NAME = "p1"; - private Map generateIndexDocuments(int ndocs) { + /** Returns Pair(list of invalid document terms, Map of document term -> document) */ + private Map.Entry, Map> generateIndexDocuments(int ndocs, boolean requiresPayload) { Map docs = new HashMap<>(); + List invalidDocTerms = new ArrayList<>(); for(int i = 0; i < ndocs ; i++) { - Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES); - Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i)); - Field weight = new StoredField(WEIGHT_FIELD_NAME, 100d + i); Document doc = new Document(); - doc.add(field); - doc.add(payload); - doc.add(weight); - docs.put(field.stringValue(), doc); + boolean invalidDoc = false; + Field field = null; + // usually have valid term field in document + if (usually()) { + field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES); + doc.add(field); + } else { + invalidDoc = true; + } + + // even if payload is not required usually have it + if (requiresPayload || usually()) { + // usually have valid payload field in document + if (usually()) { + Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i)); + doc.add(payload); + } else if (requiresPayload) { + invalidDoc = true; + } + } + + // usually have valid weight field in document + if (usually()) { + Field weight = (rarely()) ? + new StoredField(WEIGHT_FIELD_NAME, 100d + i) : + new NumericDocValuesField(WEIGHT_FIELD_NAME, 100 + i); + doc.add(weight); + } + + String term = null; + if (invalidDoc) { + term = (field!=null) ? field.stringValue() : "invalid_" + i; + invalidDocTerms.add(term); + } else { + term = field.stringValue(); + } + + docs.put(term, doc); } - return docs; + return new SimpleEntry, Map>(invalidDocTerms, docs); + } + + @Test + public void testEmptyReader() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setMergePolicy(newLogMergePolicy()); + // Make sure the index is created? + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); + writer.commit(); + writer.close(); + IndexReader ir = DirectoryReader.open(dir); + Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + + assertNull(inputIterator.next()); + assertEquals(inputIterator.weight(), 0); + assertNull(inputIterator.payload()); + + ir.close(); + dir.close(); } @Test @@ -69,7 +125,9 @@ public class DocumentDictionaryTest extends LuceneTestCase { IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(10); + Map.Entry, Map> res = generateIndexDocuments(atLeast(1000), true); + Map docs = res.getValue(); + List invalidDocTerms = res.getKey(); for(Document doc: docs.values()) { writer.addDocument(doc); } @@ -77,15 +135,21 @@ public class DocumentDictionaryTest extends LuceneTestCase { writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); - InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; - while((f = tfp.next())!=null) { + while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(tfp.weight(), doc.getField(WEIGHT_FIELD_NAME).numericValue().longValue()); - assertTrue(tfp.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); + Field weightField = doc.getField(WEIGHT_FIELD_NAME); + assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0); + assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); + } + + for (String invalidTerm : invalidDocTerms) { + assertNotNull(docs.remove(invalidTerm)); } assertTrue(docs.isEmpty()); + ir.close(); dir.close(); } @@ -96,7 +160,9 @@ public class DocumentDictionaryTest extends LuceneTestCase { IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(10); + Map.Entry, Map> res = generateIndexDocuments(atLeast(1000), false); + Map docs = res.getValue(); + List invalidDocTerms = res.getKey(); for(Document doc: docs.values()) { writer.addDocument(doc); } @@ -104,15 +170,22 @@ public class DocumentDictionaryTest extends LuceneTestCase { writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME); - InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; - while((f = tfp.next())!=null) { + while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(tfp.weight(), doc.getField(WEIGHT_FIELD_NAME).numericValue().longValue()); - assertEquals(tfp.payload(), null); + Field weightField = doc.getField(WEIGHT_FIELD_NAME); + assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0); + assertEquals(inputIterator.payload(), null); } + + for (String invalidTerm : invalidDocTerms) { + assertNotNull(docs.remove(invalidTerm)); + } + assertTrue(docs.isEmpty()); + ir.close(); dir.close(); } @@ -123,11 +196,14 @@ public class DocumentDictionaryTest extends LuceneTestCase { IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(10); + Map.Entry, Map> res = generateIndexDocuments(atLeast(1000), false); + Map docs = res.getValue(); + List invalidDocTerms = res.getKey(); Random rand = random(); List termsToDel = new ArrayList<>(); for(Document doc : docs.values()) { - if(rand.nextBoolean()) { + StorableField f = doc.getField(FIELD_NAME); + if(rand.nextBoolean() && f != null && !invalidDocTerms.contains(f.stringValue())) { termsToDel.add(doc.get(FIELD_NAME)); } writer.addDocument(doc); @@ -152,15 +228,21 @@ public class DocumentDictionaryTest extends LuceneTestCase { IndexReader ir = DirectoryReader.open(dir); assertEquals(ir.numDocs(), docs.size()); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME); - InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; - while((f = tfp.next())!=null) { + while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(tfp.weight(), doc.getField(WEIGHT_FIELD_NAME).numericValue().longValue()); - assertEquals(tfp.payload(), null); + Field weightField = doc.getField(WEIGHT_FIELD_NAME); + assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0); + assertEquals(inputIterator.payload(), null); + } + + for (String invalidTerm : invalidDocTerms) { + assertNotNull(docs.remove(invalidTerm)); } assertTrue(docs.isEmpty()); + ir.close(); dir.close(); } diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java index 4cc719662a4..30cb837a923 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java @@ -37,6 +37,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource; import org.apache.lucene.search.SortField; import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.store.Directory; @@ -71,13 +72,38 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase { return docs; } + @Test + public void testEmptyReader() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setMergePolicy(newLogMergePolicy()); + // Make sure the index is created? + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); + writer.commit(); + writer.close(); + IndexReader ir = DirectoryReader.open(dir); + Set sortFields = new HashSet(); + sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); + sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); + sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG)); + Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2) - w3)", sortFields, PAYLOAD_FIELD_NAME); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + + assertNull(inputIterator.next()); + assertEquals(inputIterator.weight(), 0); + assertNull(inputIterator.payload()); + + ir.close(); + dir.close(); + } + @Test public void testBasic() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(atLeast(10)); + Map docs = generateIndexDocuments(atLeast(100)); for(Document doc: docs.values()) { writer.addDocument(doc); } @@ -90,16 +116,16 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase { sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG)); Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2) - w3)", sortFields, PAYLOAD_FIELD_NAME); - InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; - while((f = tfp.next())!=null) { + while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue(); long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue(); long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue(); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(tfp.weight(), (w1 + w2) - w3); - assertTrue(tfp.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); + assertEquals(inputIterator.weight(), (w1 + w2) - w3); + assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); } assertTrue(docs.isEmpty()); ir.close(); @@ -112,7 +138,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase { IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(atLeast(10)); + Map docs = generateIndexDocuments(atLeast(100)); for(Document doc: docs.values()) { writer.addDocument(doc); } @@ -125,16 +151,16 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase { sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG)); Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w1 + (0.2 * w2) - (w3 - w1)/2", sortFields); - InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; - while((f = tfp.next())!=null) { + while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue(); long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue(); long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue(); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(tfp.weight(), (long)(w1 + (0.2 * w2) - (w3 - w1)/2)); - assertEquals(tfp.payload(), null); + assertEquals(inputIterator.weight(), (long)(w1 + (0.2 * w2) - (w3 - w1)/2)); + assertEquals(inputIterator.payload(), null); } assertTrue(docs.isEmpty()); ir.close(); @@ -147,11 +173,11 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase { IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(atLeast(10)); + Map docs = generateIndexDocuments(atLeast(100)); Random rand = random(); List termsToDel = new ArrayList<>(); for(Document doc : docs.values()) { - if(rand.nextBoolean()) { + if(rand.nextBoolean() && termsToDel.size() < docs.size()-1) { termsToDel.add(doc.get(FIELD_NAME)); } writer.addDocument(doc); @@ -174,20 +200,50 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase { } IndexReader ir = DirectoryReader.open(dir); + assertTrue("NumDocs should be > 0 but was " + ir.numDocs(), ir.numDocs() > 0); assertEquals(ir.numDocs(), docs.size()); Set sortFields = new HashSet(); sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w2-w1", sortFields, PAYLOAD_FIELD_NAME); - InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; - while((f = tfp.next())!=null) { + while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue(); long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue(); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(tfp.weight(), w2-w1); - assertTrue(tfp.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); + assertEquals(inputIterator.weight(), w2-w1); + assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); + } + assertTrue(docs.isEmpty()); + ir.close(); + dir.close(); + } + + @Test + public void testWithValueSource() throws IOException { + + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); + Map docs = generateIndexDocuments(atLeast(100)); + for(Document doc: docs.values()) { + writer.addDocument(doc); + } + writer.commit(); + writer.close(); + + IndexReader ir = DirectoryReader.open(dir); + Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + BytesRef f; + while((f = inputIterator.next())!=null) { + Document doc = docs.remove(f.utf8ToString()); + assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); + assertEquals(inputIterator.weight(), 10); + assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); } assertTrue(docs.isEmpty()); ir.close(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java index b085257e3b2..abd751d350f 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java @@ -635,7 +635,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { int charUpto = 0; final StringBuilder sb = new StringBuilder(); while (charUpto < s.length()) { - final int c = s.codePointAt(charUpto); + final int c = s.charAt(charUpto); if (c == 0xa) { // Strangely, you cannot put \ u000A into Java // sources (not in a comment nor a string @@ -655,7 +655,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { // don't escape... sb.append(String.format(Locale.ROOT, "\\u%04x", c)); } - charUpto += Character.charCount(c); + charUpto++; } return sb.toString(); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java index b637559c381..a2763875662 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java @@ -135,7 +135,6 @@ public class AssertingStoredFieldsFormat extends StoredFieldsFormat { @Override public void close() throws IOException { in.close(); - assert docStatus != Status.STARTED; } } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/RandomSimilarityProvider.java b/lucene/test-framework/src/java/org/apache/lucene/search/RandomSimilarityProvider.java index 3fc0792a7be..d7aca2dc00f 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/RandomSimilarityProvider.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/RandomSimilarityProvider.java @@ -103,7 +103,7 @@ public class RandomSimilarityProvider extends PerFieldSimilarityWrapper { assert field != null; Similarity sim = previousMappings.get(field); if (sim == null) { - sim = knownSims.get(Math.abs(perFieldSeed ^ field.hashCode()) % knownSims.size()); + sim = knownSims.get(Math.max(0, Math.abs(perFieldSeed ^ field.hashCode())) % knownSims.size()); previousMappings.put(field, sim); } return sim; diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java index eb12f54eb13..862a9f459d2 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java @@ -228,6 +228,7 @@ public abstract class LuceneTestCase extends Assert { // for all suites ever since. // ----------------------------------------------------------------- + // :Post-Release-Update-Version.LUCENE_XY: /** * Use this constant when creating Analyzers and any other version-dependent stuff. *

    NOTE: Change this when development starts for new Lucene version: diff --git a/lucene/tools/custom-tasks.xml b/lucene/tools/custom-tasks.xml index e17480b1d3c..e38b0b137a4 100644 --- a/lucene/tools/custom-tasks.xml +++ b/lucene/tools/custom-tasks.xml @@ -45,7 +45,7 @@ - + diff --git a/lucene/tools/junit4/tests.policy b/lucene/tools/junit4/tests.policy index 0933cab39bf..b1c4311e3b0 100644 --- a/lucene/tools/junit4/tests.policy +++ b/lucene/tools/junit4/tests.policy @@ -63,6 +63,7 @@ grant { permission javax.security.auth.PrivateCredentialPermission "org.apache.hadoop.security.Credentials * \"*\"", "read"; permission java.security.SecurityPermission "putProviderProperty.SaslPlainServer"; permission java.security.SecurityPermission "insertProvider.SaslPlainServer"; + permission javax.xml.bind.JAXBPermission "setDatatypeConverter"; // TIKA uses BouncyCastle and that registers new provider for PDF parsing + MSOffice parsing. Maybe report as bug! permission java.security.SecurityPermission "putProviderProperty.BC"; diff --git a/lucene/tools/src/java/org/apache/lucene/dependencies/GetMavenDependenciesTask.java b/lucene/tools/src/java/org/apache/lucene/dependencies/GetMavenDependenciesTask.java index 17c88d2074b..e1ac4ce84b6 100644 --- a/lucene/tools/src/java/org/apache/lucene/dependencies/GetMavenDependenciesTask.java +++ b/lucene/tools/src/java/org/apache/lucene/dependencies/GetMavenDependenciesTask.java @@ -88,7 +88,7 @@ public class GetMavenDependenciesTask extends Task { private static final String UNWANTED_INTERNAL_DEPENDENCIES = "/(?:test-)?lib/|test-framework/classes/java|/test-files|/resources"; private static final Pattern SHARED_EXTERNAL_DEPENDENCIES_PATTERN - = Pattern.compile("((?:solr|lucene)/(?!test-framework).*)/lib/"); + = Pattern.compile("((?:solr|lucene)/(?!test-framework).*)/((?:test-)?)lib/"); private static final String DEPENDENCY_MANAGEMENT_PROPERTY = "lucene.solr.dependency.management"; private static final String IVY_USER_DIR_PROPERTY = "ivy.default.ivy.user.dir"; @@ -281,10 +281,16 @@ public class GetMavenDependenciesTask extends Task { Set moduleDependencies = interModuleExternalTestScopeDependencies.get(artifactId); if (null != moduleDependencies) { for (String otherArtifactId : moduleDependencies) { + int testScopePos = otherArtifactId.indexOf(":test"); + boolean isTestScope = false; + if (-1 != testScopePos) { + otherArtifactId = otherArtifactId.substring(0, testScopePos); + isTestScope = true; + } SortedSet otherExtDeps = allExternalDependencies.get(otherArtifactId); if (null != otherExtDeps) { for (ExternalDependency otherDep : otherExtDeps) { - if ( ! otherDep.isTestDependency) { + if (otherDep.isTestDependency == isTestScope) { if ( ! deps.contains(otherDep) && ( null == allExternalDependencies.get(artifactId) || ! allExternalDependencies.get(artifactId).contains(otherDep))) { @@ -523,8 +529,10 @@ public class GetMavenDependenciesTask extends Task { matcher = SHARED_EXTERNAL_DEPENDENCIES_PATTERN.matcher(dependency); if (matcher.find()) { String otherArtifactName = matcher.group(1); + boolean isTestScope = null != matcher.group(2) && matcher.group(2).length() > 0; otherArtifactName = otherArtifactName.replace('/', '-'); otherArtifactName = otherArtifactName.replace("lucene-analysis", "lucene-analyzers"); + otherArtifactName = otherArtifactName.replace("solr-contrib-solr-", "solr-"); otherArtifactName = otherArtifactName.replace("solr-contrib-", "solr-"); if ( ! otherArtifactName.equals(artifactName)) { Map> sharedDeps @@ -534,6 +542,9 @@ public class GetMavenDependenciesTask extends Task { sharedSet = new HashSet(); sharedDeps.put(artifactName, sharedSet); } + if (isTestScope) { + otherArtifactName += ":test"; + } sharedSet.add(otherArtifactName); } } @@ -622,16 +633,13 @@ public class GetMavenDependenciesTask extends Task { /** * Convert Ant project names to artifact names: prepend "lucene-" - * to Lucene project names; and "solr-cell" -> "solr-extraction" + * to Lucene project names */ private String antProjectToArtifactName(String origModule) { String module = origModule; if ( ! origModule.startsWith("solr-")) { // lucene modules names don't have "lucene-" prepended module = "lucene-" + module; } - if (module.equals("solr-cell")) { - module = "solr-extraction"; - } return module; } @@ -648,43 +656,50 @@ public class GetMavenDependenciesTask extends Task { Document document = documentBuilder.parse(ivyXmlFile); String dependencyPath = "/ivy-module/dependencies/dependency[not(starts-with(@conf,'start->'))]"; NodeList dependencies = (NodeList)xpath.evaluate(dependencyPath, document, XPathConstants.NODESET); - for (int i = 0 ; i < dependencies.getLength() ; ++i) { - Element dependency = (Element)dependencies.item(i); + for (int depNum = 0 ; depNum < dependencies.getLength() ; ++depNum) { + Element dependency = (Element)dependencies.item(depNum); String groupId = dependency.getAttribute("org"); String artifactId = dependency.getAttribute("name"); String dependencyCoordinate = groupId + ':' + artifactId; - String classifier = null; Set classifiers = dependencyClassifiers.get(dependencyCoordinate); if (null == classifiers) { classifiers = new HashSet<>(); dependencyClassifiers.put(dependencyCoordinate, classifiers); } - if (dependency.hasChildNodes()) { - NodeList artifacts = (NodeList)xpath.evaluate("artifact", dependency, XPathConstants.NODESET); - Element firstArtifact = (Element)artifacts.item(0); - if (artifacts.getLength() > 0) { - if ( ! "jar".equals(firstArtifact.getAttribute("type")) - && ! "jar".equals(firstArtifact.getAttribute("ext"))) { - nonJarDependencies.add(dependencyCoordinate); - continue; // ignore non-jar dependencies - } - String mavenClassifier = firstArtifact.getAttribute("maven:classifier"); - if ( ! mavenClassifier.isEmpty()) { - classifier = mavenClassifier; - classifiers.add(classifier); - } - } - } - classifiers.add(classifier); String conf = dependency.getAttribute("conf"); - boolean isTestDependency = conf.contains("test"); + boolean confContainsTest = conf.contains("test"); boolean isOptional = optionalExternalDependencies.contains(dependencyCoordinate); SortedSet deps = allExternalDependencies.get(module); if (null == deps) { deps = new TreeSet(); allExternalDependencies.put(module, deps); } - deps.add(new ExternalDependency(groupId, artifactId, classifier, isTestDependency, isOptional)); + NodeList artifacts = null; + if (dependency.hasChildNodes()) { + artifacts = (NodeList)xpath.evaluate("artifact", dependency, XPathConstants.NODESET); + } + if (null != artifacts && artifacts.getLength() > 0) { + for (int artifactNum = 0 ; artifactNum < artifacts.getLength() ; ++artifactNum) { + Element artifact = (Element)artifacts.item(artifactNum); + String type = artifact.getAttribute("type"); + String ext = artifact.getAttribute("ext"); + // When conf contains BOTH "test" and "compile", and type != "test", this is NOT a test dependency + boolean isTestDependency = confContainsTest && (type.equals("test") || ! conf.contains("compile")); + if ((type.isEmpty() && ext.isEmpty()) || type.equals("jar") || ext.equals("jar")) { + String classifier = artifact.getAttribute("maven:classifier"); + if (classifier.isEmpty()) { + classifier = null; + } + classifiers.add(classifier); + deps.add(new ExternalDependency(groupId, artifactId, classifier, isTestDependency, isOptional)); + } else { // not a jar + nonJarDependencies.add(dependencyCoordinate); + } + } + } else { + classifiers.add(null); + deps.add(new ExternalDependency(groupId, artifactId, null, confContainsTest, isOptional)); + } } } @@ -774,7 +789,7 @@ public class GetMavenDependenciesTask extends Task { } builder.append('-'); builder.append(matcher.group(4)); - return builder.toString(); + return builder.toString().replace("solr-solr-", "solr-"); } /** diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index f46c6d20eae..133fed69085 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -44,6 +44,15 @@ Upgrading from Solr 4.x Detailed Change List ---------------------- +New Features +---------------------- + +* SOLR-1301: Add a Solr contrib that allows for building Solr indexes via + Hadoop's MapReduce. (Matt Revelle, Alexander Kanarsky, Steve Rowe, + Mark Miller, Greg Bowyer, Jason Rutherglen, Kris Jirapinyo, Jason Venner , + Andrzej Bialecki, Patrick Hunt, Wolfgang Hoschek, Roman Shaposhnik, + Eric Wong) + Other Changes ---------------------- @@ -64,7 +73,9 @@ Apache ZooKeeper 3.4.5 Upgrading from Solr 4.6.0 ---------------------- - + +* CloudSolrServer and LBHttpSolrServer no longer declare MalformedURLException + as thrown from their constructors. Detailed Change List ---------------------- @@ -78,18 +89,175 @@ New Features * SOLR-5441: Expose number of transaction log files and their size via JMX. (Rafał Kuć via shalin) +* SOLR-5320: Added support for tri-level compositeId routing. + (Anshum Gupta via shalin) + +* SOLR-5287: You can edit files in the conf directory from the admin UI + (Erick Erickson, Stefan Matheis) + +* SOLR-5447, SOLR-5490: Add a QParserPlugin for Lucene's SimpleQueryParser. + (Jack Conradson via shalin) + +* SOLR-5446: Admin UI - Allow changing Schema and Config (steffkes) + +* SOLR-5456: Admin UI - Allow creating new Files (steffkes) + +* SOLR-5208: Support for the setting of core.properties key/values at create-time on + Collections API (Erick Erickson) + +* SOLR-5428: New 'stats.calcdistinct' parameter in StatsComponent returns + set of distinct values and their count. This can also be specified per field + e.g. 'f.field.stats.calcdistinct'. (Elran Dvir via shalin) + +* SOLR-5378, SOLR-5528: A new SuggestComponent that fully utilizes the Lucene suggester + module and adds pluggable dictionaries, payloads and better distributed support. + This is intended to eventually replace the Suggester support through the + SpellCheckComponent. (Areek Zillur, Varun Thacker via shalin) + +* SOLR-5492: Return the replica that actually served the query in shards.info + response. (shalin) + +* SOLR-5506: Support docValues in CollationField and ICUCollationField. + (Robert Muir) + + * SOLR-5518: Added EditFileRequestHandler to deal with security issues around modifying + solr configuration files. + +* SOLR-5023: Add support for deleteInstanceDir to be passed from SolrJ for Core + Unload action. (Lyubov Romanchuk, shalin) + +* SOLR-1871: The 'map' function query accepts a ValueSource as target and + default value. (Chris Harris, shalin) + +* SOLR-5556: Allow class of CollectionsHandler and InfoHandler to be specified + in solr.xml. (Gregory Chanan, Alan Woodward) + Bug Fixes ---------------------- * SOLR-5438: DebugComponent throws NPE when used with grouping. (Tomás Fernández Löbbe via shalin) +* SOLR-5442: Python client cannot parse proxied response when served by Tomcat. + (Patrick Hunt, Gregory Chanan, Vamsee Yarlagadda, Romain Rigaux, Mark Miller) + +* SOLR-5445: Proxied responses should propagate all headers rather than the + first one for each key. (Patrick Hunt, Mark Miller) + +* SOLR-4612: Admin UI - Analysis Screen contains empty table-columns (steffkes) + +* SOLR-5451: SyncStrategy closes it's http connection manager before the + executor that uses it in it's close method. (Mark Miller) + +* SOLR-5460: SolrDispatchFilter#sendError can get a SolrCore that it does not + close. (Mark Miller) + +* SOLR-5461: Request proxying should only set con.setDoOutput(true) if the + request is a post. (Mark Miller) + +* SOLR-5479: SolrCmdDistributor retry logic stops if a leader for the request + cannot be found in 1 second. (Mark Miller) + +* SOLR-5481: SolrCmdDistributor should not let the http client do it's own + retries. (Mark Miller) + +* SOLR-4709: The core reload after replication if config files have changed + can fail due to a race condition. (Mark Miller, Hossman)) + +* LUCENE-5347: Fixed Solr's Zookeeper Client to copy files to Zookeeper using + binary transfer. Previously data was read with default encoding and stored + in zookeeper as UTF-8. This bug was found after upgrading to forbidden-apis + 1.4. (Uwe Schindler) + +* SOLR-4376: DataImportHandler uses wrong date format for last_index_time if + a delta-import is run first before any full-imports. + (Sebastien Lorber, Arcadius Ahouansou via shalin) + +* SOLR-5496: We should share an http connection manager across non search + HttpClients and ensure all http connection managers get shutdown. + (Mark Miller) + +* SOLR-5503: Retry 'forward to leader' requests less aggressively - rather + than on IOException and status 500, ConnectException. (Mark Miller) + +* SOLR-5494: CoreContainer#remove throws NPE rather than returning null when + a SolrCore does not exist in core discovery mode. (Mark Miller) + +* SOLR-5354: Distributed sort is broken with CUSTOM FieldType. + (Steve Rowe, hossman, Robert Muir, Jessica Cheng) + +* SOLR-5515: NPE when getting stats on date field with empty result on + SolrCloud. (Alexander Sagen, shalin) + +* SOLR-5204: StatsComponent and SpellCheckComponent do not support the + shards.tolerant=true parameter. (Anca Kopetz, shalin) + +* SOLR-5527: DIH logs spurious warning for special commands. (shalin) + +* SOLR-5524: Exception when using Query Function inside Scale Function. + (Trey Grainger, yonik) + +* SOLR-5540: HdfsLockFactory should explicitly create the lock parent directory + if necessary. (Mark Miller) + +* SOLR-5532: SolrJ Content-Type validation is too strict for some + webcontainers / proxies. (Jakob Furrer, hossman, Shawn Heisey, Uwe Schindler, + Mark Miller) + +* SOLR-5547: Creating a collection alias using SolrJ's CollectionAdminRequest + sets the alias name and the collections to alias to the same value. + (Aaron Schram, Mark Miller) + +* SOLR-5543: Core swaps resulted in duplicate core entries in solr.xml when + using solr.xml persistence. (Bill Bell, Alan Woodward) + +Optimizations +---------------------- + +* SOLR-5458: Admin UI - Remove separated Pages for Config & Schema (steffkes) + +* SOLR-5436: Eliminate the 1500ms wait in overseer loop as well as + polling the ZK distributed queue. (Noble Paul, Mark Miller) + +* SOLR-5189: Solr 4.x Web UI Log Viewer does not display 'date' column from + logs (steffkes) + +* SOLR-5512: Optimize DocValuesFacets. (Robert Muir) + Other Changes --------------------- * SOLR-5399: Add distributed request tracking information to DebugComponent (Tomás Fernández Löbbe via Ryan Ernst) +* SOLR-5421: Remove double set of distrib.from param in processAdd method of + DistributedUpdateProcessor. (Anshum Gupta via shalin) + +* SOLR-5404: The example config references deprecated classes. + (Uwe Schindler, Rafał Kuć via Mark Miller) + +* SOLR-5487: Replication factor error message doesn't match constraint. + (Patrick Hunt via shalin) + +* SOLR-5499: Log a warning if /get is not registered when using SolrCloud. + (Daniel Collins via shalin) + +* SOLR-5517: Return HTTP error on POST requests with no Content-Type. + (Ryan Ernst, Uwe Schindler) + +* SOLR-5502: Added a test for tri-level compositeId routing with documents + having a "/" in a document id. (Anshum Gupta via Mark Miller) + +* SOLR-5539: Admin UI - Remove ability to create/modify files (steffkes) + +* SOLR-5533: Improve out of the box support for running Solr on hdfs with + SolrCloud. (Mark Miller) + +* SOLR-5548: Give DistributedSearchTestCase / JettySolrRunner the ability to + specify extra filters. (Greg Chanan via Mark Miller) + +* SOLR-5555: LBHttpSolrServer and CloudSolrServer constructors don't need to + declare MalformedURLExceptions (Sushil Bajracharya, Alan Woodward) ================== 4.6.0 ================== @@ -163,6 +331,9 @@ New Features * SOLR-5084: new field type EnumField. (Elran Dvir via Erick Erickson) +* SOLR-5464: Add option to ConcurrentSolrServer to stream pure delete + requests. (Mark Miller) + Bug Fixes ---------------------- @@ -199,6 +370,15 @@ Bug Fixes unloaded results in a " Too many close [count:-1]" error. (Olivier Soyez via Erick Erickson) +* SOLR-5453: Raise recovery socket read timeouts. (Mark Miller) + +* SOLR-5397: Replication can fail silently in some cases. (Mark Miller) + +* SOLR-5465: SolrCmdDistributor retry logic has a concurrency race bug. + (Mark Miller) + +* SOLR-5452: Do not attempt to proxy internal update requests. (Mark Miller) + Optimizations ---------------------- diff --git a/solr/common-build.xml b/solr/common-build.xml index 614c2784212..5d0d8d04aea 100644 --- a/solr/common-build.xml +++ b/solr/common-build.xml @@ -92,6 +92,7 @@ + @@ -155,7 +156,7 @@ @@ -228,7 +229,7 @@ - + @@ -295,6 +296,7 @@ + diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java index e1489474281..c1b2ce7c09d 100644 --- a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java +++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java @@ -19,6 +19,9 @@ package org.apache.solr.schema; import java.io.IOException; import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; import java.util.Map; import org.apache.commons.io.IOUtils; @@ -26,7 +29,12 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.collation.ICUCollationKeyAnalyzer; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.StorableField; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DocTermOrdsRangeFilter; +import org.apache.lucene.search.FieldCacheRangeFilter; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermRangeQuery; @@ -35,6 +43,7 @@ import org.apache.lucene.util.Version; import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.Base64; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; @@ -178,8 +187,7 @@ public class ICUCollationField extends FieldType { rbc.setVariableTop(variableTop); } - // we use 4.0 because it ensures we just encode the pure byte[] keys. - analyzer = new ICUCollationKeyAnalyzer(Version.LUCENE_40, collator); + analyzer = new ICUCollationKeyAnalyzer(Version.LUCENE_CURRENT, collator); } /** @@ -229,12 +237,12 @@ public class ICUCollationField extends FieldType { } /** - * analyze the range with the analyzer, instead of the collator. + * analyze the text with the analyzer, instead of the collator. * because icu collators are not thread safe, this keeps things * simple (we already have a threadlocal clone in the reused TS) */ - private BytesRef analyzeRangePart(String field, String part) { - try (TokenStream source = analyzer.tokenStream(field, part)) { + private BytesRef getCollationKey(String field, String text) { + try (TokenStream source = analyzer.tokenStream(field, text)) { source.reset(); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); @@ -242,22 +250,73 @@ public class ICUCollationField extends FieldType { // we control the analyzer here: most errors are impossible if (!source.incrementToken()) - throw new IllegalArgumentException("analyzer returned no terms for range part: " + part); + throw new IllegalArgumentException("analyzer returned no terms for text: " + text); termAtt.fillBytesRef(); assert !source.incrementToken(); source.end(); return BytesRef.deepCopyOf(bytes); } catch (IOException e) { - throw new RuntimeException("Unable analyze range part: " + part, e); + throw new RuntimeException("Unable to analyze text: " + text, e); } } @Override public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { String f = field.getName(); - BytesRef low = part1 == null ? null : analyzeRangePart(f, part1); - BytesRef high = part2 == null ? null : analyzeRangePart(f, part2); - return new TermRangeQuery(field.getName(), low, high, minInclusive, maxInclusive); + BytesRef low = part1 == null ? null : getCollationKey(f, part1); + BytesRef high = part2 == null ? null : getCollationKey(f, part2); + if (!field.indexed() && field.hasDocValues()) { + if (field.multiValued()) { + return new ConstantScoreQuery(DocTermOrdsRangeFilter.newBytesRefRange( + field.getName(), low, high, minInclusive, maxInclusive)); + } else { + return new ConstantScoreQuery(FieldCacheRangeFilter.newBytesRefRange( + field.getName(), low, high, minInclusive, maxInclusive)); + } + } else { + return new TermRangeQuery(field.getName(), low, high, minInclusive, maxInclusive); + } + } + + @Override + public void checkSchemaField(SchemaField field) { + // no-op + } + + @Override + public List createFields(SchemaField field, Object value, float boost) { + if (field.hasDocValues()) { + List fields = new ArrayList(); + fields.add(createField(field, value, boost)); + final BytesRef bytes = getCollationKey(field.getName(), value.toString()); + if (field.multiValued()) { + fields.add(new SortedSetDocValuesField(field.getName(), bytes)); + } else { + fields.add(new SortedDocValuesField(field.getName(), bytes)); + } + return fields; + } else { + return Collections.singletonList(createField(field, value, boost)); + } + } + + @Override + public Object marshalSortValue(Object value) { + if (null == value) { + return null; + } + final BytesRef val = (BytesRef)value; + return Base64.byteArrayToBase64(val.bytes, val.offset, val.length); + } + + @Override + public Object unmarshalSortValue(Object value) { + if (null == value) { + return null; + } + final String val = (String)value; + final byte[] bytes = Base64.base64ToByteArray(val); + return new BytesRef(bytes); } } diff --git a/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml new file mode 100644 index 00000000000..62c2651d14b --- /dev/null +++ b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + id + + + + + + + + diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java new file mode 100644 index 00000000000..b7ddfaf7c4f --- /dev/null +++ b/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.schema; + +import java.io.File; +import java.io.FileOutputStream; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.solr.SolrTestCaseJ4; +import org.junit.BeforeClass; + +import com.ibm.icu.text.Collator; +import com.ibm.icu.text.RuleBasedCollator; +import com.ibm.icu.util.ULocale; + +/** + * Tests {@link ICUCollationField} with docValues. + */ +@SuppressCodecs({"Lucene40", "Lucene41"}) +public class TestICUCollationFieldDocValues extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + String home = setupSolrHome(); + initCore("solrconfig.xml","schema.xml", home); + // add some docs + assertU(adoc("id", "1", "text", "\u0633\u0627\u0628")); + assertU(adoc("id", "2", "text", "I WİLL USE TURKİSH CASING")); + assertU(adoc("id", "3", "text", "ı will use turkish casıng")); + assertU(adoc("id", "4", "text", "Töne")); + assertU(adoc("id", "5", "text", "I W\u0049\u0307LL USE TURKİSH CASING")); + assertU(adoc("id", "6", "text", "Testing")); + assertU(adoc("id", "7", "text", "Tone")); + assertU(adoc("id", "8", "text", "Testing")); + assertU(adoc("id", "9", "text", "testing")); + assertU(adoc("id", "10", "text", "toene")); + assertU(adoc("id", "11", "text", "Tzne")); + assertU(adoc("id", "12", "text", "\u0698\u0698")); + assertU(commit()); + } + + /** + * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource. + * These are largish files, and jvm-specific (as our documentation says, you should always + * look out for jvm differences with collation). + * So its preferable to create this file on-the-fly. + */ + public static String setupSolrHome() throws Exception { + // make a solr home underneath the test's TEMP_DIR + File tmpFile = File.createTempFile("test", "tmp", TEMP_DIR); + tmpFile.delete(); + tmpFile.mkdir(); + + // make data and conf dirs + new File(tmpFile + "/collection1", "data").mkdirs(); + File confDir = new File(tmpFile + "/collection1", "conf"); + confDir.mkdirs(); + + // copy over configuration files + FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml")); + FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml"), new File(confDir, "schema.xml")); + + // generate custom collation rules (DIN 5007-2), saving to customrules.dat + RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE")); + + String DIN5007_2_tailorings = + "& ae , a\u0308 & AE , A\u0308"+ + "& oe , o\u0308 & OE , O\u0308"+ + "& ue , u\u0308 & UE , u\u0308"; + + RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings); + String tailoredRules = tailoredCollator.getRules(); + FileOutputStream os = new FileOutputStream(new File(confDir, "customrules.dat")); + IOUtils.write(tailoredRules, os, "UTF-8"); + os.close(); + + return tmpFile.getAbsolutePath(); + } + + /** + * Test termquery with german DIN 5007-1 primary strength. + * In this case, ö is equivalent to o (but not oe) + */ + public void testBasicTermQuery() { + assertQ("Collated TQ: ", + req("fl", "id", "q", "sort_de:tone", "sort", "id asc" ), + "//*[@numFound='2']", + "//result/doc[1]/int[@name='id'][.=4]", + "//result/doc[2]/int[@name='id'][.=7]" + ); + } + + /** + * Test rangequery again with the DIN 5007-1 collator. + * We do a range query of tone .. tp, in binary order this + * would retrieve nothing due to case and accent differences. + */ + public void testBasicRangeQuery() { + assertQ("Collated RangeQ: ", + req("fl", "id", "q", "sort_de:[tone TO tp]", "sort", "id asc" ), + "//*[@numFound='2']", + "//result/doc[1]/int[@name='id'][.=4]", + "//result/doc[2]/int[@name='id'][.=7]" + ); + } + + /** + * Test sort with a danish collator. ö is ordered after z + */ + public void testBasicSort() { + assertQ("Collated Sort: ", + req("fl", "id", "q", "sort_da:[tz TO töz]", "sort", "sort_da asc" ), + "//*[@numFound='2']", + "//result/doc[1]/int[@name='id'][.=11]", + "//result/doc[2]/int[@name='id'][.=4]" + ); + } + + /** + * Test sort with an arabic collator. U+0633 is ordered after U+0698. + * With a binary collator, the range would also return nothing. + */ + public void testArabicSort() { + assertQ("Collated Sort: ", + req("fl", "id", "q", "sort_ar:[\u0698 TO \u0633\u0633]", "sort", "sort_ar asc" ), + "//*[@numFound='2']", + "//result/doc[1]/int[@name='id'][.=12]", + "//result/doc[2]/int[@name='id'][.=1]" + ); + } + + /** + * Test rangequery again with an Arabic collator. + * Binary order would normally order U+0633 in this range. + */ + public void testNegativeRangeQuery() { + assertQ("Collated RangeQ: ", + req("fl", "id", "q", "sort_ar:[\u062F TO \u0698]", "sort", "id asc" ), + "//*[@numFound='0']" + ); + } + /** + * Test canonical decomposition with turkish primary strength. + * With this sort order, İ is the uppercase form of i, and I is the uppercase form of ı. + * We index a decomposed form of İ. + */ + public void testCanonicalDecomposition() { + assertQ("Collated TQ: ", + req("fl", "id", "q", "sort_tr_canon:\"I Will Use Turkish Casıng\"", "sort", "id asc" ), + "//*[@numFound='3']", + "//result/doc[1]/int[@name='id'][.=2]", + "//result/doc[2]/int[@name='id'][.=3]", + "//result/doc[3]/int[@name='id'][.=5]" + ); + } + + /** + * Test termquery with custom collator (DIN 5007-2). + * In this case, ö is equivalent to oe (but not o) + */ + public void testCustomCollation() { + assertQ("Collated TQ: ", + req("fl", "id", "q", "sort_custom:toene", "sort", "id asc" ), + "//*[@numFound='2']", + "//result/doc[1]/int[@name='id'][.=4]", + "//result/doc[2]/int[@name='id'][.=10]" + ); + } +} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHProperties.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHProperties.java index 0ea391c4974..911322eef82 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHProperties.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHProperties.java @@ -37,6 +37,8 @@ public abstract class DIHProperties { public abstract Map readIndexerProperties(); + public abstract String convertDateToString(Date d); + public Date getCurrentTimestamp() { return new Date(); } diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java index 01991de3611..dc33a31ea89 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java @@ -51,6 +51,11 @@ public class DocBuilder { private static final Logger LOG = LoggerFactory.getLogger(DocBuilder.class); private static final Date EPOCH = new Date(0); + public static final String DELETE_DOC_BY_ID = "$deleteDocById"; + public static final String DELETE_DOC_BY_QUERY = "$deleteDocByQuery"; + public static final String DOC_BOOST = "$docBoost"; + public static final String SKIP_DOC = "$skipDoc"; + public static final String SKIP_ROW = "$skipRow"; DataImporter dataImporter; @@ -117,6 +122,7 @@ public class DocBuilder { private VariableResolver getVariableResolver() { try { VariableResolver resolver = null; + String epoch = propWriter.convertDateToString(EPOCH); if(dataImporter != null && dataImporter.getCore() != null && dataImporter.getCore().getResourceLoader().getCoreProperties() != null){ resolver = new VariableResolver(dataImporter.getCore().getResourceLoader().getCoreProperties()); @@ -129,7 +135,7 @@ public class DocBuilder { indexerNamespace.put(LAST_INDEX_TIME, persistedProperties.get(LAST_INDEX_TIME)); } else { // set epoch - indexerNamespace.put(LAST_INDEX_TIME, EPOCH); + indexerNamespace.put(LAST_INDEX_TIME, epoch); } indexerNamespace.put(INDEX_START_TIME, dataImporter.getIndexStartTime()); indexerNamespace.put("request", new HashMap(reqParams.getRawParams())); @@ -140,7 +146,7 @@ public class DocBuilder { if (lastIndex != null) { entityNamespace.put(SolrWriter.LAST_INDEX_KEY, lastIndex); } else { - entityNamespace.put(SolrWriter.LAST_INDEX_KEY, EPOCH); + entityNamespace.put(SolrWriter.LAST_INDEX_KEY, epoch); } indexerNamespace.put(entity.getName(), entityNamespace); } @@ -567,7 +573,7 @@ public class DocBuilder { } private void handleSpecialCommands(Map arow, DocWrapper doc) { - Object value = arow.get("$deleteDocById"); + Object value = arow.get(DELETE_DOC_BY_ID); if (value != null) { if (value instanceof Collection) { Collection collection = (Collection) value; @@ -580,7 +586,7 @@ public class DocBuilder { importStatistics.deletedDocCount.incrementAndGet(); } } - value = arow.get("$deleteDocByQuery"); + value = arow.get(DELETE_DOC_BY_QUERY); if (value != null) { if (value instanceof Collection) { Collection collection = (Collection) value; @@ -593,7 +599,7 @@ public class DocBuilder { importStatistics.deletedDocCount.incrementAndGet(); } } - value = arow.get("$docBoost"); + value = arow.get(DOC_BOOST); if (value != null) { float value1 = 1.0f; if (value instanceof Number) { @@ -604,7 +610,7 @@ public class DocBuilder { doc.setDocumentBoost(value1); } - value = arow.get("$skipDoc"); + value = arow.get(SKIP_DOC); if (value != null) { if (Boolean.parseBoolean(value.toString())) { throw new DataImportHandlerException(DataImportHandlerException.SKIP, @@ -612,7 +618,7 @@ public class DocBuilder { } } - value = arow.get("$skipRow"); + value = arow.get(SKIP_ROW); if (value != null) { if (Boolean.parseBoolean(value.toString())) { throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW); diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java index 1aa882e7281..7c9d03169e5 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java @@ -153,7 +153,4 @@ public class EntityProcessorBase extends EntityProcessor { public static final String CONTINUE = "continue"; public static final String SKIP = "skip"; - - public static final String SKIP_DOC = "$skipDoc"; - } diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SimplePropertiesWriter.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SimplePropertiesWriter.java index 1d652b8d516..7967558a852 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SimplePropertiesWriter.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SimplePropertiesWriter.java @@ -130,7 +130,8 @@ public class SimplePropertiesWriter extends DIHProperties { } - protected String convertDateToString(Date d) { + @Override + public String convertDateToString(Date d) { return dateFormat.format(d); } protected Date convertStringToDate(String s) { diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java index c18ae65375b..4819e75089c 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java @@ -330,7 +330,7 @@ public class XPathEntityProcessor extends EntityProcessorBase { } else if (SKIP.equals(onError)) { LOG.warn(msg, e); Map map = new HashMap(); - map.put(SKIP_DOC, Boolean.TRUE); + map.put(DocBuilder.SKIP_DOC, Boolean.TRUE); rows.add(map); } else if (CONTINUE.equals(onError)) { LOG.warn(msg, e); diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/config/DIHConfiguration.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/config/DIHConfiguration.java index 4b5b7510481..bf07e40f16b 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/config/DIHConfiguration.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/config/DIHConfiguration.java @@ -8,6 +8,7 @@ import java.util.Locale; import java.util.Map; import org.apache.solr.handler.dataimport.DataImporter; +import org.apache.solr.handler.dataimport.DocBuilder; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.slf4j.Logger; @@ -111,7 +112,7 @@ public class DIHConfiguration { for (Map.Entry entry : fields.entrySet()) { EntityField fld = entry.getValue(); SchemaField field = getSchemaField(fld.getName()); - if (field == null) { + if (field == null && !isSpecialCommand(fld.getName())) { LOG.info("The field :" + fld.getName() + " present in DataConfig does not have a counterpart in Solr Schema"); } } @@ -178,4 +179,13 @@ public class DIHConfiguration { public IndexSchema getSchema() { return schema; } + + public static boolean isSpecialCommand(String fld) { + return DocBuilder.DELETE_DOC_BY_ID.equals(fld) || + DocBuilder.DELETE_DOC_BY_QUERY.equals(fld) || + DocBuilder.DOC_BOOST.equals(fld) || + DocBuilder.SKIP_DOC.equals(fld) || + DocBuilder.SKIP_ROW.equals(fld); + + } } \ No newline at end of file diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder2.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder2.java index 2ead5f2b90c..dbd38be1899 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder2.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder2.java @@ -132,7 +132,7 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTestCase { public void testSkipDoc() throws Exception { List rows = new ArrayList(); rows.add(createMap("id", "1", "desc", "one")); - rows.add(createMap("id", "2", "desc", "two", "$skipDoc", "true")); + rows.add(createMap("id", "2", "desc", "two", DocBuilder.SKIP_DOC, "true")); MockDataSource.setIterator("select * from x", rows.iterator()); runFullImport(dataConfigWithDynamicTransformer); @@ -146,7 +146,7 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTestCase { public void testSkipRow() throws Exception { List rows = new ArrayList(); rows.add(createMap("id", "1", "desc", "one")); - rows.add(createMap("id", "2", "desc", "two", "$skipRow", "true")); + rows.add(createMap("id", "2", "desc", "two", DocBuilder.SKIP_ROW, "true")); MockDataSource.setIterator("select * from x", rows.iterator()); runFullImport(dataConfigWithDynamicTransformer); @@ -166,7 +166,7 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTestCase { MockDataSource.setIterator("3", rows.iterator()); rows = new ArrayList(); - rows.add(createMap("name_s", "xyz", "$skipRow", "true")); + rows.add(createMap("name_s", "xyz", DocBuilder.SKIP_ROW, "true")); MockDataSource.setIterator("4", rows.iterator()); runFullImport(dataConfigWithTwoEntities); @@ -197,7 +197,7 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTestCase { List rows = new ArrayList(); rows.add(createMap("id", "1", "desc", "one")); rows.add(createMap("id", "2", "desc", "two")); - rows.add(createMap("id", "3", "desc", "two", "$deleteDocById", "2")); + rows.add(createMap("id", "3", "desc", "two", DocBuilder.DELETE_DOC_BY_ID, "2")); MockDataSource.setIterator("select * from x", rows.iterator()); runFullImport(dataConfigForSkipTransform); @@ -213,7 +213,7 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTestCase { rows = new ArrayList(); rows.add(createMap("id", "1", "desc", "one")); rows.add(createMap("id", "2", "desc", "one")); - rows.add(createMap("id", "3", "desc", "two", "$deleteDocByQuery", "desc:one")); + rows.add(createMap("id", "3", "desc", "two", DocBuilder.DELETE_DOC_BY_QUERY, "desc:one")); MockDataSource.setIterator("select * from x", rows.iterator()); runFullImport(dataConfigForSkipTransform); @@ -227,7 +227,7 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTestCase { MockDataSource.clearCache(); rows = new ArrayList(); - rows.add(createMap("$deleteDocById", "3")); + rows.add(createMap(DocBuilder.DELETE_DOC_BY_ID, "3")); MockDataSource.setIterator("select * from x", rows.iterator()); runFullImport(dataConfigForSkipTransform, createMap("clean","false")); assertQ(req("id:3"), "//*[@numFound='0']"); diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java index 93928e61ff5..4fd3a61d114 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java @@ -23,7 +23,7 @@ import org.junit.Test; */ /** - * Test with various combinations of parameters, child entites, transformers. + * Test with various combinations of parameters, child entities, transformers. */ public class TestSqlEntityProcessorDelta extends AbstractSqlEntityProcessorTestCase { private boolean delta = false; @@ -48,6 +48,21 @@ public class TestSqlEntityProcessorDelta extends AbstractSqlEntityProcessorTestC singleEntity(c); validateChanges(); } + + @Test + public void testDeltaImportWithoutInitialFullImport() throws Exception { + log.debug("testDeltaImportWithoutInitialFullImport delta-import..."); + countryEntity = false; + delta = true; + /* + * We need to add 2 in total: + * +1 for deltaQuery i.e identifying id of items to update, + * +1 for deletedPkQuery i.e delete query + */ + singleEntity(totalPeople() + 2); + validateChanges(); + } + @Test public void testWithSimpleTransformer() throws Exception { log.debug("testWithSimpleTransformer full-import..."); diff --git a/solr/contrib/extraction/ivy.xml b/solr/contrib/extraction/ivy.xml index 40e5201f60d..263c48832c9 100644 --- a/solr/contrib/extraction/ivy.xml +++ b/solr/contrib/extraction/ivy.xml @@ -22,6 +22,7 @@ + @@ -44,12 +45,19 @@ + + + + + + + diff --git a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java index c91dd47306f..acf94a2d801 100644 --- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java +++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java @@ -1,4 +1,4 @@ -/* +/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. diff --git a/solr/contrib/map-reduce/build.xml b/solr/contrib/map-reduce/build.xml new file mode 100644 index 00000000000..b1076bb4327 --- /dev/null +++ b/solr/contrib/map-reduce/build.xml @@ -0,0 +1,147 @@ + + + + + + + + Solr map-reduce index construction. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/map-reduce/ivy.xml b/solr/contrib/map-reduce/ivy.xml new file mode 100644 index 00000000000..179b39d666e --- /dev/null +++ b/solr/contrib/map-reduce/ivy.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/map-reduce/src/java/assembly/hadoop-job.xml b/solr/contrib/map-reduce/src/java/assembly/hadoop-job.xml new file mode 100644 index 00000000000..1640b6ff72e --- /dev/null +++ b/solr/contrib/map-reduce/src/java/assembly/hadoop-job.xml @@ -0,0 +1,39 @@ + + + + + + job + + jar + + false + + + false + runtime + lib + + ${groupId}:${artifactId} + + + + true + + ${groupId}:${artifactId} + + + + diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/AlphaNumericComparator.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/AlphaNumericComparator.java new file mode 100644 index 00000000000..cd8f183cee1 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/AlphaNumericComparator.java @@ -0,0 +1,76 @@ +//The MIT License +// +// Copyright (c) 2003 Ron Alford, Mike Grove, Bijan Parsia, Evren Sirin +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. + +package org.apache.solr.hadoop; + +import java.util.Comparator; + +/** + * This is a comparator to perform a mix of alphabetical+numeric comparison. For + * example, if there is a list {"test10", "test2", "test150", "test25", "test1"} + * then what we generally expect from the ordering is the result {"test1", + * "test2", "test10", "test25", "test150"}. However, standard lexigraphic + * ordering does not do that and "test10" comes before "test2". This class is + * provided to overcome that problem. This functionality is useful to sort the + * benchmark files (like the ones in in DL-benchmark-suite) from smallest to the + * largest. Comparisons are done on the String values retuned by toString() so + * care should be taken when this comparator is used to sort arbitrary Java + * objects. + * + */ +final class AlphaNumericComparator implements Comparator { + + public AlphaNumericComparator() { + } + + public int compare(Object o1, Object o2) { + String s1 = o1.toString(); + String s2 = o2.toString(); + int n1 = s1.length(), n2 = s2.length(); + int i1 = 0, i2 = 0; + while (i1 < n1 && i2 < n2) { + int p1 = i1; + int p2 = i2; + char c1 = s1.charAt(i1++); + char c2 = s2.charAt(i2++); + if(c1 != c2) { + if (Character.isDigit(c1) && Character.isDigit(c2)) { + int value1 = 0, value2 = 0; + while (i1 < n1 && Character.isDigit(c1 = s1.charAt(i1))) { + i1++; + } + value1 = Integer.parseInt(s1.substring(p1, i1)); + while (i2 < n2 && Character.isDigit(c2 = s2.charAt(i2))) { + i2++; + } + value2 = Integer.parseInt(s2.substring(p2, i2)); + if (value1 != value2) { + return value1 - value2; + } + } + return c1 - c2; + } + } + + return n1 - n2; + } +} \ No newline at end of file diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/BatchWriter.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/BatchWriter.java new file mode 100644 index 00000000000..06818ec4c8f --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/BatchWriter.java @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Locale; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskID; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Enables adding batches of documents to an EmbeddedSolrServer. + */ +class BatchWriter { + + private final EmbeddedSolrServer solr; + private volatile Exception batchWriteException = null; + + private static final Logger LOG = LoggerFactory.getLogger(BatchWriter.class); + + public Exception getBatchWriteException() { + return batchWriteException; + } + + public void setBatchWriteException(Exception batchWriteException) { + this.batchWriteException = batchWriteException; + } + + /** The number of writing threads. */ + final int writerThreads; + + /** Queue Size */ + final int queueSize; + + private final ThreadPoolExecutor batchPool; + + private TaskID taskId = null; + + /** + * The number of in progress batches, must be zero before the close can + * actually start closing + */ + AtomicInteger executingBatches = new AtomicInteger(0); + + /** + * Create the batch writer object, set the thread to daemon mode, and start + * it. + * + */ + + final class Batch implements Runnable { + + private List documents; + private UpdateResponse result; + + public Batch(Collection batch) { + documents = new ArrayList(batch); + } + + public void run() { + try { + executingBatches.getAndIncrement(); + result = runUpdate(documents); + } finally { + executingBatches.getAndDecrement(); + } + } + + protected List getDocuments() { + return documents; + } + + protected void setDocuments(List documents) { + this.documents = documents; + } + + protected UpdateResponse getResult() { + return result; + } + + protected void setResult(UpdateResponse result) { + this.result = result; + } + + protected void reset(List documents) { + if (this.documents == null) { + this.documents = new ArrayList(documents); + } else { + this.documents.clear(); + this.documents.addAll(documents); + } + result = null; + } + + protected void reset(SolrInputDocument document) { + if (this.documents == null) { + this.documents = new ArrayList(); + } else { + this.documents.clear(); + } + this.documents.add(document); + result = null; + } + } + + protected UpdateResponse runUpdate(List batchToWrite) { + try { + UpdateResponse result = solr.add(batchToWrite); + SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.BATCHES_WRITTEN.toString(), 1); + SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString(), batchToWrite.size()); + if (LOG.isDebugEnabled()) { + SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.BATCH_WRITE_TIME.toString(), result.getElapsedTime()); + } + return result; + } catch (Throwable e) { + if (e instanceof Exception) { + setBatchWriteException((Exception) e); + } else { + setBatchWriteException(new Exception(e)); + } + SolrRecordWriter.incrementCounter(taskId, getClass().getName() + ".errors", e.getClass().getName(), 1); + LOG.error("Unable to process batch", e); + return null; + } + } + + + public BatchWriter(EmbeddedSolrServer solr, int batchSize, TaskID tid, + int writerThreads, int queueSize) { + this.solr = solr; + this.writerThreads = writerThreads; + this.queueSize = queueSize; + taskId = tid; + + // we need to obtain the settings before the constructor + if (writerThreads != 0) { + batchPool = new ThreadPoolExecutor(writerThreads, writerThreads, 5, + TimeUnit.SECONDS, new LinkedBlockingQueue(queueSize), + new ThreadPoolExecutor.CallerRunsPolicy()); + } else { // single threaded case + batchPool = null; + } + } + + public void queueBatch(Collection batch) + throws IOException, SolrServerException { + + throwIf(); + Batch b = new Batch(batch); + if (batchPool != null) { + batchPool.execute(b); + } else { // single threaded case + b.run(); + throwIf(); + } + } + + public synchronized void close(TaskAttemptContext context) + throws InterruptedException, SolrServerException, IOException { + + if (batchPool != null) { + context.setStatus("Waiting for batches to complete"); + batchPool.shutdown(); + + while (!batchPool.isTerminated()) { + LOG.info(String.format(Locale.ENGLISH, + "Waiting for %d items and %d threads to finish executing", batchPool + .getQueue().size(), batchPool.getActiveCount())); + batchPool.awaitTermination(5, TimeUnit.SECONDS); + } + } + context.setStatus("Committing Solr Phase 1"); + solr.commit(true, false); + context.setStatus("Optimizing Solr"); + int maxSegments = context.getConfiguration().getInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, 1); + LOG.info("Optimizing Solr: forcing merge down to {} segments", maxSegments); + long start = System.currentTimeMillis(); + solr.optimize(true, false, maxSegments); + context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_REDUCER_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start); + float secs = (System.currentTimeMillis() - start) / 1000.0f; + LOG.info("Optimizing Solr: done forcing merge down to {} segments in {} secs", maxSegments, secs); + context.setStatus("Committing Solr Phase 2"); + solr.commit(true, false); + context.setStatus("Shutting down Solr"); + solr.shutdown(); + } + + /** + * Throw a legal exception if a previous batch write had an exception. The + * previous state is cleared. Uses {@link #batchWriteException} for the state + * from the last exception. + * + * This will loose individual exceptions if the exceptions happen rapidly. + * + * @throws IOException On low level IO error + * @throws SolrServerException On Solr Exception + */ + private void throwIf() throws IOException, SolrServerException { + + final Exception last = batchWriteException; + batchWriteException = null; + + if (last == null) { + return; + } + if (last instanceof SolrServerException) { + throw (SolrServerException) last; + } + if (last instanceof IOException) { + throw (IOException) last; + } + throw new IOException("Batch Write Failure", last); + } +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataInputInputStream.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataInputInputStream.java new file mode 100644 index 00000000000..33f609f1f2d --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataInputInputStream.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.hadoop; + +import java.io.DataInput; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * An InputStream that wraps a DataInput. + * @see DataOutputOutputStream + */ +@InterfaceAudience.Private +public class DataInputInputStream extends InputStream { + + private DataInput in; + + /** + * Construct an InputStream from the given DataInput. If 'in' + * is already an InputStream, simply returns it. Otherwise, wraps + * it in an InputStream. + * @param in the DataInput to wrap + * @return an InputStream instance that reads from 'in' + */ + public static InputStream constructInputStream(DataInput in) { + if (in instanceof InputStream) { + return (InputStream)in; + } else { + return new DataInputInputStream(in); + } + } + + + public DataInputInputStream(DataInput in) { + this.in = in; + } + + @Override + public int read() throws IOException { + return in.readUnsignedByte(); + } +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java new file mode 100644 index 00000000000..389c52a577d --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.DataOutput; +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * OutputStream implementation that wraps a DataOutput. + */ +@InterfaceAudience.Private +public class DataOutputOutputStream extends OutputStream { + + private final DataOutput out; + + /** + * Construct an OutputStream from the given DataOutput. If 'out' + * is already an OutputStream, simply returns it. Otherwise, wraps + * it in an OutputStream. + * @param out the DataOutput to wrap + * @return an OutputStream instance that outputs to 'out' + */ + public static OutputStream constructOutputStream(DataOutput out) { + if (out instanceof OutputStream) { + return (OutputStream)out; + } else { + return new DataOutputOutputStream(out); + } + } + + private DataOutputOutputStream(DataOutput out) { + this.out = out; + } + + @Override + public void write(int b) throws IOException { + out.writeByte(b); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + out.write(b, off, len); + } + + @Override + public void write(byte[] b) throws IOException { + out.write(b); + } +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java new file mode 100644 index 00000000000..bacf1d0e1fc --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import org.apache.solr.client.solrj.response.SolrPingResponse; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.morphlines.solr.DocumentLoader; + +/** + * Prints documents to stdout instead of loading them into Solr for quicker turnaround during early + * trial & debug sessions. + */ +final class DryRunDocumentLoader implements DocumentLoader { + + @Override + public void beginTransaction() { + } + + @Override + public void load(SolrInputDocument doc) { + System.out.println("dryrun: " + doc); + } + + @Override + public void commitTransaction() { + } + + @Override + public UpdateResponse rollbackTransaction() { + return new UpdateResponse(); + } + + @Override + public void shutdown() { + } + + @Override + public SolrPingResponse ping() { + return new SolrPingResponse(); + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/GoLive.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/GoLive.java new file mode 100644 index 00000000000..a7e4f7dda9d --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/GoLive.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletionService; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudSolrServer; +import org.apache.solr.client.solrj.impl.HttpSolrServer; +import org.apache.solr.client.solrj.request.CoreAdminRequest; +import org.apache.solr.hadoop.MapReduceIndexerTool.Options; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The optional (parallel) GoLive phase merges the output shards of the previous + * phase into a set of live customer facing Solr servers, typically a SolrCloud. + */ +class GoLive { + + private static final Logger LOG = LoggerFactory.getLogger(GoLive.class); + + // TODO: handle clusters with replicas + public boolean goLive(Options options, FileStatus[] outDirs) { + LOG.info("Live merging of output shards into Solr cluster..."); + boolean success = false; + long start = System.currentTimeMillis(); + int concurrentMerges = options.goLiveThreads; + ThreadPoolExecutor executor = new ThreadPoolExecutor(concurrentMerges, + concurrentMerges, 1, TimeUnit.SECONDS, + new LinkedBlockingQueue()); + + try { + CompletionService completionService = new ExecutorCompletionService(executor); + Set> pending = new HashSet>(); + int cnt = -1; + for (final FileStatus dir : outDirs) { + + LOG.debug("processing: " + dir.getPath()); + + cnt++; + List urls = options.shardUrls.get(cnt); + + for (String url : urls) { + + String baseUrl = url; + if (baseUrl.endsWith("/")) { + baseUrl = baseUrl.substring(0, baseUrl.length() - 1); + } + + int lastPathIndex = baseUrl.lastIndexOf("/"); + if (lastPathIndex == -1) { + LOG.error("Found unexpected shardurl, live merge failed: " + baseUrl); + return false; + } + + final String name = baseUrl.substring(lastPathIndex + 1); + baseUrl = baseUrl.substring(0, lastPathIndex); + final String mergeUrl = baseUrl; + + Callable task = new Callable() { + @Override + public Request call() { + Request req = new Request(); + LOG.info("Live merge " + dir.getPath() + " into " + mergeUrl); + final HttpSolrServer server = new HttpSolrServer(mergeUrl); + try { + CoreAdminRequest.MergeIndexes mergeRequest = new CoreAdminRequest.MergeIndexes(); + mergeRequest.setCoreName(name); + mergeRequest.setIndexDirs(Arrays.asList(dir.getPath().toString() + "/data/index")); + try { + mergeRequest.process(server); + req.success = true; + } catch (SolrServerException e) { + req.e = e; + return req; + } catch (IOException e) { + req.e = e; + return req; + } + } finally { + server.shutdown(); + } + return req; + } + }; + pending.add(completionService.submit(task)); + } + } + + while (pending != null && pending.size() > 0) { + try { + Future future = completionService.take(); + if (future == null) break; + pending.remove(future); + + try { + Request req = future.get(); + + if (!req.success) { + // failed + LOG.error("A live merge command failed", req.e); + return false; + } + + } catch (ExecutionException e) { + LOG.error("Error sending live merge command", e); + return false; + } + + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.error("Live merge process interrupted", e); + return false; + } + } + + cnt = -1; + + + try { + LOG.info("Committing live merge..."); + if (options.zkHost != null) { + CloudSolrServer server = new CloudSolrServer(options.zkHost); + server.setDefaultCollection(options.collection); + server.commit(); + server.shutdown(); + } else { + for (List urls : options.shardUrls) { + for (String url : urls) { + // TODO: we should do these concurrently + HttpSolrServer server = new HttpSolrServer(url); + server.commit(); + server.shutdown(); + } + } + } + LOG.info("Done committing live merge"); + } catch (Exception e) { + LOG.error("Error sending commits to live Solr cluster", e); + return false; + } + + success = true; + return true; + } finally { + shutdownNowAndAwaitTermination(executor); + float secs = (System.currentTimeMillis() - start) / 1000.0f; + LOG.info("Live merging of index shards into Solr cluster took " + secs + " secs"); + if (success) { + LOG.info("Live merging completed successfully"); + } else { + LOG.info("Live merging failed"); + } + } + + // if an output dir does not exist, we should fail and do no merge? + } + + private void shutdownNowAndAwaitTermination(ExecutorService pool) { + pool.shutdown(); // Disable new tasks from being submitted + pool.shutdownNow(); // Cancel currently executing tasks + boolean shutdown = false; + while (!shutdown) { + try { + // Wait a while for existing tasks to terminate + shutdown = pool.awaitTermination(5, TimeUnit.SECONDS); + } catch (InterruptedException ie) { + // Preserve interrupt status + Thread.currentThread().interrupt(); + } + if (!shutdown) { + pool.shutdownNow(); // Cancel currently executing tasks + } + } + } + + + private static final class Request { + Exception e; + boolean success = false; + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java new file mode 100644 index 00000000000..c9eaef6c9e9 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + + +/** + * Solr field names for metadata of an HDFS file. + */ +public interface HdfsFileFieldNames { + + public static final String FILE_UPLOAD_URL = "file_upload_url"; + public static final String FILE_DOWNLOAD_URL = "file_download_url"; + public static final String FILE_SCHEME = "file_scheme"; + public static final String FILE_HOST = "file_host"; + public static final String FILE_PORT = "file_port"; + public static final String FILE_PATH = "file_path"; + public static final String FILE_NAME = "file_name"; + public static final String FILE_LENGTH = "file_length"; + public static final String FILE_LAST_MODIFIED = "file_last_modified"; + public static final String FILE_OWNER = "file_owner"; + public static final String FILE_GROUP = "file_group"; + public static final String FILE_PERMISSIONS_USER = "file_permissions_user"; + public static final String FILE_PERMISSIONS_GROUP = "file_permissions_group"; + public static final String FILE_PERMISSIONS_OTHER = "file_permissions_other"; + public static final String FILE_PERMISSIONS_STICKYBIT = "file_permissions_stickybit"; + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HeartBeater.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HeartBeater.java new file mode 100644 index 00000000000..229235b96b6 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HeartBeater.java @@ -0,0 +1,158 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.util.Locale; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.mapreduce.TaskInputOutputContext; +import org.apache.hadoop.util.Progressable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class runs a background thread that once every 60 seconds checks to see if + * a progress report is needed. If a report is needed it is issued. + * + * A simple counter {@link #threadsNeedingHeartBeat} handles the number of + * threads requesting a heart beat. + * + * The expected usage pattern is + * + *

    + *  try {
    + *       heartBeater.needHeartBeat();
    + *       do something that may take a while
    + *    } finally {
    + *       heartBeater.cancelHeartBeat();
    + *    }
    + * 
    + * + * + */ +public class HeartBeater extends Thread { + + public static Logger LOG = LoggerFactory.getLogger(HeartBeater.class); + + /** + * count of threads asking for heart beat, at 0 no heart beat done. This could + * be an atomic long but then missmatches in need/cancel could result in + * negative counts. + */ + private volatile int threadsNeedingHeartBeat = 0; + + private Progressable progress; + + /** + * The amount of time to wait between checks for the need to issue a heart + * beat. In milliseconds. + */ + private final long waitTimeMs = TimeUnit.MILLISECONDS.convert(60, TimeUnit.SECONDS); + + private final CountDownLatch isClosing = new CountDownLatch(1); + + /** + * Create the heart beat object thread set it to daemon priority and start the + * thread. When the count in {@link #threadsNeedingHeartBeat} is positive, the + * heart beat will be issued on the progress object every 60 seconds. + */ + public HeartBeater(Progressable progress) { + setDaemon(true); + this.progress = progress; + LOG.info("Heart beat reporting class is " + progress.getClass().getName()); + start(); + } + + public Progressable getProgress() { + return progress; + } + + public void setProgress(Progressable progress) { + this.progress = progress; + } + + @Override + public void run() { + LOG.info("HeartBeat thread running"); + while (true) { + try { + synchronized (this) { + if (threadsNeedingHeartBeat > 0) { + progress.progress(); + if (LOG.isInfoEnabled()) { + LOG.info(String.format(Locale.ENGLISH, "Issuing heart beat for %d threads", + threadsNeedingHeartBeat)); + } + } else { + if (LOG.isInfoEnabled()) { + LOG.info(String.format(Locale.ENGLISH, "heartbeat skipped count %d", + threadsNeedingHeartBeat)); + } + } + } + if (isClosing.await(waitTimeMs, TimeUnit.MILLISECONDS)) { + return; + } + } catch (Throwable e) { + LOG.error("HeartBeat throwable", e); + } + } + } + + /** + * inform the background thread that heartbeats are to be issued. Issue a + * heart beat also + */ + public synchronized void needHeartBeat() { + threadsNeedingHeartBeat++; + // Issue a progress report right away, + // just in case the the cancel comes before the background thread issues a + // report. + // If enough cases like this happen the 600 second timeout can occur + progress.progress(); + if (threadsNeedingHeartBeat == 1) { + // this.notify(); // wake up the heartbeater + } + } + + /** + * inform the background thread that this heartbeat request is not needed. + * This must be called at some point after each {@link #needHeartBeat()} + * request. + */ + public synchronized void cancelHeartBeat() { + if (threadsNeedingHeartBeat > 0) { + threadsNeedingHeartBeat--; + } else { + Exception e = new Exception("Dummy"); + e.fillInStackTrace(); + LOG.warn("extra call to cancelHeartBeat", e); + } + } + + public void setStatus(String status) { + if (progress instanceof TaskInputOutputContext) { + ((TaskInputOutputContext) progress).setStatus(status); + } + } + + /** Releases any resources */ + public void close() { + isClosing.countDown(); + } +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java new file mode 100644 index 00000000000..5d65fa306df --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * MR Mapper that randomizing a list of URLs. + * + * Mapper input is (offset, URL) pairs. Each such pair indicates a file to + * index. + * + * Mapper output is (randomPosition, URL) pairs. The reducer receives these + * pairs sorted by randomPosition. + */ +public class LineRandomizerMapper extends Mapper { + + private Random random; + + private static final Logger LOGGER = LoggerFactory.getLogger(LineRandomizerMapper.class); + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + super.setup(context); + random = createRandom(context); + } + + @Override + protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + LOGGER.debug("map key: {}, value: {}", key, value); + context.write(new LongWritable(random.nextLong()), value); + } + + private Random createRandom(Context context) { + long taskId = 0; + if (context.getTaskAttemptID() != null) { // MRUnit returns null + LOGGER.debug("context.getTaskAttemptID().getId(): {}", context.getTaskAttemptID().getId()); + LOGGER.debug("context.getTaskAttemptID().getTaskID().getId(): {}", context.getTaskAttemptID().getTaskID().getId()); + taskId = context.getTaskAttemptID().getTaskID().getId(); // taskId = 0, 1, ..., N + } + // create a good random seed, yet ensure deterministic PRNG sequence for easy reproducability + return new Random(421439783L * (taskId + 1)); + } + +} \ No newline at end of file diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java new file mode 100644 index 00000000000..af7759e9f90 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * MR Reducer that randomizing a list of URLs. + * + * Reducer input is (randomPosition, URL) pairs. Each such pair indicates a file + * to index. + * + * Reducer output is a list of URLs, each URL in a random position. + */ +public class LineRandomizerReducer extends Reducer { + + private static final Logger LOGGER = LoggerFactory.getLogger(LineRandomizerReducer.class); + + @Override + protected void reduce(LongWritable key, Iterable values, Context context) throws IOException, InterruptedException { + for (Text value : values) { + LOGGER.debug("reduce key: {}, value: {}", key, value); + context.write(value, NullWritable.get()); + } + } +} \ No newline at end of file diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java new file mode 100644 index 00000000000..6fbdaf3a316 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java @@ -0,0 +1,1420 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.io.Writer; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLClassLoader; +import java.text.NumberFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Random; + +import net.sourceforge.argparse4j.ArgumentParsers; +import net.sourceforge.argparse4j.impl.Arguments; +import net.sourceforge.argparse4j.impl.action.HelpArgumentAction; +import net.sourceforge.argparse4j.impl.choice.RangeArgumentChoice; +import net.sourceforge.argparse4j.impl.type.FileArgumentType; +import net.sourceforge.argparse4j.inf.Argument; +import net.sourceforge.argparse4j.inf.ArgumentGroup; +import net.sourceforge.argparse4j.inf.ArgumentParser; +import net.sourceforge.argparse4j.inf.ArgumentParserException; +import net.sourceforge.argparse4j.inf.FeatureControl; +import net.sourceforge.argparse4j.inf.Namespace; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.log4j.PropertyConfigurator; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.hadoop.dedup.RetainMostRecentUpdateConflictResolver; +import org.apache.solr.hadoop.morphline.MorphlineMapRunner; +import org.apache.solr.hadoop.morphline.MorphlineMapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.cloudera.cdk.morphline.base.Fields; +import com.google.common.base.Charsets; +import com.google.common.base.Preconditions; +import com.google.common.io.ByteStreams; + + +/** + * Public API for a MapReduce batch job driver that creates a set of Solr index shards from a set of + * input files and writes the indexes into HDFS, in a flexible, scalable and fault-tolerant manner. + * Also supports merging the output shards into a set of live customer facing Solr servers, + * typically a SolrCloud. + */ +public class MapReduceIndexerTool extends Configured implements Tool { + + Job job; // visible for testing only + + public static final String RESULTS_DIR = "results"; + + static final String MAIN_MEMORY_RANDOMIZATION_THRESHOLD = + MapReduceIndexerTool.class.getName() + ".mainMemoryRandomizationThreshold"; + + private static final String FULL_INPUT_LIST = "full-input-list.txt"; + + private static final Logger LOG = LoggerFactory.getLogger(MapReduceIndexerTool.class); + + + /** + * See http://argparse4j.sourceforge.net and for details see http://argparse4j.sourceforge.net/usage.html + */ + static final class MyArgumentParser { + + private static final String SHOW_NON_SOLR_CLOUD = "--show-non-solr-cloud"; + + private boolean showNonSolrCloud = false; + + /** + * Parses the given command line arguments. + * + * @return exitCode null indicates the caller shall proceed with processing, + * non-null indicates the caller shall exit the program with the + * given exit status code. + */ + public Integer parseArgs(String[] args, Configuration conf, Options opts) { + assert args != null; + assert conf != null; + assert opts != null; + + if (args.length == 0) { + args = new String[] { "--help" }; + } + + showNonSolrCloud = Arrays.asList(args).contains(SHOW_NON_SOLR_CLOUD); // intercept it first + + ArgumentParser parser = ArgumentParsers + .newArgumentParser("hadoop [GenericOptions]... jar solr-map-reduce-*.jar ", false) + .defaultHelp(true) + .description( + "MapReduce batch job driver that takes a morphline and creates a set of Solr index shards from a set of input files " + + "and writes the indexes into HDFS, in a flexible, scalable and fault-tolerant manner. " + + "It also supports merging the output shards into a set of live customer facing Solr servers, " + + "typically a SolrCloud. The program proceeds in several consecutive MapReduce based phases, as follows:" + + "\n\n" + + "1) Randomization phase: This (parallel) phase randomizes the list of input files in order to spread " + + "indexing load more evenly among the mappers of the subsequent phase." + + "\n\n" + + "2) Mapper phase: This (parallel) phase takes the input files, extracts the relevant content, transforms it " + + "and hands SolrInputDocuments to a set of reducers. " + + "The ETL functionality is flexible and " + + "customizable using chains of arbitrary morphline commands that pipe records from one transformation command to another. " + + "Commands to parse and transform a set of standard data formats such as Avro, CSV, Text, HTML, XML, " + + "PDF, Word, Excel, etc. are provided out of the box, and additional custom commands and parsers for additional " + + "file or data formats can be added as morphline plugins. " + + "This is done by implementing a simple Java interface that consumes a record (e.g. a file in the form of an InputStream " + + "plus some headers plus contextual metadata) and generates as output zero or more records. " + + "Any kind of data format can be indexed and any Solr documents for any kind of Solr schema can be generated, " + + "and any custom ETL logic can be registered and executed.\n" + + "Record fields, including MIME types, can also explicitly be passed by force from the CLI to the morphline, for example: " + + "hadoop ... -D " + MorphlineMapRunner.MORPHLINE_FIELD_PREFIX + Fields.ATTACHMENT_MIME_TYPE + "=text/csv" + + "\n\n" + + "3) Reducer phase: This (parallel) phase loads the mapper's SolrInputDocuments into one EmbeddedSolrServer per reducer. " + + "Each such reducer and Solr server can be seen as a (micro) shard. The Solr servers store their " + + "data in HDFS." + + "\n\n" + + "4) Mapper-only merge phase: This (parallel) phase merges the set of reducer shards into the number of solr " + + "shards expected by the user, using a mapper-only job. This phase is omitted if the number " + + "of shards is already equal to the number of shards expected by the user. " + + "\n\n" + + "5) Go-live phase: This optional (parallel) phase merges the output shards of the previous phase into a set of " + + "live customer facing Solr servers, typically a SolrCloud. " + + "If this phase is omitted you can explicitly point each Solr server to one of the HDFS output shard directories." + + "\n\n" + + "Fault Tolerance: Mapper and reducer task attempts are retried on failure per the standard MapReduce semantics. " + + "On program startup all data in the --output-dir is deleted if that output directory already exists. " + + "If the whole job fails you can retry simply by rerunning the program again using the same arguments." + ); + + parser.addArgument("--help", "-help", "-h") + .help("Show this help message and exit") + .action(new HelpArgumentAction() { + @Override + public void run(ArgumentParser parser, Argument arg, Map attrs, String flag, Object value) throws ArgumentParserException { + try { + parser.printHelp(new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"))); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException("Won't Happen for UTF-8"); + } + System.out.println(); + System.out.print(ToolRunnerHelpFormatter.getGenericCommandUsage()); + //ToolRunner.printGenericCommandUsage(System.out); + System.out.println( + "Examples: \n\n" + + + "# (Re)index an Avro based Twitter tweet file:\n" + + "sudo -u hdfs hadoop \\\n" + + " --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" + + " jar target/solr-map-reduce-*.jar \\\n" + + " -D 'mapred.child.java.opts=-Xmx500m' \\\n" + +// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" + + " --log4j src/test/resources/log4j.properties \\\n" + + " --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadAvroContainer.conf \\\n" + + " --solr-home-dir src/test/resources/solr/minimr \\\n" + + " --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n" + + " --shards 1 \\\n" + + " hdfs:///user/$USER/test-documents/sample-statuses-20120906-141433.avro\n" + + "\n" + + "# (Re)index all files that match all of the following conditions:\n" + + "# 1) File is contained in dir tree hdfs:///user/$USER/solrloadtest/twitter/tweets\n" + + "# 2) file name matches the glob pattern 'sample-statuses*.gz'\n" + + "# 3) file was last modified less than 100000 minutes ago\n" + + "# 4) file size is between 1 MB and 1 GB\n" + + "# Also include extra library jar file containing JSON tweet Java parser:\n" + + "hadoop jar target/solr-map-reduce-*.jar " + "com.cloudera.cdk.morphline.hadoop.find.HdfsFindTool" + " \\\n" + + " -find hdfs:///user/$USER/solrloadtest/twitter/tweets \\\n" + + " -type f \\\n" + + " -name 'sample-statuses*.gz' \\\n" + + " -mmin -1000000 \\\n" + + " -size -100000000c \\\n" + + " -size +1000000c \\\n" + + "| sudo -u hdfs hadoop \\\n" + + " --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" + + " jar target/solr-map-reduce-*.jar \\\n" + + " -D 'mapred.child.java.opts=-Xmx500m' \\\n" + +// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" + + " --log4j src/test/resources/log4j.properties \\\n" + + " --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadJsonTestTweets.conf \\\n" + + " --solr-home-dir src/test/resources/solr/minimr \\\n" + + " --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n" + + " --shards 100 \\\n" + + " --input-list -\n" + + "\n" + + "# Go live by merging resulting index shards into a live Solr cluster\n" + + "# (explicitly specify Solr URLs - for a SolrCloud cluster see next example):\n" + + "sudo -u hdfs hadoop \\\n" + + " --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" + + " jar target/solr-map-reduce-*.jar \\\n" + + " -D 'mapred.child.java.opts=-Xmx500m' \\\n" + +// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" + + " --log4j src/test/resources/log4j.properties \\\n" + + " --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadAvroContainer.conf \\\n" + + " --solr-home-dir src/test/resources/solr/minimr \\\n" + + " --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n" + + " --shard-url http://solr001.mycompany.com:8983/solr/collection1 \\\n" + + " --shard-url http://solr002.mycompany.com:8983/solr/collection1 \\\n" + + " --go-live \\\n" + + " hdfs:///user/foo/indir\n" + + "\n" + + "# Go live by merging resulting index shards into a live SolrCloud cluster\n" + + "# (discover shards and Solr URLs through ZooKeeper):\n" + + "sudo -u hdfs hadoop \\\n" + + " --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" + + " jar target/solr-map-reduce-*.jar \\\n" + + " -D 'mapred.child.java.opts=-Xmx500m' \\\n" + +// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" + + " --log4j src/test/resources/log4j.properties \\\n" + + " --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadAvroContainer.conf \\\n" + + " --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n" + + " --zk-host zk01.mycompany.com:2181/solr \\\n" + + " --collection collection1 \\\n" + + " --go-live \\\n" + + " hdfs:///user/foo/indir\n" + ); + throw new FoundHelpArgument(); // Trick to prevent processing of any remaining arguments + } + }); + + ArgumentGroup requiredGroup = parser.addArgumentGroup("Required arguments"); + + Argument outputDirArg = requiredGroup.addArgument("--output-dir") + .metavar("HDFS_URI") + .type(new PathArgumentType(conf) { + @Override + public Path convert(ArgumentParser parser, Argument arg, String value) throws ArgumentParserException { + Path path = super.convert(parser, arg, value); + if ("hdfs".equals(path.toUri().getScheme()) && path.toUri().getAuthority() == null) { + // TODO: consider defaulting to hadoop's fs.default.name here or in SolrRecordWriter.createEmbeddedSolrServer() + throw new ArgumentParserException("Missing authority in path URI: " + path, parser); + } + return path; + } + }.verifyHasScheme().verifyIsAbsolute().verifyCanWriteParent()) + .required(true) + .help("HDFS directory to write Solr indexes to. Inside there one output directory per shard will be generated. " + + "Example: hdfs://c2202.mycompany.com/user/$USER/test"); + + Argument inputListArg = parser.addArgument("--input-list") + .action(Arguments.append()) + .metavar("URI") + // .type(new PathArgumentType(fs).verifyExists().verifyCanRead()) + .type(Path.class) + .help("Local URI or HDFS URI of a UTF-8 encoded file containing a list of HDFS URIs to index, " + + "one URI per line in the file. If '-' is specified, URIs are read from the standard input. " + + "Multiple --input-list arguments can be specified."); + + Argument morphlineFileArg = requiredGroup.addArgument("--morphline-file") + .metavar("FILE") + .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead()) + .required(true) + .help("Relative or absolute path to a local config file that contains one or more morphlines. " + + "The file must be UTF-8 encoded. Example: /path/to/morphline.conf"); + + Argument morphlineIdArg = parser.addArgument("--morphline-id") + .metavar("STRING") + .type(String.class) + .help("The identifier of the morphline that shall be executed within the morphline config file " + + "specified by --morphline-file. If the --morphline-id option is ommitted the first (i.e. " + + "top-most) morphline within the config file is used. Example: morphline1"); + + Argument solrHomeDirArg = nonSolrCloud(parser.addArgument("--solr-home-dir") + .metavar("DIR") + .type(new FileArgumentType() { + @Override + public File convert(ArgumentParser parser, Argument arg, String value) throws ArgumentParserException { + File solrHomeDir = super.convert(parser, arg, value); + File solrConfigFile = new File(new File(solrHomeDir, "conf"), "solrconfig.xml"); + new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead().convert( + parser, arg, solrConfigFile.getPath()); + return solrHomeDir; + } + }.verifyIsDirectory().verifyCanRead()) + .required(false) + .help("Relative or absolute path to a local dir containing Solr conf/ dir and in particular " + + "conf/solrconfig.xml and optionally also lib/ dir. This directory will be uploaded to each MR task. " + + "Example: src/test/resources/solr/minimr")); + + Argument updateConflictResolverArg = parser.addArgument("--update-conflict-resolver") + .metavar("FQCN") + .type(String.class) + .setDefault(RetainMostRecentUpdateConflictResolver.class.getName()) + .help("Fully qualified class name of a Java class that implements the UpdateConflictResolver interface. " + + "This enables deduplication and ordering of a series of document updates for the same unique document " + + "key. For example, a MapReduce batch job might index multiple files in the same job where some of the " + + "files contain old and new versions of the very same document, using the same unique document key.\n" + + "Typically, implementations of this interface forbid collisions by throwing an exception, or ignore all but " + + "the most recent document version, or, in the general case, order colliding updates ascending from least " + + "recent to most recent (partial) update. The caller of this interface (i.e. the Hadoop Reducer) will then " + + "apply the updates to Solr in the order returned by the orderUpdates() method.\n" + + "The default RetainMostRecentUpdateConflictResolver implementation ignores all but the most recent document " + + "version, based on a configurable numeric Solr field, which defaults to the file_last_modified timestamp"); + + Argument mappersArg = parser.addArgument("--mappers") + .metavar("INTEGER") + .type(Integer.class) + .choices(new RangeArgumentChoice(-1, Integer.MAX_VALUE)) // TODO: also support X% syntax where X is an integer + .setDefault(-1) + .help("Tuning knob that indicates the maximum number of MR mapper tasks to use. -1 indicates use all map slots " + + "available on the cluster."); + + Argument reducersArg = parser.addArgument("--reducers") + .metavar("INTEGER") + .type(Integer.class) + .choices(new RangeArgumentChoice(-1, Integer.MAX_VALUE)) // TODO: also support X% syntax where X is an integer + .setDefault(-1) + .help("Tuning knob that indicates the number of reducers to index into. " + + "-1 indicates use all reduce slots available on the cluster. " + + "0 indicates use one reducer per output shard, which disables the mtree merge MR algorithm. " + + "The mtree merge MR algorithm improves scalability by spreading load " + + "(in particular CPU load) among a number of parallel reducers that can be much larger than the number " + + "of solr shards expected by the user. It can be seen as an extension of concurrent lucene merges " + + "and tiered lucene merges to the clustered case. The subsequent mapper-only phase " + + "merges the output of said large number of reducers to the number of shards expected by the user, " + + "again by utilizing more available parallelism on the cluster."); + + Argument fanoutArg = parser.addArgument("--fanout") + .metavar("INTEGER") + .type(Integer.class) + .choices(new RangeArgumentChoice(2, Integer.MAX_VALUE)) + .setDefault(Integer.MAX_VALUE) + .help(FeatureControl.SUPPRESS); + + Argument maxSegmentsArg = parser.addArgument("--max-segments") + .metavar("INTEGER") + .type(Integer.class) + .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE)) + .setDefault(1) + .help("Tuning knob that indicates the maximum number of segments to be contained on output in the index of " + + "each reducer shard. After a reducer has built its output index it applies a merge policy to merge segments " + + "until there are <= maxSegments lucene segments left in this index. " + + "Merging segments involves reading and rewriting all data in all these segment files, " + + "potentially multiple times, which is very I/O intensive and time consuming. " + + "However, an index with fewer segments can later be merged faster, " + + "and it can later be queried faster once deployed to a live Solr serving shard. " + + "Set maxSegments to 1 to optimize the index for low query latency. " + + "In a nutshell, a small maxSegments value trades indexing latency for subsequently improved query latency. " + + "This can be a reasonable trade-off for batch indexing systems."); + + Argument fairSchedulerPoolArg = parser.addArgument("--fair-scheduler-pool") + .metavar("STRING") + .help("Optional tuning knob that indicates the name of the fair scheduler pool to submit jobs to. " + + "The Fair Scheduler is a pluggable MapReduce scheduler that provides a way to share large clusters. " + + "Fair scheduling is a method of assigning resources to jobs such that all jobs get, on average, an " + + "equal share of resources over time. When there is a single job running, that job uses the entire " + + "cluster. When other jobs are submitted, tasks slots that free up are assigned to the new jobs, so " + + "that each job gets roughly the same amount of CPU time. Unlike the default Hadoop scheduler, which " + + "forms a queue of jobs, this lets short jobs finish in reasonable time while not starving long jobs. " + + "It is also an easy way to share a cluster between multiple of users. Fair sharing can also work with " + + "job priorities - the priorities are used as weights to determine the fraction of total compute time " + + "that each job gets."); + + Argument dryRunArg = parser.addArgument("--dry-run") + .action(Arguments.storeTrue()) + .help("Run in local mode and print documents to stdout instead of loading them into Solr. This executes " + + "the morphline in the client process (without submitting a job to MR) for quicker turnaround during " + + "early trial & debug sessions."); + + Argument log4jConfigFileArg = parser.addArgument("--log4j") + .metavar("FILE") + .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead()) + .help("Relative or absolute path to a log4j.properties config file on the local file system. This file " + + "will be uploaded to each MR task. Example: /path/to/log4j.properties"); + + Argument verboseArg = parser.addArgument("--verbose", "-v") + .action(Arguments.storeTrue()) + .help("Turn on verbose output."); + + parser.addArgument(SHOW_NON_SOLR_CLOUD) + .action(Arguments.storeTrue()) + .help("Also show options for Non-SolrCloud mode as part of --help."); + + ArgumentGroup clusterInfoGroup = parser + .addArgumentGroup("Cluster arguments") + .description( + "Arguments that provide information about your Solr cluster. " + + nonSolrCloud("If you are building shards for a SolrCloud cluster, pass the --zk-host argument. " + + "If you are building shards for " + + "a Non-SolrCloud cluster, pass the --shard-url argument one or more times. To build indexes for " + + "a replicated Non-SolrCloud cluster with --shard-url, pass replica urls consecutively and also pass --shards. " + + "Using --go-live requires either --zk-host or --shard-url.")); + + Argument zkHostArg = clusterInfoGroup.addArgument("--zk-host") + .metavar("STRING") + .type(String.class) + .help("The address of a ZooKeeper ensemble being used by a SolrCloud cluster. " + + "This ZooKeeper ensemble will be examined to determine the number of output " + + "shards to create as well as the Solr URLs to merge the output shards into when using the --go-live option. " + + "Requires that you also pass the --collection to merge the shards into.\n" + + "\n" + + "The --zk-host option implements the same partitioning semantics as the standard SolrCloud " + + "Near-Real-Time (NRT) API. This enables to mix batch updates from MapReduce ingestion with " + + "updates from standard Solr NRT ingestion on the same SolrCloud cluster, " + + "using identical unique document keys.\n" + + "\n" + + "Format is: a list of comma separated host:port pairs, each corresponding to a zk " + + "server. Example: '127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183' If " + + "the optional chroot suffix is used the example would look " + + "like: '127.0.0.1:2181/solr,127.0.0.1:2182/solr,127.0.0.1:2183/solr' " + + "where the client would be rooted at '/solr' and all paths " + + "would be relative to this root - i.e. getting/setting/etc... " + + "'/foo/bar' would result in operations being run on " + + "'/solr/foo/bar' (from the server perspective).\n" + + nonSolrCloud("\n" + + "If --solr-home-dir is not specified, the Solr home directory for the collection " + + "will be downloaded from this ZooKeeper ensemble.")); + + Argument shardUrlsArg = nonSolrCloud(clusterInfoGroup.addArgument("--shard-url") + .metavar("URL") + .type(String.class) + .action(Arguments.append()) + .help("Solr URL to merge resulting shard into if using --go-live. " + + "Example: http://solr001.mycompany.com:8983/solr/collection1. " + + "Multiple --shard-url arguments can be specified, one for each desired shard. " + + "If you are merging shards into a SolrCloud cluster, use --zk-host instead.")); + + Argument shardsArg = nonSolrCloud(clusterInfoGroup.addArgument("--shards") + .metavar("INTEGER") + .type(Integer.class) + .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE)) + .help("Number of output shards to generate.")); + + ArgumentGroup goLiveGroup = parser.addArgumentGroup("Go live arguments") + .description("Arguments for merging the shards that are built into a live Solr cluster. " + + "Also see the Cluster arguments."); + + Argument goLiveArg = goLiveGroup.addArgument("--go-live") + .action(Arguments.storeTrue()) + .help("Allows you to optionally merge the final index shards into a live Solr cluster after they are built. " + + "You can pass the ZooKeeper address with --zk-host and the relevant cluster information will be auto detected. " + + nonSolrCloud("If you are not using a SolrCloud cluster, --shard-url arguments can be used to specify each SolrCore to merge " + + "each shard into.")); + + Argument collectionArg = goLiveGroup.addArgument("--collection") + .metavar("STRING") + .help("The SolrCloud collection to merge shards into when using --go-live and --zk-host. Example: collection1"); + + Argument goLiveThreadsArg = goLiveGroup.addArgument("--go-live-threads") + .metavar("INTEGER") + .type(Integer.class) + .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE)) + .setDefault(1000) + .help("Tuning knob that indicates the maximum number of live merges to run in parallel at one time."); + + // trailing positional arguments + Argument inputFilesArg = parser.addArgument("input-files") + .metavar("HDFS_URI") + .type(new PathArgumentType(conf).verifyHasScheme().verifyExists().verifyCanRead()) + .nargs("*") + .setDefault() + .help("HDFS URI of file or directory tree to index."); + + Namespace ns; + try { + ns = parser.parseArgs(args); + } catch (FoundHelpArgument e) { + return 0; + } catch (ArgumentParserException e) { + parser.handleError(e); + return 1; + } + + opts.log4jConfigFile = (File) ns.get(log4jConfigFileArg.getDest()); + if (opts.log4jConfigFile != null) { + PropertyConfigurator.configure(opts.log4jConfigFile.getPath()); + } + LOG.debug("Parsed command line args: {}", ns); + + opts.inputLists = ns.getList(inputListArg.getDest()); + if (opts.inputLists == null) { + opts.inputLists = Collections.EMPTY_LIST; + } + opts.inputFiles = ns.getList(inputFilesArg.getDest()); + opts.outputDir = (Path) ns.get(outputDirArg.getDest()); + opts.mappers = ns.getInt(mappersArg.getDest()); + opts.reducers = ns.getInt(reducersArg.getDest()); + opts.updateConflictResolver = ns.getString(updateConflictResolverArg.getDest()); + opts.fanout = ns.getInt(fanoutArg.getDest()); + opts.maxSegments = ns.getInt(maxSegmentsArg.getDest()); + opts.morphlineFile = (File) ns.get(morphlineFileArg.getDest()); + opts.morphlineId = ns.getString(morphlineIdArg.getDest()); + opts.solrHomeDir = (File) ns.get(solrHomeDirArg.getDest()); + opts.fairSchedulerPool = ns.getString(fairSchedulerPoolArg.getDest()); + opts.isDryRun = ns.getBoolean(dryRunArg.getDest()); + opts.isVerbose = ns.getBoolean(verboseArg.getDest()); + opts.zkHost = ns.getString(zkHostArg.getDest()); + opts.shards = ns.getInt(shardsArg.getDest()); + opts.shardUrls = buildShardUrls(ns.getList(shardUrlsArg.getDest()), opts.shards); + opts.goLive = ns.getBoolean(goLiveArg.getDest()); + opts.goLiveThreads = ns.getInt(goLiveThreadsArg.getDest()); + opts.collection = ns.getString(collectionArg.getDest()); + + try { + verifyGoLiveArgs(opts, parser); + } catch (ArgumentParserException e) { + parser.handleError(e); + return 1; + } + + if (opts.inputLists.isEmpty() && opts.inputFiles.isEmpty()) { + LOG.info("No input files specified - nothing to process"); + return 0; // nothing to process + } + return null; + } + + // make it a "hidden" option, i.e. the option is functional and enabled but not shown in --help output + private Argument nonSolrCloud(Argument arg) { + return showNonSolrCloud ? arg : arg.help(FeatureControl.SUPPRESS); + } + + private String nonSolrCloud(String msg) { + return showNonSolrCloud ? msg : ""; + } + + /** Marker trick to prevent processing of any remaining arguments once --help option has been parsed */ + private static final class FoundHelpArgument extends RuntimeException { + } + } + // END OF INNER CLASS + + static List> buildShardUrls(List urls, Integer numShards) { + if (urls == null) return null; + List> shardUrls = new ArrayList>(urls.size()); + List list = null; + + int sz; + if (numShards == null) { + numShards = urls.size(); + } + sz = (int) Math.ceil(urls.size() / (float)numShards); + for (int i = 0; i < urls.size(); i++) { + if (i % sz == 0) { + list = new ArrayList(); + shardUrls.add(list); + } + list.add((String) urls.get(i)); + } + + return shardUrls; + } + + static final class Options { + boolean goLive; + String collection; + String zkHost; + Integer goLiveThreads; + List> shardUrls; + List inputLists; + List inputFiles; + Path outputDir; + int mappers; + int reducers; + String updateConflictResolver; + int fanout; + Integer shards; + int maxSegments; + File morphlineFile; + String morphlineId; + File solrHomeDir; + String fairSchedulerPool; + boolean isDryRun; + File log4jConfigFile; + boolean isVerbose; + } + // END OF INNER CLASS + + + /** API for command line clients */ + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new MapReduceIndexerTool(), args); + System.exit(res); + } + + public MapReduceIndexerTool() {} + + @Override + public int run(String[] args) throws Exception { + Options opts = new Options(); + Integer exitCode = new MyArgumentParser().parseArgs(args, getConf(), opts); + if (exitCode != null) { + return exitCode; + } + return run(opts); + } + + /** API for Java clients; visible for testing; may become a public API eventually */ + int run(Options options) throws Exception { + + if ("local".equals(getConf().get("mapred.job.tracker"))) { + throw new IllegalStateException( + "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported " + + "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, " + + "which is required for passing files via --files and --libjars"); + } + + long programStartTime = System.currentTimeMillis(); + if (options.fairSchedulerPool != null) { + getConf().set("mapred.fairscheduler.pool", options.fairSchedulerPool); + } + getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments); + + // switch off a false warning about allegedly not implementing Tool + // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html + // also see https://issues.apache.org/jira/browse/HADOOP-8183 + getConf().setBoolean("mapred.used.genericoptionsparser", true); + + if (options.log4jConfigFile != null) { + Utils.setLogConfigFile(options.log4jConfigFile, getConf()); + addDistributedCacheFile(options.log4jConfigFile, getConf()); + } + + job = Job.getInstance(getConf()); + job.setJarByClass(getClass()); + + if (options.morphlineFile == null) { + throw new ArgumentParserException("Argument --morphline-file is required", null); + } + verifyGoLiveArgs(options, null); + verifyZKStructure(options, null); + + int mappers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxMapTasks(); // MR1 + //int mappers = job.getCluster().getClusterStatus().getMapSlotCapacity(); // Yarn only + LOG.info("Cluster reports {} mapper slots", mappers); + + if (options.mappers == -1) { + mappers = 8 * mappers; // better accomodate stragglers + } else { + mappers = options.mappers; + } + if (mappers <= 0) { + throw new IllegalStateException("Illegal number of mappers: " + mappers); + } + options.mappers = mappers; + + FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration()); + if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) { + return -1; + } + Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR); + Path outputReduceDir = new Path(options.outputDir, "reducers"); + Path outputStep1Dir = new Path(options.outputDir, "tmp1"); + Path outputStep2Dir = new Path(options.outputDir, "tmp2"); + Path outputTreeMergeStep = new Path(options.outputDir, "mtree-merge-output"); + Path fullInputList = new Path(outputStep1Dir, FULL_INPUT_LIST); + + LOG.debug("Creating list of input files for mappers: {}", fullInputList); + long numFiles = addInputFiles(options.inputFiles, options.inputLists, fullInputList, job.getConfiguration()); + if (numFiles == 0) { + LOG.info("No input files found - nothing to process"); + return 0; + } + int numLinesPerSplit = (int) ceilDivide(numFiles, mappers); + if (numLinesPerSplit < 0) { // numeric overflow from downcasting long to int? + numLinesPerSplit = Integer.MAX_VALUE; + } + numLinesPerSplit = Math.max(1, numLinesPerSplit); + + int realMappers = Math.min(mappers, (int) ceilDivide(numFiles, numLinesPerSplit)); + calculateNumReducers(options, realMappers); + int reducers = options.reducers; + LOG.info("Using these parameters: " + + "numFiles: {}, mappers: {}, realMappers: {}, reducers: {}, shards: {}, fanout: {}, maxSegments: {}", + new Object[] {numFiles, mappers, realMappers, reducers, options.shards, options.fanout, options.maxSegments}); + + + LOG.info("Randomizing list of {} input files to spread indexing load more evenly among mappers", numFiles); + long startTime = System.currentTimeMillis(); + if (numFiles < job.getConfiguration().getInt(MAIN_MEMORY_RANDOMIZATION_THRESHOLD, 100001)) { + // If there are few input files reduce latency by directly running main memory randomization + // instead of launching a high latency MapReduce job + randomizeFewInputFiles(fs, outputStep2Dir, fullInputList); + } else { + // Randomize using a MapReduce job. Use sequential algorithm below a certain threshold because there's no + // benefit in using many parallel mapper tasks just to randomize the order of a few lines each + int numLinesPerRandomizerSplit = Math.max(10 * 1000 * 1000, numLinesPerSplit); + Job randomizerJob = randomizeManyInputFiles(getConf(), fullInputList, outputStep2Dir, numLinesPerRandomizerSplit); + if (!waitForCompletion(randomizerJob, options.isVerbose)) { + return -1; // job failed + } + } + float secs = (System.currentTimeMillis() - startTime) / 1000.0f; + LOG.info("Done. Randomizing list of {} input files took {} secs", numFiles, secs); + + + job.setInputFormatClass(NLineInputFormat.class); + NLineInputFormat.addInputPath(job, outputStep2Dir); + NLineInputFormat.setNumLinesPerSplit(job, numLinesPerSplit); + FileOutputFormat.setOutputPath(job, outputReduceDir); + + String mapperClass = job.getConfiguration().get(JobContext.MAP_CLASS_ATTR); + if (mapperClass == null) { // enable customization + Class clazz = MorphlineMapper.class; + mapperClass = clazz.getName(); + job.setMapperClass(clazz); + } + job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(mapperClass)); + + if (job.getConfiguration().get(JobContext.REDUCE_CLASS_ATTR) == null) { // enable customization + job.setReducerClass(SolrReducer.class); + } + if (options.updateConflictResolver == null) { + throw new IllegalArgumentException("updateConflictResolver must not be null"); + } + job.getConfiguration().set(SolrReducer.UPDATE_CONFLICT_RESOLVER, options.updateConflictResolver); + + if (options.zkHost != null) { + assert options.collection != null; + /* + * MapReduce partitioner that partitions the Mapper output such that each + * SolrInputDocument gets sent to the SolrCloud shard that it would have + * been sent to if the document were ingested via the standard SolrCloud + * Near Real Time (NRT) API. + * + * In other words, this class implements the same partitioning semantics + * as the standard SolrCloud NRT API. This enables to mix batch updates + * from MapReduce ingestion with updates from standard NRT ingestion on + * the same SolrCloud cluster, using identical unique document keys. + */ + if (job.getConfiguration().get(JobContext.PARTITIONER_CLASS_ATTR) == null) { // enable customization + job.setPartitionerClass(SolrCloudPartitioner.class); + } + job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost); + job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection); + } + job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards); + + job.setOutputFormatClass(SolrOutputFormat.class); + if (options.solrHomeDir != null) { + SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job); + } else { + assert options.zkHost != null; + // use the config that this collection uses for the SolrHomeCache. + ZooKeeperInspector zki = new ZooKeeperInspector(); + SolrZkClient zkClient = zki.getZkClient(options.zkHost); + try { + String configName = zki.readConfigName(zkClient, options.collection); + File tmpSolrHomeDir = zki.downloadConfigDir(zkClient, configName); + SolrOutputFormat.setupSolrHomeCache(tmpSolrHomeDir, job); + options.solrHomeDir = tmpSolrHomeDir; + } finally { + zkClient.close(); + } + } + + MorphlineMapRunner runner = setupMorphline(options); + if (options.isDryRun && runner != null) { + LOG.info("Indexing {} files in dryrun mode", numFiles); + startTime = System.currentTimeMillis(); + dryRun(runner, fs, fullInputList); + secs = (System.currentTimeMillis() - startTime) / 1000.0f; + LOG.info("Done. Indexing {} files in dryrun mode took {} secs", numFiles, secs); + goodbye(null, programStartTime); + return 0; + } + job.getConfiguration().set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, options.morphlineFile.getName()); + + job.setNumReduceTasks(reducers); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(SolrInputDocumentWritable.class); + LOG.info("Indexing {} files using {} real mappers into {} reducers", new Object[] {numFiles, realMappers, reducers}); + startTime = System.currentTimeMillis(); + if (!waitForCompletion(job, options.isVerbose)) { + return -1; // job failed + } + + secs = (System.currentTimeMillis() - startTime) / 1000.0f; + LOG.info("Done. Indexing {} files using {} real mappers into {} reducers took {} secs", new Object[] {numFiles, realMappers, reducers, secs}); + + int mtreeMergeIterations = 0; + if (reducers > options.shards) { + mtreeMergeIterations = (int) Math.round(log(options.fanout, reducers / options.shards)); + } + LOG.debug("MTree merge iterations to do: {}", mtreeMergeIterations); + int mtreeMergeIteration = 1; + while (reducers > options.shards) { // run a mtree merge iteration + job = Job.getInstance(getConf()); + job.setJarByClass(getClass()); + job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(TreeMergeMapper.class)); + job.setMapperClass(TreeMergeMapper.class); + job.setOutputFormatClass(TreeMergeOutputFormat.class); + job.setNumReduceTasks(0); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(NullWritable.class); + job.setInputFormatClass(NLineInputFormat.class); + + Path inputStepDir = new Path(options.outputDir, "mtree-merge-input-iteration" + mtreeMergeIteration); + fullInputList = new Path(inputStepDir, FULL_INPUT_LIST); + LOG.debug("MTree merge iteration {}/{}: Creating input list file for mappers {}", new Object[] {mtreeMergeIteration, mtreeMergeIterations, fullInputList}); + numFiles = createTreeMergeInputDirList(outputReduceDir, fs, fullInputList); + if (numFiles != reducers) { + throw new IllegalStateException("Not same reducers: " + reducers + ", numFiles: " + numFiles); + } + NLineInputFormat.addInputPath(job, fullInputList); + NLineInputFormat.setNumLinesPerSplit(job, options.fanout); + FileOutputFormat.setOutputPath(job, outputTreeMergeStep); + + LOG.info("MTree merge iteration {}/{}: Merging {} shards into {} shards using fanout {}", new Object[] { + mtreeMergeIteration, mtreeMergeIterations, reducers, (reducers / options.fanout), options.fanout}); + startTime = System.currentTimeMillis(); + if (!waitForCompletion(job, options.isVerbose)) { + return -1; // job failed + } + if (!renameTreeMergeShardDirs(outputTreeMergeStep, job, fs)) { + return -1; + } + secs = (System.currentTimeMillis() - startTime) / 1000.0f; + LOG.info("MTree merge iteration {}/{}: Done. Merging {} shards into {} shards using fanout {} took {} secs", + new Object[] {mtreeMergeIteration, mtreeMergeIterations, reducers, (reducers / options.fanout), options.fanout, secs}); + + if (!delete(outputReduceDir, true, fs)) { + return -1; + } + if (!rename(outputTreeMergeStep, outputReduceDir, fs)) { + return -1; + } + assert reducers % options.fanout == 0; + reducers = reducers / options.fanout; + mtreeMergeIteration++; + } + assert reducers == options.shards; + + // normalize output shard dir prefix, i.e. + // rename part-r-00000 to part-00000 (stems from zero tree merge iterations) + // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations) + for (FileStatus stats : fs.listStatus(outputReduceDir)) { + String dirPrefix = SolrOutputFormat.getOutputName(job); + Path srcPath = stats.getPath(); + if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) { + String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length()); + Path dstPath = new Path(srcPath.getParent(), dstName); + if (!rename(srcPath, dstPath, fs)) { + return -1; + } + } + }; + + // publish results dir + if (!rename(outputReduceDir, outputResultsDir, fs)) { + return -1; + } + + if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(outputResultsDir, fs))) { + return -1; + } + + goodbye(job, programStartTime); + return 0; + } + + private void calculateNumReducers(Options options, int realMappers) throws IOException { + if (options.shards <= 0) { + throw new IllegalStateException("Illegal number of shards: " + options.shards); + } + if (options.fanout <= 1) { + throw new IllegalStateException("Illegal fanout: " + options.fanout); + } + if (realMappers <= 0) { + throw new IllegalStateException("Illegal realMappers: " + realMappers); + } + + + int reducers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxReduceTasks(); // MR1 + //reducers = job.getCluster().getClusterStatus().getReduceSlotCapacity(); // Yarn only + LOG.info("Cluster reports {} reduce slots", reducers); + + if (options.reducers == 0) { + reducers = options.shards; + } else if (options.reducers == -1) { + reducers = Math.min(reducers, realMappers); // no need to use many reducers when using few mappers + } else { + reducers = options.reducers; + } + reducers = Math.max(reducers, options.shards); + + if (reducers != options.shards) { + // Ensure fanout isn't misconfigured. fanout can't meaningfully be larger than what would be + // required to merge all leaf shards in one single tree merge iteration into root shards + options.fanout = Math.min(options.fanout, (int) ceilDivide(reducers, options.shards)); + + // Ensure invariant reducers == options.shards * (fanout ^ N) where N is an integer >= 1. + // N is the number of mtree merge iterations. + // This helps to evenly spread docs among root shards and simplifies the impl of the mtree merge algorithm. + int s = options.shards; + while (s < reducers) { + s = s * options.fanout; + } + reducers = s; + assert reducers % options.fanout == 0; + } + options.reducers = reducers; + } + + private long addInputFiles(List inputFiles, List inputLists, Path fullInputList, Configuration conf) + throws IOException { + + long numFiles = 0; + FileSystem fs = fullInputList.getFileSystem(conf); + FSDataOutputStream out = fs.create(fullInputList); + try { + Writer writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8")); + + for (Path inputFile : inputFiles) { + FileSystem inputFileFs = inputFile.getFileSystem(conf); + if (inputFileFs.exists(inputFile)) { + PathFilter pathFilter = new PathFilter() { + @Override + public boolean accept(Path path) { + return !path.getName().startsWith("."); // ignore "hidden" files and dirs + } + }; + numFiles += addInputFilesRecursively(inputFile, writer, inputFileFs, pathFilter); + } + } + + for (Path inputList : inputLists) { + InputStream in; + if (inputList.toString().equals("-")) { + in = System.in; + } else if (inputList.isAbsoluteAndSchemeAuthorityNull()) { + in = new BufferedInputStream(new FileInputStream(inputList.toString())); + } else { + in = inputList.getFileSystem(conf).open(inputList); + } + try { + BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8")); + String line; + while ((line = reader.readLine()) != null) { + writer.write(line + "\n"); + numFiles++; + } + reader.close(); + } finally { + in.close(); + } + } + + writer.close(); + } finally { + out.close(); + } + return numFiles; + } + + /** + * Add the specified file to the input set, if path is a directory then + * add the files contained therein. + */ + private long addInputFilesRecursively(Path path, Writer writer, FileSystem fs, PathFilter pathFilter) throws IOException { + long numFiles = 0; + for (FileStatus stat : fs.listStatus(path, pathFilter)) { + LOG.debug("Adding path {}", stat.getPath()); + if (stat.isDirectory()) { + numFiles += addInputFilesRecursively(stat.getPath(), writer, fs, pathFilter); + } else { + writer.write(stat.getPath().toString() + "\n"); + numFiles++; + } + } + return numFiles; + } + + private void randomizeFewInputFiles(FileSystem fs, Path outputStep2Dir, Path fullInputList) throws IOException { + List lines = new ArrayList(); + BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(fullInputList), "UTF-8")); + try { + String line; + while ((line = reader.readLine()) != null) { + lines.add(line); + } + } finally { + reader.close(); + } + + Collections.shuffle(lines, new Random(421439783L)); // constant seed for reproducability + + FSDataOutputStream out = fs.create(new Path(outputStep2Dir, FULL_INPUT_LIST)); + Writer writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8")); + try { + for (String line : lines) { + writer.write(line + "\n"); + } + } finally { + writer.close(); + } + } + + /** + * To uniformly spread load across all mappers we randomize fullInputList + * with a separate small Mapper & Reducer preprocessing step. This way + * each input line ends up on a random position in the output file list. + * Each mapper indexes a disjoint consecutive set of files such that each + * set has roughly the same size, at least from a probabilistic + * perspective. + * + * For example an input file with the following input list of URLs: + * + * A + * B + * C + * D + * + * might be randomized into the following output list of URLs: + * + * C + * A + * D + * B + * + * The implementation sorts the list of lines by randomly generated numbers. + */ + private Job randomizeManyInputFiles(Configuration baseConfig, Path fullInputList, Path outputStep2Dir, int numLinesPerSplit) + throws IOException { + + Job job2 = Job.getInstance(baseConfig); + job2.setJarByClass(getClass()); + job2.setJobName(getClass().getName() + "/" + Utils.getShortClassName(LineRandomizerMapper.class)); + job2.setInputFormatClass(NLineInputFormat.class); + NLineInputFormat.addInputPath(job2, fullInputList); + NLineInputFormat.setNumLinesPerSplit(job2, numLinesPerSplit); + job2.setMapperClass(LineRandomizerMapper.class); + job2.setReducerClass(LineRandomizerReducer.class); + job2.setOutputFormatClass(TextOutputFormat.class); + FileOutputFormat.setOutputPath(job2, outputStep2Dir); + job2.setNumReduceTasks(1); + job2.setOutputKeyClass(LongWritable.class); + job2.setOutputValueClass(Text.class); + return job2; + } + + // do the same as if the user had typed 'hadoop ... --files ' + private void addDistributedCacheFile(File file, Configuration conf) throws IOException { + String HADOOP_TMP_FILES = "tmpfiles"; // see Hadoop's GenericOptionsParser + String tmpFiles = conf.get(HADOOP_TMP_FILES, ""); + if (tmpFiles.length() > 0) { // already present? + tmpFiles = tmpFiles + ","; + } + GenericOptionsParser parser = new GenericOptionsParser( + new Configuration(conf), + new String[] { "--files", file.getCanonicalPath() }); + String additionalTmpFiles = parser.getConfiguration().get(HADOOP_TMP_FILES); + assert additionalTmpFiles != null; + assert additionalTmpFiles.length() > 0; + tmpFiles += additionalTmpFiles; + conf.set(HADOOP_TMP_FILES, tmpFiles); + } + + private MorphlineMapRunner setupMorphline(Options options) throws IOException, URISyntaxException { + if (options.morphlineId != null) { + job.getConfiguration().set(MorphlineMapRunner.MORPHLINE_ID_PARAM, options.morphlineId); + } + addDistributedCacheFile(options.morphlineFile, job.getConfiguration()); + if (!options.isDryRun) { + return null; + } + + /* + * Ensure scripting support for Java via morphline "java" command works even in dryRun mode, + * i.e. when executed in the client side driver JVM. To do so, collect all classpath URLs from + * the class loaders chain that org.apache.hadoop.util.RunJar (hadoop jar xyz-job.jar) and + * org.apache.hadoop.util.GenericOptionsParser (--libjars) have installed, then tell + * FastJavaScriptEngine.parse() where to find classes that JavaBuilder scripts might depend on. + * This ensures that scripts that reference external java classes compile without exceptions + * like this: + * + * ... caused by compilation failed: mfm:///MyJavaClass1.java:2: package + * com.cloudera.cdk.morphline.api does not exist + */ + LOG.trace("dryRun: java.class.path: {}", System.getProperty("java.class.path")); + String fullClassPath = ""; + ClassLoader loader = Thread.currentThread().getContextClassLoader(); // see org.apache.hadoop.util.RunJar + while (loader != null) { // walk class loaders, collect all classpath URLs + if (loader instanceof URLClassLoader) { + URL[] classPathPartURLs = ((URLClassLoader) loader).getURLs(); // see org.apache.hadoop.util.RunJar + LOG.trace("dryRun: classPathPartURLs: {}", Arrays.asList(classPathPartURLs)); + StringBuilder classPathParts = new StringBuilder(); + for (URL url : classPathPartURLs) { + File file = new File(url.toURI()); + if (classPathPartURLs.length > 0) { + classPathParts.append(File.pathSeparator); + } + classPathParts.append(file.getPath()); + } + LOG.trace("dryRun: classPathParts: {}", classPathParts); + String separator = File.pathSeparator; + if (fullClassPath.length() == 0 || classPathParts.length() == 0) { + separator = ""; + } + fullClassPath = classPathParts + separator + fullClassPath; + } + loader = loader.getParent(); + } + + // tell FastJavaScriptEngine.parse() where to find the classes that the script might depend on + if (fullClassPath.length() > 0) { + assert System.getProperty("java.class.path") != null; + fullClassPath = System.getProperty("java.class.path") + File.pathSeparator + fullClassPath; + LOG.trace("dryRun: fullClassPath: {}", fullClassPath); + System.setProperty("java.class.path", fullClassPath); // see FastJavaScriptEngine.parse() + } + + job.getConfiguration().set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, options.morphlineFile.getPath()); + return new MorphlineMapRunner( + job.getConfiguration(), new DryRunDocumentLoader(), options.solrHomeDir.getPath()); + } + + /* + * Executes the morphline in the current process (without submitting a job to MR) for quicker + * turnaround during trial & debug sessions + */ + private void dryRun(MorphlineMapRunner runner, FileSystem fs, Path fullInputList) throws IOException { + BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(fullInputList), "UTF-8")); + try { + String line; + while ((line = reader.readLine()) != null) { + runner.map(line, job.getConfiguration(), null); + } + runner.cleanup(); + } finally { + reader.close(); + } + } + + private int createTreeMergeInputDirList(Path outputReduceDir, FileSystem fs, Path fullInputList) + throws FileNotFoundException, IOException { + + FileStatus[] dirs = listSortedOutputShardDirs(outputReduceDir, fs); + int numFiles = 0; + FSDataOutputStream out = fs.create(fullInputList); + try { + Writer writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8")); + for (FileStatus stat : dirs) { + LOG.debug("Adding path {}", stat.getPath()); + Path dir = new Path(stat.getPath(), "data/index"); + if (!fs.isDirectory(dir)) { + throw new IllegalStateException("Not a directory: " + dir); + } + writer.write(dir.toString() + "\n"); + numFiles++; + } + writer.close(); + } finally { + out.close(); + } + return numFiles; + } + + private FileStatus[] listSortedOutputShardDirs(Path outputReduceDir, FileSystem fs) throws FileNotFoundException, + IOException { + + final String dirPrefix = SolrOutputFormat.getOutputName(job); + FileStatus[] dirs = fs.listStatus(outputReduceDir, new PathFilter() { + @Override + public boolean accept(Path path) { + return path.getName().startsWith(dirPrefix); + } + }); + for (FileStatus dir : dirs) { + if (!dir.isDirectory()) { + throw new IllegalStateException("Not a directory: " + dir.getPath()); + } + } + + // use alphanumeric sort (rather than lexicographical sort) to properly handle more than 99999 shards + Arrays.sort(dirs, new Comparator() { + @Override + public int compare(FileStatus f1, FileStatus f2) { + return new AlphaNumericComparator().compare(f1.getPath().getName(), f2.getPath().getName()); + } + }); + + return dirs; + } + + /* + * You can run MapReduceIndexerTool in Solrcloud mode, and once the MR job completes, you can use + * the standard solrj Solrcloud API to send doc updates and deletes to SolrCloud, and those updates + * and deletes will go to the right Solr shards, and it will work just fine. + * + * The MapReduce framework doesn't guarantee that input split N goes to the map task with the + * taskId = N. The job tracker and Yarn schedule and assign tasks, considering data locality + * aspects, but without regard of the input split# withing the overall list of input splits. In + * other words, split# != taskId can be true. + * + * To deal with this issue, our mapper tasks write a little auxiliary metadata file (per task) + * that tells the job driver which taskId processed which split#. Once the mapper-only job is + * completed, the job driver renames the output dirs such that the dir name contains the true solr + * shard id, based on these auxiliary files. + * + * This way each doc gets assigned to the right Solr shard even with #reducers > #solrshards + * + * Example for a merge with two shards: + * + * part-m-00000 and part-m-00001 goes to outputShardNum = 0 and will end up in merged part-m-00000 + * part-m-00002 and part-m-00003 goes to outputShardNum = 1 and will end up in merged part-m-00001 + * part-m-00004 and part-m-00005 goes to outputShardNum = 2 and will end up in merged part-m-00002 + * ... and so on + * + * Also see run() method above where it uses NLineInputFormat.setNumLinesPerSplit(job, + * options.fanout) + * + * Also see TreeMergeOutputFormat.TreeMergeRecordWriter.writeShardNumberFile() + */ + private boolean renameTreeMergeShardDirs(Path outputTreeMergeStep, Job job, FileSystem fs) throws IOException { + final String dirPrefix = SolrOutputFormat.getOutputName(job); + FileStatus[] dirs = fs.listStatus(outputTreeMergeStep, new PathFilter() { + @Override + public boolean accept(Path path) { + return path.getName().startsWith(dirPrefix); + } + }); + + for (FileStatus dir : dirs) { + if (!dir.isDirectory()) { + throw new IllegalStateException("Not a directory: " + dir.getPath()); + } + } + + // Example: rename part-m-00004 to _part-m-00004 + for (FileStatus dir : dirs) { + Path path = dir.getPath(); + Path renamedPath = new Path(path.getParent(), "_" + path.getName()); + if (!rename(path, renamedPath, fs)) { + return false; + } + } + + // Example: rename _part-m-00004 to part-m-00002 + for (FileStatus dir : dirs) { + Path path = dir.getPath(); + Path renamedPath = new Path(path.getParent(), "_" + path.getName()); + + // read auxiliary metadata file (per task) that tells which taskId + // processed which split# aka solrShard + Path solrShardNumberFile = new Path(renamedPath, TreeMergeMapper.SOLR_SHARD_NUMBER); + InputStream in = fs.open(solrShardNumberFile); + byte[] bytes = ByteStreams.toByteArray(in); + in.close(); + Preconditions.checkArgument(bytes.length > 0); + int solrShard = Integer.parseInt(new String(bytes, Charsets.UTF_8)); + if (!delete(solrShardNumberFile, false, fs)) { + return false; + } + + // same as FileOutputFormat.NUMBER_FORMAT + NumberFormat numberFormat = NumberFormat.getInstance(Locale.ENGLISH); + numberFormat.setMinimumIntegerDigits(5); + numberFormat.setGroupingUsed(false); + Path finalPath = new Path(renamedPath.getParent(), dirPrefix + "-m-" + numberFormat.format(solrShard)); + + LOG.info("MTree merge renaming solr shard: " + solrShard + " from dir: " + dir.getPath() + " to dir: " + finalPath); + if (!rename(renamedPath, finalPath, fs)) { + return false; + } + } + return true; + } + + private static void verifyGoLiveArgs(Options opts, ArgumentParser parser) throws ArgumentParserException { + if (opts.zkHost == null && opts.solrHomeDir == null) { + throw new ArgumentParserException("At least one of --zk-host or --solr-home-dir is required", parser); + } + if (opts.goLive && opts.zkHost == null && opts.shardUrls == null) { + throw new ArgumentParserException("--go-live requires that you also pass --shard-url or --zk-host", parser); + } + + if (opts.zkHost != null && opts.collection == null) { + throw new ArgumentParserException("--zk-host requires that you also pass --collection", parser); + } + + if (opts.zkHost != null) { + return; + // verify structure of ZK directory later, to avoid checking run-time errors during parsing. + } else if (opts.shardUrls != null) { + if (opts.shardUrls.size() == 0) { + throw new ArgumentParserException("--shard-url requires at least one URL", parser); + } + } else if (opts.shards != null) { + if (opts.shards <= 0) { + throw new ArgumentParserException("--shards must be a positive number: " + opts.shards, parser); + } + } else { + throw new ArgumentParserException("You must specify one of the following (mutually exclusive) arguments: " + + "--zk-host or --shard-url or --shards", parser); + } + + if (opts.shardUrls != null) { + opts.shards = opts.shardUrls.size(); + } + + assert opts.shards != null; + assert opts.shards > 0; + } + + private static void verifyZKStructure(Options opts, ArgumentParser parser) throws ArgumentParserException { + if (opts.zkHost != null) { + assert opts.collection != null; + ZooKeeperInspector zki = new ZooKeeperInspector(); + try { + opts.shardUrls = zki.extractShardUrls(opts.zkHost, opts.collection); + } catch (Exception e) { + LOG.debug("Cannot extract SolrCloud shard URLs from ZooKeeper", e); + throw new ArgumentParserException(e, parser); + } + assert opts.shardUrls != null; + if (opts.shardUrls.size() == 0) { + throw new ArgumentParserException("--zk-host requires ZooKeeper " + opts.zkHost + + " to contain at least one SolrCore for collection: " + opts.collection, parser); + } + opts.shards = opts.shardUrls.size(); + LOG.debug("Using SolrCloud shard URLs: {}", opts.shardUrls); + } + } + + private boolean waitForCompletion(Job job, boolean isVerbose) + throws IOException, InterruptedException, ClassNotFoundException { + + LOG.debug("Running job: " + getJobInfo(job)); + boolean success = job.waitForCompletion(isVerbose); + if (!success) { + LOG.error("Job failed! " + getJobInfo(job)); + } + return success; + } + + private void goodbye(Job job, long startTime) { + float secs = (System.currentTimeMillis() - startTime) / 1000.0f; + if (job != null) { + LOG.info("Succeeded with job: " + getJobInfo(job)); + } + LOG.info("Success. Done. Program took {} secs. Goodbye.", secs); + } + + private String getJobInfo(Job job) { + return "jobName: " + job.getJobName() + ", jobId: " + job.getJobID(); + } + + private boolean rename(Path src, Path dst, FileSystem fs) throws IOException { + boolean success = fs.rename(src, dst); + if (!success) { + LOG.error("Cannot rename " + src + " to " + dst); + } + return success; + } + + private boolean delete(Path path, boolean recursive, FileSystem fs) throws IOException { + boolean success = fs.delete(path, recursive); + if (!success) { + LOG.error("Cannot delete " + path); + } + return success; + } + + // same as IntMath.divide(p, q, RoundingMode.CEILING) + private long ceilDivide(long p, long q) { + long result = p / q; + if (p % q != 0) { + result++; + } + return result; + } + + /** + * Returns logbasevalue. + */ + private double log(double base, double value) { + return Math.log(value) / Math.log(base); + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/PathArgumentType.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/PathArgumentType.java new file mode 100644 index 00000000000..770a2f9f90b --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/PathArgumentType.java @@ -0,0 +1,233 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; + +import net.sourceforge.argparse4j.inf.Argument; +import net.sourceforge.argparse4j.inf.ArgumentParser; +import net.sourceforge.argparse4j.inf.ArgumentParserException; +import net.sourceforge.argparse4j.inf.ArgumentType; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsAction; + +/** + * ArgumentType subclass for HDFS Path type, using fluent style API. + */ +public class PathArgumentType implements ArgumentType { + + private final Configuration conf; + private FileSystem fs; + private boolean acceptSystemIn = false; + private boolean verifyExists = false; + private boolean verifyNotExists = false; + private boolean verifyIsFile = false; + private boolean verifyIsDirectory = false; + private boolean verifyCanRead = false; + private boolean verifyCanWrite = false; + private boolean verifyCanWriteParent = false; + private boolean verifyCanExecute = false; + private boolean verifyIsAbsolute = false; + private boolean verifyHasScheme = false; + private String verifyScheme = null; + + public PathArgumentType(Configuration conf) { + this.conf = conf; + } + + public PathArgumentType acceptSystemIn() { + acceptSystemIn = true; + return this; + } + + public PathArgumentType verifyExists() { + verifyExists = true; + return this; + } + + public PathArgumentType verifyNotExists() { + verifyNotExists = true; + return this; + } + + public PathArgumentType verifyIsFile() { + verifyIsFile = true; + return this; + } + + public PathArgumentType verifyIsDirectory() { + verifyIsDirectory = true; + return this; + } + + public PathArgumentType verifyCanRead() { + verifyCanRead = true; + return this; + } + + public PathArgumentType verifyCanWrite() { + verifyCanWrite = true; + return this; + } + + public PathArgumentType verifyCanWriteParent() { + verifyCanWriteParent = true; + return this; + } + + public PathArgumentType verifyCanExecute() { + verifyCanExecute = true; + return this; + } + + public PathArgumentType verifyIsAbsolute() { + verifyIsAbsolute = true; + return this; + } + + public PathArgumentType verifyHasScheme() { + verifyHasScheme = true; + return this; + } + + public PathArgumentType verifyScheme(String scheme) { + verifyScheme = scheme; + return this; + } + + @Override + public Path convert(ArgumentParser parser, Argument arg, String value) throws ArgumentParserException { + Path file = new Path(value); + try { + fs = file.getFileSystem(conf); + if (verifyHasScheme && !isSystemIn(file)) { + verifyHasScheme(parser, file); + } + if (verifyScheme != null && !isSystemIn(file)) { + verifyScheme(parser, file); + } + if (verifyIsAbsolute && !isSystemIn(file)) { + verifyIsAbsolute(parser, file); + } + if (verifyExists && !isSystemIn(file)) { + verifyExists(parser, file); + } + if (verifyNotExists && !isSystemIn(file)) { + verifyNotExists(parser, file); + } + if (verifyIsFile && !isSystemIn(file)) { + verifyIsFile(parser, file); + } + if (verifyIsDirectory && !isSystemIn(file)) { + verifyIsDirectory(parser, file); + } + if (verifyCanRead && !isSystemIn(file)) { + verifyCanRead(parser, file); + } + if (verifyCanWrite && !isSystemIn(file)) { + verifyCanWrite(parser, file); + } + if (verifyCanWriteParent && !isSystemIn(file)) { + verifyCanWriteParent(parser, file); + } + if (verifyCanExecute && !isSystemIn(file)) { + verifyCanExecute(parser, file); + } + } catch (IOException e) { + throw new ArgumentParserException(e, parser); + } + return file; + } + + private void verifyExists(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + if (!fs.exists(file)) { + throw new ArgumentParserException("File not found: " + file, parser); + } + } + + private void verifyNotExists(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + if (fs.exists(file)) { + throw new ArgumentParserException("File found: " + file, parser); + } + } + + private void verifyIsFile(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + if (!fs.isFile(file)) { + throw new ArgumentParserException("Not a file: " + file, parser); + } + } + + private void verifyIsDirectory(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + if (!fs.isDirectory(file)) { + throw new ArgumentParserException("Not a directory: " + file, parser); + } + } + + private void verifyCanRead(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + verifyExists(parser, file); + if (!fs.getFileStatus(file).getPermission().getUserAction().implies(FsAction.READ)) { + throw new ArgumentParserException("Insufficient permissions to read file: " + file, parser); + } + } + + private void verifyCanWrite(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + verifyExists(parser, file); + if (!fs.getFileStatus(file).getPermission().getUserAction().implies(FsAction.WRITE)) { + throw new ArgumentParserException("Insufficient permissions to write file: " + file, parser); + } + } + + private void verifyCanWriteParent(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + Path parent = file.getParent(); + if (parent == null || !fs.exists(parent) || !fs.getFileStatus(parent).getPermission().getUserAction().implies(FsAction.WRITE)) { + throw new ArgumentParserException("Cannot write parent of file: " + file, parser); + } + } + + private void verifyCanExecute(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + verifyExists(parser, file); + if (!fs.getFileStatus(file).getPermission().getUserAction().implies(FsAction.EXECUTE)) { + throw new ArgumentParserException("Insufficient permissions to execute file: " + file, parser); + } + } + + private void verifyIsAbsolute(ArgumentParser parser, Path file) throws ArgumentParserException { + if (!file.isAbsolute()) { + throw new ArgumentParserException("Not an absolute file: " + file, parser); + } + } + + private void verifyHasScheme(ArgumentParser parser, Path file) throws ArgumentParserException { + if (file.toUri().getScheme() == null) { + throw new ArgumentParserException("URI scheme is missing in path: " + file, parser); + } + } + + private void verifyScheme(ArgumentParser parser, Path file) throws ArgumentParserException { + if (!verifyScheme.equals(file.toUri().getScheme())) { + throw new ArgumentParserException("Scheme of path: " + file + " must be: " + verifyScheme, parser); + } + } + + private boolean isSystemIn(Path file) { + return acceptSystemIn && file.toString().equals("-"); + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/PathParts.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/PathParts.java new file mode 100644 index 00000000000..690901b4c76 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/PathParts.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.net.URI; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.server.namenode.NameNode; + +/** + * Extracts various components of an HDFS Path + */ +public final class PathParts { + + private final String uploadURL; + private final Configuration conf; + private final FileSystem fs; + private final Path normalizedPath; + private FileStatus stats; + + public PathParts(String uploadURL, Configuration conf) throws IOException { + if (uploadURL == null) { + throw new IllegalArgumentException("Path must not be null: " + uploadURL); + } + this.uploadURL = uploadURL; + if (conf == null) { + throw new IllegalArgumentException("Configuration must not be null: " + uploadURL); + } + this.conf = conf; + URI uri = stringToUri(uploadURL); + this.fs = FileSystem.get(uri, conf); + if (fs == null) { + throw new IllegalArgumentException("File system must not be null: " + uploadURL); + } + this.normalizedPath = fs.makeQualified(new Path(uri)); + if (!normalizedPath.isAbsolute()) { + throw new IllegalArgumentException("Path must be absolute: " + uploadURL); + } + if (getScheme() == null) { + throw new IllegalArgumentException("Scheme must not be null: " + uploadURL); + } + if (getHost() == null) { + throw new IllegalArgumentException("Host must not be null: " + uploadURL); + } + if (getPort() < 0) { + throw new IllegalArgumentException("Port must not be negative: " + uploadURL); + } + } + + public String getUploadURL() { + return uploadURL; + } + + public Path getUploadPath() { + return new Path(getUploadURL()); + } + + public String getURIPath() { + return normalizedPath.toUri().getPath(); + } + + public String getName() { + return normalizedPath.getName(); + } + + public String getScheme() { + return normalizedPath.toUri().getScheme(); + } + + public String getHost() { + return normalizedPath.toUri().getHost(); + } + + public int getPort() { + int port = normalizedPath.toUri().getPort(); + if (port == -1) { + port = fs.getWorkingDirectory().toUri().getPort(); + if (port == -1) { + port = NameNode.DEFAULT_PORT; + } + } + return port; + } + + public String getId() { + return getScheme() + "://" + getHost() + ":" + getPort() + getURIPath(); + } + + public String getDownloadURL() { + return getId(); + } + + public Configuration getConfiguration() { + return conf; + } + + public FileSystem getFileSystem() { + return fs; + } + + public FileStatus getFileStatus() throws IOException { + if (stats == null) { + stats = getFileSystem().getFileStatus(getUploadPath()); + } + return stats; + } + + private URI stringToUri(String pathString) { + //return new Path(pathString).toUri().normalize(); + return URI.create(pathString).normalize(); + } +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrCloudPartitioner.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrCloudPartitioner.java new file mode 100644 index 00000000000..27f532c174a --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrCloudPartitioner.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Partitioner; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.DocRouter; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.params.MapSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.Hash; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * MapReduce partitioner that partitions the Mapper output such that each + * SolrInputDocument gets sent to the SolrCloud shard that it would have been + * sent to if the document were ingested via the standard SolrCloud Near Real + * Time (NRT) API. + * + * In other words, this class implements the same partitioning semantics as the + * standard SolrCloud NRT API. This enables to mix batch updates from MapReduce + * ingestion with updates from standard NRT ingestion on the same SolrCloud + * cluster, using identical unique document keys. + */ +public class SolrCloudPartitioner extends Partitioner implements Configurable { + + private Configuration conf; + private DocCollection docCollection; + private Map shardNumbers; + private int shards = 0; + private final SolrParams emptySolrParams = new MapSolrParams(Collections.EMPTY_MAP); + + public static final String SHARDS = SolrCloudPartitioner.class.getName() + ".shards"; + public static final String ZKHOST = SolrCloudPartitioner.class.getName() + ".zkHost"; + public static final String COLLECTION = SolrCloudPartitioner.class.getName() + ".collection"; + + private static final Logger LOG = LoggerFactory.getLogger(SolrCloudPartitioner.class); + + public SolrCloudPartitioner() {} + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + this.shards = conf.getInt(SHARDS, -1); + if (shards <= 0) { + throw new IllegalArgumentException("Illegal shards: " + shards); + } + String zkHost = conf.get(ZKHOST); + if (zkHost == null) { + throw new IllegalArgumentException("zkHost must not be null"); + } + String collection = conf.get(COLLECTION); + if (collection == null) { + throw new IllegalArgumentException("collection must not be null"); + } + LOG.info("Using SolrCloud zkHost: {}, collection: {}", zkHost, collection); + docCollection = new ZooKeeperInspector().extractDocCollection(zkHost, collection); + if (docCollection == null) { + throw new IllegalArgumentException("docCollection must not be null"); + } + if (docCollection.getSlicesMap().size() != shards) { + throw new IllegalArgumentException("Incompatible shards: + " + shards + " for docCollection: " + docCollection); + } + List slices = new ZooKeeperInspector().getSortedSlices(docCollection.getSlices()); + if (slices.size() != shards) { + throw new IllegalStateException("Incompatible sorted shards: + " + shards + " for docCollection: " + docCollection); + } + shardNumbers = new HashMap(10 * slices.size()); // sparse for performance + for (int i = 0; i < slices.size(); i++) { + shardNumbers.put(slices.get(i).getName(), i); + } + LOG.debug("Using SolrCloud docCollection: {}", docCollection); + DocRouter docRouter = docCollection.getRouter(); + if (docRouter == null) { + throw new IllegalArgumentException("docRouter must not be null"); + } + LOG.info("Using SolrCloud docRouterClass: {}", docRouter.getClass()); + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public int getPartition(Text key, SolrInputDocumentWritable value, int numPartitions) { + DocRouter docRouter = docCollection.getRouter(); + SolrInputDocument doc = value.getSolrInputDocument(); + String keyStr = key.toString(); + + // TODO: scalability: replace linear search in HashBasedRouter.hashToSlice() with binary search on sorted hash ranges + Slice slice = docRouter.getTargetSlice(keyStr, doc, emptySolrParams, docCollection); + +// LOG.info("slice: {}", slice); + if (slice == null) { + throw new IllegalStateException("No matching slice found! The slice seems unavailable. docRouterClass: " + + docRouter.getClass().getName()); + } + int rootShard = shardNumbers.get(slice.getName()); + if (rootShard < 0 || rootShard >= shards) { + throw new IllegalStateException("Illegal shard number " + rootShard + " for slice: " + slice + ", docCollection: " + + docCollection); + } + + // map doc to micro shard aka leaf shard, akin to HashBasedRouter.sliceHash() + // taking into account mtree merge algorithm + assert numPartitions % shards == 0; // Also note that numPartitions is equal to the number of reducers + int hashCode = Hash.murmurhash3_x86_32(keyStr, 0, keyStr.length(), 0); + int offset = (hashCode & Integer.MAX_VALUE) % (numPartitions / shards); + int microShard = (rootShard * (numPartitions / shards)) + offset; +// LOG.info("Subpartitions rootShard: {}, offset: {}", rootShard, offset); +// LOG.info("Partitioned to p: {} for numPartitions: {}, shards: {}, key: {}, value: {}", microShard, numPartitions, shards, key, value); + + assert microShard >= 0 && microShard < numPartitions; + return microShard; + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrCounters.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrCounters.java new file mode 100644 index 00000000000..88e9acb57cc --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrCounters.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +public enum SolrCounters { + + DOCUMENTS_WRITTEN (getClassName(SolrReducer.class) + + ": Number of documents processed"), + + BATCHES_WRITTEN (getClassName(SolrReducer.class) + + ": Number of document batches processed"), + + BATCH_WRITE_TIME (getClassName(SolrReducer.class) + + ": Time spent by reducers writing batches [ms]"), + + PHYSICAL_REDUCER_MERGE_TIME (getClassName(SolrReducer.class) + + ": Time spent by reducers on physical merges [ms]"), + + LOGICAL_TREE_MERGE_TIME (getClassName(TreeMergeMapper.class) + + ": Time spent on logical tree merges [ms]"), + + PHYSICAL_TREE_MERGE_TIME (getClassName(TreeMergeMapper.class) + + ": Time spent on physical tree merges [ms]"); + + private final String label; + + private SolrCounters(String label) { + this.label = label; + } + + public String toString() { + return label; + } + + private static String getClassName(Class clazz) { + return Utils.getShortClassName(clazz); + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrInputDocumentWritable.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrInputDocumentWritable.java new file mode 100644 index 00000000000..e043f7a0ed2 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrInputDocumentWritable.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.Writable; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.util.FastOutputStream; +import org.apache.solr.common.util.JavaBinCodec; + +public class SolrInputDocumentWritable implements Writable { + private SolrInputDocument sid; + + public SolrInputDocumentWritable() { + } + + public SolrInputDocumentWritable(SolrInputDocument sid) { + this.sid = sid; + } + + public SolrInputDocument getSolrInputDocument() { + return sid; + } + + @Override + public String toString() { + return sid.toString(); + } + + @Override + public void write(DataOutput out) throws IOException { + JavaBinCodec codec = new JavaBinCodec(); + FastOutputStream daos = FastOutputStream.wrap(DataOutputOutputStream.constructOutputStream(out)); + codec.init(daos); + try { + codec.writeVal(sid); + } finally { + daos.flushBuffer(); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + JavaBinCodec codec = new JavaBinCodec(); + UnbufferedDataInputInputStream dis = new UnbufferedDataInputInputStream(in); + sid = (SolrInputDocument)codec.readVal(dis); + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrMapper.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrMapper.java new file mode 100644 index 00000000000..2a6d699b541 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrMapper.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; + +public class SolrMapper extends Mapper { + + private Path solrHomeDir; + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + Utils.getLogConfigFile(context.getConfiguration()); + super.setup(context); + solrHomeDir = SolrRecordWriter.findSolrConfig(context.getConfiguration()); + } + + protected Path getSolrHomeDir() { + return solrHomeDir; + } +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrOutputFormat.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrOutputFormat.java new file mode 100644 index 00000000000..97b2b79404e --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrOutputFormat.java @@ -0,0 +1,278 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; +import java.util.UUID; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.filecache.DistributedCache; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SolrOutputFormat extends FileOutputFormat { + + private static final Logger LOG = LoggerFactory.getLogger(SolrOutputFormat.class); + + /** + * The parameter used to pass the solr config zip file information. This will + * be the hdfs path to the configuration zip file + */ + public static final String SETUP_OK = "solr.output.format.setup"; + + /** The key used to pass the zip file name through the configuration. */ + public static final String ZIP_NAME = "solr.zip.name"; + + /** + * The base name of the zip file containing the configuration information. + * This file is passed via the distributed cache using a unique name, obtained + * via {@link #getZipName(Configuration jobConf)}. + */ + public static final String ZIP_FILE_BASE_NAME = "solr.zip"; + + /** + * The key used to pass the boolean configuration parameter that instructs for + * regular or zip file output + */ + public static final String OUTPUT_ZIP_FILE = "solr.output.zip.format"; + + static int defaultSolrWriterThreadCount = 0; + + public static final String SOLR_WRITER_THREAD_COUNT = "solr.record.writer.num.threads"; + + static int defaultSolrWriterQueueSize = 1; + + public static final String SOLR_WRITER_QUEUE_SIZE = "solr.record.writer.max.queues.size"; + + static int defaultSolrBatchSize = 20; + + public static final String SOLR_RECORD_WRITER_BATCH_SIZE = "solr.record.writer.batch.size"; + + public static final String SOLR_RECORD_WRITER_MAX_SEGMENTS = "solr.record.writer.maxSegments"; + + public static String getSetupOk() { + return SETUP_OK; + } + + /** Get the number of threads used for index writing */ + public static void setSolrWriterThreadCount(int count, Configuration conf) { + conf.setInt(SOLR_WRITER_THREAD_COUNT, count); + } + + /** Set the number of threads used for index writing */ + public static int getSolrWriterThreadCount(Configuration conf) { + return conf.getInt(SOLR_WRITER_THREAD_COUNT, defaultSolrWriterThreadCount); + } + + /** + * Set the maximum size of the the queue for documents to be written to the + * index. + */ + public static void setSolrWriterQueueSize(int count, Configuration conf) { + conf.setInt(SOLR_WRITER_QUEUE_SIZE, count); + } + + /** Return the maximum size for the number of documents pending index writing. */ + public static int getSolrWriterQueueSize(Configuration conf) { + return conf.getInt(SOLR_WRITER_QUEUE_SIZE, defaultSolrWriterQueueSize); + } + + /** + * Return the file name portion of the configuration zip file, from the + * configuration. + */ + public static String getZipName(Configuration conf) { + return conf.get(ZIP_NAME, ZIP_FILE_BASE_NAME); + } + + /** + * configure the job to output zip files of the output index, or full + * directory trees. Zip files are about 1/5th the size of the raw index, and + * much faster to write, but take more cpu to create. + * + * @param output true if should output zip files + * @param conf to use + */ + public static void setOutputZipFormat(boolean output, Configuration conf) { + conf.setBoolean(OUTPUT_ZIP_FILE, output); + } + + /** + * return true if the output should be a zip file of the index, rather than + * the raw index + * + * @param conf to use + * @return true if output zip files is on + */ + public static boolean isOutputZipFormat(Configuration conf) { + return conf.getBoolean(OUTPUT_ZIP_FILE, false); + } + + public static String getOutputName(JobContext job) { + return FileOutputFormat.getOutputName(job); + } + + @Override + public void checkOutputSpecs(JobContext job) throws IOException { + super.checkOutputSpecs(job); + if (job.getConfiguration().get(SETUP_OK) == null) { + throw new IOException("Solr home cache not set up!"); + } + } + + + @Override + public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { + Utils.getLogConfigFile(context.getConfiguration()); + Path workDir = getDefaultWorkFile(context, ""); + int batchSize = getBatchSize(context.getConfiguration()); + return new SolrRecordWriter(context, workDir, batchSize); + } + + public static void setupSolrHomeCache(File solrHomeDir, Job job) throws IOException{ + File solrHomeZip = createSolrHomeZip(solrHomeDir); + addSolrConfToDistributedCache(job, solrHomeZip); + } + + public static File createSolrHomeZip(File solrHomeDir) throws IOException { + return createSolrHomeZip(solrHomeDir, false); + } + + private static File createSolrHomeZip(File solrHomeDir, boolean safeToModify) throws IOException { + if (solrHomeDir == null || !(solrHomeDir.exists() && solrHomeDir.isDirectory())) { + throw new IOException("Invalid solr home: " + solrHomeDir); + } + File solrHomeZip = File.createTempFile("solr", ".zip"); + createZip(solrHomeDir, solrHomeZip); + return solrHomeZip; + } + + public static void addSolrConfToDistributedCache(Job job, File solrHomeZip) + throws IOException { + // Make a reasonably unique name for the zip file in the distributed cache + // to avoid collisions if multiple jobs are running. + String hdfsZipName = UUID.randomUUID().toString() + '.' + + ZIP_FILE_BASE_NAME; + Configuration jobConf = job.getConfiguration(); + jobConf.set(ZIP_NAME, hdfsZipName); + + Path zipPath = new Path("/tmp", getZipName(jobConf)); + FileSystem fs = FileSystem.get(jobConf); + fs.copyFromLocalFile(new Path(solrHomeZip.toString()), zipPath); + final URI baseZipUrl = fs.getUri().resolve( + zipPath.toString() + '#' + getZipName(jobConf)); + + DistributedCache.addCacheArchive(baseZipUrl, jobConf); + LOG.debug("Set Solr distributed cache: {}", Arrays.asList(job.getCacheArchives())); + LOG.debug("Set zipPath: {}", zipPath); + // Actually send the path for the configuration zip file + jobConf.set(SETUP_OK, zipPath.toString()); + } + + private static void createZip(File dir, File out) throws IOException { + HashSet files = new HashSet(); + // take only conf/ and lib/ + for (String allowedDirectory : SolrRecordWriter + .getAllowedConfigDirectories()) { + File configDir = new File(dir, allowedDirectory); + boolean configDirExists; + /** If the directory does not exist, and is required, bail out */ + if (!(configDirExists = configDir.exists()) + && SolrRecordWriter.isRequiredConfigDirectory(allowedDirectory)) { + throw new IOException(String.format(Locale.ENGLISH, + "required configuration directory %s is not present in %s", + allowedDirectory, dir)); + } + if (!configDirExists) { + continue; + } + listFiles(configDir, files); // Store the files in the existing, allowed + // directory configDir, in the list of files + // to store in the zip file + } + + out.delete(); + int subst = dir.toString().length(); + ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(out)); + byte[] buf = new byte[1024]; + for (File f : files) { + ZipEntry ze = new ZipEntry(f.toString().substring(subst)); + zos.putNextEntry(ze); + InputStream is = new FileInputStream(f); + int cnt; + while ((cnt = is.read(buf)) >= 0) { + zos.write(buf, 0, cnt); + } + is.close(); + zos.flush(); + zos.closeEntry(); + } + + ZipEntry ze = new ZipEntry("solr.xml"); + zos.putNextEntry(ze); + zos.write("".getBytes("UTF-8")); + zos.flush(); + zos.closeEntry(); + zos.close(); + } + + private static void listFiles(File dir, Set files) throws IOException { + File[] list = dir.listFiles(); + + if (list == null && dir.isFile()) { + files.add(dir); + return; + } + + for (File f : list) { + if (f.isFile()) { + files.add(f); + } else { + listFiles(f, files); + } + } + } + + public static int getBatchSize(Configuration jobConf) { + // TODO Auto-generated method stub + return jobConf.getInt(SolrOutputFormat.SOLR_RECORD_WRITER_BATCH_SIZE, + defaultSolrBatchSize); + } + + public static void setBatchSize(int count, Configuration jobConf) { + jobConf.setInt(SOLR_RECORD_WRITER_BATCH_SIZE, count); + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrRecordWriter.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrRecordWriter.java new file mode 100644 index 00000000000..0850898601e --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrRecordWriter.java @@ -0,0 +1,515 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Properties; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.filecache.DistributedCache; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskID; +import org.apache.solr.hadoop.SolrOutputFormat; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.CoreDescriptor; +import org.apache.solr.core.HdfsDirectoryFactory; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class SolrRecordWriter extends RecordWriter { + + private static final Logger LOG = LoggerFactory.getLogger(SolrRecordWriter.class); + + public final static List allowedConfigDirectories = new ArrayList( + Arrays.asList(new String[] { "conf", "lib", "solr.xml" })); + + public final static Set requiredConfigDirectories = new HashSet(); + + static { + requiredConfigDirectories.add("conf"); + } + + /** + * Return the list of directories names that may be included in the + * configuration data passed to the tasks. + * + * @return an UnmodifiableList of directory names + */ + public static List getAllowedConfigDirectories() { + return Collections.unmodifiableList(allowedConfigDirectories); + } + + /** + * check if the passed in directory is required to be present in the + * configuration data set. + * + * @param directory The directory to check + * @return true if the directory is required. + */ + public static boolean isRequiredConfigDirectory(final String directory) { + return requiredConfigDirectories.contains(directory); + } + + /** The path that the final index will be written to */ + + /** The location in a local temporary directory that the index is built in. */ + +// /** +// * If true, create a zip file of the completed index in the final storage +// * location A .zip will be appended to the final output name if it is not +// * already present. +// */ +// private boolean outputZipFile = false; + + private final HeartBeater heartBeater; + private final BatchWriter batchWriter; + private final List batch; + private final int batchSize; + private long numDocsWritten = 0; + private long nextLogTime = System.currentTimeMillis(); + + private static HashMap.Context> contextMap = new HashMap.Context>(); + + public SolrRecordWriter(TaskAttemptContext context, Path outputShardDir, int batchSize) { + this.batchSize = batchSize; + this.batch = new ArrayList(batchSize); + Configuration conf = context.getConfiguration(); + + // setLogLevel("org.apache.solr.core", "WARN"); + // setLogLevel("org.apache.solr.update", "WARN"); + + heartBeater = new HeartBeater(context); + try { + heartBeater.needHeartBeat(); + + Path solrHomeDir = SolrRecordWriter.findSolrConfig(conf); + FileSystem fs = outputShardDir.getFileSystem(conf); + EmbeddedSolrServer solr = createEmbeddedSolrServer(solrHomeDir, fs, outputShardDir); + batchWriter = new BatchWriter(solr, batchSize, + context.getTaskAttemptID().getTaskID(), + SolrOutputFormat.getSolrWriterThreadCount(conf), + SolrOutputFormat.getSolrWriterQueueSize(conf)); + + } catch (Exception e) { + throw new IllegalStateException(String.format(Locale.ENGLISH, + "Failed to initialize record writer for %s, %s", context.getJobName(), conf + .get("mapred.task.id")), e); + } finally { + heartBeater.cancelHeartBeat(); + } + } + + public static EmbeddedSolrServer createEmbeddedSolrServer(Path solrHomeDir, FileSystem fs, Path outputShardDir) + throws IOException { + + if (solrHomeDir == null) { + throw new IOException("Unable to find solr home setting"); + } + LOG.info("Creating embedded Solr server with solrHomeDir: " + solrHomeDir + ", fs: " + fs + ", outputShardDir: " + outputShardDir); + + Path solrDataDir = new Path(outputShardDir, "data"); + + String dataDirStr = solrDataDir.toUri().toString(); + + SolrResourceLoader loader = new SolrResourceLoader(solrHomeDir.toString(), null, null); + + LOG.info(String + .format(Locale.ENGLISH, + "Constructed instance information solr.home %s (%s), instance dir %s, conf dir %s, writing index to solr.data.dir %s, with permdir %s", + solrHomeDir, solrHomeDir.toUri(), loader.getInstanceDir(), + loader.getConfigDir(), dataDirStr, outputShardDir)); + + // TODO: This is fragile and should be well documented + System.setProperty("solr.directoryFactory", HdfsDirectoryFactory.class.getName()); + System.setProperty("solr.lock.type", "hdfs"); + System.setProperty("solr.hdfs.nrtcachingdirectory", "false"); + System.setProperty("solr.hdfs.blockcache.enabled", "false"); + System.setProperty("solr.autoCommit.maxTime", "-1"); + System.setProperty("solr.autoSoftCommit.maxTime", "-1"); + + CoreContainer container = new CoreContainer(loader); + container.load(); + + Properties props = new Properties(); + props.setProperty(CoreDescriptor.CORE_DATADIR, dataDirStr); + + CoreDescriptor descr = new CoreDescriptor(container, "core1", solrHomeDir.toString(), props); + + SolrCore core = container.create(descr); + container.register(core, false); + + EmbeddedSolrServer solr = new EmbeddedSolrServer(container, "core1"); + return solr; + } + + public static void incrementCounter(TaskID taskId, String groupName, String counterName, long incr) { + Reducer.Context context = contextMap.get(taskId); + if (context != null) { + context.getCounter(groupName, counterName).increment(incr); + } + } + + public static void incrementCounter(TaskID taskId, Enum counterName, long incr) { + Reducer.Context context = contextMap.get(taskId); + if (context != null) { + context.getCounter(counterName).increment(incr); + } + } + + public static void addReducerContext(Reducer.Context context) { + TaskID taskID = context.getTaskAttemptID().getTaskID(); + contextMap.put(taskID, context); + } + + public static Path findSolrConfig(Configuration conf) throws IOException { + Path solrHome = null; + // FIXME when mrunit supports the new cache apis + //URI[] localArchives = context.getCacheArchives(); + Path[] localArchives = DistributedCache.getLocalCacheArchives(conf); + if (localArchives.length == 0) { + throw new IOException(String.format(Locale.ENGLISH, + "No local cache archives, where is %s:%s", SolrOutputFormat + .getSetupOk(), SolrOutputFormat.getZipName(conf))); + } + for (Path unpackedDir : localArchives) { + // Only logged if debugging + if (LOG.isDebugEnabled()) { + LOG.debug(String.format(Locale.ENGLISH, "Examining unpack directory %s for %s", + unpackedDir, SolrOutputFormat.getZipName(conf))); + + ProcessBuilder lsCmd = new ProcessBuilder(new String[] { "/bin/ls", + "-lR", unpackedDir.toString() }); + lsCmd.redirectErrorStream(); + Process ls = lsCmd.start(); + byte[] buf = new byte[16 * 1024]; + InputStream all = ls.getInputStream(); + try { + int count; + while ((count = all.read(buf)) >= 0) { + System.err.write(buf, 0, count); + } + } catch (IOException ignore) { + } finally { + all.close(); + } + String exitValue; + try { + exitValue = String.valueOf(ls.waitFor()); + } catch (InterruptedException e) { + exitValue = "interrupted"; + } + System.err.format("Exit value of 'ls -lR' is %s%n", exitValue); + } + if (unpackedDir.getName().equals(SolrOutputFormat.getZipName(conf))) { + LOG.info("Using this unpacked directory as solr home: {}", unpackedDir); + solrHome = unpackedDir; + break; + } + } + + return solrHome; + } + + /** + * Write a record. This method accumulates records in to a batch, and when + * {@link #batchSize} items are present flushes it to the indexer. The writes + * can take a substantial amount of time, depending on {@link #batchSize}. If + * there is heavy disk contention the writes may take more than the 600 second + * default timeout. + */ + @Override + public void write(K key, V value) throws IOException { + heartBeater.needHeartBeat(); + try { + try { + SolrInputDocumentWritable sidw = (SolrInputDocumentWritable) value; + batch.add(sidw.getSolrInputDocument()); + if (batch.size() >= batchSize) { + batchWriter.queueBatch(batch); + numDocsWritten += batch.size(); + if (System.currentTimeMillis() >= nextLogTime) { + LOG.info("docsWritten: {}", numDocsWritten); + nextLogTime += 10000; + } + batch.clear(); + } + } catch (SolrServerException e) { + throw new IOException(e); + } + } finally { + heartBeater.cancelHeartBeat(); + } + + } + + @Override + public void close(TaskAttemptContext context) throws IOException, InterruptedException { + if (context != null) { + heartBeater.setProgress(context); + } + try { + heartBeater.needHeartBeat(); + if (batch.size() > 0) { + batchWriter.queueBatch(batch); + numDocsWritten += batch.size(); + batch.clear(); + } + LOG.info("docsWritten: {}", numDocsWritten); + batchWriter.close(context); +// if (outputZipFile) { +// context.setStatus("Writing Zip"); +// packZipFile(); // Written to the perm location +// } else { +// context.setStatus("Copying Index"); +// fs.completeLocalOutput(perm, temp); // copy to dfs +// } + } catch (Exception e) { + if (e instanceof IOException) { + throw (IOException) e; + } + throw new IOException(e); + } finally { + heartBeater.cancelHeartBeat(); + heartBeater.close(); +// File tempFile = new File(temp.toString()); +// if (tempFile.exists()) { +// FileUtils.forceDelete(new File(temp.toString())); +// } + } + + context.setStatus("Done"); + } + +// private void packZipFile() throws IOException { +// FSDataOutputStream out = null; +// ZipOutputStream zos = null; +// int zipCount = 0; +// LOG.info("Packing zip file for " + perm); +// try { +// out = fs.create(perm, false); +// zos = new ZipOutputStream(out); +// +// String name = perm.getName().replaceAll(".zip$", ""); +// LOG.info("adding index directory" + temp); +// zipCount = zipDirectory(conf, zos, name, temp.toString(), temp); +// /** +// for (String configDir : allowedConfigDirectories) { +// if (!isRequiredConfigDirectory(configDir)) { +// continue; +// } +// final Path confPath = new Path(solrHome, configDir); +// LOG.info("adding configdirectory" + confPath); +// +// zipCount += zipDirectory(conf, zos, name, solrHome.toString(), confPath); +// } +// **/ +// } catch (Throwable ohFoo) { +// LOG.error("packZipFile exception", ohFoo); +// if (ohFoo instanceof RuntimeException) { +// throw (RuntimeException) ohFoo; +// } +// if (ohFoo instanceof IOException) { +// throw (IOException) ohFoo; +// } +// throw new IOException(ohFoo); +// +// } finally { +// if (zos != null) { +// if (zipCount == 0) { // If no entries were written, only close out, as +// // the zip will throw an error +// LOG.error("No entries written to zip file " + perm); +// fs.delete(perm, false); +// // out.close(); +// } else { +// LOG.info(String.format("Wrote %d items to %s for %s", zipCount, perm, +// temp)); +// zos.close(); +// } +// } +// } +// } +// +// /** +// * Write a file to a zip output stream, removing leading path name components +// * from the actual file name when creating the zip file entry. +// * +// * The entry placed in the zip file is baseName/ +// * relativePath, where relativePath is constructed +// * by removing a leading root from the path for +// * itemToZip. +// * +// * If itemToZip is an empty directory, it is ignored. If +// * itemToZip is a directory, the contents of the directory are +// * added recursively. +// * +// * @param zos The zip output stream +// * @param baseName The base name to use for the file name entry in the zip +// * file +// * @param root The path to remove from itemToZip to make a +// * relative path name +// * @param itemToZip The path to the file to be added to the zip file +// * @return the number of entries added +// * @throws IOException +// */ +// static public int zipDirectory(final Configuration conf, +// final ZipOutputStream zos, final String baseName, final String root, +// final Path itemToZip) throws IOException { +// LOG +// .info(String +// .format("zipDirectory: %s %s %s", baseName, root, itemToZip)); +// LocalFileSystem localFs = FileSystem.getLocal(conf); +// int count = 0; +// +// final FileStatus itemStatus = localFs.getFileStatus(itemToZip); +// if (itemStatus.isDirectory()) { +// final FileStatus[] statai = localFs.listStatus(itemToZip); +// +// // Add a directory entry to the zip file +// final String zipDirName = relativePathForZipEntry(itemToZip.toUri() +// .getPath(), baseName, root); +// final ZipEntry dirZipEntry = new ZipEntry(zipDirName +// + Path.SEPARATOR_CHAR); +// LOG.info(String.format("Adding directory %s to zip", zipDirName)); +// zos.putNextEntry(dirZipEntry); +// zos.closeEntry(); +// count++; +// +// if (statai == null || statai.length == 0) { +// LOG.info(String.format("Skipping empty directory %s", itemToZip)); +// return count; +// } +// for (FileStatus status : statai) { +// count += zipDirectory(conf, zos, baseName, root, status.getPath()); +// } +// LOG.info(String.format("Wrote %d entries for directory %s", count, +// itemToZip)); +// return count; +// } +// +// final String inZipPath = relativePathForZipEntry(itemToZip.toUri() +// .getPath(), baseName, root); +// +// if (inZipPath.length() == 0) { +// LOG.warn(String.format("Skipping empty zip file path for %s (%s %s)", +// itemToZip, root, baseName)); +// return 0; +// } +// +// // Take empty files in case the place holder is needed +// FSDataInputStream in = null; +// try { +// in = localFs.open(itemToZip); +// final ZipEntry ze = new ZipEntry(inZipPath); +// ze.setTime(itemStatus.getModificationTime()); +// // Comments confuse looking at the zip file +// // ze.setComment(itemToZip.toString()); +// zos.putNextEntry(ze); +// +// IOUtils.copyBytes(in, zos, conf, false); +// zos.closeEntry(); +// LOG.info(String.format("Wrote %d entries for file %s", count, itemToZip)); +// return 1; +// } finally { +// in.close(); +// } +// +// } +// +// static String relativePathForZipEntry(final String rawPath, +// final String baseName, final String root) { +// String relativePath = rawPath.replaceFirst(Pattern.quote(root.toString()), +// ""); +// LOG.info(String.format("RawPath %s, baseName %s, root %s, first %s", +// rawPath, baseName, root, relativePath)); +// +// if (relativePath.startsWith(Path.SEPARATOR)) { +// relativePath = relativePath.substring(1); +// } +// LOG.info(String.format( +// "RawPath %s, baseName %s, root %s, post leading slash %s", rawPath, +// baseName, root, relativePath)); +// if (relativePath.isEmpty()) { +// LOG.warn(String.format( +// "No data after root (%s) removal from raw path %s", root, rawPath)); +// return baseName; +// } +// // Construct the path that will be written to the zip file, including +// // removing any leading '/' characters +// String inZipPath = baseName + Path.SEPARATOR_CHAR + relativePath; +// +// LOG.info(String.format("RawPath %s, baseName %s, root %s, inZip 1 %s", +// rawPath, baseName, root, inZipPath)); +// if (inZipPath.startsWith(Path.SEPARATOR)) { +// inZipPath = inZipPath.substring(1); +// } +// LOG.info(String.format("RawPath %s, baseName %s, root %s, inZip 2 %s", +// rawPath, baseName, root, inZipPath)); +// +// return inZipPath; +// +// } +// + /* + static boolean setLogLevel(String packageName, String level) { + Log logger = LogFactory.getLog(packageName); + if (logger == null) { + return false; + } + // look for: org.apache.commons.logging.impl.SLF4JLocationAwareLog + LOG.warn("logger class:"+logger.getClass().getName()); + if (logger instanceof Log4JLogger) { + process(((Log4JLogger) logger).getLogger(), level); + return true; + } + if (logger instanceof Jdk14Logger) { + process(((Jdk14Logger) logger).getLogger(), level); + return true; + } + return false; + } + + public static void process(org.apache.log4j.Logger log, String level) { + if (level != null) { + log.setLevel(org.apache.log4j.Level.toLevel(level)); + } + } + + public static void process(java.util.logging.Logger log, String level) { + if (level != null) { + log.setLevel(java.util.logging.Level.parse(level)); + } + } + */ +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrReducer.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrReducer.java new file mode 100644 index 00000000000..cf291bdc956 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrReducer.java @@ -0,0 +1,187 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.hadoop.dedup.NoChangeUpdateConflictResolver; +import org.apache.solr.hadoop.dedup.RetainMostRecentUpdateConflictResolver; +import org.apache.solr.hadoop.dedup.UpdateConflictResolver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.cloudera.cdk.morphline.api.ExceptionHandler; +import com.cloudera.cdk.morphline.base.FaultTolerance; +import com.google.common.base.Preconditions; + +/** + * This class loads the mapper's SolrInputDocuments into one EmbeddedSolrServer + * per reducer. Each such reducer and Solr server can be seen as a (micro) + * shard. The Solr servers store their data in HDFS. + * + * More specifically, this class consumes a list of <docId, SolrInputDocument> + * pairs, sorted by docId, and sends them to an embedded Solr server to generate + * a Solr index shard from the documents. + */ +public class SolrReducer extends Reducer { + + private UpdateConflictResolver resolver; + private HeartBeater heartBeater; + private ExceptionHandler exceptionHandler; + + public static final String UPDATE_CONFLICT_RESOLVER = SolrReducer.class.getName() + ".updateConflictResolver"; + + private static final Logger LOG = LoggerFactory.getLogger(SolrReducer.class); + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + verifyPartitionAssignment(context); + SolrRecordWriter.addReducerContext(context); + Class resolverClass = context.getConfiguration().getClass( + UPDATE_CONFLICT_RESOLVER, RetainMostRecentUpdateConflictResolver.class, UpdateConflictResolver.class); + + this.resolver = ReflectionUtils.newInstance(resolverClass, context.getConfiguration()); + /* + * Note that ReflectionUtils.newInstance() above also implicitly calls + * resolver.configure(context.getConfiguration()) if the resolver + * implements org.apache.hadoop.conf.Configurable + */ + + this.exceptionHandler = new FaultTolerance( + context.getConfiguration().getBoolean(FaultTolerance.IS_PRODUCTION_MODE, false), + context.getConfiguration().getBoolean(FaultTolerance.IS_IGNORING_RECOVERABLE_EXCEPTIONS, false), + context.getConfiguration().get(FaultTolerance.RECOVERABLE_EXCEPTION_CLASSES, SolrServerException.class.getName())); + + this.heartBeater = new HeartBeater(context); + } + + protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { + heartBeater.needHeartBeat(); + try { + values = resolve(key, values, context); + super.reduce(key, values, context); + } catch (Exception e) { + LOG.error("Unable to process key " + key, e); + context.getCounter(getClass().getName() + ".errors", e.getClass().getName()).increment(1); + exceptionHandler.handleException(e, null); + } finally { + heartBeater.cancelHeartBeat(); + } + } + + private Iterable resolve( + final Text key, final Iterable values, final Context context) { + + if (resolver instanceof NoChangeUpdateConflictResolver) { + return values; // fast path + } + return new Iterable() { + @Override + public Iterator iterator() { + return new WrapIterator(resolver.orderUpdates(key, new UnwrapIterator(values.iterator()), context)); + } + }; + } + + @Override + protected void cleanup(Context context) throws IOException, InterruptedException { + heartBeater.close(); + super.cleanup(context); + } + + /* + * Verify that if a mappers's partitioner sends an item to partition X it implies that said item + * is sent to the reducer with taskID == X. This invariant is currently required for Solr + * documents to end up in the right Solr shard. + */ + private void verifyPartitionAssignment(Context context) { + if ("true".equals(System.getProperty("verifyPartitionAssignment", "true"))) { + String partitionStr = context.getConfiguration().get("mapred.task.partition"); + if (partitionStr == null) { + partitionStr = context.getConfiguration().get("mapreduce.task.partition"); + } + int partition = Integer.parseInt(partitionStr); + int taskId = context.getTaskAttemptID().getTaskID().getId(); + Preconditions.checkArgument(partition == taskId, + "mapred.task.partition: " + partition + " not equal to reducer taskId: " + taskId); + } + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class WrapIterator implements Iterator { + + private Iterator parent; + + private WrapIterator(Iterator parent) { + this.parent = parent; + } + + @Override + public boolean hasNext() { + return parent.hasNext(); + } + + @Override + public SolrInputDocumentWritable next() { + return new SolrInputDocumentWritable(parent.next()); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class UnwrapIterator implements Iterator { + + private Iterator parent; + + private UnwrapIterator(Iterator parent) { + this.parent = parent; + } + + @Override + public boolean hasNext() { + return parent.hasNext(); + } + + @Override + public SolrInputDocument next() { + return parent.next().getSolrInputDocument(); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/ToolRunnerHelpFormatter.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/ToolRunnerHelpFormatter.java new file mode 100644 index 00000000000..d2efa96cdcf --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/ToolRunnerHelpFormatter.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.BufferedReader; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.io.PrintWriter; +import java.io.StringReader; +import java.io.StringWriter; +import java.io.UnsupportedEncodingException; + +import net.sourceforge.argparse4j.ArgumentParsers; +import net.sourceforge.argparse4j.helper.ASCIITextWidthCounter; +import net.sourceforge.argparse4j.helper.TextHelper; + +import org.apache.hadoop.util.ToolRunner; + +/** + * Nicely formats the output of + * {@link ToolRunner#printGenericCommandUsage(PrintStream)} with the same look and feel that argparse4j uses for help text. + */ +class ToolRunnerHelpFormatter { + + public static String getGenericCommandUsage() { + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + String msg; + try { + ToolRunner.printGenericCommandUsage(new PrintStream(bout, true, "UTF-8")); + msg = new String(bout.toByteArray(), "UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); // unreachable + } + + BufferedReader reader = new BufferedReader(new StringReader(msg)); + StringBuilder result = new StringBuilder(); + while (true) { + String line; + try { + line = reader.readLine(); + } catch (IOException e) { + throw new RuntimeException(e); // unreachable + } + + if (line == null) { + return result.toString(); // EOS + } + + if (!line.startsWith("-")) { + result.append(line + "\n"); + } else { + line = line.trim(); + int i = line.indexOf(" "); + if (i < 0) { + i = line.indexOf('\t'); + } + if (i < 0) { + result.append(line + "\n"); + } else { + String title = line.substring(0, i).trim(); + if (title.length() >= 3 && Character.isLetterOrDigit(title.charAt(1)) && Character.isLetterOrDigit(title.charAt(2))) { + title = "-" + title; // prefer "--libjars" long arg style over "-libjars" style but retain "-D foo" short arg style + } + String help = line.substring(i, line.length()).trim(); + StringWriter strWriter = new StringWriter(); + PrintWriter writer = new PrintWriter(strWriter, true); + TextHelper.printHelp(writer, title, help, new ASCIITextWidthCounter(), ArgumentParsers.getFormatWidth()); + result.append(strWriter.toString()); + } + } + } + } +} + diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/TreeMergeMapper.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/TreeMergeMapper.java new file mode 100644 index 00000000000..b0de2bc9ed5 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/TreeMergeMapper.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * For the meat see {@link TreeMergeOutputFormat}. + */ +public class TreeMergeMapper extends Mapper { + + private static final Logger LOGGER = LoggerFactory.getLogger(TreeMergeMapper.class); + + public static final String MAX_SEGMENTS_ON_TREE_MERGE = "maxSegmentsOnTreeMerge"; + + public static final String SOLR_SHARD_NUMBER = "_solrShardNumber"; + + @Override + protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + LOGGER.trace("map key: {}, value: {}", key, value); + context.write(value, NullWritable.get()); + } + +} \ No newline at end of file diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/TreeMergeOutputFormat.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/TreeMergeOutputFormat.java new file mode 100644 index 00000000000..6141aadc87c --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/TreeMergeOutputFormat.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.TieredMergePolicy; +import org.apache.lucene.misc.IndexMergeTool; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Version; +import org.apache.solr.store.hdfs.HdfsDirectory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Charsets; +import com.google.common.base.Preconditions; + +/** + * See {@link IndexMergeTool}. + */ +public class TreeMergeOutputFormat extends FileOutputFormat { + + @Override + public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException { + Utils.getLogConfigFile(context.getConfiguration()); + Path workDir = getDefaultWorkFile(context, ""); + return new TreeMergeRecordWriter(context, workDir); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class TreeMergeRecordWriter extends RecordWriter { + + private final Path workDir; + private final List shards = new ArrayList(); + private final HeartBeater heartBeater; + private final TaskAttemptContext context; + + private static final Logger LOG = LoggerFactory.getLogger(TreeMergeRecordWriter.class); + + public TreeMergeRecordWriter(TaskAttemptContext context, Path workDir) { + this.workDir = new Path(workDir, "data/index"); + this.heartBeater = new HeartBeater(context); + this.context = context; + } + + @Override + public void write(Text key, NullWritable value) { + LOG.info("map key: {}", key); + heartBeater.needHeartBeat(); + try { + Path path = new Path(key.toString()); + shards.add(path); + } finally { + heartBeater.cancelHeartBeat(); + } + } + + @Override + public void close(TaskAttemptContext context) throws IOException { + LOG.debug("Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards); + writeShardNumberFile(context); + heartBeater.needHeartBeat(); + try { + Directory mergedIndex = new HdfsDirectory(workDir, context.getConfiguration()); + + // TODO: shouldn't we pull the Version from the solrconfig.xml? + IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, null) + .setOpenMode(OpenMode.CREATE).setUseCompoundFile(false) + //.setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml? + //.setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml? + ; + + if (LOG.isDebugEnabled()) { + writerConfig.setInfoStream(System.out); + } +// writerConfig.setRAMBufferSizeMB(100); // improve performance +// writerConfig.setMaxThreadStates(1); + + // disable compound file to improve performance + // also see http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html + // also see defaults in SolrIndexConfig + MergePolicy mergePolicy = writerConfig.getMergePolicy(); + LOG.debug("mergePolicy was: {}", mergePolicy); + if (mergePolicy instanceof TieredMergePolicy) { + ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0); +// ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000); +// ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000); +// ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000); + } else if (mergePolicy instanceof LogMergePolicy) { + ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0); + } + LOG.info("Using mergePolicy: {}", mergePolicy); + + IndexWriter writer = new IndexWriter(mergedIndex, writerConfig); + + Directory[] indexes = new Directory[shards.size()]; + for (int i = 0; i < shards.size(); i++) { + indexes[i] = new HdfsDirectory(shards.get(i), context.getConfiguration()); + } + + context.setStatus("Logically merging " + shards.size() + " shards into one shard"); + LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir); + long start = System.currentTimeMillis(); + + writer.addIndexes(indexes); + // TODO: avoid intermediate copying of files into dst directory; rename the files into the dir instead (cp -> rename) + // This can improve performance and turns this phase into a true "logical" merge, completing in constant time. + // See https://issues.apache.org/jira/browse/LUCENE-4746 + + if (LOG.isDebugEnabled()) { + context.getCounter(SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start); + } + float secs = (System.currentTimeMillis() - start) / 1000.0f; + LOG.info("Logical merge took {} secs", secs); + int maxSegments = context.getConfiguration().getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE); + context.setStatus("Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments"); + LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments); + start = System.currentTimeMillis(); + if (maxSegments < Integer.MAX_VALUE) { + writer.forceMerge(maxSegments); + // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data + // see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html + } + if (LOG.isDebugEnabled()) { + context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start); + } + secs = (System.currentTimeMillis() - start) / 1000.0f; + LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {} secs", maxSegments, secs); + + start = System.currentTimeMillis(); + LOG.info("Optimizing Solr: Closing index writer"); + writer.close(); + secs = (System.currentTimeMillis() - start) / 1000.0f; + LOG.info("Optimizing Solr: Done closing index writer in {} secs", secs); + context.setStatus("Done"); + } finally { + heartBeater.cancelHeartBeat(); + heartBeater.close(); + } + } + + /* + * For background see MapReduceIndexerTool.renameTreeMergeShardDirs() + * + * Also see MapReduceIndexerTool.run() method where it uses + * NLineInputFormat.setNumLinesPerSplit(job, options.fanout) + */ + private void writeShardNumberFile(TaskAttemptContext context) throws IOException { + Preconditions.checkArgument(shards.size() > 0); + String shard = shards.get(0).getParent().getParent().getName(); // move up from "data/index" + String taskId = shard.substring("part-m-".length(), shard.length()); // e.g. part-m-00001 + int taskNum = Integer.parseInt(taskId); + int outputShardNum = taskNum / shards.size(); + LOG.debug("Merging into outputShardNum: " + outputShardNum + " from taskId: " + taskId); + Path shardNumberFile = new Path(workDir.getParent().getParent(), TreeMergeMapper.SOLR_SHARD_NUMBER); + OutputStream out = shardNumberFile.getFileSystem(context.getConfiguration()).create(shardNumberFile); + Writer writer = new OutputStreamWriter(out, Charsets.UTF_8); + writer.write(String.valueOf(outputShardNum)); + writer.flush(); + writer.close(); + } + } +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/UnbufferedDataInputInputStream.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/UnbufferedDataInputInputStream.java new file mode 100644 index 00000000000..1ad141a4264 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/UnbufferedDataInputInputStream.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.hadoop; + +import java.io.BufferedReader; +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStreamReader; + +public class UnbufferedDataInputInputStream extends org.apache.solr.common.util.DataInputInputStream { + private final DataInputStream in; + + public UnbufferedDataInputInputStream(DataInput in) { + this.in = new DataInputStream(DataInputInputStream.constructInputStream(in)); + } + + @Override + public void readFully(byte[] b) throws IOException { + in.readFully(b); + } + + @Override + public void readFully(byte[] b, int off, int len) throws IOException { + in.readFully(b, off, len); + } + + @Override + public int skipBytes(int n) throws IOException { + return in.skipBytes(n); + } + + @Override + public boolean readBoolean() throws IOException { + return in.readBoolean(); + } + + @Override + public byte readByte() throws IOException { + return in.readByte(); + } + + @Override + public int readUnsignedByte() throws IOException { + return in.readUnsignedByte(); + } + + @Override + public short readShort() throws IOException { + return in.readShort(); + } + + @Override + public int readUnsignedShort() throws IOException { + return in.readUnsignedShort(); + } + + @Override + public char readChar() throws IOException { + return in.readChar(); + } + + @Override + public int readInt() throws IOException { + return in.readInt(); + } + + @Override + public long readLong() throws IOException { + return in.readLong(); + } + + @Override + public float readFloat() throws IOException { + return in.readFloat(); + } + + @Override + public double readDouble() throws IOException { + return in.readDouble(); + } + + @Override + public String readLine() throws IOException { + BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8")); + return reader.readLine(); + } + + @Override + public String readUTF() throws IOException { + return in.readUTF(); + } + + @Override + public int read() throws IOException { + return in.read(); + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/Utils.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/Utils.java new file mode 100644 index 00000000000..c20d5784c0d --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/Utils.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.File; + +import org.apache.hadoop.conf.Configuration; +import org.apache.log4j.PropertyConfigurator; + +import com.google.common.annotations.Beta; + + +@Beta +public final class Utils { + + private static final String LOG_CONFIG_FILE = "hadoop.log4j.configuration"; + + public static void setLogConfigFile(File file, Configuration conf) { + conf.set(LOG_CONFIG_FILE, file.getName()); + } + + public static void getLogConfigFile(Configuration conf) { + String log4jPropertiesFile = conf.get(LOG_CONFIG_FILE); + if (log4jPropertiesFile != null) { + PropertyConfigurator.configure(log4jPropertiesFile); + } + } + + public static String getShortClassName(Class clazz) { + return getShortClassName(clazz.getName()); + } + + public static String getShortClassName(String className) { + int i = className.lastIndexOf('.'); // regular class + int j = className.lastIndexOf('$'); // inner class + return className.substring(1 + Math.max(i, j)); + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/ZooKeeperInspector.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/ZooKeeperInspector.java new file mode 100644 index 00000000000..c8de94cda5d --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/ZooKeeperInspector.java @@ -0,0 +1,200 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.hadoop; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import org.apache.commons.io.FileUtils; +import org.apache.solr.cloud.ZkController; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.cloud.Aliases; +import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.cloud.ZkCoreNodeProps; +import org.apache.solr.common.cloud.ZkNodeProps; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.util.StrUtils; +import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.io.Files; + +/** + * Extracts SolrCloud information from ZooKeeper. + */ +final class ZooKeeperInspector { + + private static final Logger LOG = LoggerFactory.getLogger(ZooKeeperInspector.class); + + public List> extractShardUrls(String zkHost, String collection) { + + DocCollection docCollection = extractDocCollection(zkHost, collection); + List slices = getSortedSlices(docCollection.getSlices()); + List> solrUrls = new ArrayList>(slices.size()); + for (Slice slice : slices) { + if (slice.getLeader() == null) { + throw new IllegalArgumentException("Cannot find SolrCloud slice leader. " + + "It looks like not all of your shards are registered in ZooKeeper yet"); + } + Collection replicas = slice.getReplicas(); + List urls = new ArrayList(replicas.size()); + for (Replica replica : replicas) { + ZkCoreNodeProps props = new ZkCoreNodeProps(replica); + urls.add(props.getCoreUrl()); + } + solrUrls.add(urls); + } + return solrUrls; + } + + public DocCollection extractDocCollection(String zkHost, String collection) { + if (collection == null) { + throw new IllegalArgumentException("collection must not be null"); + } + SolrZkClient zkClient = getZkClient(zkHost); + + try { + ZkStateReader zkStateReader = new ZkStateReader(zkClient); + try { + // first check for alias + collection = checkForAlias(zkClient, collection); + zkStateReader.createClusterStateWatchersAndUpdate(); + } catch (Exception e) { + throw new IllegalArgumentException("Cannot find expected information for SolrCloud in ZooKeeper: " + zkHost, e); + } + + try { + return zkStateReader.getClusterState().getCollection(collection); + } catch (SolrException e) { + throw new IllegalArgumentException("Cannot find collection '" + collection + "' in ZooKeeper: " + zkHost, e); + } + } finally { + zkClient.close(); + } + } + + public SolrZkClient getZkClient(String zkHost) { + if (zkHost == null) { + throw new IllegalArgumentException("zkHost must not be null"); + } + + SolrZkClient zkClient; + try { + zkClient = new SolrZkClient(zkHost, 30000); + } catch (Exception e) { + throw new IllegalArgumentException("Cannot connect to ZooKeeper: " + zkHost, e); + } + return zkClient; + } + + public List getSortedSlices(Collection slices) { + List sorted = new ArrayList(slices); + Collections.sort(sorted, new Comparator() { + @Override + public int compare(Slice slice1, Slice slice2) { + Comparator c = new AlphaNumericComparator(); + return c.compare(slice1.getName(), slice2.getName()); + } + }); + LOG.trace("Sorted slices: {}", sorted); + return sorted; + } + + /** + * Returns config value given collection name + * Borrowed heavily from Solr's ZKController. + */ + public String readConfigName(SolrZkClient zkClient, String collection) + throws KeeperException, InterruptedException { + if (collection == null) { + throw new IllegalArgumentException("collection must not be null"); + } + String configName = null; + + // first check for alias + collection = checkForAlias(zkClient, collection); + + String path = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection; + if (LOG.isInfoEnabled()) { + LOG.info("Load collection config from:" + path); + } + byte[] data = zkClient.getData(path, null, null, true); + + if(data != null) { + ZkNodeProps props = ZkNodeProps.load(data); + configName = props.getStr(ZkController.CONFIGNAME_PROP); + } + + if (configName != null && !zkClient.exists(ZkController.CONFIGS_ZKNODE + "/" + configName, true)) { + LOG.error("Specified config does not exist in ZooKeeper:" + configName); + throw new IllegalArgumentException("Specified config does not exist in ZooKeeper:" + + configName); + } + + return configName; + } + + private String checkForAlias(SolrZkClient zkClient, String collection) + throws KeeperException, InterruptedException { + byte[] aliasData = zkClient.getData(ZkStateReader.ALIASES, null, null, true); + Aliases aliases = ClusterState.load(aliasData); + String alias = aliases.getCollectionAlias(collection); + if (alias != null) { + List aliasList = StrUtils.splitSmart(alias, ",", true); + if (aliasList.size() > 1) { + throw new IllegalArgumentException("collection cannot be an alias that maps to multiple collections"); + } + collection = aliasList.get(0); + } + return collection; + } + + /** + * Download and return the config directory from ZK + */ + public File downloadConfigDir(SolrZkClient zkClient, String configName) + throws IOException, InterruptedException, KeeperException { + File dir = Files.createTempDir(); + dir.deleteOnExit(); + ZkController.downloadConfigDir(zkClient, configName, dir); + File confDir = new File(dir, "conf"); + if (!confDir.isDirectory()) { + // create a temporary directory with "conf" subdir and mv the config in there. This is + // necessary because of CDH-11188; solrctl does not generate nor accept directories with e.g. + // conf/solrconfig.xml which is necessary for proper solr operation. This should work + // even if solrctl changes. + confDir = new File(Files.createTempDir().getAbsolutePath(), "conf"); + confDir.getParentFile().deleteOnExit(); + Files.move(dir, confDir); + dir = confDir.getParentFile(); + } + FileUtils.writeStringToFile(new File(dir, "solr.xml"), "", "UTF-8"); + return dir; + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/NoChangeUpdateConflictResolver.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/NoChangeUpdateConflictResolver.java new file mode 100644 index 00000000000..0eae9405717 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/NoChangeUpdateConflictResolver.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.dedup; + +import java.util.Iterator; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer.Context; +import org.apache.solr.common.SolrInputDocument; + +/** + * UpdateConflictResolver implementation that returns the solr documents in the + * same order as they are received on input, i.e. without change in order. + */ +public final class NoChangeUpdateConflictResolver implements UpdateConflictResolver { + + @Override + public Iterator orderUpdates(Text key, Iterator updates, Context ctx) { + return updates; + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/RejectingUpdateConflictResolver.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/RejectingUpdateConflictResolver.java new file mode 100644 index 00000000000..60efb4c15bb --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/RejectingUpdateConflictResolver.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.dedup; + +import java.util.Collections; +import java.util.Iterator; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer.Context; +import org.apache.solr.common.SolrInputDocument; + +/** + * UpdateConflictResolver implementation that rejects multiple documents with + * the same key with an exception. + */ +public final class RejectingUpdateConflictResolver implements UpdateConflictResolver { + + @Override + public Iterator orderUpdates(Text key, Iterator updates, Context ctx) { + SolrInputDocument firstUpdate = null; + while (updates.hasNext()) { + if (firstUpdate == null) { + firstUpdate = updates.next(); + assert firstUpdate != null; + } else { + throw new IllegalArgumentException("Update conflict! Documents with the same unique key are forbidden: " + + key); + } + } + assert firstUpdate != null; + return Collections.singletonList(firstUpdate).iterator(); + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/RetainMostRecentUpdateConflictResolver.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/RetainMostRecentUpdateConflictResolver.java new file mode 100644 index 00000000000..1994c163dea --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/RetainMostRecentUpdateConflictResolver.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.dedup; + +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer.Context; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.hadoop.HdfsFileFieldNames; +import org.apache.solr.hadoop.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * UpdateConflictResolver implementation that ignores all but the most recent + * document version, based on a configurable numeric Solr field, which defaults + * to the file_last_modified timestamp. + */ +public class RetainMostRecentUpdateConflictResolver implements UpdateConflictResolver, Configurable { + + private Configuration conf; + private String orderByFieldName = ORDER_BY_FIELD_NAME_DEFAULT; + + public static final String ORDER_BY_FIELD_NAME_KEY = + RetainMostRecentUpdateConflictResolver.class.getName() + ".orderByFieldName"; + + public static final String ORDER_BY_FIELD_NAME_DEFAULT = HdfsFileFieldNames.FILE_LAST_MODIFIED; + + public static final String COUNTER_GROUP = Utils.getShortClassName(RetainMostRecentUpdateConflictResolver.class); + public static final String DUPLICATES_COUNTER_NAME = "Number of documents ignored as duplicates"; + public static final String OUTDATED_COUNTER_NAME = "Number of documents ignored as outdated"; + + private static final Logger LOG = LoggerFactory.getLogger(RetainMostRecentUpdateConflictResolver.class); + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + this.orderByFieldName = conf.get(ORDER_BY_FIELD_NAME_KEY, orderByFieldName); + } + + @Override + public Configuration getConf() { + return conf; + } + + protected String getOrderByFieldName() { + return orderByFieldName; + } + + @Override + public Iterator orderUpdates(Text key, Iterator updates, Context ctx) { + return getMaximum(updates, getOrderByFieldName(), new SolrInputDocumentComparator.TimeStampComparator(), ctx); + } + + /** Returns the most recent document among the colliding updates */ + protected Iterator getMaximum(Iterator updates, String fieldName, + Comparator child, Context context) { + + SolrInputDocumentComparator comp = new SolrInputDocumentComparator(fieldName, child); + SolrInputDocument max = null; + long numDupes = 0; + long numOutdated = 0; + while (updates.hasNext()) { + SolrInputDocument next = updates.next(); + assert next != null; + if (max == null) { + max = next; + } else { + int c = comp.compare(next, max); + if (c == 0) { + LOG.debug("Ignoring document version because it is a duplicate: {}", next); + numDupes++; + } else if (c > 0) { + LOG.debug("Ignoring document version because it is outdated: {}", max); + max = next; + numOutdated++; + } else { + LOG.debug("Ignoring document version because it is outdated: {}", next); + numOutdated++; + } + } + } + + assert max != null; + if (numDupes > 0) { + context.getCounter(COUNTER_GROUP, DUPLICATES_COUNTER_NAME).increment(numDupes); + } + if (numOutdated > 0) { + context.getCounter(COUNTER_GROUP, OUTDATED_COUNTER_NAME).increment(numOutdated); + } + return Collections.singletonList(max).iterator(); + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/SolrInputDocumentComparator.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/SolrInputDocumentComparator.java new file mode 100644 index 00000000000..e8cfdbb52e4 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/SolrInputDocumentComparator.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.dedup; + +import java.util.Comparator; + +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; + +/** + * Default mechanism of determining which of two Solr documents with the same + * key is the more recent version. + */ +public final class SolrInputDocumentComparator implements Comparator { + + private Comparator child; + private String fieldName; + + SolrInputDocumentComparator(String fieldName, Comparator child) { + this.child = child; + this.fieldName = fieldName; + } + + @Override + public int compare(SolrInputDocument doc1, SolrInputDocument doc2) { + SolrInputField f1 = doc1.getField(fieldName); + SolrInputField f2 = doc2.getField(fieldName); + if (f1 == f2) { + return 0; + } else if (f1 == null) { + return -1; + } else if (f2 == null) { + return 1; + } + + Object v1 = f1.getFirstValue(); + Object v2 = f2.getFirstValue(); + return child.compare(v1, v2); + } + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + public static final class TimeStampComparator implements Comparator { + + @Override + public int compare(Object v1, Object v2) { + if (v1 == v2) { + return 0; + } else if (v1 == null) { + return -1; + } else if (v2 == null) { + return 1; + } + long t1 = getLong(v1); + long t2 = getLong(v2); + return (t1 < t2 ? -1 : (t1==t2 ? 0 : 1)); + } + + private long getLong(Object v) { + if (v instanceof Long) { + return ((Long) v).longValue(); + } else { + return Long.parseLong(v.toString()); + } + } + + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java new file mode 100644 index 00000000000..24ea9363801 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.dedup; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer.Context; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.hadoop.HdfsFileFieldNames; + +/** + * UpdateConflictResolver implementation that orders colliding updates ascending + * from least recent to most recent (partial) update, based on a configurable + * numeric Solr field, which defaults to the file_last_modified timestamp. + */ +public class SortingUpdateConflictResolver implements UpdateConflictResolver, Configurable { + + private Configuration conf; + private String orderByFieldName = ORDER_BY_FIELD_NAME_DEFAULT; + + public static final String ORDER_BY_FIELD_NAME_KEY = + SortingUpdateConflictResolver.class.getName() + ".orderByFieldName"; + + public static final String ORDER_BY_FIELD_NAME_DEFAULT = HdfsFileFieldNames.FILE_LAST_MODIFIED; + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + this.orderByFieldName = conf.get(ORDER_BY_FIELD_NAME_KEY, orderByFieldName); + } + + @Override + public Configuration getConf() { + return conf; + } + + protected String getOrderByFieldName() { + return orderByFieldName; + } + + @Override + public Iterator orderUpdates(Text key, Iterator updates, Context ctx) { + return sort(updates, getOrderByFieldName(), new SolrInputDocumentComparator.TimeStampComparator()); + } + + protected Iterator sort(Iterator updates, String fieldName, Comparator child) { + // TODO: use an external merge sort in the pathological case where there are a huge amount of collisions + List sortedUpdates = new ArrayList(1); + while (updates.hasNext()) { + sortedUpdates.add(updates.next()); + } + if (sortedUpdates.size() > 1) { // conflicts are rare + Collections.sort(sortedUpdates, new SolrInputDocumentComparator(fieldName, child)); + } + return sortedUpdates.iterator(); + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java new file mode 100644 index 00000000000..94e23e134eb --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.dedup; + +import java.util.Iterator; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.Reducer.Context; +import org.apache.solr.common.SolrInputDocument; + +/** + * Interface that enables deduplication and ordering of a series of document + * updates for the same unique document key. + * + * For example, a MapReduce batch job might index multiple files in the same job + * where some of the files contain old and new versions of the very same + * document, using the same unique document key. + * + * Typically, implementations of this interface forbid collisions by throwing an + * exception, or ignore all but the most recent document version, or, in the + * general case, order colliding updates ascending from least recent to most + * recent (partial) update. + * + * The caller of this interface (i.e. the Hadoop Reducer) will then apply the + * updates to Solr in the order returned by the orderUpdates() method. + * + * Configuration: If an UpdateConflictResolver implementation also implements + * {@link Configurable} then the Hadoop Reducer will call + * {@link Configurable#setConf(org.apache.hadoop.conf.Configuration)} on + * instance construction and pass the standard Hadoop configuration information. + */ +public interface UpdateConflictResolver { + + /** + * Given a list of all colliding document updates for the same unique document + * key, this method returns zero or more documents in an application specific + * order. + * + * The caller will then apply the updates for this key to Solr in the order + * returned by the orderUpdate() method. + * + * @param uniqueKey + * the document key common to all collidingUpdates mentioned below + * @param collidingUpdates + * all updates in the MapReduce job that have a key equal to + * {@code uniqueKey} mentioned above. The input order is unspecified. + * @param context + * The Context passed from the {@link Reducer} + * implementations. + * @return the order in which the updates shall be applied to Solr + */ + Iterator orderUpdates( + Text uniqueKey, Iterator collidingUpdates, Context context); + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/package.html b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/package.html new file mode 100644 index 00000000000..5543f0262be --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/package.html @@ -0,0 +1,22 @@ + + + + +Dedupe related code. + + diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java new file mode 100644 index 00000000000..5ba98ff3968 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.morphline; + +import org.apache.solr.hadoop.Utils; + +public enum MorphlineCounters { + + FILES_READ (getClassName(MorphlineMapper.class) + ": Number of files read"), + + FILE_BYTES_READ (getClassName(MorphlineMapper.class) + ": Number of file bytes read"), + + DOCS_READ (getClassName(MorphlineMapper.class) + ": Number of documents read"), + + PARSER_OUTPUT_BYTES (getClassName(MorphlineMapper.class) + ": Number of document bytes generated by Tika parser"), + + ERRORS (getClassName(MorphlineMapper.class) + ": Number of errors"); + + private final String label; + + private MorphlineCounters(String label) { + this.label = label; + } + + public String toString() { + return label; + } + + private static String getClassName(Class clazz) { + return Utils.getShortClassName(clazz); + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java new file mode 100644 index 00000000000..606ac05fd2e --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.morphline; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.HashMap; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.mapreduce.Mapper.Context; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.hadoop.HdfsFileFieldNames; +import org.apache.solr.hadoop.PathParts; +import org.apache.solr.hadoop.Utils; +import org.apache.solr.morphlines.solr.DocumentLoader; +import org.apache.solr.morphlines.solr.SolrLocator; +import org.apache.solr.morphlines.solr.SolrMorphlineContext; +import org.apache.solr.schema.IndexSchema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.MorphlineCompilationException; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Compiler; +import com.cloudera.cdk.morphline.base.FaultTolerance; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.base.Metrics; +import com.cloudera.cdk.morphline.base.Notifications; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; +import com.google.common.annotations.Beta; +import com.google.common.base.Joiner; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; + +/** + * Internal helper for {@link MorphlineMapper} and dryRun mode; This API is for *INTERNAL* use only + * and should not be considered public. + */ +@Beta +public final class MorphlineMapRunner { + + private MorphlineContext morphlineContext; + private Command morphline; + private IndexSchema schema; + private Map commandLineMorphlineHeaders; + private boolean disableFileOpen; + private String morphlineFileAndId; + private final Timer elapsedTime; + + public static final String MORPHLINE_FILE_PARAM = "morphlineFile"; + public static final String MORPHLINE_ID_PARAM = "morphlineId"; + + /** + * Morphline variables can be passed from the CLI to the Morphline, e.g.: + * hadoop ... -D morphlineVariable.zkHost=127.0.0.1:2181/solr + */ + public static final String MORPHLINE_VARIABLE_PARAM = "morphlineVariable"; + + /** + * Headers, including MIME types, can also explicitly be passed by force from the CLI to Morphline, e.g: + * hadoop ... -D morphlineField._attachment_mimetype=text/csv + */ + public static final String MORPHLINE_FIELD_PREFIX = "morphlineField."; + + /** + * Flag to disable reading of file contents if indexing just file metadata is sufficient. + * This improves performance and confidentiality. + */ + public static final String DISABLE_FILE_OPEN = "morphlineDisableFileOpen"; + + private static final Logger LOG = LoggerFactory.getLogger(MorphlineMapRunner.class); + + MorphlineContext getMorphlineContext() { + return morphlineContext; + } + + IndexSchema getSchema() { + return schema; + } + + public MorphlineMapRunner(Configuration configuration, DocumentLoader loader, String solrHomeDir) throws IOException { + if (LOG.isTraceEnabled()) { + LOG.trace("CWD is {}", new File(".").getCanonicalPath()); + TreeMap map = new TreeMap(); + for (Map.Entry entry : configuration) { + map.put(entry.getKey(), entry.getValue()); + } + LOG.trace("Configuration:\n{}", Joiner.on("\n").join(map.entrySet())); + } + + FaultTolerance faultTolerance = new FaultTolerance( + configuration.getBoolean(FaultTolerance.IS_PRODUCTION_MODE, false), + configuration.getBoolean(FaultTolerance.IS_IGNORING_RECOVERABLE_EXCEPTIONS, false), + configuration.get(FaultTolerance.RECOVERABLE_EXCEPTION_CLASSES, SolrServerException.class.getName()) + ); + + morphlineContext = new SolrMorphlineContext.Builder() + .setDocumentLoader(loader) + .setExceptionHandler(faultTolerance) + .setMetricRegistry(new MetricRegistry()) + .build(); + + class MySolrLocator extends SolrLocator { // trick to access protected ctor + public MySolrLocator(MorphlineContext ctx) { + super(ctx); + } + } + + SolrLocator locator = new MySolrLocator(morphlineContext); + locator.setSolrHomeDir(solrHomeDir); + schema = locator.getIndexSchema(); + + // rebuild context, now with schema + morphlineContext = new SolrMorphlineContext.Builder() + .setIndexSchema(schema) + .setDocumentLoader(loader) + .setExceptionHandler(faultTolerance) + .setMetricRegistry(morphlineContext.getMetricRegistry()) + .build(); + + String morphlineFile = configuration.get(MORPHLINE_FILE_PARAM); + String morphlineId = configuration.get(MORPHLINE_ID_PARAM); + if (morphlineFile == null || morphlineFile.trim().length() == 0) { + throw new MorphlineCompilationException("Missing parameter: " + MORPHLINE_FILE_PARAM, null); + } + Map morphlineVariables = new HashMap(); + for (Map.Entry entry : configuration) { + String variablePrefix = MORPHLINE_VARIABLE_PARAM + "."; + if (entry.getKey().startsWith(variablePrefix)) { + morphlineVariables.put(entry.getKey().substring(variablePrefix.length()), entry.getValue()); + } + } + Config override = ConfigFactory.parseMap(morphlineVariables); + morphline = new Compiler().compile(new File(morphlineFile), morphlineId, morphlineContext, null, override); + morphlineFileAndId = morphlineFile + "@" + morphlineId; + + disableFileOpen = configuration.getBoolean(DISABLE_FILE_OPEN, false); + LOG.debug("disableFileOpen: {}", disableFileOpen); + + commandLineMorphlineHeaders = new HashMap(); + for (Map.Entry entry : configuration) { + if (entry.getKey().startsWith(MORPHLINE_FIELD_PREFIX)) { + commandLineMorphlineHeaders.put(entry.getKey().substring(MORPHLINE_FIELD_PREFIX.length()), entry.getValue()); + } + } + LOG.debug("Headers, including MIME types, passed by force from the CLI to morphline: {}", commandLineMorphlineHeaders); + + String metricName = MetricRegistry.name(Utils.getShortClassName(getClass()), Metrics.ELAPSED_TIME); + this.elapsedTime = morphlineContext.getMetricRegistry().timer(metricName); + Notifications.notifyBeginTransaction(morphline); + } + + /** + * Extract content from the path specified in the value. Key is useless. + */ + public void map(String value, Configuration configuration, Context context) throws IOException { + LOG.info("Processing file {}", value); + InputStream in = null; + Record record = null; + Timer.Context timerContext = elapsedTime.time(); + try { + PathParts parts = new PathParts(value.toString(), configuration); + record = getRecord(parts); + if (record == null) { + return; // ignore + } + for (Map.Entry entry : commandLineMorphlineHeaders.entrySet()) { + record.replaceValues(entry.getKey(), entry.getValue()); + } + long fileLength = parts.getFileStatus().getLen(); + if (disableFileOpen) { + in = new ByteArrayInputStream(new byte[0]); + } else { + in = new BufferedInputStream(parts.getFileSystem().open(parts.getUploadPath())); + } + record.put(Fields.ATTACHMENT_BODY, in); + Notifications.notifyStartSession(morphline); + if (!morphline.process(record)) { + LOG.warn("Morphline {} failed to process record: {}", morphlineFileAndId, record); + } + if (context != null) { + context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.FILES_READ.toString()).increment(1); + context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.FILE_BYTES_READ.toString()).increment(fileLength); + } + } catch (Exception e) { + LOG.error("Unable to process file " + value, e); + if (context != null) { + context.getCounter(getClass().getName() + ".errors", e.getClass().getName()).increment(1); + } + morphlineContext.getExceptionHandler().handleException(e, record); + } finally { + timerContext.stop(); + if (in != null) { + in.close(); + } + } + } + + protected Record getRecord(PathParts parts) { + FileStatus stats; + try { + stats = parts.getFileStatus(); + } catch (IOException e) { + stats = null; + } + if (stats == null) { + LOG.warn("Ignoring file that somehow has become unavailable since the job was submitted: {}", + parts.getUploadURL()); + return null; + } + + Record headers = new Record(); + //headers.put(getSchema().getUniqueKeyField().getName(), parts.getId()); // use HDFS file path as docId if no docId is specified + headers.put(Fields.BASE_ID, parts.getId()); // with sanitizeUniqueKey command, use HDFS file path as docId if no docId is specified + headers.put(Fields.ATTACHMENT_NAME, parts.getName()); // Tika can use the file name in guessing the right MIME type + + // enable indexing and storing of file meta data in Solr + headers.put(HdfsFileFieldNames.FILE_UPLOAD_URL, parts.getUploadURL()); + headers.put(HdfsFileFieldNames.FILE_DOWNLOAD_URL, parts.getDownloadURL()); + headers.put(HdfsFileFieldNames.FILE_SCHEME, parts.getScheme()); + headers.put(HdfsFileFieldNames.FILE_HOST, parts.getHost()); + headers.put(HdfsFileFieldNames.FILE_PORT, String.valueOf(parts.getPort())); + headers.put(HdfsFileFieldNames.FILE_PATH, parts.getURIPath()); + headers.put(HdfsFileFieldNames.FILE_NAME, parts.getName()); + headers.put(HdfsFileFieldNames.FILE_LAST_MODIFIED, String.valueOf(stats.getModificationTime())); // FIXME also add in SpoolDirectorySource + headers.put(HdfsFileFieldNames.FILE_LENGTH, String.valueOf(stats.getLen())); // FIXME also add in SpoolDirectorySource + headers.put(HdfsFileFieldNames.FILE_OWNER, stats.getOwner()); + headers.put(HdfsFileFieldNames.FILE_GROUP, stats.getGroup()); + headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_USER, stats.getPermission().getUserAction().SYMBOL); + headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_GROUP, stats.getPermission().getGroupAction().SYMBOL); + headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_OTHER, stats.getPermission().getOtherAction().SYMBOL); + headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_STICKYBIT, String.valueOf(stats.getPermission().getStickyBit())); + // TODO: consider to add stats.getAccessTime(), stats.getReplication(), stats.isSymlink(), stats.getBlockSize() + + return headers; + } + + public void cleanup() { + Notifications.notifyCommitTransaction(morphline); + Notifications.notifyShutdown(morphline); + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java new file mode 100644 index 00000000000..8ded6041547 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.morphline; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.response.SolrPingResponse; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; +import org.apache.solr.hadoop.HeartBeater; +import org.apache.solr.hadoop.SolrInputDocumentWritable; +import org.apache.solr.hadoop.SolrMapper; +import org.apache.solr.morphlines.solr.DocumentLoader; +import org.apache.solr.schema.IndexSchema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Counting; +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Meter; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; + +/** + * This class takes the input files, extracts the relevant content, transforms + * it and hands SolrInputDocuments to a set of reducers. + * + * More specifically, it consumes a list of <offset, hdfsFilePath> input pairs. + * For each such pair extracts a set of zero or more SolrInputDocuments and + * sends them to a downstream Reducer. The key for the reducer is the unique id + * of the SolrInputDocument specified in Solr schema.xml. + */ +public class MorphlineMapper extends SolrMapper { + + private Context context; + private MorphlineMapRunner runner; + private HeartBeater heartBeater; + + private static final Logger LOG = LoggerFactory.getLogger(MorphlineMapper.class); + + protected IndexSchema getSchema() { + return runner.getSchema(); + } + + protected Context getContext() { + return context; + } + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + super.setup(context); + this.context = context; + heartBeater = new HeartBeater(context); + this.runner = new MorphlineMapRunner( + context.getConfiguration(), new MyDocumentLoader(), getSolrHomeDir().toString()); + } + + /** + * Extract content from the path specified in the value. Key is useless. + */ + @Override + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + heartBeater.needHeartBeat(); + try { + runner.map(value.toString(), context.getConfiguration(), context); + } finally { + heartBeater.cancelHeartBeat(); + } + } + + @Override + protected void cleanup(Context context) throws IOException, InterruptedException { + heartBeater.close(); + runner.cleanup(); + addMetricsToMRCounters(runner.getMorphlineContext().getMetricRegistry(), context); + super.cleanup(context); + } + + private void addMetricsToMRCounters(MetricRegistry metricRegistry, Context context) { + for (Map.Entry entry : metricRegistry.getCounters().entrySet()) { + addCounting(entry.getKey(), entry.getValue(), 1); + } + for (Map.Entry entry : metricRegistry.getHistograms().entrySet()) { + addCounting(entry.getKey(), entry.getValue(), 1); + } + for (Map.Entry entry : metricRegistry.getMeters().entrySet()) { + addCounting(entry.getKey(), entry.getValue(), 1); + } + for (Map.Entry entry : metricRegistry.getTimers().entrySet()) { + long nanosPerMilliSec = 1000 * 1000; + addCounting(entry.getKey(), entry.getValue(), nanosPerMilliSec); + } + } + + private void addCounting(String metricName, Counting value, long scale) { + context.getCounter("morphline", metricName).increment(value.getCount() / scale); + } + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private final class MyDocumentLoader implements DocumentLoader { + + @Override + public void beginTransaction() { + } + + @Override + public void load(SolrInputDocument doc) throws IOException, SolrServerException { + String uniqueKeyFieldName = getSchema().getUniqueKeyField().getName(); + Object id = doc.getFieldValue(uniqueKeyFieldName); + if (id == null) { + throw new IllegalArgumentException("Missing value for (required) unique document key: " + uniqueKeyFieldName + + " (see Solr schema.xml)"); + } + try { + context.write(new Text(id.toString()), new SolrInputDocumentWritable(doc)); + } catch (InterruptedException e) { + throw new IOException("Interrupted while writing " + doc, e); + } + + if (LOG.isDebugEnabled()) { + long numParserOutputBytes = 0; + for (SolrInputField field : doc.values()) { + numParserOutputBytes += sizeOf(field.getValue()); + } + context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.PARSER_OUTPUT_BYTES.toString()).increment(numParserOutputBytes); + } + context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.DOCS_READ.toString()).increment(1); + } + + // just an approximation + private long sizeOf(Object value) { + if (value instanceof CharSequence) { + return ((CharSequence) value).length(); + } else if (value instanceof Integer) { + return 4; + } else if (value instanceof Long) { + return 8; + } else if (value instanceof Collection) { + long size = 0; + for (Object val : (Collection) value) { + size += sizeOf(val); + } + return size; + } else { + return String.valueOf(value).length(); + } + } + + @Override + public void commitTransaction() { + } + + @Override + public UpdateResponse rollbackTransaction() throws SolrServerException, IOException { + return new UpdateResponse(); + } + + @Override + public void shutdown() { + } + + @Override + public SolrPingResponse ping() throws SolrServerException, IOException { + return new SolrPingResponse(); + } + + } + +} diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/package.html b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/package.html new file mode 100644 index 00000000000..9597a15d4f5 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/package.html @@ -0,0 +1,22 @@ + + + + +Morphlines related code. + + diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/package.html b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/package.html new file mode 100644 index 00000000000..c90c7a24775 --- /dev/null +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/package.html @@ -0,0 +1,22 @@ + + + + +{@link org.apache.solr.hadoop.MapReduceIndexerTool} and related code. + + diff --git a/solr/contrib/map-reduce/src/java/overview.html b/solr/contrib/map-reduce/src/java/overview.html new file mode 100644 index 00000000000..ad7c1c0c3fe --- /dev/null +++ b/solr/contrib/map-reduce/src/java/overview.html @@ -0,0 +1,21 @@ + + + +Apache Solr Search Server: Solr MapReduce contrib + + diff --git a/solr/contrib/map-reduce/src/test-files/custom-mimetypes.xml b/solr/contrib/map-reduce/src/test-files/custom-mimetypes.xml new file mode 100644 index 00000000000..6891e42d616 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/custom-mimetypes.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/currency.xml b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/elevate.xml b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/contractions_ca.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/contractions_fr.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/contractions_ga.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/contractions_it.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#名詞 +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#名詞-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#名詞-固有名詞 +# +# noun-proper-misc: miscellaneous proper nouns +#名詞-固有名詞-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#名詞-固有名詞-人名 +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. お市の方 +#名詞-固有名詞-人名-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#名詞-固有名詞-人名-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#名詞-固有名詞-人名-名 +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産省, NHK +#名詞-固有名詞-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#名詞-固有名詞-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, バルセロナ, 京都 +#名詞-固有名詞-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#名詞-固有名詞-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#名詞-代名詞 +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ +#名詞-代名詞-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ +#名詞-代名詞-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, 午後, 少量 +#名詞-副詞可能 +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (する, できる, なさる, くださる) +# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り +#名詞-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before な ("na") +# e.g. 健康, 安易, 駄目, だめ +#名詞-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. +# e.g. 0, 1, 2, 何, 数, 幾 +#名詞-数 +# +# noun-affix: noun affixes where the sub-classification is undefined +#名詞-非自立 +# +# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, +# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, +# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, +# わり, 割り, 割, ん-口語/, もん-口語/ +#名詞-非自立-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, +# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, +# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, +# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, +# 儘, 侭, みぎり, 矢先 +#名詞-非自立-副詞可能 +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よう(だ) ("you(da)"). +# e.g. よう, やう, 様 (よう) +#名詞-非自立-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form な (aux "da"). +# e.g. みたい, ふう +#名詞-非自立-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#名詞-特殊 +# +# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. そう +#名詞-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#名詞-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, +# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 +#名詞-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. 君, 様, 著 +#名詞-接尾-人名 +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#名詞-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分け, 入り, 落ち, 買い +#名詞-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. そう +#名詞-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula だ ("da"). +# e.g. 的, げ, がち +#名詞-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) +#名詞-接尾-副詞可能 +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 +#名詞-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽し) さ, (考え) 方 +#名詞-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) +#名詞-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are +# semantically verb-like. +# e.g. ごらん, ご覧, 御覧, 頂戴 +#名詞-動詞非自立的 +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") +# is いわく ("iwaku"). +#名詞-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and +# behave like an adjective. +# e.g. 申し訳, 仕方, とんでも, 違い +#名詞-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) +#接頭詞-名詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by なる/なさる/くださる. +# e.g. お (読みなさい), お (座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. お (寒いですねえ), バカ (でかい) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. 約, およそ, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#動詞 +# +# verb-main: +#動詞-自立 +# +# verb-auxiliary: +#動詞-非自立 +# +# verb-suffix: +#動詞-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-非自立 +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. あいかわらず, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, +# な, する, だ, etc. +# e.g. こんなに, そんなに, あんなに, なにか, なんでも +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, +# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, +# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. が, けれども, そして, じゃあ, それどころか +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. から, が, で, と, に, へ, より, を, の, にて +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, +# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, +# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, +# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, +# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, +# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, +# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, +# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, +# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, +# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. こそ, さえ, しか, すら, は, も, ぞ +助詞-係助詞 +# +# particle-adverbial: +# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, +# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, +# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, +# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, +# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (松島) や +助詞-間投助詞 +# +# particle-coordinate: +# e.g. と, たり, だの, だり, とか, なり, や, やら +助詞-並立助詞 +# +# particle-final: +# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, +# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 +# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 +# 「(祈りが届いたせい) か (, 試験に合格した.)」 +# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 +# e.g. か +助詞-副助詞/並立助詞/終助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. に, と +助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, +# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい +#感動詞 +# +##### +# symbol: unclassified Symbols. +記号 +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [○◎@$〒→+] +記号-一般 +# +# symbol-comma: Commas +# e.g. [,、] +記号-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記号-句点 +# +# symbol-space: Full-width whitespace. +記号-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『【] +記号-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’”』」】] +記号-括弧閉 +# +# symbol-alphabetic: +#記号-アルファベット +# +##### +# other: unclassified other +#その他 +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (だ)ァ +その他-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. あの, うんと, えと +フィラー +# +##### +# non-verbal: non-verbal sound. +非言語音 +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both أ and ا +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +أ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeš +budem +byli +jseš +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naši +napište +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +však +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +ještě +až +bez +také +pouze +první +vaše +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_da.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +på | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +når | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +også | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sådan | such, like this/like that diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_de.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_el.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_en.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_es.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +այդ +այլ +այն +այս +դու +դուք +եմ +են +ենք +ես +եք +է +էի +էին +էինք +էիր +էիք +էր +ըստ +թ +ի +ին +իսկ +իր +կամ +համար +հետ +հետո +մենք +մեջ +մի +ն +նա +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +պիտի +վրա +և diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_id.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_it.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +の +に +は +を +た +が +で +て +と +し +れ +さ +ある +いる +も +する +から +な +こと +として +い +や +れる +など +なっ +ない +この +ため +その +あっ +よう +また +もの +という +あり +まで +られ +なる +へ +か +だ +これ +によって +により +おり +より +による +ず +なり +られる +において +ば +なかっ +なく +しかし +について +せ +だっ +その後 +できる +それ +う +ので +なお +のみ +でき +き +つ +における +および +いう +さらに +でも +ら +たり +その他 +に関する +たち +ます +ん +なら +に対して +特に +せる +及び +これら +とき +では +にて +ほか +ながら +うち +そして +とともに +ただし +かつて +それぞれ +または +お +ほど +ものの +に対する +ほとんど +と共に +といった +です +とも +ところ +ここ +##### End of file diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakš +ārpus +augšpus +bez +caur +dēļ +gar +iekš +iz +kopš +labad +lejpus +līdz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekš +starp +šaipus +uz +viņpus +virs +virspus +zem +apakšpus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretī +arī +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droši +diemžēl +nebūt +ik +it +taču +nu +pat +tiklab +iekšpus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolīdz +līdzko +tiklīdz +jebšu +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +būt +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +būšu +būsi +būs +būsim +būsiet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikšu +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapšu +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_no.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmål dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +på | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +så | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nå | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +når | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +å | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sånn | such a +inni | inside/within +mellom | between +vår | our +hver | each +hvem | who +vors | us/ours +hvis | whose +både | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +også | also +slik | just +vært | been +være | to be +båe | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +då | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjå | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceşti +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aş +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aţi +au +avea +avem +aveţi +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiţi +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulţi +ne +nicăieri +nici +nimeni +nişte +noastră +noastre +noi +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +şi +sînt +sîntem +sînteţi +spre +sub +sunt +suntem +sunteţi +ta +tăi +tale +tău +te +ţi +ţie +tine +toată +toate +tot +toţi +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voştri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | så = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +på | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +så | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +då | then, when +sin | his +nu | now +har | have +inte | inte någon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +något | some etc +från | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +någon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +åt | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +några | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sådan | such a +vår | our +blivit | from bli +dess | its +inom | within +mellan | between +sådant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sådana | such a +vart | each +dina | thy +vars | whose +vårt | our +våra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_th.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +แห่ง +แล้ว +และ +แรก +แบบ +แต่ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นการ +เป็น +เปิดเผย +เปิด +เนื่องจาก +เดียวกัน +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีก +อาจ +อะไร +ออก +อย่าง +อยู่ +อยาก +หาก +หลาย +หลังจาก +หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สําหรับ +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาก +มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นํา +นั้น +นัก +นอกจาก +ทุก +ที่สุด +ที่ +ทําให้ +ทํา +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูก +ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งแต่ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาก +จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +ก่อน +ก็ +การ +กับ +กัน +กว่า +กล่าว diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/userdict_ja.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 + +# Custom segmentation for compound katakana +トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 +ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 + +# Custom reading for former sumo wrestler +朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/protwords.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/schema.xml new file mode 100644 index 00000000000..ae2c56d18ae --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/schema.xml @@ -0,0 +1,947 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iddiff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/solrconfig.xml new file mode 100644 index 00000000000..9d9178746cf --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/solrconfig.xml @@ -0,0 +1,1764 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + textSpell + + + + + + default + name + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/stopwords.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/synonyms.txt b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/collection1/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/currency.xml b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/elevate.xml b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/contractions_ca.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/contractions_fr.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/contractions_ga.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/contractions_it.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#名詞 +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#名詞-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#名詞-固有名詞 +# +# noun-proper-misc: miscellaneous proper nouns +#名詞-固有名詞-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#名詞-固有名詞-人名 +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. お市の方 +#名詞-固有名詞-人名-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#名詞-固有名詞-人名-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#名詞-固有名詞-人名-名 +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産省, NHK +#名詞-固有名詞-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#名詞-固有名詞-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, バルセロナ, 京都 +#名詞-固有名詞-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#名詞-固有名詞-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#名詞-代名詞 +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ +#名詞-代名詞-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ +#名詞-代名詞-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, 午後, 少量 +#名詞-副詞可能 +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (する, できる, なさる, くださる) +# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り +#名詞-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before な ("na") +# e.g. 健康, 安易, 駄目, だめ +#名詞-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. +# e.g. 0, 1, 2, 何, 数, 幾 +#名詞-数 +# +# noun-affix: noun affixes where the sub-classification is undefined +#名詞-非自立 +# +# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, +# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, +# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, +# わり, 割り, 割, ん-口語/, もん-口語/ +#名詞-非自立-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, +# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, +# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, +# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, +# 儘, 侭, みぎり, 矢先 +#名詞-非自立-副詞可能 +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よう(だ) ("you(da)"). +# e.g. よう, やう, 様 (よう) +#名詞-非自立-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form な (aux "da"). +# e.g. みたい, ふう +#名詞-非自立-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#名詞-特殊 +# +# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. そう +#名詞-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#名詞-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, +# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 +#名詞-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. 君, 様, 著 +#名詞-接尾-人名 +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#名詞-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分け, 入り, 落ち, 買い +#名詞-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. そう +#名詞-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula だ ("da"). +# e.g. 的, げ, がち +#名詞-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) +#名詞-接尾-副詞可能 +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 +#名詞-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽し) さ, (考え) 方 +#名詞-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) +#名詞-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are +# semantically verb-like. +# e.g. ごらん, ご覧, 御覧, 頂戴 +#名詞-動詞非自立的 +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") +# is いわく ("iwaku"). +#名詞-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and +# behave like an adjective. +# e.g. 申し訳, 仕方, とんでも, 違い +#名詞-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) +#接頭詞-名詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by なる/なさる/くださる. +# e.g. お (読みなさい), お (座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. お (寒いですねえ), バカ (でかい) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. 約, およそ, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#動詞 +# +# verb-main: +#動詞-自立 +# +# verb-auxiliary: +#動詞-非自立 +# +# verb-suffix: +#動詞-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-非自立 +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. あいかわらず, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, +# な, する, だ, etc. +# e.g. こんなに, そんなに, あんなに, なにか, なんでも +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, +# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, +# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. が, けれども, そして, じゃあ, それどころか +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. から, が, で, と, に, へ, より, を, の, にて +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, +# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, +# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, +# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, +# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, +# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, +# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, +# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, +# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, +# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. こそ, さえ, しか, すら, は, も, ぞ +助詞-係助詞 +# +# particle-adverbial: +# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, +# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, +# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, +# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, +# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (松島) や +助詞-間投助詞 +# +# particle-coordinate: +# e.g. と, たり, だの, だり, とか, なり, や, やら +助詞-並立助詞 +# +# particle-final: +# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, +# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 +# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 +# 「(祈りが届いたせい) か (, 試験に合格した.)」 +# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 +# e.g. か +助詞-副助詞/並立助詞/終助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. に, と +助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, +# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい +#感動詞 +# +##### +# symbol: unclassified Symbols. +記号 +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [○◎@$〒→+] +記号-一般 +# +# symbol-comma: Commas +# e.g. [,、] +記号-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記号-句点 +# +# symbol-space: Full-width whitespace. +記号-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『【] +記号-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’”』」】] +記号-括弧閉 +# +# symbol-alphabetic: +#記号-アルファベット +# +##### +# other: unclassified other +#その他 +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (だ)ァ +その他-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. あの, うんと, えと +フィラー +# +##### +# non-verbal: non-verbal sound. +非言語音 +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both أ and ا +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +أ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeš +budem +byli +jseš +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naši +napište +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +však +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +ještě +až +bez +také +pouze +první +vaše +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_da.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +på | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +når | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +også | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sådan | such, like this/like that diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_de.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_el.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_en.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_es.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +այդ +այլ +այն +այս +դու +դուք +եմ +են +ենք +ես +եք +է +էի +էին +էինք +էիր +էիք +էր +ըստ +թ +ի +ին +իսկ +իր +կամ +համար +հետ +հետո +մենք +մեջ +մի +ն +նա +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +պիտի +վրա +և diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_id.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_it.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +の +に +は +を +た +が +で +て +と +し +れ +さ +ある +いる +も +する +から +な +こと +として +い +や +れる +など +なっ +ない +この +ため +その +あっ +よう +また +もの +という +あり +まで +られ +なる +へ +か +だ +これ +によって +により +おり +より +による +ず +なり +られる +において +ば +なかっ +なく +しかし +について +せ +だっ +その後 +できる +それ +う +ので +なお +のみ +でき +き +つ +における +および +いう +さらに +でも +ら +たり +その他 +に関する +たち +ます +ん +なら +に対して +特に +せる +及び +これら +とき +では +にて +ほか +ながら +うち +そして +とともに +ただし +かつて +それぞれ +または +お +ほど +ものの +に対する +ほとんど +と共に +といった +です +とも +ところ +ここ +##### End of file diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakš +ārpus +augšpus +bez +caur +dēļ +gar +iekš +iz +kopš +labad +lejpus +līdz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekš +starp +šaipus +uz +viņpus +virs +virspus +zem +apakšpus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretī +arī +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droši +diemžēl +nebūt +ik +it +taču +nu +pat +tiklab +iekšpus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolīdz +līdzko +tiklīdz +jebšu +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +būt +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +būšu +būsi +būs +būsim +būsiet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikšu +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapšu +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_no.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmål dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +på | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +så | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nå | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +når | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +å | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sånn | such a +inni | inside/within +mellom | between +vår | our +hver | each +hvem | who +vors | us/ours +hvis | whose +både | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +også | also +slik | just +vært | been +være | to be +båe | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +då | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjå | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceşti +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aş +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aţi +au +avea +avem +aveţi +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiţi +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulţi +ne +nicăieri +nici +nimeni +nişte +noastră +noastre +noi +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +şi +sînt +sîntem +sînteţi +spre +sub +sunt +suntem +sunteţi +ta +tăi +tale +tău +te +ţi +ţie +tine +toată +toate +tot +toţi +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voştri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | så = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +på | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +så | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +då | then, when +sin | his +nu | now +har | have +inte | inte någon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +något | some etc +från | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +någon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +åt | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +några | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sådan | such a +vår | our +blivit | from bli +dess | its +inom | within +mellan | between +sådant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sådana | such a +vart | each +dina | thy +vars | whose +vårt | our +våra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_th.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +แห่ง +แล้ว +และ +แรก +แบบ +แต่ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นการ +เป็น +เปิดเผย +เปิด +เนื่องจาก +เดียวกัน +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีก +อาจ +อะไร +ออก +อย่าง +อยู่ +อยาก +หาก +หลาย +หลังจาก +หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สําหรับ +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาก +มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นํา +นั้น +นัก +นอกจาก +ทุก +ที่สุด +ที่ +ทําให้ +ทํา +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูก +ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งแต่ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาก +จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +ก่อน +ก็ +การ +กับ +กัน +กว่า +กล่าว diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/userdict_ja.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 + +# Custom segmentation for compound katakana +トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 +ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 + +# Custom reading for former sumo wrestler +朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/protwords.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/schema.xml b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/schema.xml new file mode 100644 index 00000000000..65192efe442 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/schema.xml @@ -0,0 +1,961 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iddiff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/solrconfig.xml b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/solrconfig.xml new file mode 100644 index 00000000000..beff1b2af0a --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/solrconfig.xml @@ -0,0 +1,1784 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + ${solr.hdfs.home:} + ${solr.hdfs.confdir:} + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.slab.count:1} + ${solr.hdfs.blockcache.direct.memory.allocation:true} + ${solr.hdfs.blockcache.blocksperbank:16384} + ${solr.hdfs.blockcache.read.enabled:true} + ${solr.hdfs.blockcache.write.enabled:true} + ${solr.hdfs.nrtcachingdirectory.enable:true} + ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16} + ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192} + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + ${solr.lock.type:hdfs} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/stopwords.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/synonyms.txt b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/map-reduce/src/test-files/solr/minimr/solr.xml b/solr/contrib/map-reduce/src/test-files/solr/minimr/solr.xml new file mode 100644 index 00000000000..6c8b43f75ed --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/minimr/solr.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/currency.xml b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/elevate.xml b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/contractions_it.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#名詞 +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#名詞-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#名詞-固有名詞 +# +# noun-proper-misc: miscellaneous proper nouns +#名詞-固有名詞-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#名詞-固有名詞-人名 +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. お市の方 +#名詞-固有名詞-人名-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#名詞-固有名詞-人名-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#名詞-固有名詞-人名-名 +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産省, NHK +#名詞-固有名詞-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#名詞-固有名詞-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, バルセロナ, 京都 +#名詞-固有名詞-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#名詞-固有名詞-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#名詞-代名詞 +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ +#名詞-代名詞-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ +#名詞-代名詞-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, 午後, 少量 +#名詞-副詞可能 +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (する, できる, なさる, くださる) +# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り +#名詞-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before な ("na") +# e.g. 健康, 安易, 駄目, だめ +#名詞-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. +# e.g. 0, 1, 2, 何, 数, 幾 +#名詞-数 +# +# noun-affix: noun affixes where the sub-classification is undefined +#名詞-非自立 +# +# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, +# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, +# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, +# わり, 割り, 割, ん-口語/, もん-口語/ +#名詞-非自立-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, +# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, +# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, +# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, +# 儘, 侭, みぎり, 矢先 +#名詞-非自立-副詞可能 +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よう(だ) ("you(da)"). +# e.g. よう, やう, 様 (よう) +#名詞-非自立-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form な (aux "da"). +# e.g. みたい, ふう +#名詞-非自立-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#名詞-特殊 +# +# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. そう +#名詞-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#名詞-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, +# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 +#名詞-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. 君, 様, 著 +#名詞-接尾-人名 +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#名詞-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分け, 入り, 落ち, 買い +#名詞-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. そう +#名詞-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula だ ("da"). +# e.g. 的, げ, がち +#名詞-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) +#名詞-接尾-副詞可能 +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 +#名詞-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽し) さ, (考え) 方 +#名詞-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) +#名詞-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are +# semantically verb-like. +# e.g. ごらん, ご覧, 御覧, 頂戴 +#名詞-動詞非自立的 +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") +# is いわく ("iwaku"). +#名詞-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and +# behave like an adjective. +# e.g. 申し訳, 仕方, とんでも, 違い +#名詞-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) +#接頭詞-名詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by なる/なさる/くださる. +# e.g. お (読みなさい), お (座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. お (寒いですねえ), バカ (でかい) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. 約, およそ, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#動詞 +# +# verb-main: +#動詞-自立 +# +# verb-auxiliary: +#動詞-非自立 +# +# verb-suffix: +#動詞-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-非自立 +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. あいかわらず, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, +# な, する, だ, etc. +# e.g. こんなに, そんなに, あんなに, なにか, なんでも +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, +# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, +# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. が, けれども, そして, じゃあ, それどころか +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. から, が, で, と, に, へ, より, を, の, にて +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, +# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, +# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, +# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, +# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, +# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, +# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, +# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, +# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, +# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. こそ, さえ, しか, すら, は, も, ぞ +助詞-係助詞 +# +# particle-adverbial: +# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, +# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, +# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, +# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, +# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (松島) や +助詞-間投助詞 +# +# particle-coordinate: +# e.g. と, たり, だの, だり, とか, なり, や, やら +助詞-並立助詞 +# +# particle-final: +# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, +# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 +# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 +# 「(祈りが届いたせい) か (, 試験に合格した.)」 +# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 +# e.g. か +助詞-副助詞/並立助詞/終助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. に, と +助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, +# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい +#感動詞 +# +##### +# symbol: unclassified Symbols. +記号 +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [○◎@$〒→+] +記号-一般 +# +# symbol-comma: Commas +# e.g. [,、] +記号-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記号-句点 +# +# symbol-space: Full-width whitespace. +記号-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『【] +記号-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’”』」】] +記号-括弧閉 +# +# symbol-alphabetic: +#記号-アルファベット +# +##### +# other: unclassified other +#その他 +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (だ)ァ +その他-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. あの, うんと, えと +フィラー +# +##### +# non-verbal: non-verbal sound. +非言語音 +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both أ and ا +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +أ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeš +budem +byli +jseš +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naši +napište +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +však +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +ještě +až +bez +také +pouze +první +vaše +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +på | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +når | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +også | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sådan | such, like this/like that diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +այդ +այլ +այն +այս +դու +դուք +եմ +են +ենք +ես +եք +է +էի +էին +էինք +էիր +էիք +էր +ըստ +թ +ի +ին +իսկ +իր +կամ +համար +հետ +հետո +մենք +մեջ +մի +ն +նա +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +պիտի +վրա +և diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +の +に +は +を +た +が +で +て +と +し +れ +さ +ある +いる +も +する +から +な +こと +として +い +や +れる +など +なっ +ない +この +ため +その +あっ +よう +また +もの +という +あり +まで +られ +なる +へ +か +だ +これ +によって +により +おり +より +による +ず +なり +られる +において +ば +なかっ +なく +しかし +について +せ +だっ +その後 +できる +それ +う +ので +なお +のみ +でき +き +つ +における +および +いう +さらに +でも +ら +たり +その他 +に関する +たち +ます +ん +なら +に対して +特に +せる +及び +これら +とき +では +にて +ほか +ながら +うち +そして +とともに +ただし +かつて +それぞれ +または +お +ほど +ものの +に対する +ほとんど +と共に +といった +です +とも +ところ +ここ +##### End of file diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakš +ārpus +augšpus +bez +caur +dēļ +gar +iekš +iz +kopš +labad +lejpus +līdz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekš +starp +šaipus +uz +viņpus +virs +virspus +zem +apakšpus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretī +arī +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droši +diemžēl +nebūt +ik +it +taču +nu +pat +tiklab +iekšpus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolīdz +līdzko +tiklīdz +jebšu +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +būt +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +būšu +būsi +būs +būsim +būsiet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikšu +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapšu +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmål dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +på | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +så | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nå | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +når | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +å | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sånn | such a +inni | inside/within +mellom | between +vår | our +hver | each +hvem | who +vors | us/ours +hvis | whose +både | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +også | also +slik | just +vært | been +være | to be +båe | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +då | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjå | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceşti +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aş +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aţi +au +avea +avem +aveţi +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiţi +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulţi +ne +nicăieri +nici +nimeni +nişte +noastră +noastre +noi +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +şi +sînt +sîntem +sînteţi +spre +sub +sunt +suntem +sunteţi +ta +tăi +tale +tău +te +ţi +ţie +tine +toată +toate +tot +toţi +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voştri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | så = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +på | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +så | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +då | then, when +sin | his +nu | now +har | have +inte | inte någon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +något | some etc +från | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +någon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +åt | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +några | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sådan | such a +vår | our +blivit | from bli +dess | its +inom | within +mellan | between +sådant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sådana | such a +vart | each +dina | thy +vars | whose +vårt | our +våra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +แห่ง +แล้ว +และ +แรก +แบบ +แต่ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นการ +เป็น +เปิดเผย +เปิด +เนื่องจาก +เดียวกัน +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีก +อาจ +อะไร +ออก +อย่าง +อยู่ +อยาก +หาก +หลาย +หลังจาก +หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สําหรับ +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาก +มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นํา +นั้น +นัก +นอกจาก +ทุก +ที่สุด +ที่ +ทําให้ +ทํา +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูก +ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งแต่ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาก +จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +ก่อน +ก็ +การ +กับ +กัน +กว่า +กล่าว diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 + +# Custom segmentation for compound katakana +トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 +ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 + +# Custom reading for former sumo wrestler +朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/protwords.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/schema.xml b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/schema.xml new file mode 100644 index 00000000000..b133c135f31 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/schema.xml @@ -0,0 +1,961 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/solrconfig.xml b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/solrconfig.xml new file mode 100644 index 00000000000..f9683b27db7 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/solrconfig.xml @@ -0,0 +1,1789 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + ${solr.hdfs.home:} + ${solr.hdfs.confdir:} + ${solr.hdfs.security.kerberos.enabled:false} + ${solr.hdfs.security.kerberos.keytabfile:} + ${solr.hdfs.security.kerberos.principal:} + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.slab.count:1} + ${solr.hdfs.blockcache.direct.memory.allocation:true} + ${solr.hdfs.blockcache.blocksperbank:16384} + ${solr.hdfs.blockcache.read.enabled:true} + ${solr.hdfs.blockcache.write.enabled:true} + ${solr.hdfs.nrtcachingdirectory.enable:true} + ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16} + ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192} + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + ${solr.lock.type:hdfs} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/stopwords.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/synonyms.txt b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/map-reduce/src/test-files/solr/mrunit/solr.xml b/solr/contrib/map-reduce/src/test-files/solr/mrunit/solr.xml new file mode 100644 index 00000000000..6c8b43f75ed --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/mrunit/solr.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/solr.xml b/solr/contrib/map-reduce/src/test-files/solr/solr.xml new file mode 100644 index 00000000000..4604f60476f --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solr.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/currency.xml b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#名詞 +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#名詞-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#名詞-固有名詞 +# +# noun-proper-misc: miscellaneous proper nouns +#名詞-固有名詞-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#名詞-固有名詞-人名 +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. お市の方 +#名詞-固有名詞-人名-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#名詞-固有名詞-人名-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#名詞-固有名詞-人名-名 +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産省, NHK +#名詞-固有名詞-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#名詞-固有名詞-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, バルセロナ, 京都 +#名詞-固有名詞-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#名詞-固有名詞-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#名詞-代名詞 +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ +#名詞-代名詞-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ +#名詞-代名詞-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, 午後, 少量 +#名詞-副詞可能 +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (する, できる, なさる, くださる) +# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り +#名詞-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before な ("na") +# e.g. 健康, 安易, 駄目, だめ +#名詞-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. +# e.g. 0, 1, 2, 何, 数, 幾 +#名詞-数 +# +# noun-affix: noun affixes where the sub-classification is undefined +#名詞-非自立 +# +# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, +# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, +# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, +# わり, 割り, 割, ん-口語/, もん-口語/ +#名詞-非自立-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, +# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, +# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, +# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, +# 儘, 侭, みぎり, 矢先 +#名詞-非自立-副詞可能 +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よう(だ) ("you(da)"). +# e.g. よう, やう, 様 (よう) +#名詞-非自立-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form な (aux "da"). +# e.g. みたい, ふう +#名詞-非自立-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#名詞-特殊 +# +# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. そう +#名詞-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#名詞-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, +# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 +#名詞-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. 君, 様, 著 +#名詞-接尾-人名 +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#名詞-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分け, 入り, 落ち, 買い +#名詞-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. そう +#名詞-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula だ ("da"). +# e.g. 的, げ, がち +#名詞-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) +#名詞-接尾-副詞可能 +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 +#名詞-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽し) さ, (考え) 方 +#名詞-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) +#名詞-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are +# semantically verb-like. +# e.g. ごらん, ご覧, 御覧, 頂戴 +#名詞-動詞非自立的 +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") +# is いわく ("iwaku"). +#名詞-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and +# behave like an adjective. +# e.g. 申し訳, 仕方, とんでも, 違い +#名詞-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) +#接頭詞-名詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by なる/なさる/くださる. +# e.g. お (読みなさい), お (座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. お (寒いですねえ), バカ (でかい) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. 約, およそ, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#動詞 +# +# verb-main: +#動詞-自立 +# +# verb-auxiliary: +#動詞-非自立 +# +# verb-suffix: +#動詞-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-非自立 +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. あいかわらず, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, +# な, する, だ, etc. +# e.g. こんなに, そんなに, あんなに, なにか, なんでも +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, +# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, +# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. が, けれども, そして, じゃあ, それどころか +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. から, が, で, と, に, へ, より, を, の, にて +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, +# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, +# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, +# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, +# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, +# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, +# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, +# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, +# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, +# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. こそ, さえ, しか, すら, は, も, ぞ +助詞-係助詞 +# +# particle-adverbial: +# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, +# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, +# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, +# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, +# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (松島) や +助詞-間投助詞 +# +# particle-coordinate: +# e.g. と, たり, だの, だり, とか, なり, や, やら +助詞-並立助詞 +# +# particle-final: +# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, +# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 +# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 +# 「(祈りが届いたせい) か (, 試験に合格した.)」 +# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 +# e.g. か +助詞-副助詞/並立助詞/終助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. に, と +助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, +# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい +#感動詞 +# +##### +# symbol: unclassified Symbols. +記号 +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [○◎@$〒→+] +記号-一般 +# +# symbol-comma: Commas +# e.g. [,、] +記号-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記号-句点 +# +# symbol-space: Full-width whitespace. +記号-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『【] +記号-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’”』」】] +記号-括弧閉 +# +# symbol-alphabetic: +#記号-アルファベット +# +##### +# other: unclassified other +#その他 +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (だ)ァ +その他-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. あの, うんと, えと +フィラー +# +##### +# non-verbal: non-verbal sound. +非言語音 +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both أ and ا +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +أ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeš +budem +byli +jseš +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naši +napište +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +však +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +ještě +až +bez +také +pouze +první +vaše +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +på | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +når | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +også | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sådan | such, like this/like that diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +այդ +այլ +այն +այս +դու +դուք +եմ +են +ենք +ես +եք +է +էի +էին +էինք +էիր +էիք +էր +ըստ +թ +ի +ին +իսկ +իր +կամ +համար +հետ +հետո +մենք +մեջ +մի +ն +նա +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +պիտի +վրա +և diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +の +に +は +を +た +が +で +て +と +し +れ +さ +ある +いる +も +する +から +な +こと +として +い +や +れる +など +なっ +ない +この +ため +その +あっ +よう +また +もの +という +あり +まで +られ +なる +へ +か +だ +これ +によって +により +おり +より +による +ず +なり +られる +において +ば +なかっ +なく +しかし +について +せ +だっ +その後 +できる +それ +う +ので +なお +のみ +でき +き +つ +における +および +いう +さらに +でも +ら +たり +その他 +に関する +たち +ます +ん +なら +に対して +特に +せる +及び +これら +とき +では +にて +ほか +ながら +うち +そして +とともに +ただし +かつて +それぞれ +または +お +ほど +ものの +に対する +ほとんど +と共に +といった +です +とも +ところ +ここ +##### End of file diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakš +ārpus +augšpus +bez +caur +dēļ +gar +iekš +iz +kopš +labad +lejpus +līdz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekš +starp +šaipus +uz +viņpus +virs +virspus +zem +apakšpus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretī +arī +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droši +diemžēl +nebūt +ik +it +taču +nu +pat +tiklab +iekšpus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolīdz +līdzko +tiklīdz +jebšu +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +būt +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +būšu +būsi +būs +būsim +būsiet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikšu +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapšu +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmål dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +på | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +så | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nå | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +når | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +å | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sånn | such a +inni | inside/within +mellom | between +vår | our +hver | each +hvem | who +vors | us/ours +hvis | whose +både | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +også | also +slik | just +vært | been +være | to be +båe | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +då | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjå | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceşti +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aş +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aţi +au +avea +avem +aveţi +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiţi +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulţi +ne +nicăieri +nici +nimeni +nişte +noastră +noastre +noi +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +şi +sînt +sîntem +sînteţi +spre +sub +sunt +suntem +sunteţi +ta +tăi +tale +tău +te +ţi +ţie +tine +toată +toate +tot +toţi +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voştri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | så = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +på | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +så | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +då | then, when +sin | his +nu | now +har | have +inte | inte någon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +något | some etc +från | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +någon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +åt | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +några | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sådan | such a +vår | our +blivit | from bli +dess | its +inom | within +mellan | between +sådant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sådana | such a +vart | each +dina | thy +vars | whose +vårt | our +våra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +แห่ง +แล้ว +และ +แรก +แบบ +แต่ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นการ +เป็น +เปิดเผย +เปิด +เนื่องจาก +เดียวกัน +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีก +อาจ +อะไร +ออก +อย่าง +อยู่ +อยาก +หาก +หลาย +หลังจาก +หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สําหรับ +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาก +มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นํา +นั้น +นัก +นอกจาก +ทุก +ที่สุด +ที่ +ทําให้ +ทํา +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูก +ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งแต่ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาก +จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +ก่อน +ก็ +การ +กับ +กัน +กว่า +กล่าว diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 + +# Custom segmentation for compound katakana +トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 +ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 + +# Custom reading for former sumo wrestler +朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/schema.xml b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/schema.xml new file mode 100644 index 00000000000..83080dfa40c --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/schema.xml @@ -0,0 +1,914 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + iddiff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml new file mode 100644 index 00000000000..9d9178746cf --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml @@ -0,0 +1,1764 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + textSpell + + + + + + default + name + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/map-reduce/src/test-files/solr/solrcloud/conf/solrconfig.xml b/solr/contrib/map-reduce/src/test-files/solr/solrcloud/conf/solrconfig.xml new file mode 100644 index 00000000000..a37ab12ecfe --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/solr/solrcloud/conf/solrconfig.xml @@ -0,0 +1,1787 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + ${solr.hdfs.home:} + ${solr.hdfs.confdir:} + ${solr.hdfs.security.kerberos.enabled:false} + ${solr.hdfs.security.kerberos.keytabfile:} + ${solr.hdfs.security.kerberos.principal:} + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.slab.count:1} + ${solr.hdfs.blockcache.direct.memory.allocation:true} + ${solr.hdfs.blockcache.blocksperbank:16384} + ${solr.hdfs.blockcache.read.enabled:true} + ${solr.hdfs.blockcache.write.enabled:true} + ${solr.hdfs.nrtcachingdirectory.enable:true} + ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16} + ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192} + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + ${solr.lock.type:hdfs} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/NullHeader.docx b/solr/contrib/map-reduce/src/test-files/test-documents/NullHeader.docx new file mode 100644 index 00000000000..cc62b8d6beb Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/NullHeader.docx differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/boilerplate.html b/solr/contrib/map-reduce/src/test-files/test-documents/boilerplate.html new file mode 100644 index 00000000000..0286578693c --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-documents/boilerplate.html @@ -0,0 +1,58 @@ + + + + + + + + Title + + + + + + + +
    + + + + + +
    boilerplatetext
    +
    + +

    This is the real meat of the page, +and represents the text we want. +It has lots of juicy content. + +We assume that it won't get filtered out. +And that all of the lines will be in the +output. +

    + +

    +Here's another paragraph of text. +This is the end of the text. +

    + +

    footer

    + + + diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/complex.mbox b/solr/contrib/map-reduce/src/test-files/test-documents/complex.mbox new file mode 100644 index 00000000000..27f7017d265 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-documents/complex.mbox @@ -0,0 +1,291 @@ +From core-user-return-14700-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 04:28:28 2009 +Return-Path: +Delivered-To: apmail-hadoop-core-user-archive@www.apache.org +Received: (qmail 19921 invoked from network); 1 Jun 2009 04:28:28 -0000 +Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) + by minotaur.apache.org with SMTP; 1 Jun 2009 04:28:28 -0000 +Received: (qmail 84995 invoked by uid 500); 1 Jun 2009 04:28:38 -0000 +Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org +Received: (qmail 84895 invoked by uid 500); 1 Jun 2009 04:28:38 -0000 +Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm +Precedence: bulk +List-Help: +List-Unsubscribe: +List-Post: +List-Id: +Reply-To: core-user@hadoop.apache.org +Delivered-To: mailing list core-user@hadoop.apache.org +Received: (qmail 84885 invoked by uid 99); 1 Jun 2009 04:28:38 -0000 +Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) + by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 04:28:38 +0000 +X-ASF-Spam-Status: No, hits=1.2 required=10.0 + tests=SPF_NEUTRAL +X-Spam-Check-By: apache.org +Received-SPF: neutral (athena.apache.org: local policy) +Received: from [69.147.107.21] (HELO mrout2-b.corp.re1.wahoo.com) (69.147.107.21) + by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 04:28:26 +0000 +Received: from SNV-EXPF01.ds.corp.wahoo.com (snv-expf01.ds.corp.wahoo.com [207.126.227.250]) + by mrout2-b.corp.re1.wahoo.com (8.13.8/8.13.8/y.out) with ESMTP id n514QYA6099963 + for ; Sun, 31 May 2009 21:26:35 -0700 (PDT) +DomainKey-Signature: a=rsa-sha1; s=serpent; d=wahoo-inc.com; c=nofws; q=dns; + h=received:user-agent:date:subject:from:to:message-id: + thread-topic:thread-index:in-reply-to:mime-version:content-type: + content-transfer-encoding:x-originalarrivaltime; + b=YVtSNdgjeeSBS1yY3XDolul49i+HrgNG7QszMo9LzGnrwejjgsl5+iUM6EiQgEpV +Received: from SNV-EXVS08.ds.corp.wahoo.com ([207.126.227.9]) by SNV-EXPF01.ds.corp.wahoo.com with Microsoft SMTPSVC(6.0.3790.3959); + Sun, 31 May 2009 21:26:34 -0700 +Received: from 10.66.92.213 ([10.66.92.213]) by SNV-EXVS08.ds.corp.wahoo.com ([207.126.227.58]) with Microsoft Exchange Server HTTP-DAV ; + Mon, 1 Jun 2009 04:26:33 +0000 +User-Agent: Microsoft-Entourage/12.17.0.090302 +Date: Mon, 01 Jun 2009 09:56:31 +0530 +Subject: Re: question about when shuffle/sort start working +From: Sam Judgement +To: +Message-ID: +Thread-Topic: question about when shuffle/sort start working +Thread-Index: AcnicSNoBw19cMU8UEaXwAdZ1YYhuw== +In-Reply-To: <440622.41041.qm@web111005.mail.gq1.wahoo.com> +Mime-version: 1.0 +Content-type: text/plain; + charset="US-ASCII" +Content-transfer-encoding: 7bit +X-OriginalArrivalTime: 01 Jun 2009 04:26:34.0501 (UTC) FILETIME=[257EAB50:01C9E271] +X-Virus-Checked: Checked by ClamAV on apache.org + +When a Mapper completes, MapCompletionEvents are generated. Reducers try to +fetch map outputs for a given map only on the receipt of such events. + +Sam + + +On 5/30/09 10:00 AM, "Jianmin Foo" wrote: + +> Hi, +> I am being confused by the protocol between mapper and reducer. When mapper +> emitting the (key,value) pair done, is there any signal the mapper send out to +> hadoop framework in protocol to indicate that map is done and the shuffle/sort +> can begin for reducer? If there is no this signal in protocol, when the +> framework begin the shuffle/sort? +> +> Thanks, +> Jianmin +> +> +> +> + + +From core-user-return-14701-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 05:31:14 2009 +Return-Path: +Delivered-To: apmail-hadoop-core-user-archive@www.apache.org +Received: (qmail 38243 invoked from network); 1 Jun 2009 05:31:14 -0000 +Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) + by minotaur.apache.org with SMTP; 1 Jun 2009 05:31:14 -0000 +Received: (qmail 15621 invoked by uid 500); 1 Jun 2009 05:31:24 -0000 +Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org +Received: (qmail 15557 invoked by uid 500); 1 Jun 2009 05:31:24 -0000 +Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm +Precedence: bulk +List-Help: +List-Unsubscribe: +List-Post: +List-Id: +Reply-To: core-user@hadoop.apache.org +Delivered-To: mailing list core-user@hadoop.apache.org +Received: (qmail 15547 invoked by uid 99); 1 Jun 2009 05:31:24 -0000 +Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) + by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 05:31:24 +0000 +X-ASF-Spam-Status: No, hits=2.2 required=10.0 + tests=HTML_MESSAGE,SPF_PASS +X-Spam-Check-By: apache.org +Received-SPF: pass (nike.apache.org: local policy) +Received: from [68.142.237.94] (HELO n9.bullet.re3.wahoo.com) (68.142.237.94) + by apache.org (qpsmtpd/0.29) with SMTP; Mon, 01 Jun 2009 05:31:11 +0000 +Received: from [68.142.237.88] by n9.bullet.re3.wahoo.com with NNFMP; 01 Jun 2009 05:30:50 -0000 +Received: from [67.195.9.82] by t4.bullet.re3.wahoo.com with NNFMP; 01 Jun 2009 05:30:49 -0000 +Received: from [67.195.9.99] by t2.bullet.mail.gq1.wahoo.com with NNFMP; 01 Jun 2009 05:30:49 -0000 +Received: from [127.0.0.1] by omp103.mail.gq1.wahoo.com with NNFMP; 01 Jun 2009 05:28:01 -0000 +X-wahoo-Newman-Property: ymail-3 +X-wahoo-Newman-Id: 796121.97519.bm@omp103.mail.gq1.wahoo.com +Received: (qmail 35264 invoked by uid 60001); 1 Jun 2009 05:30:49 -0000 +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=wahoo.com; s=s1024; t=1243834249; bh=R8qzdi/IbLyO8UwpnaujDpT9E+6bJ7nkmZN2803EmRk=; h=Message-ID:X-YMail-OSG:Received:X-Mailer:References:Date:From:Subject:To:In-Reply-To:MIME-Version:Content-Type; b=vq4c6RIDbkuLPYd8mirusIXf6DqTb/IeT55In7W00Y5Sxx1ZiXBb78yE9+TDfXJ0elsEZvqv4ocyvolGE0eGtyYeJA0mZikpRNu6pidxPNpCplOcLHBRz7YQ7iERwv3TagRlWy2Xd3oD9ZeV0A05P7WUOiNNX1PUUJD1IVdrEZo= +DomainKey-Signature:a=rsa-sha1; q=dns; c=nofws; + s=s1024; d=wahoo.com; + h=Message-ID:X-YMail-OSG:Received:X-Mailer:References:Date:From:Subject:To:In-Reply-To:MIME-Version:Content-Type; + b=6HXZV98ON5vBwmE/xS8stVD0D2F4dkMY7a0suX5KVTb736JdR8G59mqBq/dWcpbFTLiCLtxi18LMb/dU1RKRGOEdn3l3j/jKXhBrhIgfg3qtNskPedXDKBvn7JGXiSkqpA/tUtPjvc0Uuk8/LaA01SQTz40Engg7nD8/EJdIAhA=; +Message-ID: <592088.35091.qm@web111010.mail.gq1.wahoo.com> +X-YMail-OSG: KzhhrJYVM1m.MCS6vRpRP2ZZO2PrfnbngosELDCIa91ZqvhJph4RdmzfUW0jw9W04RCSch1K730bPohwNpNBIk2QR_zt4_mfbhfq7YEPkSoz9LSXG90P9vIo5Fc8qyZN0U6vA9gtdyGQTpN5ahvillUH9nAF0TMWv2SvZJLjPlQ0Z0p8oK8ltBwGTgLrM8Jtdn9D29yoRyi3_EpVOfdD9OP.EK50Vr1XwSUYMbnpZ0WGHMwd.Yig7A6Elwadm3YVbfOdx2mfrG.jQsUAxQjRBNvbrOM57.FaE11kHTe9aoBWSeihNg-- +Received: from [216.145.54.7] by web111010.mail.gq1.wahoo.com via HTTP; Sun, 31 May 2009 22:30:49 PDT +X-Mailer: wahooMailRC/1277.43 wahooMailWebService/0.7.289.10 +References: +Date: Sun, 31 May 2009 22:30:49 -0700 (PDT) +From: Jianmin Foo +Subject: Re: question about when shuffle/sort start working +To: core-user@hadoop.apache.org +In-Reply-To: +MIME-Version: 1.0 +Content-Type: multipart/alternative; boundary="0-1193839393-1243834249=:35091" +X-Virus-Checked: Checked by ClamAV on apache.org + +--0-1193839393-1243834249=:35091 +Content-Type: text/plain; charset=us-ascii + +Thanks a lot for your explanation, Sam. + +So is this event generated by hadoop framework? Is there any API in mapper to fire this event? Actually, I am thinking to implement a mapper that will emit some pairs, then fire this event to let the reducer works, the same mapper task then emit some other pairs and repeat. Do you think is this logic feasible by current API? + +Thanks, +Jianmin + + + + + +________________________________ +From: Sam Judgement +To: core-user@hadoop.apache.org +Sent: Monday, June 1, 2009 12:26:31 PM +Subject: Re: question about when shuffle/sort start working + +When a Mapper completes, MapCompletionEvents are generated. Reducers try to +fetch map outputs for a given map only on the receipt of such events. + +Sam + + +On 5/30/09 10:00 AM, "Jianmin Foo" wrote: + +> Hi, +> I am being confused by the protocol between mapper and reducer. When mapper +> emitting the (key,value) pair done, is there any signal the mapper send out to +> hadoop framework in protocol to indicate that map is done and the shuffle/sort +> can begin for reducer? If there is no this signal in protocol, when the +> framework begin the shuffle/sort? +> +> Thanks, +> Jianmin +> +> +> +> + + + +--0-1193839393-1243834249=:35091-- + + +From core-user-return-14702-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 06:04:30 2009 +Return-Path: +Delivered-To: apmail-hadoop-core-user-archive@www.apache.org +Received: (qmail 53387 invoked from network); 1 Jun 2009 06:04:29 -0000 +Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) + by minotaur.apache.org with SMTP; 1 Jun 2009 06:04:29 -0000 +Received: (qmail 39066 invoked by uid 500); 1 Jun 2009 06:04:39 -0000 +Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org +Received: (qmail 38970 invoked by uid 500); 1 Jun 2009 06:04:39 -0000 +Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm +Precedence: bulk +List-Help: +List-Unsubscribe: +List-Post: +List-Id: +Reply-To: core-user@hadoop.apache.org +Delivered-To: mailing list core-user@hadoop.apache.org +Received: (qmail 38955 invoked by uid 99); 1 Jun 2009 06:04:39 -0000 +Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) + by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 06:04:39 +0000 +X-ASF-Spam-Status: No, hits=1.2 required=10.0 + tests=SPF_NEUTRAL +X-Spam-Check-By: apache.org +Received-SPF: neutral (athena.apache.org: local policy) +Received: from [216.145.54.172] (HELO mrout2.wahoo.com) (216.145.54.172) + by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 06:04:28 +0000 +Received: from SNV-EXBH01.ds.corp.wahoo.com (snv-exbh01.ds.corp.wahoo.com [207.126.227.249]) + by mrout2.wahoo.com (8.13.6/8.13.6/y.out) with ESMTP id n5163FGq038852 + for ; Sun, 31 May 2009 23:03:15 -0700 (PDT) +DomainKey-Signature: a=rsa-sha1; s=serpent; d=wahoo-inc.com; c=nofws; q=dns; + h=received:user-agent:date:subject:from:to:message-id: + thread-topic:thread-index:in-reply-to:mime-version:content-type: + content-transfer-encoding:x-originalarrivaltime; + b=rChE4SCnwtWaZpjhovkiXDKfDiVNdRRvsadSGG9S9bgvOexn/9/5JjEQx1pOR7Nb +Received: from SNV-EXVS08.ds.corp.wahoo.com ([207.126.227.9]) by SNV-EXBH01.ds.corp.wahoo.com with Microsoft SMTPSVC(6.0.3790.3959); + Sun, 31 May 2009 23:03:15 -0700 +Received: from 10.66.92.213 ([10.66.92.213]) by SNV-EXVS08.ds.corp.wahoo.com ([207.126.227.58]) with Microsoft Exchange Server HTTP-DAV ; + Mon, 1 Jun 2009 06:03:15 +0000 +User-Agent: Microsoft-Entourage/12.17.0.090302 +Date: Mon, 01 Jun 2009 11:33:13 +0530 +Subject: Re: question about when shuffle/sort start working +From: Sam Judgement +To: +Message-ID: +Thread-Topic: question about when shuffle/sort start working +Thread-Index: AcnifqWrLG6N7GAk7kqy9QalVWfegQ== +In-Reply-To: <592088.35091.qm@web111010.mail.gq1.wahoo.com> +Mime-version: 1.0 +Content-type: text/plain; + charset="US-ASCII" +Content-transfer-encoding: 7bit +X-OriginalArrivalTime: 01 Jun 2009 06:03:15.0462 (UTC) FILETIME=[A7231260:01C9E27E] +X-Virus-Checked: Checked by ClamAV on apache.org + + +No you cannot raise this event yourself, this event is generated internally +by the framework. + +I am guessing that what you probably want is to have a chain of MapReduce +Jobs where the output of one is automatically fed as input to another. You +can look at these classes: JobControl and ChainMapper/ChainReducer. + +Sam + +On 6/1/09 11:00 AM, "Jianmin Foo" wrote: + +> Thanks a lot for your explanation, Sam. +> +> So is this event generated by hadoop framework? Is there any API in mapper to +> fire this event? Actually, I am thinking to implement a mapper that will emit +> some pairs, then fire this event to let the reducer works, the +> same mapper task then emit some other pairs and repeat. Do you +> think is this logic feasible by current API? +> +> Thanks, +> Jianmin +> +> +> +> +> +> ________________________________ +> From: Sam Judgement +> To: core-user@hadoop.apache.org +> Sent: Monday, June 1, 2009 12:26:31 PM +> Subject: Re: question about when shuffle/sort start working +> +> When a Mapper completes, MapCompletionEvents are generated. Reducers try to +> fetch map outputs for a given map only on the receipt of such events. +> +> Sam +> +> +> On 5/30/09 10:00 AM, "Jianmin Foo" wrote: +> +>> Hi, +>> I am being confused by the protocol between mapper and reducer. When mapper +>> emitting the (key,value) pair done, is there any signal the mapper send out +>> to +>> hadoop framework in protocol to indicate that map is done and the +>> shuffle/sort +>> can begin for reducer? If there is no this signal in protocol, when the +>> framework begin the shuffle/sort? +>> +>> Thanks, +>> Jianmin +>> +>> +>> +>> +> +> +> + + diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/rsstest.rss b/solr/contrib/map-reduce/src/test-files/test-documents/rsstest.rss new file mode 100644 index 00000000000..758f6a18363 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-documents/rsstest.rss @@ -0,0 +1,36 @@ + + + + + TestChannel + http://test.channel.com/ + Sample RSS File for Junit test + en-us + + + Home Page of Chris Mattmann + http://www-scf.usc.edu/~mattmann/ + Chris Mattmann's home page + + + Awesome Open Source Search Engine + http://www.nutch.org/ + Yup, that's what it is + + + diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120521-100919.avro b/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120521-100919.avro new file mode 100644 index 00000000000..36f01a2d48c Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120521-100919.avro differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433 b/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433 new file mode 100644 index 00000000000..e633a1f71f1 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433 @@ -0,0 +1,4 @@ +1000 +{"text":"sample tweet one","retweet_count":0,"in_reply_to_user_id":null,"retweeted":false,"truncated":false,"source":"href=\"http:\/\/sample.com\"","id_str":"1234567891","entities":{"user_mentions":[],"hashtags":[],"urls":[]},"in_reply_to_status_id":null,"place":null,"in_reply_to_status_id_str":null,"coordinates":null,"created_at":"Wed Sep 05 01:01:01 +0000 1985","in_reply_to_screen_name":null,"favorited":false,"in_reply_to_user_id_str":null,"user":{"default_profile_image":false,"friends_count":111,"profile_background_color":"3C0C29","location":"Palo Alto","is_translator":false,"profile_background_tile":true,"favourites_count":11,"verified":false,"profile_sidebar_fill_color":"efefef","follow_request_sent":null,"contributors_enabled":false,"description":"desc1","profile_sidebar_border_color":"eeeeee","profile_image_url_https":"https:\/\/si0.twimg.com\/profile_images\/1\/normal.jpg","id_str":"1111111","listed_count":1,"lang":"en","screen_name":"fake_user1","show_all_inline_media":false,"profile_use_background_image":true,"profile_image_url":"http:\/\/a0.twimg.com\/profile_images\/1111111\/normal.jpg","default_profile":false,"statuses_count":11111,"created_at":"Thu Apr 07 11:04:54 +0000 1985","profile_text_color":"333333","followers_count":111,"protected":false,"following":null,"notifications":null,"profile_background_image_url":"http:\/\/a0.twimg.com\/images\/themes\/theme1\/bg.gif","time_zone":null,"url":null,"name":"name1","geo_enabled":false,"profile_link_color":"009999","id":1111112,"profile_background_image_url_https":"https:\/\/si0.twimg.com\/images\/themes\/theme1\/bg.gif","utc_offset":null},"id":11111112,"contributors":null,"geo":null} +2000 +{"text":"sample tweet two","retweet_count":0,"in_reply_to_user_id":null,"retweeted":false,"truncated":false,"source":"href=\"http:\/\/sample.com\"","id_str":"2345678902","entities":{"user_mentions":[],"hashtags":[],"urls":[]},"in_reply_to_status_id":null,"place":null,"in_reply_to_status_id_str":null,"coordinates":null,"created_at":"Wed Sep 05 02:14:34 +0000 1985","in_reply_to_screen_name":null,"favorited":false,"in_reply_to_user_id_str":null,"user":{"default_profile_image":false,"friends_count":222,"profile_background_color":"3C0C29","location":"San Francisco","is_translator":false,"profile_background_tile":false,"favourites_count":22,"verified":false,"profile_sidebar_fill_color":"B2D948","follow_request_sent":null,"contributors_enabled":false,"description":"desc2","profile_sidebar_border_color":"8EC63D","profile_image_url_https":"https:\/\/si0.twimg.com\/profile_images\/22222222\/image_normal.jpg","id_str":"2222222","listed_count":0,"lang":"en","screen_name":"fake_user2","show_all_inline_media":false,"profile_use_background_image":true,"profile_image_url":"http:\/\/a0.twimg.com\/profile_images\/2222222\/image_normal.jpg","default_profile":false,"statuses_count":222222,"created_at":"Thu Aug 04 11:33:28 +0000 1985","profile_text_color":"444444","followers_count":222,"protected":false,"following":null,"notifications":null,"profile_background_image_url":"http:\/\/a0.twimg.com\/profile_background_images\/222222\/222222.jpg","time_zone":"Central Time (US & Canada)","url":null,"name":"name2","geo_enabled":false,"profile_link_color":"9A0057","id":2222223,"profile_background_image_url_https":"https:\/\/si0.twimg.com\/profile_background_images\/2222222\/22222.jpg","utc_offset":-21600},"id":222223,"contributors":null,"geo":null} \ No newline at end of file diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433-medium.avro b/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433-medium.avro new file mode 100644 index 00000000000..900507c6f05 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433-medium.avro differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433.avro b/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433.avro new file mode 100644 index 00000000000..4dbf180dc1d Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433.avro differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433.bz2 b/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433.bz2 new file mode 100644 index 00000000000..a4a91594ce8 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433.bz2 differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433.gz b/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433.gz new file mode 100644 index 00000000000..3e7a44cb588 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/sample-statuses-20120906-141433.gz differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/test-outlook.msg b/solr/contrib/map-reduce/src/test-files/test-documents/test-outlook.msg new file mode 100644 index 00000000000..c975c0c69d4 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/test-outlook.msg differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testAIFF.aif b/solr/contrib/map-reduce/src/test-files/test-documents/testAIFF.aif new file mode 100644 index 00000000000..97eac1d8e3d Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testAIFF.aif differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testBMP.bmp b/solr/contrib/map-reduce/src/test-files/test-documents/testBMP.bmp new file mode 100644 index 00000000000..c0176157039 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testBMP.bmp differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testBMPfp.txt b/solr/contrib/map-reduce/src/test-files/test-documents/testBMPfp.txt new file mode 100644 index 00000000000..1da2966d451 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-documents/testBMPfp.txt @@ -0,0 +1,3 @@ +BMW to Make Hybrid Sports Car
 + By CHRISTOPH RAUWALD . +LEIPZIG, Germany—German car maker BMW AG said Friday it will start series production of a new plug-in hybrid sports car in 2013, to be based on the Vision EfficientDynamics Concept car shown at the Frankfurt auto show in September last year. Chief Executive Norbert Reithofer said the car will be produced in Germany but didn't provide details on the price. The BMW Vision EfficientDynamics Concept car is a sporty plug-in, full hybrid with a turbo-diesel engine, four seats and upward-pivoting doors. BMW executive board member Klaus Draeger told reporters he expects to achieve "a significant sales volume" with the new high-performance sports car. Asked whether annual sales could exceed 1,000 vehicles, Mr. Draeger said, "You said this and I'm not saying this is wrong." In March, Mr. Reithofer indicated that the concept car was set to make it into series production. "I like the car. And you know what it means when I say I like the car—it means I will drive it. It's not just a concept car," he told analysts during a presentation in Munich. The car will be designed for sale in all major global markets, which according to Mr. Draeger might require offering a gasoline engine instead of the prototype's three-cylinder diesel engine. Diesel cars account for roughly half of the European market, but are significantly less popular in the U.S. and hardly present at all in China. Mr. Draeger declined to comment on the vehicle's price tag, but noted that in order to achieve substantial sales volumes the price mustn't be too high. He said the same goes for BMW's planned Megacity Vehicle. A price tag of €60,000 ($85,242) or more would certainly limit potential sales volumes, he said. diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testEMLX.emlx b/solr/contrib/map-reduce/src/test-files/test-documents/testEMLX.emlx new file mode 100644 index 00000000000..66766e10be3 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-documents/testEMLX.emlx @@ -0,0 +1,72 @@ + + +1795 +From: "Julien Nioche (JIRA)" +To: dev@tika.apache.org +Subject: [jira] Commented: (TIKA-461) RFC822 messages not parsed +Reply-To: dev@tika.apache.org +Delivered-To: mailing list dev@tika.apache.org +Date: Mon, 6 Sep 2010 05:25:34 -0400 (EDT) +In-Reply-To: <6089099.260231278600349994.JavaMail.jira@thor> +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf-8 +Content-Transfer-Encoding: 7bit +X-JIRA-FingerPrint: 30527f35849b9dde25b450d4833f0394 +X-Virus-Checked: Checked by ClamAV on apache.org + + + [ https://issues.apache.org/jira/browse/TIKA-461?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12906468#action_12906468 ] + +Julien Nioche commented on TIKA-461: +------------------------------------ + +I'll have a look at mime4j and try to use it in Tika + +> RFC822 messages not parsed +> -------------------------- +> +> Key: TIKA-461 +> URL: https://issues.apache.org/jira/browse/TIKA-461 +> Project: Tika +> Issue Type: Bug +> Components: parser +> Affects Versions: 0.7 +> Reporter: Joshua Turner +> Assignee: Julien Nioche +> +> Presented with an RFC822 message exported from Thunderbird, AutodetectParser produces an empty body, and a Metadata containing only one key-value pair: "Content-Type=message/rfc822". Directly calling MboxParser likewise gives an empty body, but with two metadata pairs: "Content-Encoding=us-ascii Content-Type=application/mbox". +> A quick peek at the source of MboxParser shows that the implementation is pretty naive. If the wiring can be sorted out, something like Apache James' mime4j might be a better bet. + +-- +This message is automatically generated by JIRA. +- +You can reply to this email to add a comment to the issue online. + + + + + + flags + 0 + sender + "Julien Nioche (JIRA)" <jira@apache.org> + subject + [jira] Commented: (TIKA-461) RFC822 messages not parsed + to + dev@tika.apache.org + diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testEXCEL.xls b/solr/contrib/map-reduce/src/test-files/test-documents/testEXCEL.xls new file mode 100644 index 00000000000..86b291606d0 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testEXCEL.xls differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testEXCEL.xlsx b/solr/contrib/map-reduce/src/test-files/test-documents/testEXCEL.xlsx new file mode 100644 index 00000000000..8d5169f8410 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testEXCEL.xlsx differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testFLAC.flac b/solr/contrib/map-reduce/src/test-files/test-documents/testFLAC.flac new file mode 100644 index 00000000000..ccec94717a4 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testFLAC.flac differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testFLV.flv b/solr/contrib/map-reduce/src/test-files/test-documents/testFLV.flv new file mode 100644 index 00000000000..d35e9bb6063 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testFLV.flv differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testJPEG_EXIF.jpg b/solr/contrib/map-reduce/src/test-files/test-documents/testJPEG_EXIF.jpg new file mode 100644 index 00000000000..1b93e771832 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testJPEG_EXIF.jpg differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testJPEG_EXIF.jpg.gz b/solr/contrib/map-reduce/src/test-files/test-documents/testJPEG_EXIF.jpg.gz new file mode 100644 index 00000000000..2ee8e9c1b59 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testJPEG_EXIF.jpg.gz differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testJPEG_EXIF.jpg.tar.gz b/solr/contrib/map-reduce/src/test-files/test-documents/testJPEG_EXIF.jpg.tar.gz new file mode 100644 index 00000000000..3f35102eaef Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testJPEG_EXIF.jpg.tar.gz differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testMP3i18n.mp3 b/solr/contrib/map-reduce/src/test-files/test-documents/testMP3i18n.mp3 new file mode 100644 index 00000000000..0f253704ebb Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testMP3i18n.mp3 differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testMP4.m4a b/solr/contrib/map-reduce/src/test-files/test-documents/testMP4.m4a new file mode 100644 index 00000000000..a9bc7312702 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testMP4.m4a differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testPDF.pdf b/solr/contrib/map-reduce/src/test-files/test-documents/testPDF.pdf new file mode 100644 index 00000000000..1f1bcff6fe9 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testPDF.pdf differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testPNG.png b/solr/contrib/map-reduce/src/test-files/test-documents/testPNG.png new file mode 100644 index 00000000000..afbcb5f7388 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testPNG.png differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testPPT_various.ppt b/solr/contrib/map-reduce/src/test-files/test-documents/testPPT_various.ppt new file mode 100644 index 00000000000..75829de08d7 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testPPT_various.ppt differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testPPT_various.pptx b/solr/contrib/map-reduce/src/test-files/test-documents/testPPT_various.pptx new file mode 100644 index 00000000000..92c2744dc4e Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testPPT_various.pptx differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testPSD.psd b/solr/contrib/map-reduce/src/test-files/test-documents/testPSD.psd new file mode 100644 index 00000000000..7cedbc21a7a Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testPSD.psd differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testPages.pages b/solr/contrib/map-reduce/src/test-files/test-documents/testPages.pages new file mode 100644 index 00000000000..9fe1e401297 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testPages.pages differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testRTFVarious.rtf b/solr/contrib/map-reduce/src/test-files/test-documents/testRTFVarious.rtf new file mode 100644 index 00000000000..57fadb99988 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-documents/testRTFVarious.rtf @@ -0,0 +1,329 @@ +{\rtf1\adeflang1025\ansi\ansicpg1252\uc1\adeff31507\deff0\stshfdbch31506\stshfloch31506\stshfhich31506\stshfbi31507\deflang1033\deflangfe1033\themelang1033\themelangfe0\themelangcs0{\fonttbl{\f0\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fbidi \fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;} +{\f2\fbidi \fmodern\fcharset0\fprq1{\*\panose 02070309020205020404}Courier New;}{\f3\fbidi \froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;}{\f10\fbidi \fnil\fcharset2\fprq2{\*\panose 05000000000000000000}Wingdings;} +{\f11\fbidi \fmodern\fcharset128\fprq1{\*\panose 02020609040205080304}MS Mincho{\*\falt \'82\'6c\'82\'72 \'96\'be\'92\'a9};}{\f15\fbidi \fmodern\fcharset128\fprq1{\*\panose 020b0609070205080204}MS Gothic{\*\falt MS Mincho};} +{\f34\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria Math;}{\f37\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;}{\f38\fbidi \fswiss\fcharset0\fprq2{\*\panose 020b0604030504040204}Tahoma;} +{\f175\fbidi \fmodern\fcharset128\fprq1{\*\panose 02020609040205080304}@MS Mincho;}{\f209\fbidi \fmodern\fcharset128\fprq1{\*\panose 00000000000000000000}@MS Gothic;} +{\flomajor\f31500\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fdbmajor\f31501\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;} +{\fhimajor\f31502\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria;}{\fbimajor\f31503\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;} +{\flominor\f31504\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fdbminor\f31505\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;} +{\fhiminor\f31506\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;}{\fbiminor\f31507\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f210\fbidi \froman\fcharset238\fprq2 Times New Roman CE;} +{\f211\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\f213\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\f214\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\f215\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);} +{\f216\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\f217\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\f218\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f220\fbidi \fswiss\fcharset238\fprq2 Arial CE;} +{\f221\fbidi \fswiss\fcharset204\fprq2 Arial Cyr;}{\f223\fbidi \fswiss\fcharset161\fprq2 Arial Greek;}{\f224\fbidi \fswiss\fcharset162\fprq2 Arial Tur;}{\f225\fbidi \fswiss\fcharset177\fprq2 Arial (Hebrew);} +{\f226\fbidi \fswiss\fcharset178\fprq2 Arial (Arabic);}{\f227\fbidi \fswiss\fcharset186\fprq2 Arial Baltic;}{\f228\fbidi \fswiss\fcharset163\fprq2 Arial (Vietnamese);}{\f230\fbidi \fmodern\fcharset238\fprq1 Courier New CE;} +{\f231\fbidi \fmodern\fcharset204\fprq1 Courier New Cyr;}{\f233\fbidi \fmodern\fcharset161\fprq1 Courier New Greek;}{\f234\fbidi \fmodern\fcharset162\fprq1 Courier New Tur;}{\f235\fbidi \fmodern\fcharset177\fprq1 Courier New (Hebrew);} +{\f236\fbidi \fmodern\fcharset178\fprq1 Courier New (Arabic);}{\f237\fbidi \fmodern\fcharset186\fprq1 Courier New Baltic;}{\f238\fbidi \fmodern\fcharset163\fprq1 Courier New (Vietnamese);} +{\f322\fbidi \fmodern\fcharset0\fprq1 MS Mincho Western{\*\falt \'82\'6c\'82\'72 \'96\'be\'92\'a9};}{\f320\fbidi \fmodern\fcharset238\fprq1 MS Mincho CE{\*\falt \'82\'6c\'82\'72 \'96\'be\'92\'a9};} +{\f321\fbidi \fmodern\fcharset204\fprq1 MS Mincho Cyr{\*\falt \'82\'6c\'82\'72 \'96\'be\'92\'a9};}{\f323\fbidi \fmodern\fcharset161\fprq1 MS Mincho Greek{\*\falt \'82\'6c\'82\'72 \'96\'be\'92\'a9};} +{\f324\fbidi \fmodern\fcharset162\fprq1 MS Mincho Tur{\*\falt \'82\'6c\'82\'72 \'96\'be\'92\'a9};}{\f327\fbidi \fmodern\fcharset186\fprq1 MS Mincho Baltic{\*\falt \'82\'6c\'82\'72 \'96\'be\'92\'a9};}{\f550\fbidi \froman\fcharset238\fprq2 Cambria Math CE;} +{\f551\fbidi \froman\fcharset204\fprq2 Cambria Math Cyr;}{\f553\fbidi \froman\fcharset161\fprq2 Cambria Math Greek;}{\f554\fbidi \froman\fcharset162\fprq2 Cambria Math Tur;}{\f557\fbidi \froman\fcharset186\fprq2 Cambria Math Baltic;} +{\f580\fbidi \fswiss\fcharset238\fprq2 Calibri CE;}{\f581\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;}{\f583\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\f584\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;} +{\f587\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}{\f590\fbidi \fswiss\fcharset238\fprq2 Tahoma CE;}{\f591\fbidi \fswiss\fcharset204\fprq2 Tahoma Cyr;}{\f593\fbidi \fswiss\fcharset161\fprq2 Tahoma Greek;} +{\f594\fbidi \fswiss\fcharset162\fprq2 Tahoma Tur;}{\f595\fbidi \fswiss\fcharset177\fprq2 Tahoma (Hebrew);}{\f596\fbidi \fswiss\fcharset178\fprq2 Tahoma (Arabic);}{\f597\fbidi \fswiss\fcharset186\fprq2 Tahoma Baltic;} +{\f598\fbidi \fswiss\fcharset163\fprq2 Tahoma (Vietnamese);}{\f599\fbidi \fswiss\fcharset222\fprq2 Tahoma (Thai);}{\f1962\fbidi \fmodern\fcharset0\fprq1 @MS Mincho Western;}{\f1960\fbidi \fmodern\fcharset238\fprq1 @MS Mincho CE;} +{\f1961\fbidi \fmodern\fcharset204\fprq1 @MS Mincho Cyr;}{\f1963\fbidi \fmodern\fcharset161\fprq1 @MS Mincho Greek;}{\f1964\fbidi \fmodern\fcharset162\fprq1 @MS Mincho Tur;}{\f1967\fbidi \fmodern\fcharset186\fprq1 @MS Mincho Baltic;} +{\flomajor\f31508\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flomajor\f31509\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\flomajor\f31511\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;} +{\flomajor\f31512\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flomajor\f31513\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\flomajor\f31514\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);} +{\flomajor\f31515\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flomajor\f31516\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fdbmajor\f31518\fbidi \froman\fcharset238\fprq2 Times New Roman CE;} +{\fdbmajor\f31519\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbmajor\f31521\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fdbmajor\f31522\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;} +{\fdbmajor\f31523\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbmajor\f31524\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fdbmajor\f31525\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;} +{\fdbmajor\f31526\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhimajor\f31528\fbidi \froman\fcharset238\fprq2 Cambria CE;}{\fhimajor\f31529\fbidi \froman\fcharset204\fprq2 Cambria Cyr;} +{\fhimajor\f31531\fbidi \froman\fcharset161\fprq2 Cambria Greek;}{\fhimajor\f31532\fbidi \froman\fcharset162\fprq2 Cambria Tur;}{\fhimajor\f31535\fbidi \froman\fcharset186\fprq2 Cambria Baltic;} +{\fbimajor\f31538\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fbimajor\f31539\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fbimajor\f31541\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;} +{\fbimajor\f31542\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fbimajor\f31543\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fbimajor\f31544\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);} +{\fbimajor\f31545\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fbimajor\f31546\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\flominor\f31548\fbidi \froman\fcharset238\fprq2 Times New Roman CE;} +{\flominor\f31549\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\flominor\f31551\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\flominor\f31552\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;} +{\flominor\f31553\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\flominor\f31554\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\flominor\f31555\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;} +{\flominor\f31556\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fdbminor\f31558\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fdbminor\f31559\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;} +{\fdbminor\f31561\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fdbminor\f31562\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fdbminor\f31563\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);} +{\fdbminor\f31564\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fdbminor\f31565\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fdbminor\f31566\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);} +{\fhiminor\f31568\fbidi \fswiss\fcharset238\fprq2 Calibri CE;}{\fhiminor\f31569\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;}{\fhiminor\f31571\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\fhiminor\f31572\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;} +{\fhiminor\f31575\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}{\fbiminor\f31578\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fbiminor\f31579\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;} +{\fbiminor\f31581\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbiminor\f31582\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fbiminor\f31583\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);} +{\fbiminor\f31584\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbiminor\f31585\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fbiminor\f31586\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}} +{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0; +\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;\chyperlink\ctint255\cshade255\red0\green0\blue255;\caccentone\ctint255\cshade255\red79\green129\blue189;}{\*\defchp \f31506\fs22 } +{\*\defpap \ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 }\noqfpromote {\stylesheet{\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 +\rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \snext0 \sqformat \spriority0 \styrsid16456967 Normal;}{\*\cs10 \additive \ssemihidden \sunhideused \spriority1 Default Paragraph Font;}{\* +\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tblind0\tblindtype3\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv \ql \li0\ri0\sa200\sl276\slmult1 +\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \snext11 \ssemihidden \sunhideused \sqformat Normal Table;}{ +\s15\ql \li0\ri0\widctlpar\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon0 \snext15 \slink16 \sunhideused \styrsid4535536 header;}{\*\cs16 \additive \rtlch\fcs1 \af0 \ltrch\fcs0 \sbasedon10 \slink15 \slocked \styrsid4535536 Header Char;}{\s17\ql \li0\ri0\widctlpar +\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon0 \snext17 \slink18 \sunhideused \styrsid4535536 footer;}{\*\cs18 \additive \rtlch\fcs1 \af0 \ltrch\fcs0 \sbasedon10 \slink17 \slocked \styrsid4535536 Footer Char;}{ +\s19\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af38\afs16\alang1025 \ltrch\fcs0 \f38\fs16\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon0 \snext19 \slink20 \ssemihidden \sunhideused \styrsid4535536 Balloon Text;}{\*\cs20 \additive \rtlch\fcs1 \af38\afs16 \ltrch\fcs0 \f38\fs16 \sbasedon10 \slink19 \slocked \ssemihidden \styrsid4535536 Balloon Text Char;}{\*\cs21 \additive +\rtlch\fcs1 \af0 \ltrch\fcs0 \ul\cf17 \sbasedon10 \sunhideused \styrsid4535536 Hyperlink;}{\*\cs22 \additive \rtlch\fcs1 \af0 \ltrch\fcs0 \cf15 \sbasedon10 \ssemihidden \styrsid4535536 Placeholder Text;}{ +\s23\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs20\alang1025 \ltrch\fcs0 \f31506\fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon0 \snext23 \slink24 \ssemihidden \sunhideused \styrsid10829135 footnote text;}{\*\cs24 \additive \rtlch\fcs1 \af0\afs20 \ltrch\fcs0 \fs20 \sbasedon10 \slink23 \slocked \ssemihidden \styrsid10829135 Footnote Text Char;}{\*\cs25 \additive +\rtlch\fcs1 \af0 \ltrch\fcs0 \super \sbasedon10 \ssemihidden \sunhideused \styrsid10829135 footnote reference;}{\*\ts26\tsrowd\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv +\brdrs\brdrw10 \trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tblind0\tblindtype3\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv +\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon11 \snext26 \spriority59 \styrsid8288896 +Table Grid;}{\s27\ql \li720\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin720\itap0\contextualspace \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon0 \snext27 \sqformat \spriority34 \styrsid10055055 List Paragraph;}{\s28\ql \li0\ri0\sa200\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \ab\af31507\afs18\alang1025 \ltrch\fcs0 +\b\f31506\fs18\cf18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \sunhideused \sqformat \spriority35 \styrsid11105546 caption;}}{\*\listtable{\list\listtemplateid1249008552\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0 +\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0\hres0\chhres0 \fi-360\li720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0 +\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0\hres0\chhres0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative +\levelspace360\levelindent0{\leveltext\leveltemplateid67698693\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0\hres0\chhres0 \fi-360\li2160\lin2160 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0\hres0\chhres0 \fi-360\li2880\lin2880 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0 +{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0\hres0\chhres0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext +\leveltemplateid67698693\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0\hres0\chhres0 \fi-360\li4320\lin4320 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext +\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0\hres0\chhres0 \fi-360\li5040\lin5040 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext +\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0\hres0\chhres0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext\leveltemplateid67698693 +\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0\hres0\chhres0 \fi-360\li6480\lin6480 }{\listname ;}\listid73432867}{\list\listtemplateid1071396652\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0\hres0\chhres0 \fi-360\li720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0 +{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0\hres0\chhres0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext +\leveltemplateid67698693\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0\hres0\chhres0 \fi-360\li2160\lin2160 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext +\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0\hres0\chhres0 \fi-360\li2880\lin2880 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext +\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0\hres0\chhres0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext\leveltemplateid67698693 +\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0\hres0\chhres0 \fi-360\li4320\lin4320 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext\leveltemplateid67698689 +\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0\hres0\chhres0 \fi-360\li5040\lin5040 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext\leveltemplateid67698691 +\'01o;}{\levelnumbers;}\f2\fbias0\hres0\chhres0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext\leveltemplateid67698693 +\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0\hres0\chhres0 \fi-360\li6480\lin6480 }{\listname ;}\listid169494399}{\list\listtemplateid-487930464\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698705\'02\'00);}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-360\li720\lin720 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698713\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698715\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-180\li2160\lin2160 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698703\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-360\li2880\lin2880 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698713\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698715\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-180\li4320\lin4320 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698703\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-360\li5040\lin5040 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698713\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698715\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-180\li6480\lin6480 }{\listname ;}\listid1132862691}}{\*\listoverridetable{\listoverride\listid169494399\listoverridecount0\ls1} +{\listoverride\listid73432867\listoverridecount0\ls2}{\listoverride\listid1132862691\listoverridecount0\ls3}}{\*\rsidtbl \rsid724479\rsid2255182\rsid2767955\rsid4260063\rsid4535536\rsid5051464\rsid5706211\rsid5843828\rsid7218132\rsid8152053\rsid8288896 +\rsid9897893\rsid9969477\rsid10055055\rsid10249050\rsid10829135\rsid11105546\rsid12662658\rsid12941695\rsid13331334\rsid14163426\rsid14225018\rsid14292078\rsid14556934\rsid16456967\rsid16539678}{\mmathPr\mmathFont34\mbrkBin0\mbrkBinSub0\msmallFrac0 +\mdispDef1\mlMargin0\mrMargin0\mdefJc1\mwrapIndent1440\mintLim0\mnaryLim1}{\info{\subject Subject is here}{\author Michael McCandless}{\keywords Keyword1 Keyword2}{\operator Michael McCandless}{\creatim\yr2011\mo8\dy29\hr5\min20} +{\revtim\yr2011\mo8\dy30\hr6\min13}{\version30}{\edmins445}{\nofpages2}{\nofwords95}{\nofchars546}{\nofcharsws640}{\vern32771}}{\*\xmlnstbl {\xmlns1 http://schemas.microsoft.com/office/word/2003/wordml}} +\paperw12240\paperh15840\margl1440\margr1440\margt1440\margb1440\gutter0\ltrsect +\widowctrl\ftnbj\aenddoc\trackmoves1\trackformatting1\donotembedsysfont1\relyonvml0\donotembedlingdata0\grfdocevents0\validatexml1\showplaceholdtext0\ignoremixedcontent0\saveinvalidxml0\showxmlerrors1\noxlattoyen +\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\formshade\horzdoc\dgmargin\dghspace180\dgvspace180\dghorigin1440\dgvorigin1440\dghshow1\dgvshow1 +\jexpand\viewkind1\viewscale150\pgbrdrhead\pgbrdrfoot\splytwnine\ftnlytwnine\htmautsp\nolnhtadjtbl\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nobrkwrptbl\snaptogridincell\allowfieldendsel\wrppunct +\asianbrkrule\rsidroot4535536\newtblstyruls\nogrowautofit\usenormstyforlist\noindnmbrts\felnbrelev\nocxsptable\indrlsweleven\noafcnsttbl\afelev\utinl\hwelev\spltpgpar\notcvasp\notbrkcnstfrctbl\notvatxbx\krnprsnet\cachedcolbal \nouicompat \fet0 +{\*\wgrffmtfilter 2450}\nofeaturethrottle1\ilfomacatclnup0{\*\ftnsep \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid4535536 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 +\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 \chftnsep +\par }}{\*\ftnsepc \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid4535536 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 \chftnsepc +\par }}{\*\aftnsep \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid4535536 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 \chftnsep +\par }}{\*\aftnsepc \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid4535536 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 +\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 \chftnsepc +\par }}\ltrpar \sectd \ltrsect\linex0\endnhere\sectlinegrid360\sectdefaultcl\sectrsid16456967\sftnbj {\headerr \ltrpar \pard\plain \ltrpar\s15\ql \li0\ri0\widctlpar\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 +\rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 This is the header text}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid12662658 .}{\rtlch\fcs1 +\af31507 \ltrch\fcs0 \insrsid4535536 +\par +\par }}{\footerr \ltrpar \pard\plain \ltrpar\s17\ql \li0\ri0\widctlpar\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 +\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 This is the footer text. +\par +\par }}{\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}} +{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8 +\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1 +\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 +\lang1024\langfe1024\noproof\langfenp1028\insrsid4535536 {\shp{\*\shpinst\shpleft4866\shptop1990\shpright8593\shpbottom2658\shpfhdr0\shpbxcolumn\shpbxignore\shpbypara\shpbyignore\shpwr3\shpwrk0\shpfblwtxt0\shpz0\shplid1026 +{\sp{\sn shapeType}{\sv 202}}{\sp{\sn fFlipH}{\sv 0}}{\sp{\sn fFlipV}{\sv 0}}{\sp{\sn lTxid}{\sv 65536}}{\sp{\sn hspNext}{\sv 1026}}{\sp{\sn fFitShapeToText}{\sv 1}}{\sp{\sn dhgt}{\sv 251660288}}{\sp{\sn pctHoriz}{\sv 400}}{\sp{\sn pctVert}{\sv 200}} +{\sp{\sn sizerelh}{\sv 0}}{\sp{\sn sizerelv}{\sv 0}}{\sp{\sn fLayoutInCell}{\sv 1}}{\shptxt \ltrpar \pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 +\af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 Here is a text box +\par }}}{\shprslt{\*\do\dobxcolumn\dobypara\dodhgt8192\dptxbx\dptxlrtb{\dptxbxtext\ltrpar \pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 +\ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 Here is a text box +\par }}\dpx4866\dpy1990\dpxsize3727\dpysize668\dpfillfgcr255\dpfillfgcg255\dpfillfgcb255\dpfillbgcr255\dpfillbgcg255\dpfillbgcb255\dpfillpat1\dplinew15\dplinecor0\dplinecog0\dplinecob0}}}}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 Footnote appears here} +{\rtlch\fcs1 \af31507 \ltrch\fcs0 \cs25\super\insrsid10829135 \chftn {\footnote \ltrpar \pard\plain \ltrpar\s23\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs20\alang1025 \ltrch\fcs0 +\f31506\fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \cs25\super\insrsid10829135 \chftn }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid10829135 This is a footnote.}}}{\rtlch\fcs1 \af31507 \ltrch\fcs0 +\insrsid14292078 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14556934 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \b\insrsid14556934\charrsid14556934 Bold}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14556934 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \i\insrsid14556934\charrsid14556934 italic}{\rtlch\fcs1 \af31507 \ltrch\fcs0 +\insrsid14556934 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \ul\insrsid14556934\charrsid14556934 underline}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14556934 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \super\insrsid14556934\charrsid14556934 superscript}{\rtlch\fcs1 +\af31507 \ltrch\fcs0 \insrsid14556934 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \sub\insrsid14556934\charrsid14556934 subscript}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14556934 +\par }\pard \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid10055055 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14292078 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid10055055 Here is a list: +\par {\listtext\pard\plain\ltrpar \s27 \rtlch\fcs1 \af31507\afs22 \ltrch\fcs0 \f3\fs22\insrsid10055055 \loch\af3\dbch\af31506\hich\f3 \'b7\tab}}\pard\plain \ltrpar\s27\ql \fi-360\li720\ri0\sa200\sl276\slmult1 +\widctlpar\wrapdefault\aspalpha\aspnum\faauto\ls2\adjustright\rin0\lin720\itap0\pararsid10055055\contextualspace \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 +\ltrch\fcs0 \insrsid10055055 Bullet 1 +\par {\listtext\pard\plain\ltrpar \s27 \rtlch\fcs1 \af31507\afs22 \ltrch\fcs0 \f3\fs22\insrsid10055055 \loch\af3\dbch\af31506\hich\f3 \'b7\tab}Bullet 2 +\par {\listtext\pard\plain\ltrpar \s27 \rtlch\fcs1 \af31507\afs22 \ltrch\fcs0 \f3\fs22\insrsid10055055 \loch\af3\dbch\af31506\hich\f3 \'b7\tab}Bullet 3 +\par }\pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid10055055 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid10055055 Here is a numbered list: +\par {\listtext\pard\plain\ltrpar \s27 \rtlch\fcs1 \af31507\afs22 \ltrch\fcs0 \f31506\fs22\insrsid10055055 \hich\af31506\dbch\af31506\loch\f31506 1)\tab}}\pard\plain \ltrpar\s27\ql \fi-360\li720\ri0\sa200\sl276\slmult1 +\widctlpar\wrapdefault\aspalpha\aspnum\faauto\ls3\adjustright\rin0\lin720\itap0\pararsid10055055\contextualspace \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 +\ltrch\fcs0 \insrsid10055055 Number bullet 1 +\par {\listtext\pard\plain\ltrpar \s27 \rtlch\fcs1 \af31507\afs22 \ltrch\fcs0 \f31506\fs22\insrsid10055055 \hich\af31506\dbch\af31506\loch\f31506 2)\tab}Number bullet 2 +\par {\listtext\pard\plain\ltrpar \s27 \rtlch\fcs1 \af31507\afs22 \ltrch\fcs0 \f31506\fs22\insrsid10055055 \hich\af31506\dbch\af31506\loch\f31506 3)\tab}Number bullet 3 +\par }\pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 +\af31507 \ltrch\fcs0 \insrsid10829135 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536\charrsid4535536 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 Keyword1 Keyword2}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 }{\rtlch\fcs1 +\af31507 \ltrch\fcs0 \insrsid15481255 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 +\par }{\field{\*\fldinst {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 HYPERLINK "http://tika.apache.org" }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 {\*\datafield +00d0c9ea79f9bace118c8200aa004ba90b0200000003000000e0c9ea79f9bace118c8200aa004ba90b4800000068007400740070003a002f002f00740069006b0061002e006100700061006300680065002e006f00720067002f000000795881f43b1d7f48af2c825dc485276300000000a5ab0000}}}{\fldrslt { +\rtlch\fcs1 \af31507 \ltrch\fcs0 \cs21\ul\cf17\insrsid4535536\charrsid4535536 This is a hyperlink}}}\sectd \ltrsect\linex0\endnhere\sectlinegrid360\sectdefaultcl\sectrsid16456967\sftnbj {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14292078 +\par +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536\charrsid4535536 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14292078 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14292078 Subject is here}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14292078 }{\rtlch\fcs1 +\af31507 \ltrch\fcs0 \insrsid4535536 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid8288896 +\par \ltrrow}\trowd \irow0\irowband0\ltrrow\ts26\trgaph108\trleft-108\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 +\trftsWidth1\trftsWidthB3\trautofit1\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tblrsid8288896\tbllkhdrrows\tbllkhdrcols\tbllknocolband\tblind0\tblindtype3 \clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 +\clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx3084\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx6276\clvertalt +\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx9468\pard\plain \ltrpar +\ql \li0\ri0\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\yts26 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid8288896 +Row 1 Col 1\cell Row 1 Col 2\cell Row 1 Col 3\cell }\pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 +\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid8288896 \trowd \irow0\irowband0\ltrrow\ts26\trgaph108\trleft-108\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr +\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \trftsWidth1\trftsWidthB3\trautofit1\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tblrsid8288896\tbllkhdrrows\tbllkhdrcols\tbllknocolband\tblind0\tblindtype3 \clvertalt\clbrdrt +\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx3084\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 +\cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx6276\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx9468\row \ltrrow}\pard\plain \ltrpar +\ql \li0\ri0\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\yts26 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid8288896 +Row 2 Col 1\cell Row 2 Col 2\cell Row 2 Col 3\cell }\pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 +\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid8288896 \trowd \irow1\irowband1\lastrow \ltrrow\ts26\trgaph108\trleft-108\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr +\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \trftsWidth1\trftsWidthB3\trautofit1\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tblrsid8288896\tbllkhdrrows\tbllkhdrcols\tbllknocolband\tblind0\tblindtype3 \clvertalt\clbrdrt +\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx3084\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 +\cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx6276\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx9468\row }\pard \ltrpar +\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid8288896 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid724479 Suddenly some }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid5706211 J}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid724479 apanese text:}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid9969477 +\par }{\rtlch\fcs1 \af11 \ltrch\fcs0 \loch\af11\hich\af11\dbch\af11\insrsid724479\charrsid724479 \loch\af11\hich\af11\dbch\f11 \uc2\u12478\'83\'5d\u12523\'83\'8b\u12466\'83\'51\u12392\'82\'c6\u23614\'94\'f6\u23822\'8d\'e8\u12289\'81\'41\u28129\'92\'57\u12293 +\'81\'58\u12392\'82\'c6\u26368\'8d\'c5\u26399\'8a\'fa}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid9969477 +\par }{\rtlch\fcs1 \af15 \ltrch\fcs0 \lang1033\langfe1041\loch\af15\hich\af15\dbch\af15\langfenp1041\insrsid5843828 \loch\af15\hich\af15\dbch\f15 \uc2\u-248\'81\'69\u-217\'82\'66\u-216\'82\'67\u-207\'82\'70\u-247\'81\'6a}{\rtlch\fcs1 \af31507 \ltrch\fcs0 +\insrsid9969477 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid5706211 And then some Gothic text: +\par }\pard \ltrpar\ql \li0\ri0\nowidctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid14163426 {\rtlch\fcs1 \af1\afs20 \ltrch\fcs0 \f1\fs20\insrsid14163426 \u-10240\'3f\u-8398\'3f\u-10240\'3f\u-8385\'3f\u-10240\'3f\u-8380\'3f\u-10240\'3f\u-8391\'3f\u-10240 +\'3f\u-8381\'3f\u-10240\'3f\u-8390\'3f}{\rtlch\fcs1 \af1\afs20 \ltrch\fcs0 \f1\fs20\insrsid14163426 +\par }\pard \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid9969477 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid7218132 Here is a citation:}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid9969477 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid12941695 }{\field{\*\fldinst {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid12941695 CITATION Kra \\l 1033 }}{\fldrslt {\rtlch\fcs1 \af31507 \ltrch\fcs0 \lang1024\langfe1024\noproof\insrsid12941695 (Kramer)}}} +\sectd \ltrsect\linex0\endnhere\sectlinegrid360\sectdefaultcl\sectrsid16456967\sftnbj {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid12941695 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid9969477 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid11105546 +\par }\pard\plain \ltrpar\s28\ql \li0\ri0\sa200\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid11105546 \rtlch\fcs1 \ab\af31507\afs18\alang1025 \ltrch\fcs0 \b\f31506\fs18\cf18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid11105546 Figure }{\field{\*\fldinst {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid11105546 SEQ Figure \\* ARABIC }}{\fldrslt {\rtlch\fcs1 \af31507 \ltrch\fcs0 \lang1024\langfe1024\noproof\insrsid11105546 1}}} +\sectd \ltrsect\linex0\endnhere\sectlinegrid360\sectdefaultcl\sectrsid16456967\sftnbj {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid11105546 This is a caption for Figure 1 +\par }\pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid8152053 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 { +\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid8152053 +\par +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid5051464 \sect }\sectd \ltrsect\sbknone\linex0\cols2\endnhere\sectlinegrid360\sectdefaultcl\sectrsid5051464\sftnbj \pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1 +\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid5051464 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid5051464 +Row 1 column 1 +\par Row 2 column 1 +\par }\pard \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid8152053 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid5051464 Row 1 column 2 +\par Row 2 column 2 +\par \sect }\sectd \ltrsect\sbknone\linex0\endnhere\sectlinegrid360\sectdefaultcl\sectrsid5051464\sftnbj \pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid8152053 \rtlch\fcs1 +\af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid5051464\charrsid8152053 +\par }{\*\themedata 504b030414000600080000002100828abc13fa0000001c020000130000005b436f6e74656e745f54797065735d2e786d6cac91cb6ac3301045f785fe83d0b6d8 +72ba28a5d8cea249777d2cd20f18e4b12d6a8f843409c9df77ecb850ba082d74231062ce997b55ae8fe3a00e1893f354e9555e6885647de3a8abf4fbee29bbd7 +2a3150038327acf409935ed7d757e5ee14302999a654e99e393c18936c8f23a4dc072479697d1c81e51a3b13c07e4087e6b628ee8cf5c4489cf1c4d075f92a0b +44d7a07a83c82f308ac7b0a0f0fbf90c2480980b58abc733615aa2d210c2e02cb04430076a7ee833dfb6ce62e3ed7e14693e8317d8cd0433bf5c60f53fea2fe7 +065bd80facb647e9e25c7fc421fd2ddb526b2e9373fed4bb902e182e97b7b461e6bfad3f010000ffff0300504b030414000600080000002100a5d6a7e7c00000 +00360100000b0000005f72656c732f2e72656c73848fcf6ac3300c87ef85bd83d17d51d2c31825762fa590432fa37d00e1287f68221bdb1bebdb4fc7060abb08 +84a4eff7a93dfeae8bf9e194e720169aaa06c3e2433fcb68e1763dbf7f82c985a4a725085b787086a37bdbb55fbc50d1a33ccd311ba548b63095120f88d94fbc +52ae4264d1c910d24a45db3462247fa791715fd71f989e19e0364cd3f51652d73760ae8fa8c9ffb3c330cc9e4fc17faf2ce545046e37944c69e462a1a82fe353 +bd90a865aad41ed0b5b8f9d6fd010000ffff0300504b0304140006000800000021006b799616830000008a0000001c0000007468656d652f7468656d652f7468 +656d654d616e616765722e786d6c0ccc4d0ac3201040e17da17790d93763bb284562b2cbaebbf600439c1a41c7a0d29fdbd7e5e38337cedf14d59b4b0d592c9c +070d8a65cd2e88b7f07c2ca71ba8da481cc52c6ce1c715e6e97818c9b48d13df49c873517d23d59085adb5dd20d6b52bd521ef2cdd5eb9246a3d8b4757e8d3f7 +29e245eb2b260a0238fd010000ffff0300504b03041400060008000000210096b5ade296060000501b0000160000007468656d652f7468656d652f7468656d65 +312e786d6cec594f6fdb3614bf0fd87720746f6327761a07758ad8b19b2d4d1bc46e871e698996d850a240d2497d1bdae38001c3ba618715d86d87615b8116d8 +a5fb34d93a6c1dd0afb0475292c5585e9236d88aad3e2412f9e3fbff1e1fa9abd7eec70c1d1221294fda5efd72cd4324f1794093b0eddd1ef62fad79482a9c04 +98f184b4bd2991deb58df7dfbb8ad755446282607d22d771db8b944ad79796a40fc3585ee62949606ecc458c15bc8a702910f808e8c66c69b9565b5d8a314d3c +94e018c8de1a8fa94fd05093f43672e23d06af89927ac06762a049136785c10607758d9053d965021d62d6f6804fc08f86e4bef210c352c144dbab999fb7b471 +7509af678b985ab0b6b4ae6f7ed9ba6c4170b06c788a705430adf71bad2b5b057d03606a1ed7ebf5babd7a41cf00b0ef83a6569632cd467faddec9699640f671 +9e76b7d6ac355c7c89feca9cccad4ea7d36c65b258a206641f1b73f8b5da6a6373d9c11b90c537e7f08dce66b7bbeae00dc8e257e7f0fd2badd5868b37a088d1 +e4600ead1ddaef67d40bc898b3ed4af81ac0d76a197c86826828a24bb318f3442d8ab518dfe3a20f000d6458d104a9694ac6d88728eee2782428d60cf03ac1a5 +193be4cbb921cd0b495fd054b5bd0f530c1931a3f7eaf9f7af9e3f45c70f9e1d3ff8e9f8e1c3e3073f5a42ceaa6d9c84e5552fbffdeccfc71fa33f9e7ef3f2d1 +17d57859c6fffac327bffcfc793510d26726ce8b2f9ffcf6ecc98baf3efdfdbb4715f04d814765f890c644a29be408edf3181433567125272371be15c308d3f2 +8acd249438c19a4b05fd9e8a1cf4cd296699771c393ac4b5e01d01e5a30a787d72cf1178108989a2159c77a2d801ee72ce3a5c545a6147f32a99793849c26ae6 +6252c6ed637c58c5bb8b13c7bfbd490a75330f4b47f16e441c31f7184e140e494214d273fc80900aedee52ead87597fa824b3e56e82e451d4c2b4d32a423279a +668bb6690c7e9956e90cfe766cb37b077538abd27a8b1cba48c80acc2a841f12e698f13a9e281c57911ce298950d7e03aba84ac8c154f8655c4f2af074481847 +bd804859b5e696007d4b4edfc150b12addbecba6b18b148a1e54d1bc81392f23b7f84137c2715a851dd0242a633f900710a218ed715505dfe56e86e877f0034e +16bafb0e258ebb4faf06b769e888340b103d3311da9750aa9d0a1cd3e4efca31a3508f6d0c5c5c398602f8e2ebc71591f5b616e24dd893aa3261fb44f95d843b +5974bb5c04f4edafb95b7892ec1108f3f98de75dc97d5772bdff7cc95d94cf672db4b3da0a6557f70db629362d72bcb0431e53c6066acac80d699a6409fb44d0 +8741bdce9c0e4971624a2378cceaba830b05366b90e0ea23aaa241845368b0eb9e2612ca8c742851ca251ceccc70256d8d87265dd96361531f186c3d9058edf2 +c00eafe8e1fc5c509031bb4d680e9f39a3154de0accc56ae644441edd76156d7429d995bdd88664a9dc3ad50197c38af1a0c16d684060441db02565e85f3b966 +0d0713cc48a0ed6ef7dedc2dc60b17e92219e180643ed27acffba86e9c94c78ab90980d8a9f0913ee49d62b512b79626fb06dccee2a432bbc60276b9f7dec44b +7904cfbca4f3f6443ab2a49c9c2c41476dafd55c6e7ac8c769db1bc399161ee314bc2e75cf8759081743be1236ec4f4d6693e5336fb672c5dc24a8c33585b5fb +9cc24e1d4885545b58463634cc5416022cd19cacfccb4d30eb45296023fd35a458598360f8d7a4003bbaae25e331f155d9d9a5116d3bfb9a95523e51440ca2e0 +088dd844ec6370bf0e55d027a012ae264c45d02f708fa6ad6da6dce29c255df9f6cae0ec38666984b372ab5334cf640b37795cc860de4ae2816e95b21be5ceaf +8a49f90b52a51cc6ff3355f47e0237052b81f6800fd7b802239daf6d8f0b1571a8426944fdbe80c6c1d40e8816b88b8569082ab84c36ff0539d4ff6dce591a26 +ade1c0a7f669880485fd484582903d284b26fa4e2156cff62e4b9265844c4495c495a9157b440e091bea1ab8aaf7760f4510eaa69a6465c0e04ec69ffb9e65d0 +28d44d4e39df9c1a52ecbd3607fee9cec7263328e5d661d3d0e4f62f44acd855ed7ab33cdf7bcb8ae889599bd5c8b3029895b6825696f6af29c239b75a5bb1e6 +345e6ee6c28117e73586c1a2214ae1be07e93fb0ff51e133fb65426fa843be0fb515c187064d0cc206a2fa926d3c902e907670048d931db4c1a44959d366ad93 +b65abe595f70a75bf03d616c2dd959fc7d4e6317cd99cbcec9c58b34766661c7d6766ca1a9c1b327531486c6f941c638c67cd22a7f75e2a37be0e82db8df9f30 +254d30c1372581a1f51c983c80e4b71ccdd28dbf000000ffff0300504b0304140006000800000021000dd1909fb60000001b010000270000007468656d652f74 +68656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73848f4d0ac2301484f78277086f6fd3ba109126dd88d0add40384e4350d363f24 +51eced0dae2c082e8761be9969bb979dc9136332de3168aa1a083ae995719ac16db8ec8e4052164e89d93b64b060828e6f37ed1567914b284d262452282e3198 +720e274a939cd08a54f980ae38a38f56e422a3a641c8bbd048f7757da0f19b017cc524bd62107bd5001996509affb3fd381a89672f1f165dfe514173d9850528 +a2c6cce0239baa4c04ca5bbabac4df000000ffff0300504b01022d0014000600080000002100828abc13fa0000001c0200001300000000000000000000000000 +000000005b436f6e74656e745f54797065735d2e786d6c504b01022d0014000600080000002100a5d6a7e7c0000000360100000b000000000000000000000000 +002b0100005f72656c732f2e72656c73504b01022d00140006000800000021006b799616830000008a0000001c00000000000000000000000000140200007468 +656d652f7468656d652f7468656d654d616e616765722e786d6c504b01022d001400060008000000210096b5ade296060000501b000016000000000000000000 +00000000d10200007468656d652f7468656d652f7468656d65312e786d6c504b01022d00140006000800000021000dd1909fb60000001b010000270000000000 +00000000000000009b0900007468656d652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73504b050600000000050005005d010000960a00000000} +{\*\colorschememapping 3c3f786d6c2076657273696f6e3d22312e302220656e636f64696e673d225554462d3822207374616e64616c6f6e653d22796573223f3e0d0a3c613a636c724d +617020786d6c6e733a613d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f64726177696e676d6c2f323030362f6d6169 +6e22206267313d226c743122207478313d22646b3122206267323d226c743222207478323d22646b322220616363656e74313d22616363656e74312220616363 +656e74323d22616363656e74322220616363656e74333d22616363656e74332220616363656e74343d22616363656e74342220616363656e74353d22616363656e74352220616363656e74363d22616363656e74362220686c696e6b3d22686c696e6b2220666f6c486c696e6b3d22666f6c486c696e6b222f3e} +{\*\latentstyles\lsdstimax267\lsdlockeddef0\lsdsemihiddendef1\lsdunhideuseddef1\lsdqformatdef0\lsdprioritydef99{\lsdlockedexcept \lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority0 \lsdlocked0 Normal; +\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 1;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 2;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 3;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 4; +\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 5;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 6;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 7;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 8;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 9; +\lsdpriority39 \lsdlocked0 toc 1;\lsdpriority39 \lsdlocked0 toc 2;\lsdpriority39 \lsdlocked0 toc 3;\lsdpriority39 \lsdlocked0 toc 4;\lsdpriority39 \lsdlocked0 toc 5;\lsdpriority39 \lsdlocked0 toc 6;\lsdpriority39 \lsdlocked0 toc 7; +\lsdpriority39 \lsdlocked0 toc 8;\lsdpriority39 \lsdlocked0 toc 9;\lsdqformat1 \lsdpriority35 \lsdlocked0 caption;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority10 \lsdlocked0 Title;\lsdpriority1 \lsdlocked0 Default Paragraph Font; +\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority11 \lsdlocked0 Subtitle;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority22 \lsdlocked0 Strong;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority20 \lsdlocked0 Emphasis; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority59 \lsdlocked0 Table Grid;\lsdunhideused0 \lsdlocked0 Placeholder Text;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority1 \lsdlocked0 No Spacing; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 1; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 1; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 1;\lsdunhideused0 \lsdlocked0 Revision; +\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority34 \lsdlocked0 List Paragraph;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority29 \lsdlocked0 Quote;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority30 \lsdlocked0 Intense Quote; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 1; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 1; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 2; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 2; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 2; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 2; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 2; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 3; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 3; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 3; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 3; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 3; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 4; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 4; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 4; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 4; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 5; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 5; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 5; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 5; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 5; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 6; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 6; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 6; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 6; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 6; +\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority19 \lsdlocked0 Subtle Emphasis;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority21 \lsdlocked0 Intense Emphasis; +\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority31 \lsdlocked0 Subtle Reference;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority32 \lsdlocked0 Intense Reference; +\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority33 \lsdlocked0 Book Title;\lsdpriority37 \lsdlocked0 Bibliography;\lsdqformat1 \lsdpriority39 \lsdlocked0 TOC Heading;}}{\*\datastore 010500000200000018000000 +4d73786d6c322e534158584d4c5265616465722e352e30000000000000000000000e0000 +d0cf11e0a1b11ae1000000000000000000000000000000003e000300feff0900060000000000000000000000010000000100000000000000001000000200000001000000feffffff0000000000000000ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +fffffffffffffffffdffffff05000000feffffff04000000fefffffffeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffff52006f006f007400200045006e00740072007900000000000000000000000000000000000000000000000000000000000000000000000000000000000000000016000500ffffffffffffffff01000000ec69d9888b8b3d4c859eaf6cd158be0f0000000000000000000000000076 +bb6efd66cc0103000000c0030000000000004d0073006f004400610074006100530074006f0072006500000000000000000000000000000000000000000000000000000000000000000000000000000000001a000101ffffffffffffffff0200000000000000000000000000000000000000000000000076bb6efd66cc01 +0076bb6efd66cc010000000000000000000000003500cb004c0053004a004300ca00d80044005500470056003000cd0045004500d100c3004c00c000cd0051003d003d000000000000000000000000000000000032000101ffffffffffffffff0300000000000000000000000000000000000000000000000076bb6efd66 +cc010076bb6efd66cc010000000000000000000000004900740065006d0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a000201ffffffff04000000ffffffff000000000000000000000000000000000000000000000000 +0000000000000000000000000000000016020000000000000100000002000000030000000400000005000000060000000700000008000000feffffff0a0000000b0000000c0000000d0000000e000000feffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff3c623a536f757263657320786d6c6e733a623d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f6f6666696365446f63756d656e742f323030362f6269626c696f6772617068792220786d6c6e733d +22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f6f6666696365446f63756d656e742f323030362f6269626c696f677261706879222053656c65637465645374796c653d225c4150412e58534c22205374796c654e616d653d22415041223e3c623a536f757263653e3c623a546167 +3e4b72613c2f623a5461673e3c623a536f75726365547970653e426f6f6b3c2f623a536f75726365547970653e3c623a477569643e7b32313839323034362d453338412d344136382d383931312d3837313145343731453345347d3c2f623a477569643e3c623a4c4349443e303c2f623a4c4349443e3c623a417574686f +723e3c623a417574686f723e3c623a4e616d654c6973743e3c623a506572736f6e3e3c623a4c6173743e4b72616d65723c2f623a4c6173743e3c2f623a506572736f6e3e3c2f623a4e616d654c6973743e3c2f623a417574686f723e3c2f623a417574686f723e3c623a5469746c653e486f7720746f207573652054696b +613c2f623a5469746c653e3c623a5265664f726465723e313c2f623a5265664f726465723e3c2f623a536f757263653e3c2f623a536f75726365733e0d0a68aa1a083ae995719ac16db8ec8e4052164e89d93b64b060828e6f37ed1567914b284d262452282e31983c3f786d6c2076657273696f6e3d22312e302220656e +636f64696e673d225554462d3822207374616e64616c6f6e653d226e6f223f3e0d0a3c64733a6461746173746f72654974656d2064733a6974656d49443d227b32344432423237452d423832412d343130442d393536412d4431303443363332453042357d2220786d6c6e733a64733d22687474703a2f2f736368656d61 +732e6f70656e786d6c666f726d6174732e6f72672f6f6666696365446f63756d656e742f323030362f637573746f6d586d6c223e3c64733a736368656d61526566733e3c64733a736368656d615265662064733a7572693d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f6f6666 +696365446f63756d656e742f323030362f6269626c696f677261706879222f3e3c2f64733a736368656d61526566733e3c2f64733a6461746173746f72654974656d3e68656d65312e786d6c504b01022d00140006000800000021000dd1909fb60000001b01000027000000000000000000000000009b0900007468656d +652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73504b050600000000050005005d500072006f007000650072007400690065007300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000016000200ffffffffffffffffffff +ffff0000000000000000000000000000000000000000000000000000000000000000000000000900000055010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffff +ffffffff0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffff +ffffffffffff0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffff +ffffffffffffffff0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000105000000000000}} diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testSVG.svg b/solr/contrib/map-reduce/src/test-files/test-documents/testSVG.svg new file mode 100644 index 00000000000..8a05a4835b6 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-documents/testSVG.svg @@ -0,0 +1,23 @@ + + + + + Test SVG image + + \ No newline at end of file diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testTIFF.tif b/solr/contrib/map-reduce/src/test-files/test-documents/testTIFF.tif new file mode 100644 index 00000000000..8f6c7abba42 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testTIFF.tif differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testVISIO.vsd b/solr/contrib/map-reduce/src/test-files/test-documents/testVISIO.vsd new file mode 100644 index 00000000000..d699e11122b Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testVISIO.vsd differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testWAV.wav b/solr/contrib/map-reduce/src/test-files/test-documents/testWAV.wav new file mode 100644 index 00000000000..59a063ece01 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testWAV.wav differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testWORD_various.doc b/solr/contrib/map-reduce/src/test-files/test-documents/testWORD_various.doc new file mode 100644 index 00000000000..a2ad2364565 Binary files /dev/null and b/solr/contrib/map-reduce/src/test-files/test-documents/testWORD_various.doc differ diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testXML.xml b/solr/contrib/map-reduce/src/test-files/test-documents/testXML.xml new file mode 100644 index 00000000000..a01a402977b --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-documents/testXML.xml @@ -0,0 +1,48 @@ + + + + + Tika test document + + Rida Benjelloun + + Java + + XML + + XSLT + + JDOM + + Indexation + + Framework d'indexation des documents XML, HTML, PDF etc.. + + http://www.apache.org + + 2000-12-01T00:00:00.000Z + + test + + application/msword + + Fr + + Archimède et Lius à Châteauneuf testing chars en été + + \ No newline at end of file diff --git a/solr/contrib/map-reduce/src/test-files/test-documents/testXML2.xml b/solr/contrib/map-reduce/src/test-files/test-documents/testXML2.xml new file mode 100644 index 00000000000..6611ee14957 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-documents/testXML2.xml @@ -0,0 +1,22 @@ + + + + 123 + Hello World + Solr rocks + diff --git a/solr/contrib/map-reduce/src/test-files/test-morphlines/loadSolrBasic.conf b/solr/contrib/map-reduce/src/test-files/test-morphlines/loadSolrBasic.conf new file mode 100644 index 00000000000..b033320b776 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-morphlines/loadSolrBasic.conf @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# morphline.conf example file +# this is a comment +// this is yet another comment + +# for details see https://github.com/typesafehub/config#optional-system-or-env-variable-overrides +SOLR_COLLECTION : "collection1" +SOLR_COLLECTION : ${?ENV_SOLR_COLLECTION} + +ZK_HOST : "127.0.0.1:2181/solr" +ZK_HOST : ${?ENV_ZK_HOST} + +SOLR_HOME_DIR : "example/solr/collection1" +SOLR_HOME_DIR : ${?ENV_SOLR_HOME_DIR} + +SOLR_LOCATOR : { + collection : ${SOLR_COLLECTION} + zkHost : ${ZK_HOST} + solrHomeDir : ${SOLR_HOME_DIR} + # batchSize : 1000 +} +SOLR_LOCATOR : ${?ENV_SOLR_LOCATOR} + +morphlines : [ + { + id : morphline1 + importCommands : ["com.cloudera.**", "org.apache.solr.**"] + + commands : [ + { + sanitizeUnknownSolrFields { + solrLocator : ${SOLR_LOCATOR} + } + } + + { + loadSolr { + solrLocator : ${SOLR_LOCATOR} + boosts : { + id : 1.0 + } + } + } + + { logDebug { format : "output record: {}", args : ["@{}"] } } + ] + } +] diff --git a/solr/contrib/map-reduce/src/test-files/test-morphlines/solrCellDocumentTypes.conf b/solr/contrib/map-reduce/src/test-files/test-morphlines/solrCellDocumentTypes.conf new file mode 100644 index 00000000000..dd769a71ba1 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-morphlines/solrCellDocumentTypes.conf @@ -0,0 +1,255 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Application configuration file in HOCON format (Human-Optimized Config Object Notation). +# HOCON syntax is defined at http://github.com/typesafehub/config/blob/master/HOCON.md +# and also used by Akka (http://www.akka.io) and Play (http://www.playframework.org/). +# For more examples see http://doc.akka.io/docs/akka/2.1.2/general/configuration.html + +# morphline.conf example file +# this is a comment +// this is yet another comment + +morphlines : [ + { + id : morphline1 + importCommands : ["com.cloudera.**", "org.apache.solr.**"] + + commands : [ + { separateAttachments {} } + + # java command that doesn't do anything except for test compilation + { + java { + imports : "import java.util.*;" + code: """ + List tags = record.get("javaWithImports"); + return child.process(record); + """ + } + } + + # java command that doesn't do anything except for test compilation + { + java { + code: """ + List tags = record.get("javaWithoutImports"); + return child.process(record); + """ + } + } + + { + # used for auto-detection if MIME type isn't explicitly supplied + detectMimeType { + includeDefaultMimeTypes : true + mimeTypesFiles : ["RESOURCES_DIR/custom-mimetypes.xml"] + } + } + + { + tryRules { + throwExceptionIfAllRulesFailed : true + rules : [ + # next top-level rule: + { + commands : [ + { logDebug { format : "hello unpack" } } + { unpack {} } + { generateUUID {} } + { callParentPipe {} } + ] + } + + { + commands : [ + { logDebug { format : "hello decompress" } } + { decompress {} } + { callParentPipe {} } + ] + } + + { + commands : [ + { + readAvroContainer { + supportedMimeTypes : [avro/binary] + # readerSchemaString : "" # optional, avro json schema blurb for getSchema() + # readerSchemaFile : /path/to/syslog.avsc + } + } + + { extractAvroTree {} } + + { + setValues { + id : "@{/id}" + user_screen_name : "@{/user_screen_name}" + text : "@{/text}" + } + } + + { + sanitizeUnknownSolrFields { + solrLocator : ${SOLR_LOCATOR} + } + } + ] + } + + { + commands : [ + { + readJsonTestTweets { + supportedMimeTypes : ["mytwittertest/json+delimited+length"] + } + } + + { + sanitizeUnknownSolrFields { + solrLocator : ${SOLR_LOCATOR} + } + } + ] + } + + # next top-level rule: + { + commands : [ + { logDebug { format : "hello solrcell" } } + { + # wrap SolrCell around an HTML Tika parser + solrCell { + solrLocator : ${SOLR_LOCATOR} + # captureAttr : true # default is false + capture : [ + + # twitter feed schema + user_friends_count + user_location + user_description + user_statuses_count + user_followers_count + user_name + user_screen_name + created_at + text + retweet_count + retweeted + in_reply_to_user_id + source + in_reply_to_status_id + media_url_https + expanded_url + + # file metadata + file_download_url + file_upload_url + file_scheme + file_host + file_port + file_path + file_name + file_length + file_last_modified + file_owner + file_group + file_permissions_user + file_permissions_group + file_permissions_other + file_permissions_stickybit + ] + + fmap : { content : text, content-type : content_type } # rename "content" field to "text" fields + dateFormats : [ "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd"] # various java.text.SimpleDateFormat + # xpath : "/xhtml:html/xhtml:body/xhtml:div/descendant:node()" + uprefix : "ignored_" + lowernames : true + # solrContentHandlerFactory : org.apache.solr.tika.TrimSolrContentHandlerFactory + + # Tika parsers to be registered. If multiple parsers support the same MIME type, + # the parser is chosen that is closest to the bottom in this list: + parsers : [ + { parser : org.apache.tika.parser.asm.ClassParser } + # { parser : org.gagravarr.tika.OggParser, additionalSupportedMimeTypes : [audio/ogg] } + { parser : org.gagravarr.tika.FlacParser } + { parser : org.apache.tika.parser.audio.AudioParser } + { parser : org.apache.tika.parser.audio.MidiParser } + { parser : org.apache.tika.parser.crypto.Pkcs7Parser } + { parser : org.apache.tika.parser.dwg.DWGParser } + { parser : org.apache.tika.parser.epub.EpubParser } + { parser : org.apache.tika.parser.executable.ExecutableParser } + { parser : org.apache.tika.parser.feed.FeedParser } + { parser : org.apache.tika.parser.font.AdobeFontMetricParser } + { parser : org.apache.tika.parser.font.TrueTypeParser } + { parser : org.apache.tika.parser.xml.XMLParser } + { parser : org.apache.tika.parser.html.HtmlParser } + { parser : org.apache.tika.parser.image.ImageParser } + { parser : org.apache.tika.parser.image.PSDParser } + { parser : org.apache.tika.parser.image.TiffParser } + { parser : org.apache.tika.parser.iptc.IptcAnpaParser } + { parser : org.apache.tika.parser.iwork.IWorkPackageParser } + { parser : org.apache.tika.parser.jpeg.JpegParser } + { parser : org.apache.tika.parser.mail.RFC822Parser } + { parser : org.apache.tika.parser.mbox.MboxParser, additionalSupportedMimeTypes : [message/x-emlx] } + { parser : org.apache.tika.parser.microsoft.OfficeParser } + { parser : org.apache.tika.parser.microsoft.TNEFParser } + { parser : org.apache.tika.parser.microsoft.ooxml.OOXMLParser } + { parser : org.apache.tika.parser.mp3.Mp3Parser } + { parser : org.apache.tika.parser.mp4.MP4Parser } + { parser : org.apache.tika.parser.hdf.HDFParser } + { parser : org.apache.tika.parser.netcdf.NetCDFParser } + { parser : org.apache.tika.parser.odf.OpenDocumentParser } + { parser : org.apache.tika.parser.pdf.PDFParser } + { parser : org.apache.tika.parser.pkg.CompressorParser } + { parser : org.apache.tika.parser.pkg.PackageParser } + { parser : org.apache.tika.parser.rtf.RTFParser } + { parser : org.apache.tika.parser.txt.TXTParser } + { parser : org.apache.tika.parser.video.FLVParser } + { parser : org.apache.tika.parser.xml.DcXMLParser } + { parser : org.apache.tika.parser.xml.FictionBookParser } + { parser : org.apache.tika.parser.chm.ChmParser } + ] + } + } + + { generateUUID { field : ignored_base_id } } + + { + generateSolrSequenceKey { + baseIdField: ignored_base_id + solrLocator : ${SOLR_LOCATOR} + } + } + + ] + } + ] + } + } + + { + loadSolr { + solrLocator : ${SOLR_LOCATOR} + } + } + + { + logDebug { + format : "My output record: {}" + args : ["@{}"] + } + } + + ] + } +] diff --git a/solr/contrib/map-reduce/src/test-files/test-morphlines/solrCellJPGCompressed.conf b/solr/contrib/map-reduce/src/test-files/test-morphlines/solrCellJPGCompressed.conf new file mode 100644 index 00000000000..e1a9679678e --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-morphlines/solrCellJPGCompressed.conf @@ -0,0 +1,135 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Application configuration file in HOCON format (Human-Optimized Config Object Notation). +# HOCON syntax is defined at http://github.com/typesafehub/config/blob/master/HOCON.md +# and also used by Akka (http://www.akka.io) and Play (http://www.playframework.org/). +# For more examples see http://doc.akka.io/docs/akka/2.1.2/general/configuration.html + +# morphline.conf example file +# this is a comment +// this is yet another comment + +morphlines : [ + { + id : morphline1 + importCommands : ["com.cloudera.**", "org.apache.solr.**"] + + commands : [ + { separateAttachments {} } + + # java command that doesn't do anything except for test compilation + { + java { + imports : "import java.util.*;" + code: """ + List tags = record.get("javaWithImports"); + return child.process(record); + """ + } + } + + # java command that doesn't do anything except for test compilation + { + java { + code: """ + List tags = record.get("javaWithoutImports"); + return child.process(record); + """ + } + } + + { + # auto-detect MIME type if it isn't explicitly supplied + detectMimeType { + includeDefaultMimeTypes : true + } + } + + { + tryRules { + throwExceptionIfAllRulesFailed : true + rules : [ + # next top-level rule: + { + commands : [ + { logDebug { format : "hello unpack" } } + { unpack {} } + { callParentPipe {} } + ] + } + + { + commands : [ + { logDebug { format : "hello decompress" } } + { decompress {} } + { callParentPipe {} } + ] + } + + # next top-level rule: + { + commands : [ + { logDebug { format : "hello solrcell" } } + { + # wrap SolrCell around a JPG Tika parser + solrCell { + solrLocator : ${SOLR_LOCATOR} + captureAttr : true # default is false + capture : [content, a, h1, h2] # extract some fields + fmap : { exif_image_height : text, a : anchor, h1 : heading1 } # rename some fields + dateFormats : [ "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd"] # various java.text.SimpleDateFormat + xpath : "/xhtml:html/xhtml:body/xhtml:div/descendant:node()" + uprefix : "ignored_" + lowernames : true + solrContentHandlerFactory : org.apache.solr.morphlines.cell.TrimSolrContentHandlerFactory + parsers : [ # nested Tika parsers + { parser : org.apache.tika.parser.jpeg.JpegParser } + ] + } + } + + { logDebug { format : "solrcell output: {}", args : ["@{}"] } } + ] + } + ] + } + } + + { generateUUID { field : ignored_base_id } } + + { + generateSolrSequenceKey { + baseIdField: ignored_base_id + solrLocator : ${SOLR_LOCATOR} + } + } + + { + loadSolr { + solrLocator : ${SOLR_LOCATOR} + } + } + + { + logDebug { + format : "My output record: {}" + args : ["@{}"] + } + } + + ] + } +] diff --git a/solr/contrib/map-reduce/src/test-files/test-morphlines/solrCellXML.conf b/solr/contrib/map-reduce/src/test-files/test-morphlines/solrCellXML.conf new file mode 100644 index 00000000000..6c19c5ee692 --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-morphlines/solrCellXML.conf @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Application configuration file in HOCON format (Human-Optimized Config Object Notation). +# HOCON syntax is defined at http://github.com/typesafehub/config/blob/master/HOCON.md +# and also used by Akka (http://www.akka.io) and Play (http://www.playframework.org/). +# For more examples see http://doc.akka.io/docs/akka/2.1.2/general/configuration.html + +# morphline.conf example file +# this is a comment +// this is yet another comment + +morphlines : [ + { + id : morphline1 + importCommands : ["com.cloudera.**", "org.apache.solr.**"] + + commands : [ + { + addValues { _attachment_mimetype : application/xml } + # alternatively, consider using detectMimeTypes command + } + + { + # wrap SolrCell around a JPG Tika parser + solrCell { + solrLocator : ${SOLR_LOCATOR} + parsers : [ # nested Tika parsers + { parser : org.apache.tika.parser.xml.XMLParser } + ] + } + } + + { + generateSolrSequenceKey { + baseIdField: base_id + solrLocator : ${SOLR_LOCATOR} + } + } + + { + sanitizeUnknownSolrFields { + solrLocator : ${SOLR_LOCATOR} + } + } + + { logDebug { format : "solrcell output: {}", args : ["@{}"] } } + + { + loadSolr { + solrLocator : ${SOLR_LOCATOR} + } + } + + ] + } +] diff --git a/solr/contrib/map-reduce/src/test-files/test-morphlines/tokenizeText.conf b/solr/contrib/map-reduce/src/test-files/test-morphlines/tokenizeText.conf new file mode 100644 index 00000000000..c58d4d2236c --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-morphlines/tokenizeText.conf @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +morphlines : [ + { + id : morphline1 + importCommands : ["com.cloudera.**", "org.apache.solr.**"] + + commands : [ + { + tokenizeText { + inputField : message + outputField : tokens + solrFieldType : text_en + solrLocator : ${SOLR_LOCATOR} + } + } + + { logDebug { format : "output record {}", args : ["@{}"] } } + ] + } +] diff --git a/solr/contrib/map-reduce/src/test-files/test-morphlines/tutorialReadAvroContainer.conf b/solr/contrib/map-reduce/src/test-files/test-morphlines/tutorialReadAvroContainer.conf new file mode 100644 index 00000000000..cf34c4fac7e --- /dev/null +++ b/solr/contrib/map-reduce/src/test-files/test-morphlines/tutorialReadAvroContainer.conf @@ -0,0 +1,140 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Application configuration file in HOCON format (Human-Optimized Config Object Notation). +# HOCON syntax is defined at http://github.com/typesafehub/config/blob/master/HOCON.md +# and also used by Akka (http://www.akka.io) and Play (http://www.playframework.org/). +# For more examples see http://doc.akka.io/docs/akka/2.1.2/general/configuration.html + +# morphline.conf example file +# this is a comment + +# Specify server locations in a SOLR_LOCATOR variable; used later in variable substitutions: +SOLR_LOCATOR : { + # Name of solr collection + collection : collection1 + + # ZooKeeper ensemble + zkHost : "127.0.0.1:2181/solr" + + # The maximum number of documents to send to Solr per network batch (throughput knob) + # batchSize : 1000 +} + +# Specify an array of one or more morphlines, each of which defines an ETL +# transformation chain. A morphline consists of one or more (potentially +# nested) commands. A morphline is a way to consume records (e.g. Flume events, +# HDFS files or blocks), turn them into a stream of records, and pipe the stream +# of records through a set of easily configurable transformations on it's way to +# Solr. +morphlines : [ + { + # Name used to identify a morphline. E.g. used if there are multiple morphlines in a + # morphline config file + id : morphline1 + + # Import all morphline commands in these java packages and their subpackages. + # Other commands that may be present on the classpath are not visible to this morphline. + importCommands : ["com.cloudera.**", "org.apache.solr.**"] + + commands : [ + { + # Parse Avro container file and emit a record for each avro object + readAvroContainer { + # Optionally, require the input record to match one of these MIME types: + # supportedMimeTypes : [avro/binary] + + # Optionally, use a custom Avro schema in JSON format inline: + # readerSchemaString : """""" + + # Optionally, use a custom Avro schema file in JSON format: + # readerSchemaFile : /path/to/syslog.avsc + } + } + + { + # Consume the output record of the previous command and pipe another record downstream. + # + # extractAvroPaths is a command that uses zero or more avro path expressions to extract + # values from an Avro object. Each expression consists of a record output field name (on + # the left side of the colon ':') as well as zero or more path steps (on the right hand + # side), each path step separated by a '/' slash. Avro arrays are traversed with the '[]' + # notation. + # + # The result of a path expression is a list of objects, each of which is added to the + # given record output field. + # + # The path language supports all Avro concepts, including nested structures, records, + # arrays, maps, unions, etc, as well as a flatten option that collects the primitives in + # a subtree into a flat list. + extractAvroPaths { + flatten : false + paths : { + id : /id + text : /text + user_friends_count : /user_friends_count + user_location : /user_location + user_description : /user_description + user_statuses_count : /user_statuses_count + user_followers_count : /user_followers_count + user_name : /user_name + user_screen_name : /user_screen_name + created_at : /created_at + retweet_count : /retweet_count + retweeted : /retweeted + in_reply_to_user_id : /in_reply_to_user_id + source : /source + in_reply_to_status_id : /in_reply_to_status_id + media_url_https : /media_url_https + expanded_url : /expanded_url + } + } + } + + # Consume the output record of the previous command and pipe another record downstream. + # + # convert timestamp field to native Solr timestamp format + # e.g. 2012-09-06T07:14:34Z to 2012-09-06T07:14:34.000Z + { + convertTimestamp { + field : created_at + inputFormats : ["yyyy-MM-dd'T'HH:mm:ss'Z'", "yyyy-MM-dd"] + inputTimezone : UTC +# outputFormat : "yyyy-MM-dd'T'HH:mm:ss.SSSZ" + outputTimezone : America/Los_Angeles + } + } + + # Consume the output record of the previous command and pipe another record downstream. + # + # This command sanitizes record fields that are unknown to Solr schema.xml by deleting + # them. Recall that Solr throws an exception on any attempt to load a document that + # contains a field that isn't specified in schema.xml. + { + sanitizeUnknownSolrFields { + # Location from which to fetch Solr schema + solrLocator : ${SOLR_LOCATOR} + } + } + + # log the record at DEBUG level to SLF4J + { logDebug { format : "output record: {}", args : ["@{}"] } } + + # load the record into a Solr server or MapReduce Reducer. + { + loadSolr { + solrLocator : ${SOLR_LOCATOR} + } + } + ] + } +] diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/AlphaNumericComparatorTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/AlphaNumericComparatorTest.java new file mode 100644 index 00000000000..cab29e7796c --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/AlphaNumericComparatorTest.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.util.Comparator; + +import org.junit.Assert; +import org.junit.Test; + +public class AlphaNumericComparatorTest extends Assert { + + @Test + public void testBasic() { + Comparator c = new AlphaNumericComparator(); + assertTrue(c.compare("a", "b") < 0); + assertTrue(c.compare("shard1", "shard1") == 0); + //assertTrue(c.compare("shard01", "shard1") == 0); + assertTrue(c.compare("shard10", "shard10") == 0); + assertTrue(c.compare("shard1", "shard2") < 0); + assertTrue(c.compare("shard9", "shard10") < 0); + assertTrue(c.compare("shard09", "shard10") < 0); + assertTrue(c.compare("shard019", "shard10") > 0); + assertTrue(c.compare("shard10", "shard11") < 0); + assertTrue(c.compare("shard10z", "shard10z") == 0); + assertTrue(c.compare("shard10z", "shard11z") < 0); + assertTrue(c.compare("shard10a", "shard10z") < 0); + assertTrue(c.compare("shard10z", "shard10a") > 0); + assertTrue(c.compare("shard1z", "shard1z") == 0); + assertTrue(c.compare("shard2", "shard1") > 0); + } + +} diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityMapper.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityMapper.java new file mode 100644 index 00000000000..370dee189c9 --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityMapper.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class IdentityMapper extends Mapper { + + private static final Logger LOGGER = LoggerFactory.getLogger(IdentityMapper.class); + + @Override + protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + LOGGER.info("map key: {}, value: {}", key, value); + context.write(value, NullWritable.get()); + } +} \ No newline at end of file diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityReducer.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityReducer.java new file mode 100644 index 00000000000..104a88225f7 --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityReducer.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class IdentityReducer extends Reducer { + + private static final Logger LOGGER = LoggerFactory.getLogger(IdentityReducer.class); + + @Override + protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { + LOGGER.info("reduce key: {}, value: {}", key, values); + context.write(key, NullWritable.get()); + } +} \ No newline at end of file diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/LineRandomizerMapperReducerTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/LineRandomizerMapperReducerTest.java new file mode 100644 index 00000000000..379e60a4dc9 --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/LineRandomizerMapperReducerTest.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; +import org.apache.hadoop.mrunit.types.Pair; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class LineRandomizerMapperReducerTest extends Assert { + + private MapReduceDriver mapReduceDriver; + + @Before + public void setUp() { + LineRandomizerMapper mapper = new LineRandomizerMapper(); + LineRandomizerReducer reducer = new LineRandomizerReducer(); + mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer); + } + + @Test + public void testMapReduce1Item() throws IOException { + mapReduceDriver.withInput(new LongWritable(0), new Text("hello")); + mapReduceDriver.withOutput(new Text("hello"), NullWritable.get()); + mapReduceDriver.runTest(); + } + + @Test + public void testMapReduce2Items() throws IOException { + mapReduceDriver.withAll(Arrays.asList( + new Pair(new LongWritable(0), new Text("hello")), + new Pair(new LongWritable(1), new Text("world")) + )); + mapReduceDriver.withAllOutput(Arrays.asList( + new Pair(new Text("world"), NullWritable.get()), + new Pair(new Text("hello"), NullWritable.get()) + )); + mapReduceDriver.runTest(); + } + + @Test + public void testMapReduce3Items() throws IOException { + mapReduceDriver.withAll(Arrays.asList( + new Pair(new LongWritable(0), new Text("hello")), + new Pair(new LongWritable(1), new Text("world")), + new Pair(new LongWritable(2), new Text("nadja")) + )); + mapReduceDriver.withAllOutput(Arrays.asList( + new Pair(new Text("nadja"), NullWritable.get()), + new Pair(new Text("world"), NullWritable.get()), + new Pair(new Text("hello"), NullWritable.get()) + )); + mapReduceDriver.runTest(); + } + + @Test + public void testMapReduce4Items() throws IOException { + mapReduceDriver.withAll(Arrays.asList( + new Pair(new LongWritable(0), new Text("hello")), + new Pair(new LongWritable(1), new Text("world")), + new Pair(new LongWritable(2), new Text("nadja")), + new Pair(new LongWritable(3), new Text("basti")) + )); + mapReduceDriver.withAllOutput(Arrays.asList( + new Pair(new Text("nadja"), NullWritable.get()), + new Pair(new Text("world"), NullWritable.get()), + new Pair(new Text("basti"), NullWritable.get()), + new Pair(new Text("hello"), NullWritable.get()) + )); + mapReduceDriver.runTest(); + } + +} \ No newline at end of file diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java new file mode 100644 index 00000000000..93f620f85a5 --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.File; +import java.io.IOException; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.hadoop.morphline.MorphlineMapRunner; +import org.apache.solr.util.ExternalPaths; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +public abstract class MRUnitBase extends SolrTestCaseJ4 { + + protected static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/map-reduce/src/test-files"; + protected static final String DOCUMENTS_DIR = RESOURCES_DIR + "/test-documents"; + protected static File solrHomeZip; + + @BeforeClass + public static void setupClass() throws Exception { + solrHomeZip = SolrOutputFormat.createSolrHomeZip(new File(RESOURCES_DIR + "/solr/mrunit")); + assertNotNull(solrHomeZip); + } + + @AfterClass + public static void teardownClass() throws Exception { + solrHomeZip.delete(); + } + + protected void setupHadoopConfig(Configuration config) throws IOException { + + String tempDir = TEMP_DIR + "/test-morphlines-" + System.currentTimeMillis(); + new File(tempDir).mkdirs(); + FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml")); + + setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes"); + + config.set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, tempDir + "/test-morphlines/solrCellDocumentTypes.conf"); + config.set(SolrOutputFormat.ZIP_NAME, solrHomeZip.getName()); + } + + public static void setupMorphline(String tempDir, String file) throws IOException { + String morphlineText = FileUtils.readFileToString(new File(RESOURCES_DIR + "/" + file + ".conf"), "UTF-8"); + morphlineText = morphlineText.replaceAll("RESOURCES_DIR", new File(tempDir).getAbsolutePath()); + morphlineText = morphlineText.replaceAll("\\$\\{SOLR_LOCATOR\\}", "{ collection : collection1 }"); + + FileUtils.writeStringToFile(new File(tempDir + "/" + file + ".conf"), morphlineText, "UTF-8"); + } +} diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java new file mode 100644 index 00000000000..c559e0c9cfb --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java @@ -0,0 +1,469 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.PrintStream; +import java.io.UnsupportedEncodingException; +import java.util.Arrays; +import java.util.Collections; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.lucene.util.Constants; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.solr.cloud.AbstractZkTestCase; +import org.apache.solr.hadoop.dedup.NoChangeUpdateConflictResolver; +import org.apache.solr.hadoop.dedup.RetainMostRecentUpdateConflictResolver; +import org.apache.solr.util.ExternalPaths; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MapReduceIndexerToolArgumentParserTest extends LuceneTestCase { + + private Configuration conf; + private MapReduceIndexerTool.MyArgumentParser parser; + private MapReduceIndexerTool.Options opts; + private PrintStream oldSystemOut; + private PrintStream oldSystemErr; + private ByteArrayOutputStream bout; + private ByteArrayOutputStream berr; + + private static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/map-reduce/src/test-files"; + private static final File MINIMR_INSTANCE_DIR = new File(RESOURCES_DIR + "/solr/minimr"); + + private static final String MORPHLINE_FILE = RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf"; + + private static final Logger LOG = LoggerFactory.getLogger(MapReduceIndexerToolArgumentParserTest.class); + + private static final File solrHomeDirectory = new File(TEMP_DIR, MorphlineGoLiveMiniMRTest.class.getName()); + + @BeforeClass + public static void beforeClass() { + assumeFalse("Does not work on Windows, because it uses UNIX shell commands or POSIX paths", Constants.WINDOWS); + } + + @Before + public void setUp() throws Exception { + super.setUp(); + AbstractZkTestCase.SOLRHOME = solrHomeDirectory; + FileUtils.copyDirectory(MINIMR_INSTANCE_DIR, solrHomeDirectory); + + conf = new Configuration(); + parser = new MapReduceIndexerTool.MyArgumentParser(); + opts = new MapReduceIndexerTool.Options(); + oldSystemOut = System.out; + bout = new ByteArrayOutputStream(); + System.setOut(new PrintStream(bout, true, "UTF-8")); + oldSystemErr = System.err; + berr = new ByteArrayOutputStream(); + System.setErr(new PrintStream(berr, true, "UTF-8")); + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + System.setOut(oldSystemOut); + System.setErr(oldSystemErr); + } + + @Test + public void testArgsParserTypicalUse() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--morphline-id", "morphline_xyz", + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--mappers", "10", + "--reducers", "9", + "--fanout", "8", + "--max-segments", "7", + "--shards", "1", + "--verbose", + "file:///home", + "file:///dev", + }; + Integer res = parser.parseArgs(args, conf, opts); + assertNull(res != null ? res.toString() : "", res); + assertEquals(Collections.singletonList(new Path("file:///tmp")), opts.inputLists); + assertEquals(new Path("file:/tmp/foo"), opts.outputDir); + assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir); + assertEquals(10, opts.mappers); + assertEquals(9, opts.reducers); + assertEquals(8, opts.fanout); + assertEquals(7, opts.maxSegments); + assertEquals(new Integer(1), opts.shards); + assertEquals(null, opts.fairSchedulerPool); + assertTrue(opts.isVerbose); + assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles); + assertEquals(RetainMostRecentUpdateConflictResolver.class.getName(), opts.updateConflictResolver); + assertEquals(MORPHLINE_FILE, opts.morphlineFile.getPath()); + assertEquals("morphline_xyz", opts.morphlineId); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsParserMultipleSpecsOfSameKind() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--input-list", "file:///", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + "file:///home", + "file:///dev", + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEquals(Arrays.asList(new Path("file:///tmp"), new Path("file:///")), opts.inputLists); + assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles); + assertEquals(new Path("file:/tmp/foo"), opts.outputDir); + assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsParserTypicalUseWithEqualsSign() { + String[] args = new String[] { + "--input-list=file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir=file:/tmp/foo", + "--solr-home-dir=" + MINIMR_INSTANCE_DIR.getPath(), + "--mappers=10", + "--shards", "1", + "--verbose", + "file:///home", + "file:///dev", + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEquals(Collections.singletonList(new Path("file:///tmp")), opts.inputLists); + assertEquals(new Path("file:/tmp/foo"), opts.outputDir); + assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir); + assertEquals(10, opts.mappers); + assertEquals(new Integer(1), opts.shards); + assertEquals(null, opts.fairSchedulerPool); + assertTrue(opts.isVerbose); + assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsParserMultipleSpecsOfSameKindWithEqualsSign() { + String[] args = new String[] { + "--input-list=file:///tmp", + "--input-list=file:///", + "--morphline-file", MORPHLINE_FILE, + "--output-dir=file:/tmp/foo", + "--solr-home-dir=" + MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + "file:///home", + "file:///dev", + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEquals(Arrays.asList(new Path("file:///tmp"), new Path("file:///")), opts.inputLists); + assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles); + assertEquals(new Path("file:/tmp/foo"), opts.outputDir); + assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsParserHelp() throws UnsupportedEncodingException { + String[] args = new String[] { "--help" }; + assertEquals(new Integer(0), parser.parseArgs(args, conf, opts)); + String helpText = new String(bout.toByteArray(), "UTF-8"); + assertTrue(helpText.contains("MapReduce batch job driver that ")); + assertTrue(helpText.contains("bin/hadoop command")); + assertEquals(0, berr.toByteArray().length); + } + + @Test + public void testArgsParserOk() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEquals(new Integer(1), opts.shards); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsParserUpdateConflictResolver() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + "--update-conflict-resolver", NoChangeUpdateConflictResolver.class.getName(), + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEquals(NoChangeUpdateConflictResolver.class.getName(), opts.updateConflictResolver); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsParserUnknownArgName() throws Exception { + String[] args = new String[] { + "--xxxxxxxxinputlist", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsParserFileNotFound1() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/fileNotFound/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsParserFileNotFound2() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", "/fileNotFound", + "--shards", "1", + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsParserIntOutOfRange() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + "--mappers", "-20" + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsParserIllegalFanout() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + "--fanout", "1" // must be >= 2 + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsParserSolrHomeMustContainSolrConfigFile() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--shards", "1", + "--solr-home-dir", "/", + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsShardUrlOk() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shard-url", "http://localhost:8983/solr/collection1", + "--shard-url", "http://localhost:8983/solr/collection2", + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEquals(Arrays.asList( + Collections.singletonList("http://localhost:8983/solr/collection1"), + Collections.singletonList("http://localhost:8983/solr/collection2")), + opts.shardUrls); + assertEquals(new Integer(2), opts.shards); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsShardUrlMustHaveAParam() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shard-url", + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsShardUrlAndShardsSucceeds() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + "--shard-url", "http://localhost:8983/solr/collection1", + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsShardUrlNoGoLive() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shard-url", "http://localhost:8983/solr/collection1" + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEmptySystemErrAndEmptySystemOut(); + assertEquals(new Integer(1), opts.shards); + } + + @Test + public void testArgsShardUrlsAndZkhostAreMutuallyExclusive() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shard-url", "http://localhost:8983/solr/collection1", + "--shard-url", "http://localhost:8983/solr/collection1", + "--zk-host", "http://localhost:2185", + "--go-live" + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsGoLiveAndSolrUrl() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shard-url", "http://localhost:8983/solr/collection1", + "--shard-url", "http://localhost:8983/solr/collection1", + "--go-live" + }; + Integer result = parser.parseArgs(args, conf, opts); + assertNull(result); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsZkHostNoGoLive() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--zk-host", "http://localhost:2185", + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsGoLiveZkHostNoCollection() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--zk-host", "http://localhost:2185", + "--go-live" + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsGoLiveNoZkHostOrSolrUrl() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--go-live" + }; + assertArgumentParserException(args); + } + + @Test + public void testNoSolrHomeDirOrZKHost() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--shards", "1", + }; + assertArgumentParserException(args); + } + + @Test + public void testZKHostNoSolrHomeDirOk() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--zk-host", "http://localhost:2185", + "--collection", "collection1", + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEmptySystemErrAndEmptySystemOut(); + } + + private void assertEmptySystemErrAndEmptySystemOut() { + assertEquals(0, bout.toByteArray().length); + assertEquals(0, berr.toByteArray().length); + } + + private void assertArgumentParserException(String[] args) throws UnsupportedEncodingException { + assertEquals("should have returned fail code", new Integer(1), parser.parseArgs(args, conf, opts)); + assertEquals("no sys out expected:" + new String(bout.toByteArray(), "UTF-8"), 0, bout.toByteArray().length); + String usageText; + usageText = new String(berr.toByteArray(), "UTF-8"); + + assertTrue("should start with usage msg \"usage: hadoop \":" + usageText, usageText.startsWith("usage: hadoop ")); + } + +} diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java new file mode 100644 index 00000000000..9f53a0333c5 --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java @@ -0,0 +1,402 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.lang.reflect.Array; +import java.util.Arrays; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.security.authorize.ProxyUsers; +import org.apache.hadoop.util.JarFinder; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.lucene.util.Constants; +import org.apache.lucene.util.LuceneTestCase.Slow; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.cloud.AbstractZkTestCase; +import org.apache.solr.hadoop.hack.MiniMRCluster; +import org.apache.solr.util.ExternalPaths; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; + +@ThreadLeakAction({Action.WARN}) +@ThreadLeakLingering(linger = 0) +@ThreadLeakZombies(Consequence.CONTINUE) +@ThreadLeakScope(Scope.NONE) +@Slow +public class MorphlineBasicMiniMRTest extends SolrTestCaseJ4 { + + private static final boolean ENABLE_LOCAL_JOB_RUNNER = false; // for debugging only + private static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/map-reduce/src/test-files"; + private static final String DOCUMENTS_DIR = RESOURCES_DIR + "/test-documents"; + private static final File MINIMR_CONF_DIR = new File(RESOURCES_DIR + "/solr/minimr"); + + private static final String SEARCH_ARCHIVES_JAR = JarFinder.getJar(MapReduceIndexerTool.class); + + private static MiniDFSCluster dfsCluster = null; + private static MiniMRCluster mrCluster = null; + private static int numRuns = 0; + + private final String inputAvroFile; + private final int count; + + private static String tempDir; + + private static final File solrHomeDirectory = new File(TEMP_DIR, MorphlineBasicMiniMRTest.class.getName()); + + protected MapReduceIndexerTool createTool() { + return new MapReduceIndexerTool(); + } + + public MorphlineBasicMiniMRTest() { + int data = random().nextInt(3); + switch (data) { + case 0: + this.inputAvroFile = "sample-statuses-20120906-141433.avro"; + this.count = 2; + break; + case 1: + this.inputAvroFile = "sample-statuses-20120521-100919.avro"; + this.count = 20; + break; + case 2: + this.inputAvroFile = "sample-statuses-20120906-141433-medium.avro"; + this.count = 2104; + break; + default: + throw new RuntimeException("Test setup is broken"); + } + + } + + @BeforeClass + public static void setupClass() throws Exception { + assumeTrue( + "Currently this test can only be run without the lucene test security policy in place", + System.getProperty("java.security.manager", "").equals("")); + + assumeFalse("HDFS tests were disabled by -Dtests.disableHdfs", + Boolean.parseBoolean(System.getProperty("tests.disableHdfs", "false"))); + + assumeFalse("FIXME: This test fails under Java 8 due to the Saxon dependency - see SOLR-1301", Constants.JRE_IS_MINIMUM_JAVA8); + assumeFalse("FIXME: This test fails under J9 due to the Saxon dependency - see SOLR-1301", System.getProperty("java.vm.info", "").contains("IBM J9")); + + AbstractZkTestCase.SOLRHOME = solrHomeDirectory; + FileUtils.copyDirectory(MINIMR_CONF_DIR, solrHomeDirectory); + + tempDir = TEMP_DIR + "/test-morphlines-" + System.currentTimeMillis(); + new File(tempDir).mkdirs(); + FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml")); + + MRUnitBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes"); + + System.setProperty("hadoop.log.dir", new File(solrHomeDirectory, "logs").getAbsolutePath()); + + int taskTrackers = 1; + int dataNodes = 2; +// String proxyUser = System.getProperty("user.name"); +// String proxyGroup = "g"; +// StringBuilder sb = new StringBuilder(); +// sb.append("127.0.0.1,localhost"); +// for (InetAddress i : InetAddress.getAllByName(InetAddress.getLocalHost().getHostName())) { +// sb.append(",").append(i.getCanonicalHostName()); +// } + + createTempDir(); + new File(dataDir, "nm-local-dirs").mkdirs(); + + System.setProperty("solr.hdfs.blockcache.enabled", "false"); + + System.setProperty("test.build.dir", dataDir + File.separator + "hdfs" + File.separator + "test-build-dir"); + System.setProperty("test.build.data", dataDir + File.separator + "hdfs" + File.separator + "build"); + System.setProperty("test.cache.data", dataDir + File.separator + "hdfs" + File.separator + "cache"); + + JobConf conf = new JobConf(); + conf.set("dfs.block.access.token.enable", "false"); + conf.set("dfs.permissions", "true"); + conf.set("hadoop.security.authentication", "simple"); + conf.set(YarnConfiguration.NM_LOCAL_DIRS, dataDir.getPath() + File.separator + "nm-local-dirs"); + conf.set(YarnConfiguration.DEFAULT_NM_LOG_DIRS, dataDir + File.separator + "nm-logs"); + conf.set("testWorkDir", dataDir.getPath() + File.separator + "testWorkDir"); + + dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null); + FileSystem fileSystem = dfsCluster.getFileSystem(); + fileSystem.mkdirs(new Path("/tmp")); + fileSystem.mkdirs(new Path("/user")); + fileSystem.mkdirs(new Path("/hadoop/mapred/system")); + fileSystem.setPermission(new Path("/tmp"), FsPermission.valueOf("-rwxrwxrwx")); + fileSystem.setPermission(new Path("/user"), FsPermission.valueOf("-rwxrwxrwx")); + fileSystem.setPermission(new Path("/hadoop/mapred/system"), FsPermission.valueOf("-rwx------")); + String nnURI = fileSystem.getUri().toString(); + int numDirs = 1; + String[] racks = null; + String[] hosts = null; + + mrCluster = new MiniMRCluster(0, 0, taskTrackers, nnURI, numDirs, racks, hosts, null, conf); + ProxyUsers.refreshSuperUserGroupsConfiguration(conf); + } + + @AfterClass + public static void teardownClass() throws Exception { + System.clearProperty("solr.hdfs.blockcache.enabled"); + System.clearProperty("test.build.dir"); + System.clearProperty("test.build.data"); + System.clearProperty("test.cache.data"); + if (mrCluster != null) { + mrCluster.shutdown(); + mrCluster = null; + } + if (dfsCluster != null) { + dfsCluster.shutdown(); + dfsCluster = null; + } + } + + @After + public void tearDown() throws Exception { + System.clearProperty("hadoop.log.dir"); + System.clearProperty("solr.hdfs.blockcache.enabled"); + + super.tearDown(); + } + + private JobConf getJobConf() { + return mrCluster.createJobConf(); + } + + @Test + public void testPathParts() throws Exception { // see PathParts + FileSystem fs = dfsCluster.getFileSystem(); + int dfsClusterPort = fs.getWorkingDirectory().toUri().getPort(); + assertTrue(dfsClusterPort > 0); + JobConf jobConf = getJobConf(); + Configuration simpleConf = new Configuration(); + + for (Configuration conf : Arrays.asList(jobConf, simpleConf)) { + for (String queryAndFragment : Arrays.asList("", "?key=value#fragment")) { + for (String up : Arrays.asList("", "../")) { + String down = up.length() == 0 ? "foo/" : ""; + String uploadURL = "hdfs://localhost:12345/user/foo/" + up + "bar.txt" + queryAndFragment; + PathParts parts = new PathParts(uploadURL, conf); + assertEquals(uploadURL, parts.getUploadURL()); + assertEquals("/user/" + down + "bar.txt", parts.getURIPath()); + assertEquals("bar.txt", parts.getName()); + assertEquals("hdfs", parts.getScheme()); + assertEquals("localhost", parts.getHost()); + assertEquals(12345, parts.getPort()); + assertEquals("hdfs://localhost:12345/user/" + down + "bar.txt", parts.getId()); + assertEquals(parts.getId(), parts.getDownloadURL()); + assertFileNotFound(parts); + + uploadURL = "hdfs://localhost/user/foo/" + up + "bar.txt" + queryAndFragment; + parts = new PathParts(uploadURL, conf); + assertEquals(uploadURL, parts.getUploadURL()); + assertEquals("/user/" + down + "bar.txt", parts.getURIPath()); + assertEquals("bar.txt", parts.getName()); + assertEquals("hdfs", parts.getScheme()); + assertEquals("localhost", parts.getHost()); + assertEquals(8020, parts.getPort()); + assertEquals("hdfs://localhost:8020/user/" + down + "bar.txt", parts.getId()); + assertEquals(parts.getId(), parts.getDownloadURL()); + assertFileNotFound(parts); + } + } + } + + for (Configuration conf : Arrays.asList(jobConf)) { + for (String queryAndFragment : Arrays.asList("", "?key=value#fragment")) { + for (String up : Arrays.asList("", "../")) { + // verify using absolute path + String down = up.length() == 0 ? "foo/" : ""; + String uploadURL = "/user/foo/" + up + "bar.txt" + queryAndFragment; + PathParts parts = new PathParts(uploadURL, conf); + assertEquals(uploadURL, parts.getUploadURL()); + assertEquals("/user/" + down + "bar.txt", parts.getURIPath()); + assertEquals("bar.txt", parts.getName()); + assertEquals("hdfs", parts.getScheme()); + assertTrue("localhost".equals(parts.getHost()) || "localhost.localdomain".equals(parts.getHost())); + assertEquals(dfsClusterPort, parts.getPort()); + assertTrue(parts.getId().equals("hdfs://localhost:" + dfsClusterPort + "/user/" + down + "bar.txt") + || parts.getId().equals("hdfs://localhost.localdomain:" + dfsClusterPort + "/user/" + down + "bar.txt") + ); + assertFileNotFound(parts); + + // verify relative path is interpreted to be relative to user's home dir and resolved to an absolute path + uploadURL = "xuser/foo/" + up + "bar.txt" + queryAndFragment; + parts = new PathParts(uploadURL, conf); + assertEquals(uploadURL, parts.getUploadURL()); + String homeDir = "/user/" + System.getProperty("user.name"); + assertEquals(homeDir + "/xuser/" + down + "bar.txt", parts.getURIPath()); + assertEquals("bar.txt", parts.getName()); + assertEquals("hdfs", parts.getScheme()); + assertTrue("localhost".equals(parts.getHost()) || "localhost.localdomain".equals(parts.getHost())); + assertEquals(dfsClusterPort, parts.getPort()); + assertTrue(parts.getId().equals("hdfs://localhost:" + dfsClusterPort + homeDir + "/xuser/" + down + "bar.txt") + || parts.getId().equals("hdfs://localhost.localdomain:" + dfsClusterPort + homeDir + "/xuser/" + down + "bar.txt") + ); + assertFileNotFound(parts); + } + } + } + + try { + new PathParts("/user/foo/bar.txt", simpleConf); + fail("host/port resolution requires minimr conf, not a simple conf"); + } catch (IllegalArgumentException e) { + ; // expected + } + } + + private void assertFileNotFound(PathParts parts) { + try { + parts.getFileSystem().getFileStatus(parts.getUploadPath()); + fail(); + } catch (IOException e) { + ; // expected + } + } + + @Test + public void mrRun() throws Exception { + FileSystem fs = dfsCluster.getFileSystem(); + Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input")); + fs.delete(inDir, true); + String DATADIR = "/user/testing/testMapperReducer/data"; + Path dataDir = fs.makeQualified(new Path(DATADIR)); + fs.delete(dataDir, true); + Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output")); + fs.delete(outDir, true); + + assertTrue(fs.mkdirs(inDir)); + Path INPATH = new Path(inDir, "input.txt"); + OutputStream os = fs.create(INPATH); + Writer wr = new OutputStreamWriter(os, "UTF-8"); + wr.write(DATADIR + "/" + inputAvroFile); + wr.close(); + + assertTrue(fs.mkdirs(dataDir)); + fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir); + + JobConf jobConf = getJobConf(); + if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints + jobConf.set("mapred.job.tracker", "local"); + } + jobConf.setMaxMapAttempts(1); + jobConf.setMaxReduceAttempts(1); + jobConf.setJar(SEARCH_ARCHIVES_JAR); + + int shards = 2; + int maxReducers = Integer.MAX_VALUE; + if (ENABLE_LOCAL_JOB_RUNNER) { + // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work. + // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/ + maxReducers = 1; + shards = 1; + } + + String[] args = new String[] { + "--morphline-file=" + tempDir + "/test-morphlines/solrCellDocumentTypes.conf", + "--morphline-id=morphline1", + "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), + "--output-dir=" + outDir.toString(), + "--shards=" + shards, + "--verbose", + numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), + numRuns % 3 == 0 ? "--reducers=" + shards : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) + }; + if (numRuns % 3 == 2) { + args = concat(args, new String[] {"--fanout=2"}); + } + if (numRuns == 0) { + // force (slow) MapReduce based randomization to get coverage for that as well + args = concat(new String[] {"-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1"}, args); + } + MapReduceIndexerTool tool = createTool(); + int res = ToolRunner.run(jobConf, tool, args); + assertEquals(0, res); + Job job = tool.job; + assertTrue(job.isComplete()); + assertTrue(job.isSuccessful()); + + if (numRuns % 3 != 2) { + // Only run this check if mtree merge is disabled. + // With mtree merge enabled the BatchWriter counters aren't available anymore because + // variable "job" now refers to the merge job rather than the indexing job + assertEquals("Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN, + count, job.getCounters().findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString()).getValue()); + } + + // Check the output is as expected + outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR); + Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir)); + + System.out.println("outputfiles:" + Arrays.toString(outputFiles)); + + UtilsForTests.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards); + + // run again with --dryrun mode: + tool = createTool(); + args = concat(args, new String[] {"--dry-run"}); + res = ToolRunner.run(jobConf, tool, args); + assertEquals(0, res); + + numRuns++; + } + + protected static T[] concat(T[]... arrays) { + if (arrays.length <= 0) { + throw new IllegalArgumentException(); + } + Class clazz = null; + int length = 0; + for (T[] array : arrays) { + clazz = array.getClass(); + length += array.length; + } + T[] result = (T[]) Array.newInstance(clazz.getComponentType(), length); + int pos = 0; + for (T[] array : arrays) { + System.arraycopy(array, 0, result, pos, array.length); + pos += array.length; + } + return result; + } + +} diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java new file mode 100644 index 00000000000..bc6b1634f3c --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java @@ -0,0 +1,813 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; +import java.io.Writer; +import java.lang.reflect.Array; +import java.net.URI; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.security.authorize.ProxyUsers; +import org.apache.hadoop.util.JarFinder; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.lucene.util.Constants; +import org.apache.lucene.util.LuceneTestCase.Slow; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrQuery.ORDER; +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.client.solrj.impl.HttpSolrServer; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.cloud.AbstractFullDistribZkTestBase; +import org.apache.solr.cloud.AbstractZkTestCase; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.cloud.ZkCoreNodeProps; +import org.apache.solr.common.params.CollectionParams.CollectionAction; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.hadoop.hack.MiniMRClientCluster; +import org.apache.solr.hadoop.hack.MiniMRClientClusterFactory; +import org.apache.solr.util.ExternalPaths; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; + +@ThreadLeakAction({Action.WARN}) +@ThreadLeakLingering(linger = 0) +@ThreadLeakZombies(Consequence.CONTINUE) +@ThreadLeakScope(Scope.NONE) +@SuppressCodecs({"Lucene3x", "Lucene40"}) +@Slow +public class MorphlineGoLiveMiniMRTest extends AbstractFullDistribZkTestBase { + + private static final int RECORD_COUNT = 2104; + private static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/map-reduce/src/test-files"; + private static final String DOCUMENTS_DIR = RESOURCES_DIR + "/test-documents"; + private static final File MINIMR_INSTANCE_DIR = new File(RESOURCES_DIR + "/solr/minimr"); + private static final File MINIMR_CONF_DIR = new File(RESOURCES_DIR + "/solr/minimr"); + + private static final String SEARCH_ARCHIVES_JAR = JarFinder.getJar(MapReduceIndexerTool.class); + + private static MiniDFSCluster dfsCluster = null; + private static MiniMRClientCluster mrCluster = null; + private static String tempDir; + + private final String inputAvroFile1; + private final String inputAvroFile2; + private final String inputAvroFile3; + + private static final File solrHomeDirectory = new File(TEMP_DIR, MorphlineGoLiveMiniMRTest.class.getName()); + + @Override + public String getSolrHome() { + return solrHomeDirectory.getPath(); + } + + public MorphlineGoLiveMiniMRTest() { + this.inputAvroFile1 = "sample-statuses-20120521-100919.avro"; + this.inputAvroFile2 = "sample-statuses-20120906-141433.avro"; + this.inputAvroFile3 = "sample-statuses-20120906-141433-medium.avro"; + + fixShardCount = true; + sliceCount = TEST_NIGHTLY ? 7 : 3; + shardCount = TEST_NIGHTLY ? 7 : 3; + } + + @BeforeClass + public static void setupClass() throws Exception { + assumeTrue( + "Currently this test can only be run without the lucene test security policy in place", + System.getProperty("java.security.manager", "").equals("")); + + assumeFalse("HDFS tests were disabled by -Dtests.disableHdfs", + Boolean.parseBoolean(System.getProperty("tests.disableHdfs", "false"))); + + assumeFalse("FIXME: This test fails under Java 8 due to the Saxon dependency - see SOLR-1301", Constants.JRE_IS_MINIMUM_JAVA8); + assumeFalse("FIXME: This test fails under J9 due to the Saxon dependency - see SOLR-1301", System.getProperty("java.vm.info", "").contains("IBM J9")); + + AbstractZkTestCase.SOLRHOME = solrHomeDirectory; + FileUtils.copyDirectory(MINIMR_INSTANCE_DIR, solrHomeDirectory); + + tempDir = TEMP_DIR + "/test-morphlines-" + System.currentTimeMillis(); + new File(tempDir).mkdirs(); + FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml")); + + MRUnitBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes"); + + + System.setProperty("hadoop.log.dir", new File(dataDir, "logs").getAbsolutePath()); + + int taskTrackers = 2; + int dataNodes = 2; + + System.setProperty("solr.hdfs.blockcache.enabled", "false"); + + JobConf conf = new JobConf(); + conf.set("dfs.block.access.token.enable", "false"); + conf.set("dfs.permissions", "true"); + conf.set("hadoop.security.authentication", "simple"); + + conf.set(YarnConfiguration.NM_LOCAL_DIRS, dataDir + File.separator + "nm-local-dirs"); + conf.set(YarnConfiguration.DEFAULT_NM_LOG_DIRS, dataDir + File.separator + "nm-logs"); + + + createTempDir(); + new File(dataDir + File.separator + "nm-local-dirs").mkdirs(); + + System.setProperty("test.build.dir", dataDir + File.separator + "hdfs" + File.separator + "test-build-dir"); + System.setProperty("test.build.data", dataDir + File.separator + "hdfs" + File.separator + "build"); + System.setProperty("test.cache.data", dataDir + File.separator + "hdfs" + File.separator + "cache"); + + dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null); + FileSystem fileSystem = dfsCluster.getFileSystem(); + fileSystem.mkdirs(new Path("/tmp")); + fileSystem.mkdirs(new Path("/user")); + fileSystem.mkdirs(new Path("/hadoop/mapred/system")); + fileSystem.setPermission(new Path("/tmp"), + FsPermission.valueOf("-rwxrwxrwx")); + fileSystem.setPermission(new Path("/user"), + FsPermission.valueOf("-rwxrwxrwx")); + fileSystem.setPermission(new Path("/hadoop/mapred/system"), + FsPermission.valueOf("-rwx------")); + + mrCluster = MiniMRClientClusterFactory.create(MorphlineGoLiveMiniMRTest.class, 1, conf, new File(dataDir, "mrCluster")); + + //new MiniMRCluster(0, 0, taskTrackers, nnURI, numDirs, racks, + //hosts, null, conf); + + ProxyUsers.refreshSuperUserGroupsConfiguration(conf); + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + System.setProperty("host", "127.0.0.1"); + System.setProperty("numShards", Integer.toString(sliceCount)); + URI uri = dfsCluster.getFileSystem().getUri(); + System.setProperty("solr.hdfs.home", uri.toString() + "/" + this.getClass().getName()); + uploadConfFiles(); + } + + @Override + @After + public void tearDown() throws Exception { + super.tearDown(); + System.clearProperty("host"); + System.clearProperty("numShards"); + System.clearProperty("solr.hdfs.home"); + } + + @AfterClass + public static void teardownClass() throws Exception { + System.clearProperty("solr.hdfs.blockcache.enabled"); + System.clearProperty("hadoop.log.dir"); + System.clearProperty("test.build.dir"); + System.clearProperty("test.build.data"); + System.clearProperty("test.cache.data"); + + if (mrCluster != null) { + //mrCluster.shutdown(); + mrCluster = null; + } + if (dfsCluster != null) { + dfsCluster.shutdown(); + dfsCluster = null; + } + FileSystem.closeAll(); + } + + private JobConf getJobConf() throws IOException { + JobConf jobConf = new JobConf(mrCluster.getConfig()); + return jobConf; + } + + @Test + @Override + public void testDistribSearch() throws Exception { + super.testDistribSearch(); + } + + @Test + public void testBuildShardUrls() throws Exception { + // 2x3 + Integer numShards = 2; + List urls = new ArrayList(); + urls.add("shard1"); + urls.add("shard2"); + urls.add("shard3"); + urls.add("shard4"); + urls.add("shard5"); + urls.add("shard6"); + List> shardUrls = MapReduceIndexerTool.buildShardUrls(urls , numShards); + + assertEquals(shardUrls.toString(), 2, shardUrls.size()); + + for (List u : shardUrls) { + assertEquals(3, u.size()); + } + + // 1x6 + numShards = 1; + shardUrls = MapReduceIndexerTool.buildShardUrls(urls , numShards); + + assertEquals(shardUrls.toString(), 1, shardUrls.size()); + + for (List u : shardUrls) { + assertEquals(6, u.size()); + } + + // 6x1 + numShards = 6; + shardUrls = MapReduceIndexerTool.buildShardUrls(urls , numShards); + + assertEquals(shardUrls.toString(), 6, shardUrls.size()); + + for (List u : shardUrls) { + assertEquals(1, u.size()); + } + + // 3x2 + numShards = 3; + shardUrls = MapReduceIndexerTool.buildShardUrls(urls , numShards); + + assertEquals(shardUrls.toString(), 3, shardUrls.size()); + + for (List u : shardUrls) { + assertEquals(2, u.size()); + } + + // null shards, 6x1 + numShards = null; + shardUrls = MapReduceIndexerTool.buildShardUrls(urls , numShards); + + assertEquals(shardUrls.toString(), 6, shardUrls.size()); + + for (List u : shardUrls) { + assertEquals(1, u.size()); + } + + // null shards 3x1 + numShards = null; + + urls = new ArrayList(); + urls.add("shard1"); + urls.add("shard2"); + urls.add("shard3"); + + shardUrls = MapReduceIndexerTool.buildShardUrls(urls , numShards); + + assertEquals(shardUrls.toString(), 3, shardUrls.size()); + + for (List u : shardUrls) { + assertEquals(1, u.size()); + } + + // 2x(2,3) off balance + numShards = 2; + urls = new ArrayList(); + urls.add("shard1"); + urls.add("shard2"); + urls.add("shard3"); + urls.add("shard4"); + urls.add("shard5"); + shardUrls = MapReduceIndexerTool.buildShardUrls(urls , numShards); + + assertEquals(shardUrls.toString(), 2, shardUrls.size()); + + Set counts = new HashSet(); + counts.add(shardUrls.get(0).size()); + counts.add(shardUrls.get(1).size()); + + assertTrue(counts.contains(2)); + assertTrue(counts.contains(3)); + } + + private String[] prependInitialArgs(String[] args) { + String[] head = new String[] { + "--morphline-file=" + tempDir + "/test-morphlines/solrCellDocumentTypes.conf", + "--morphline-id=morphline1", + }; + return concat(head, args); + } + + @Override + public void doTest() throws Exception { + + waitForRecoveriesToFinish(false); + + FileSystem fs = dfsCluster.getFileSystem(); + Path inDir = fs.makeQualified(new Path( + "/user/testing/testMapperReducer/input")); + fs.delete(inDir, true); + String DATADIR = "/user/testing/testMapperReducer/data"; + Path dataDir = fs.makeQualified(new Path(DATADIR)); + fs.delete(dataDir, true); + Path outDir = fs.makeQualified(new Path( + "/user/testing/testMapperReducer/output")); + fs.delete(outDir, true); + + assertTrue(fs.mkdirs(inDir)); + Path INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile1); + + JobConf jobConf = getJobConf(); + // enable mapred.job.tracker = local to run in debugger and set breakpoints + // jobConf.set("mapred.job.tracker", "local"); + jobConf.setMaxMapAttempts(1); + jobConf.setMaxReduceAttempts(1); + jobConf.setJar(SEARCH_ARCHIVES_JAR); + + MapReduceIndexerTool tool; + int res; + QueryResponse results; + HttpSolrServer server = new HttpSolrServer(cloudJettys.get(0).url); + String[] args = new String[]{}; + + args = new String[] { + "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), + "--output-dir=" + outDir.toString(), + "--log4j=" + ExternalPaths.SOURCE_HOME + "/core/src/test-files/log4j.properties", + "--mappers=3", + random().nextBoolean() ? "--input-list=" + INPATH.toString() : dataDir.toString(), + "--go-live-threads", Integer.toString(random().nextInt(15) + 1), + "--verbose", + "--go-live" + }; + args = prependInitialArgs(args); + List argList = new ArrayList(); + getShardUrlArgs(argList); + args = concat(args, argList.toArray(new String[0])); + + if (true) { + tool = new MapReduceIndexerTool(); + res = ToolRunner.run(jobConf, tool, args); + assertEquals(0, res); + assertTrue(tool.job.isComplete()); + assertTrue(tool.job.isSuccessful()); + results = server.query(new SolrQuery("*:*")); + assertEquals(20, results.getResults().getNumFound()); + } + + fs.delete(inDir, true); + fs.delete(outDir, true); + fs.delete(dataDir, true); + assertTrue(fs.mkdirs(inDir)); + INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile2); + + args = new String[] { + "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), + "--output-dir=" + outDir.toString(), + "--mappers=3", + "--verbose", + "--go-live", + random().nextBoolean() ? "--input-list=" + INPATH.toString() : dataDir.toString(), + "--go-live-threads", Integer.toString(random().nextInt(15) + 1) + }; + args = prependInitialArgs(args); + argList = new ArrayList(); + getShardUrlArgs(argList); + args = concat(args, argList.toArray(new String[0])); + + if (true) { + tool = new MapReduceIndexerTool(); + res = ToolRunner.run(jobConf, tool, args); + assertEquals(0, res); + assertTrue(tool.job.isComplete()); + assertTrue(tool.job.isSuccessful()); + results = server.query(new SolrQuery("*:*")); + + assertEquals(22, results.getResults().getNumFound()); + } + + // try using zookeeper + String collection = "collection1"; + if (random().nextBoolean()) { + // sometimes, use an alias + createAlias("updatealias", "collection1"); + collection = "updatealias"; + } + + fs.delete(inDir, true); + fs.delete(outDir, true); + fs.delete(dataDir, true); + INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3); + + cloudClient.deleteByQuery("*:*"); + cloudClient.commit(); + assertEquals(0, cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound()); + + args = new String[] { + "--output-dir=" + outDir.toString(), + "--mappers=3", + "--reducers=12", + "--fanout=2", + "--verbose", + "--go-live", + random().nextBoolean() ? "--input-list=" + INPATH.toString() : dataDir.toString(), + "--zk-host", zkServer.getZkAddress(), + "--collection", collection + }; + args = prependInitialArgs(args); + + if (true) { + tool = new MapReduceIndexerTool(); + res = ToolRunner.run(jobConf, tool, args); + assertEquals(0, res); + assertTrue(tool.job.isComplete()); + assertTrue(tool.job.isSuccessful()); + + SolrDocumentList resultDocs = executeSolrQuery(cloudClient, "*:*"); + assertEquals(RECORD_COUNT, resultDocs.getNumFound()); + assertEquals(RECORD_COUNT, resultDocs.size()); + + // perform updates + for (int i = 0; i < RECORD_COUNT; i++) { + SolrDocument doc = resultDocs.get(i); + SolrInputDocument update = new SolrInputDocument(); + for (Map.Entry entry : doc.entrySet()) { + update.setField(entry.getKey(), entry.getValue()); + } + update.setField("user_screen_name", "Nadja" + i); + update.removeField("_version_"); + cloudClient.add(update); + } + cloudClient.commit(); + + // verify updates + SolrDocumentList resultDocs2 = executeSolrQuery(cloudClient, "*:*"); + assertEquals(RECORD_COUNT, resultDocs2.getNumFound()); + assertEquals(RECORD_COUNT, resultDocs2.size()); + for (int i = 0; i < RECORD_COUNT; i++) { + SolrDocument doc = resultDocs.get(i); + SolrDocument doc2 = resultDocs2.get(i); + assertEquals(doc.getFirstValue("id"), doc2.getFirstValue("id")); + assertEquals("Nadja" + i, doc2.getFirstValue("user_screen_name")); + assertEquals(doc.getFirstValue("text"), doc2.getFirstValue("text")); + + // perform delete + cloudClient.deleteById((String)doc.getFirstValue("id")); + } + cloudClient.commit(); + + // verify deletes + assertEquals(0, executeSolrQuery(cloudClient, "*:*").size()); + } + + cloudClient.deleteByQuery("*:*"); + cloudClient.commit(); + assertEquals(0, cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound()); + server.shutdown(); + + // try using zookeeper with replication + String replicatedCollection = "replicated_collection"; + if (TEST_NIGHTLY) { + createCollection(replicatedCollection, 11, 3, 11); + } else { + createCollection(replicatedCollection, 2, 3, 2); + } + waitForRecoveriesToFinish(false); + cloudClient.setDefaultCollection(replicatedCollection); + fs.delete(inDir, true); + fs.delete(outDir, true); + fs.delete(dataDir, true); + assertTrue(fs.mkdirs(dataDir)); + INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3); + + args = new String[] { + "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), + "--output-dir=" + outDir.toString(), + "--mappers=3", + "--reducers=22", + "--fanout=2", + "--verbose", + "--go-live", + "--zk-host", zkServer.getZkAddress(), + "--collection", replicatedCollection, dataDir.toString() + }; + args = prependInitialArgs(args); + + if (true) { + tool = new MapReduceIndexerTool(); + res = ToolRunner.run(jobConf, tool, args); + assertEquals(0, res); + assertTrue(tool.job.isComplete()); + assertTrue(tool.job.isSuccessful()); + + SolrDocumentList resultDocs = executeSolrQuery(cloudClient, "*:*"); + assertEquals(RECORD_COUNT, resultDocs.getNumFound()); + assertEquals(RECORD_COUNT, resultDocs.size()); + + checkConsistency(replicatedCollection); + + // perform updates + for (int i = 0; i < RECORD_COUNT; i++) { + SolrDocument doc = resultDocs.get(i); + SolrInputDocument update = new SolrInputDocument(); + for (Map.Entry entry : doc.entrySet()) { + update.setField(entry.getKey(), entry.getValue()); + } + update.setField("user_screen_name", "@Nadja" + i); + update.removeField("_version_"); + cloudClient.add(update); + } + cloudClient.commit(); + + // verify updates + SolrDocumentList resultDocs2 = executeSolrQuery(cloudClient, "*:*"); + assertEquals(RECORD_COUNT, resultDocs2.getNumFound()); + assertEquals(RECORD_COUNT, resultDocs2.size()); + for (int i = 0; i < RECORD_COUNT; i++) { + SolrDocument doc = resultDocs.get(i); + SolrDocument doc2 = resultDocs2.get(i); + assertEquals(doc.getFieldValues("id"), doc2.getFieldValues("id")); + assertEquals(1, doc.getFieldValues("id").size()); + assertEquals(Arrays.asList("@Nadja" + i), doc2.getFieldValues("user_screen_name")); + assertEquals(doc.getFieldValues("text"), doc2.getFieldValues("text")); + + // perform delete + cloudClient.deleteById((String)doc.getFirstValue("id")); + } + cloudClient.commit(); + + // verify deletes + assertEquals(0, executeSolrQuery(cloudClient, "*:*").size()); + } + + // try using solr_url with replication + cloudClient.deleteByQuery("*:*"); + cloudClient.commit(); + assertEquals(0, executeSolrQuery(cloudClient, "*:*").getNumFound()); + assertEquals(0, executeSolrQuery(cloudClient, "*:*").size()); + fs.delete(inDir, true); + fs.delete(dataDir, true); + assertTrue(fs.mkdirs(dataDir)); + INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3); + + args = new String[] { + "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), + "--output-dir=" + outDir.toString(), + "--shards", "2", + "--mappers=3", + "--verbose", + "--go-live", + "--go-live-threads", Integer.toString(random().nextInt(15) + 1), dataDir.toString() + }; + args = prependInitialArgs(args); + + argList = new ArrayList(); + getShardUrlArgs(argList, replicatedCollection); + args = concat(args, argList.toArray(new String[0])); + + if (true) { + tool = new MapReduceIndexerTool(); + res = ToolRunner.run(jobConf, tool, args); + assertEquals(0, res); + assertTrue(tool.job.isComplete()); + assertTrue(tool.job.isSuccessful()); + + checkConsistency(replicatedCollection); + + assertEquals(RECORD_COUNT, executeSolrQuery(cloudClient, "*:*").size()); + } + + } + + private void getShardUrlArgs(List args) { + for (int i = 0; i < shardCount; i++) { + args.add("--shard-url"); + args.add(cloudJettys.get(i).url); + } + } + + private SolrDocumentList executeSolrQuery(SolrServer collection, String queryString) throws SolrServerException { + SolrQuery query = new SolrQuery(queryString).setRows(2 * RECORD_COUNT).addSort("id", ORDER.asc); + QueryResponse response = collection.query(query); + return response.getResults(); + } + + private void checkConsistency(String replicatedCollection) + throws SolrServerException { + Collection slices = cloudClient.getZkStateReader().getClusterState() + .getSlices(replicatedCollection); + for (Slice slice : slices) { + Collection replicas = slice.getReplicas(); + long found = -1; + for (Replica replica : replicas) { + HttpSolrServer client = new HttpSolrServer( + new ZkCoreNodeProps(replica).getCoreUrl()); + SolrQuery query = new SolrQuery("*:*"); + query.set("distrib", false); + QueryResponse replicaResults = client.query(query); + long count = replicaResults.getResults().getNumFound(); + if (found != -1) { + assertEquals(slice.getName() + " is inconsistent " + + new ZkCoreNodeProps(replica).getCoreUrl(), found, count); + } + found = count; + } + } + } + + private void getShardUrlArgs(List args, String replicatedCollection) { + Collection slices = cloudClient.getZkStateReader().getClusterState().getSlices(replicatedCollection); + for (Slice slice : slices) { + Collection replicas = slice.getReplicas(); + for (Replica replica : replicas) { + args.add("--shard-url"); + args.add(new ZkCoreNodeProps(replica).getCoreUrl()); + } + } + } + + private Path upAvroFile(FileSystem fs, Path inDir, String DATADIR, + Path dataDir, String localFile) throws IOException, UnsupportedEncodingException { + Path INPATH = new Path(inDir, "input.txt"); + OutputStream os = fs.create(INPATH); + Writer wr = new OutputStreamWriter(os, "UTF-8"); + wr.write(DATADIR + File.separator + localFile); + wr.close(); + + assertTrue(fs.mkdirs(dataDir)); + fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, localFile), dataDir); + return INPATH; + } + + @Override + public JettySolrRunner createJetty(File solrHome, String dataDir, + String shardList, String solrConfigOverride, String schemaOverride) + throws Exception { + + JettySolrRunner jetty = new JettySolrRunner(solrHome.getAbsolutePath(), + context, 0, solrConfigOverride, schemaOverride); + + jetty.setShards(shardList); + + if (System.getProperty("collection") == null) { + System.setProperty("collection", "collection1"); + } + + jetty.start(); + + System.clearProperty("collection"); + + return jetty; + } + + private static void putConfig(SolrZkClient zkClient, File solrhome, String name) throws Exception { + putConfig(zkClient, solrhome, name, name); + } + + private static void putConfig(SolrZkClient zkClient, File solrhome, String srcName, String destName) + throws Exception { + + File file = new File(solrhome, "conf" + File.separator + srcName); + if (!file.exists()) { + // LOG.info("skipping " + file.getAbsolutePath() + + // " because it doesn't exist"); + return; + } + + String destPath = "/configs/conf1/" + destName; + // LOG.info("put " + file.getAbsolutePath() + " to " + destPath); + zkClient.makePath(destPath, file, false, true); + } + + private void uploadConfFiles() throws Exception { + // upload our own config files + SolrZkClient zkClient = new SolrZkClient(zkServer.getZkAddress(), 10000); + putConfig(zkClient, new File(RESOURCES_DIR + "/solr/solrcloud"), + "solrconfig.xml"); + putConfig(zkClient, MINIMR_CONF_DIR, "schema.xml"); + putConfig(zkClient, MINIMR_CONF_DIR, "elevate.xml"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_en.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_ar.txt"); + + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_bg.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_ca.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_cz.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_da.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_el.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_es.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_eu.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_de.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_fa.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_fi.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_fr.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_ga.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_gl.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_hi.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_hu.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_hy.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_id.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_it.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_ja.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_lv.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_nl.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_no.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_pt.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_ro.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_ru.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_sv.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_th.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_tr.txt"); + + putConfig(zkClient, MINIMR_CONF_DIR, "lang/contractions_ca.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/contractions_fr.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/contractions_ga.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/contractions_it.txt"); + + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stemdict_nl.txt"); + + putConfig(zkClient, MINIMR_CONF_DIR, "lang/hyphenations_ga.txt"); + + putConfig(zkClient, MINIMR_CONF_DIR, "stopwords.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "protwords.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "currency.xml"); + putConfig(zkClient, MINIMR_CONF_DIR, "open-exchange-rates.json"); + putConfig(zkClient, MINIMR_CONF_DIR, "mapping-ISOLatin1Accent.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "old_synonyms.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "synonyms.txt"); + zkClient.close(); + } + + protected static T[] concat(T[]... arrays) { + if (arrays.length <= 0) { + throw new IllegalArgumentException(); + } + Class clazz = null; + int length = 0; + for (T[] array : arrays) { + clazz = array.getClass(); + length += array.length; + } + T[] result = (T[]) Array.newInstance(clazz.getComponentType(), length); + int pos = 0; + for (T[] array : arrays) { + System.arraycopy(array, 0, result, pos, array.length); + pos += array.length; + } + return result; + } + + private NamedList createAlias(String alias, String collections) throws SolrServerException, IOException { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("collections", collections); + params.set("name", alias); + params.set("action", CollectionAction.CREATEALIAS.toString()); + QueryRequest request = new QueryRequest(params); + request.setPath("/admin/collections"); + return cloudClient.request(request); + } + + +} diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineMapperTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineMapperTest.java new file mode 100644 index 00000000000..fed109f12df --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineMapperTest.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mrunit.mapreduce.MapDriver; +import org.apache.hadoop.mrunit.types.Pair; +import org.apache.lucene.util.Constants; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.hadoop.morphline.MorphlineMapper; +import org.junit.BeforeClass; +import org.junit.Test; + +public class MorphlineMapperTest extends MRUnitBase { + + @BeforeClass + public static void beforeClass() { + assumeFalse("Does not work on Windows, because it uses UNIX shell commands or POSIX paths", Constants.WINDOWS); + assumeFalse("FIXME: This test fails under Java 8 due to the Saxon dependency - see SOLR-1301", Constants.JRE_IS_MINIMUM_JAVA8); + assumeFalse("FIXME: This test fails under J9 due to the Saxon dependency - see SOLR-1301", System.getProperty("java.vm.info", "").contains("IBM J9")); + } + + @Test + public void testMapper() throws Exception { + MorphlineMapper mapper = new MorphlineMapper(); + MapDriver mapDriver = MapDriver.newMapDriver(mapper);; + + Configuration config = mapDriver.getConfiguration(); + setupHadoopConfig(config); + + mapDriver.withInput(new LongWritable(0L), new Text("hdfs://localhost/" + DOCUMENTS_DIR + "/sample-statuses-20120906-141433.avro")); + + SolrInputDocument sid = new SolrInputDocument(); + sid.addField("id", "uniqueid1"); + sid.addField("user_name", "user1"); + sid.addField("text", "content of record one"); + SolrInputDocumentWritable sidw = new SolrInputDocumentWritable(sid); + + mapDriver + .withCacheArchive(solrHomeZip.getAbsolutePath()) + .withOutput(new Text("0"), sidw); + //mapDriver.runTest(); + List> result = mapDriver.run(); + for (Pair p: result) { + System.out.println(p.getFirst()); + System.out.println(p.getSecond()); + } + } +} diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineReducerTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineReducerTest.java new file mode 100644 index 00000000000..665ef04f3ea --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineReducerTest.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.TaskID; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; +import org.apache.lucene.util.Constants; +import org.apache.solr.common.SolrInputDocument; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + +import com.google.common.collect.Lists; + +public class MorphlineReducerTest extends MRUnitBase { + + @BeforeClass + public static void beforeClass2() { + assumeFalse("Does not work on Windows, because it uses UNIX shell commands or POSIX paths", Constants.WINDOWS); + assumeFalse("FIXME: This test fails under Java 8 due to the Saxon dependency - see SOLR-1301", Constants.JRE_IS_MINIMUM_JAVA8); + assumeFalse("FIXME: This test fails under J9 due to the Saxon dependency - see SOLR-1301", System.getProperty("java.vm.info", "").contains("IBM J9")); + + System.setProperty("verifyPartitionAssignment", "false"); + } + + @AfterClass + public static void afterClass2() { + System.clearProperty("verifyPartitionAssignment"); + } + + public static class MySolrReducer extends SolrReducer { + Context context; + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + this.context = context; + + // handle a bug in MRUnit - should be fixed in MRUnit 1.0.0 + when(context.getTaskAttemptID()).thenAnswer(new Answer() { + @Override + public TaskAttemptID answer(final InvocationOnMock invocation) { + // FIXME MRUNIT seems to pass taskid to the reduce task as mapred.TaskID rather than mapreduce.TaskID + return new TaskAttemptID(new TaskID("000000000000", 0, true, 0), 0); + } + }); + + super.setup(context); + } + + } + + public static class NullInputFormat extends InputFormat { + @Override + public List getSplits(JobContext context) throws IOException, + InterruptedException { + return Lists.newArrayList(); + } + + @Override + public RecordReader createRecordReader(InputSplit split, + TaskAttemptContext context) throws IOException, InterruptedException { + return null; + } + + } + + @Test + @Ignore("This test cannot currently work because it uses a local filesystem output path for the indexes and Solr requires hdfs output paths") + public void testReducer() throws Exception { + MySolrReducer myReducer = new MySolrReducer(); + try { + ReduceDriver reduceDriver = ReduceDriver + .newReduceDriver(myReducer); + + Configuration config = reduceDriver.getConfiguration(); + setupHadoopConfig(config); + + List values = new ArrayList(); + SolrInputDocument sid = new SolrInputDocument(); + String id = "myid1"; + sid.addField("id", id); + sid.addField("text", "some unique text"); + SolrInputDocumentWritable sidw = new SolrInputDocumentWritable(sid); + values.add(sidw); + reduceDriver.withInput(new Text(id), values); + + reduceDriver.withCacheArchive(solrHomeZip.getAbsolutePath()); + + reduceDriver.withOutputFormat(SolrOutputFormat.class, + NullInputFormat.class); + + reduceDriver.run(); + + assertEquals("Expected 1 counter increment", 1, + reduceDriver.getCounters().findCounter(SolrCounters.class.getName(), + SolrCounters.DOCUMENTS_WRITTEN.toString()).getValue()); + } finally { + myReducer.cleanup(myReducer.context); + } + } + +} diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/UtilsForTests.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/UtilsForTests.java new file mode 100644 index 00000000000..f31237e3cc4 --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/UtilsForTests.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.IOException; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.util.ExternalPaths; + + +public class UtilsForTests { + protected static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/map-reduce/src/test-files"; + + public static void validateSolrServerDocumentCount(File solrHomeDir, FileSystem fs, Path outDir, int expectedDocs, int expectedShards) + throws IOException, SolrServerException { + + long actualDocs = 0; + int actualShards = 0; + for (FileStatus dir : fs.listStatus(outDir)) { // for each shard + if (dir.getPath().getName().startsWith("part") && dir.isDirectory()) { + actualShards++; + EmbeddedSolrServer solr = SolrRecordWriter.createEmbeddedSolrServer( + new Path(solrHomeDir.getAbsolutePath()), fs, dir.getPath()); + + try { + SolrQuery query = new SolrQuery(); + query.setQuery("*:*"); + QueryResponse resp = solr.query(query); + long numDocs = resp.getResults().getNumFound(); + actualDocs += numDocs; + } finally { + solr.shutdown(); + } + } + } + assertEquals(expectedShards, actualShards); + assertEquals(expectedDocs, actualDocs); + } + +} diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRClientCluster.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRClientCluster.java new file mode 100644 index 00000000000..be5ea01cd29 --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRClientCluster.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.hack; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; + +/* + * A simple interface for a client MR cluster used for testing. This interface + * provides basic methods which are independent of the underlying Mini Cluster ( + * either through MR1 or MR2). + */ +public interface MiniMRClientCluster { + + public void start() throws IOException; + + /** + * Stop and start back the cluster using the same configuration. + */ + public void restart() throws IOException; + + public void stop() throws IOException; + + public Configuration getConfig() throws IOException; + +} diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRClientClusterFactory.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRClientClusterFactory.java new file mode 100644 index 00000000000..2bf721b7a6c --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRClientClusterFactory.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.hack; + +import java.io.File; +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.JarFinder; + +/** + * A MiniMRCluster factory. In MR2, it provides a wrapper MiniMRClientCluster + * interface around the MiniMRYarnCluster. While in MR1, it provides such + * wrapper around MiniMRCluster. This factory should be used in tests to provide + * an easy migration of tests across MR1 and MR2. + */ +public class MiniMRClientClusterFactory { + + public static MiniMRClientCluster create(Class caller, int noOfNMs, + Configuration conf, File testWorkDir) throws IOException { + return create(caller, caller.getSimpleName(), noOfNMs, conf, testWorkDir); + } + + public static MiniMRClientCluster create(Class caller, String identifier, + int noOfNMs, Configuration conf, File testWorkDir) throws IOException { + + if (conf == null) { + conf = new Configuration(); + } + + FileSystem fs = FileSystem.get(conf); + + Path testRootDir = new Path(testWorkDir.getPath(), identifier + "-tmpDir") + .makeQualified(fs); + Path appJar = new Path(testRootDir, "MRAppJar.jar"); + + // Copy MRAppJar and make it private. + Path appMasterJar = new Path(MiniMRYarnCluster.APPJAR); + + fs.copyFromLocalFile(appMasterJar, appJar); + fs.setPermission(appJar, new FsPermission("744")); + + Job job = Job.getInstance(conf); + + job.addFileToClassPath(appJar); + + Path callerJar = new Path(JarFinder.getJar(caller)); + Path remoteCallerJar = new Path(testRootDir, callerJar.getName()); + fs.copyFromLocalFile(callerJar, remoteCallerJar); + fs.setPermission(remoteCallerJar, new FsPermission("744")); + job.addFileToClassPath(remoteCallerJar); + + MiniMRYarnCluster miniMRYarnCluster; + try { + miniMRYarnCluster = new MiniMRYarnCluster(identifier, + noOfNMs, testWorkDir); + } catch (Exception e) { + throw new RuntimeException(e); + } + job.getConfiguration().set("minimrclientcluster.caller.name", + identifier); + job.getConfiguration().setInt("minimrclientcluster.nodemanagers.number", + noOfNMs); + miniMRYarnCluster.init(job.getConfiguration()); + miniMRYarnCluster.start(); + + return new MiniMRYarnClusterAdapter(miniMRYarnCluster, testWorkDir); + } + +} diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRCluster.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRCluster.java new file mode 100644 index 00000000000..b399b7a9552 --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRCluster.java @@ -0,0 +1,283 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.hack; + +import java.io.File; +import java.io.IOException; +import java.util.Random; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobID; +import org.apache.hadoop.mapred.JobPriority; +import org.apache.hadoop.mapred.MapTaskCompletionEventsUpdate; +import org.apache.hadoop.mapred.TaskCompletionEvent; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.lucene.util.LuceneTestCase; + + +/** + * This class is an MR2 replacement for older MR1 MiniMRCluster, that was used + * by tests prior to MR2. This replacement class uses the new MiniMRYarnCluster + * in MR2 but provides the same old MR1 interface, so tests can be migrated from + * MR1 to MR2 with minimal changes. + * + * Due to major differences between MR1 and MR2, a number of methods are either + * unimplemented/unsupported or were re-implemented to provide wrappers around + * MR2 functionality. + * + * @deprecated Use {@link org.apache.hadoop.mapred.MiniMRClientClusterFactory} + * instead + */ +@Deprecated +public class MiniMRCluster { + private static final Log LOG = LogFactory.getLog(MiniMRCluster.class); + + private MiniMRClientCluster mrClientCluster; + + public String getTaskTrackerLocalDir(int taskTracker) { + throw new UnsupportedOperationException(); + } + + public String[] getTaskTrackerLocalDirs(int taskTracker) { + throw new UnsupportedOperationException(); + } + + class JobTrackerRunner { + // Mock class + } + + class TaskTrackerRunner { + // Mock class + } + + public JobTrackerRunner getJobTrackerRunner() { + throw new UnsupportedOperationException(); + } + + TaskTrackerRunner getTaskTrackerRunner(int id) { + throw new UnsupportedOperationException(); + } + + public int getNumTaskTrackers() { + throw new UnsupportedOperationException(); + } + + public void setInlineCleanupThreads() { + throw new UnsupportedOperationException(); + } + + public void waitUntilIdle() { + throw new UnsupportedOperationException(); + } + + private void waitTaskTrackers() { + throw new UnsupportedOperationException(); + } + + public int getJobTrackerPort() { + throw new UnsupportedOperationException(); + } + + public JobConf createJobConf() { + JobConf jobConf = null; + try { + jobConf = new JobConf(mrClientCluster.getConfig()); + } catch (IOException e) { + LOG.error(e); + } + return jobConf; + } + + public JobConf createJobConf(JobConf conf) { + JobConf jobConf = null; + try { + jobConf = new JobConf(mrClientCluster.getConfig()); + } catch (IOException e) { + LOG.error(e); + } + return jobConf; + } + + static JobConf configureJobConf(JobConf conf, String namenode, + int jobTrackerPort, int jobTrackerInfoPort, UserGroupInformation ugi) { + throw new UnsupportedOperationException(); + } + + public MiniMRCluster(int numTaskTrackers, String namenode, int numDir, + String[] racks, String[] hosts) throws Exception { + this(0, 0, numTaskTrackers, namenode, numDir, racks, hosts); + } + + public MiniMRCluster(int numTaskTrackers, String namenode, int numDir, + String[] racks, String[] hosts, JobConf conf) throws Exception { + this(0, 0, numTaskTrackers, namenode, numDir, racks, hosts, null, conf); + } + + public MiniMRCluster(int numTaskTrackers, String namenode, int numDir) + throws Exception { + this(0, 0, numTaskTrackers, namenode, numDir); + } + + public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, + int numTaskTrackers, String namenode, int numDir) throws Exception { + this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir, + null); + } + + public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, + int numTaskTrackers, String namenode, int numDir, String[] racks) + throws Exception { + this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir, + racks, null); + } + + public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, + int numTaskTrackers, String namenode, int numDir, String[] racks, + String[] hosts) throws Exception { + this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir, + racks, hosts, null); + } + + public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, + int numTaskTrackers, String namenode, int numDir, String[] racks, + String[] hosts, UserGroupInformation ugi) throws Exception { + this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir, + racks, hosts, ugi, null); + } + + public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, + int numTaskTrackers, String namenode, int numDir, String[] racks, + String[] hosts, UserGroupInformation ugi, JobConf conf) + throws Exception { + this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir, + racks, hosts, ugi, conf, 0); + } + + public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, + int numTaskTrackers, String namenode, int numDir, String[] racks, + String[] hosts, UserGroupInformation ugi, JobConf conf, + int numTrackerToExclude) throws Exception { + this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir, + racks, hosts, ugi, conf, numTrackerToExclude, new Clock()); + } + + public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, + int numTaskTrackers, String namenode, int numDir, String[] racks, + String[] hosts, UserGroupInformation ugi, JobConf conf, + int numTrackerToExclude, Clock clock) throws Exception { + if (conf == null) conf = new JobConf(); + FileSystem.setDefaultUri(conf, namenode); + String identifier = this.getClass().getSimpleName() + "_" + + Integer.toString(LuceneTestCase.random().nextInt(Integer.MAX_VALUE)); + mrClientCluster = MiniMRClientClusterFactory.create(this.getClass(), + identifier, numTaskTrackers, conf, new File(conf.get("testWorkDir"))); + } + + public UserGroupInformation getUgi() { + throw new UnsupportedOperationException(); + } + + public TaskCompletionEvent[] getTaskCompletionEvents(JobID id, int from, + int max) throws IOException { + throw new UnsupportedOperationException(); + } + + public void setJobPriority(JobID jobId, JobPriority priority) + throws AccessControlException, IOException { + throw new UnsupportedOperationException(); + } + + public JobPriority getJobPriority(JobID jobId) { + throw new UnsupportedOperationException(); + } + + public long getJobFinishTime(JobID jobId) { + throw new UnsupportedOperationException(); + } + + public void initializeJob(JobID jobId) throws IOException { + throw new UnsupportedOperationException(); + } + + public MapTaskCompletionEventsUpdate getMapTaskCompletionEventsUpdates( + int index, JobID jobId, int max) throws IOException { + throw new UnsupportedOperationException(); + } + + public JobConf getJobTrackerConf() { + JobConf jobConf = null; + try { + jobConf = new JobConf(mrClientCluster.getConfig()); + } catch (IOException e) { + LOG.error(e); + } + return jobConf; + } + + public int getFaultCount(String hostName) { + throw new UnsupportedOperationException(); + } + + public void startJobTracker() { + // Do nothing + } + + public void startJobTracker(boolean wait) { + // Do nothing + } + + public void stopJobTracker() { + // Do nothing + } + + public void stopTaskTracker(int id) { + // Do nothing + } + + public void startTaskTracker(String host, String rack, int idx, int numDir) + throws IOException { + // Do nothing + } + + void addTaskTracker(TaskTrackerRunner taskTracker) { + throw new UnsupportedOperationException(); + } + + int getTaskTrackerID(String trackerName) { + throw new UnsupportedOperationException(); + } + + public void shutdown() { + try { + mrClientCluster.stop(); + } catch (IOException e) { + LOG.error(e); + } + } + + static class Clock { + long getTime() { + return System.currentTimeMillis(); + } + } + +} diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRYarnCluster.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRYarnCluster.java new file mode 100644 index 00000000000..8fa1b3132bc --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRYarnCluster.java @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.hack; + +import java.io.File; +import java.io.IOException; +import java.util.Locale; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.LocalContainerLauncher; +import org.apache.hadoop.mapred.ShuffleHandler; +import org.apache.hadoop.mapreduce.MRConfig; +import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; +import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.service.Service; +import org.apache.hadoop.util.JarFinder; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; + +/** + * Configures and starts the MR-specific components in the YARN cluster. + * + */ +public class MiniMRYarnCluster extends MiniYARNCluster { + + public static final String APPJAR = JarFinder.getJar(LocalContainerLauncher.class); + + private static final Log LOG = LogFactory.getLog(MiniMRYarnCluster.class); + private JobHistoryServer historyServer; + private JobHistoryServerWrapper historyServerWrapper; + + public MiniMRYarnCluster(String testName, File testWorkDir) { + this(testName, 1, testWorkDir); + } + + public MiniMRYarnCluster(String testName, int noOfNMs, File testWorkDir) { + super(testName, noOfNMs, 4, 4, testWorkDir); + //TODO: add the history server + historyServerWrapper = new JobHistoryServerWrapper(); + addService(historyServerWrapper); + } + + @Override + public void serviceInit(Configuration conf) throws Exception { + conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME); + if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) { + conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(getTestWorkDir(), + "apps_staging_dir/").getAbsolutePath()); + } + + // By default, VMEM monitoring disabled, PMEM monitoring enabled. + if (!conf.getBoolean( + MRConfig.MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING, + MRConfig.DEFAULT_MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING)) { + conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false); + conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false); + } + + conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000"); + + try { + Path stagingPath = FileContext.getFileContext(conf).makeQualified( + new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR))); + /* + * Re-configure the staging path on Windows if the file system is localFs. + * We need to use a absolute path that contains the drive letter. The unit + * test could run on a different drive than the AM. We can run into the + * issue that job files are localized to the drive where the test runs on, + * while the AM starts on a different drive and fails to find the job + * metafiles. Using absolute path can avoid this ambiguity. + */ + if (Path.WINDOWS) { + if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) { + conf.set(MRJobConfig.MR_AM_STAGING_DIR, + new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)) + .getAbsolutePath()); + } + } + FileContext fc=FileContext.getFileContext(stagingPath.toUri(), conf); + if (fc.util().exists(stagingPath)) { + LOG.info(stagingPath + " exists! deleting..."); + fc.delete(stagingPath, true); + } + LOG.info("mkdir: " + stagingPath); + //mkdir the staging directory so that right permissions are set while running as proxy user + fc.mkdir(stagingPath, null, true); + //mkdir done directory as well + String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf); + Path doneDirPath = fc.makeQualified(new Path(doneDir)); + fc.mkdir(doneDirPath, null, true); + } catch (IOException e) { + throw new YarnRuntimeException("Could not create staging directory. ", e); + } + conf.set(MRConfig.MASTER_ADDRESS, "test"); // The default is local because of + // which shuffle doesn't happen + //configure the shuffle service in NM + conf.setStrings(YarnConfiguration.NM_AUX_SERVICES, + new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID }); + conf.setClass(String.format(Locale.ENGLISH, YarnConfiguration.NM_AUX_SERVICE_FMT, + ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID), ShuffleHandler.class, + Service.class); + + // Non-standard shuffle port + conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); + + conf.setClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, + DefaultContainerExecutor.class, ContainerExecutor.class); + + // TestMRJobs is for testing non-uberized operation only; see TestUberAM + // for corresponding uberized tests. + conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); + + super.serviceInit(conf); + } + + private class JobHistoryServerWrapper extends AbstractService { + public JobHistoryServerWrapper() { + super(JobHistoryServerWrapper.class.getName()); + } + + @Override + public synchronized void serviceStart() throws Exception { + try { + if (!getConfig().getBoolean( + JHAdminConfig.MR_HISTORY_MINICLUSTER_FIXED_PORTS, + JHAdminConfig.DEFAULT_MR_HISTORY_MINICLUSTER_FIXED_PORTS)) { + // pick free random ports. + getConfig().set(JHAdminConfig.MR_HISTORY_ADDRESS, + MiniYARNCluster.getHostname() + ":0"); + getConfig().set(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, + MiniYARNCluster.getHostname() + ":0"); + } + historyServer = new JobHistoryServer(); + historyServer.init(getConfig()); + new Thread() { + public void run() { + historyServer.start(); + }; + }.start(); + while (historyServer.getServiceState() == STATE.INITED) { + LOG.info("Waiting for HistoryServer to start..."); + Thread.sleep(1500); + } + //TODO Add a timeout. State.STOPPED check ? + if (historyServer.getServiceState() != STATE.STARTED) { + throw new IOException("HistoryServer failed to start"); + } + super.serviceStart(); + } catch (Throwable t) { + throw new YarnRuntimeException(t); + } + //need to do this because historyServer.init creates a new Configuration + getConfig().set(JHAdminConfig.MR_HISTORY_ADDRESS, + historyServer.getConfig().get(JHAdminConfig.MR_HISTORY_ADDRESS)); + getConfig().set(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, + historyServer.getConfig().get(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS)); + + LOG.info("MiniMRYARN ResourceManager address: " + + getConfig().get(YarnConfiguration.RM_ADDRESS)); + LOG.info("MiniMRYARN ResourceManager web address: " + + getConfig().get(YarnConfiguration.RM_WEBAPP_ADDRESS)); + LOG.info("MiniMRYARN HistoryServer address: " + + getConfig().get(JHAdminConfig.MR_HISTORY_ADDRESS)); + LOG.info("MiniMRYARN HistoryServer web address: " + + getConfig().get(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS)); + } + + @Override + public synchronized void serviceStop() throws Exception { + if (historyServer != null) { + historyServer.stop(); + } + super.serviceStop(); + } + } + + public JobHistoryServer getHistoryServer() { + return this.historyServer; + } +} diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRYarnClusterAdapter.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRYarnClusterAdapter.java new file mode 100644 index 00000000000..08ab881005b --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRYarnClusterAdapter.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.hack; + +import java.io.File; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.service.Service.STATE; +import org.apache.hadoop.yarn.conf.YarnConfiguration; + +/** + * An adapter for MiniMRYarnCluster providing a MiniMRClientCluster interface. + * This interface could be used by tests across both MR1 and MR2. + */ +public class MiniMRYarnClusterAdapter implements MiniMRClientCluster { + + private MiniMRYarnCluster miniMRYarnCluster; + + private File testWorkDir; + + private static final Log LOG = LogFactory.getLog(MiniMRYarnClusterAdapter.class); + + public MiniMRYarnClusterAdapter(MiniMRYarnCluster miniMRYarnCluster, File testWorkDir) { + this.miniMRYarnCluster = miniMRYarnCluster; + this.testWorkDir = testWorkDir; + } + + @Override + public Configuration getConfig() { + return miniMRYarnCluster.getConfig(); + } + + @Override + public void start() { + miniMRYarnCluster.start(); + } + + @Override + public void stop() { + miniMRYarnCluster.stop(); + } + + @Override + public void restart() { + if (!miniMRYarnCluster.getServiceState().equals(STATE.STARTED)){ + LOG.warn("Cannot restart the mini cluster, start it first"); + return; + } + Configuration oldConf = new Configuration(getConfig()); + String callerName = oldConf.get("minimrclientcluster.caller.name", + this.getClass().getName()); + int noOfNMs = oldConf.getInt("minimrclientcluster.nodemanagers.number", 1); + oldConf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true); + oldConf.setBoolean(JHAdminConfig.MR_HISTORY_MINICLUSTER_FIXED_PORTS, true); + stop(); + miniMRYarnCluster = new MiniMRYarnCluster(callerName, noOfNMs, testWorkDir); + miniMRYarnCluster.init(oldConf); + miniMRYarnCluster.start(); + } + +} diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniYARNCluster.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniYARNCluster.java new file mode 100644 index 00000000000..d02726657e4 --- /dev/null +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniYARNCluster.java @@ -0,0 +1,410 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.hack; + +import java.io.File; +import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.Locale; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.service.CompositeService; +import org.apache.hadoop.util.Shell; +import org.apache.hadoop.util.Shell.ShellCommandExecutor; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.factories.RecordFactory; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.ipc.RPCUtil; +import org.apache.hadoop.yarn.server.api.ResourceTracker; +import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.NodeManager; +import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; +import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService; + +public class MiniYARNCluster extends CompositeService { + + private static final Log LOG = LogFactory.getLog(MiniYARNCluster.class); + + // temp fix until metrics system can auto-detect itself running in unit test: + static { + DefaultMetricsSystem.setMiniClusterMode(true); + } + + private NodeManager[] nodeManagers; + private ResourceManager resourceManager; + + private ResourceManagerWrapper resourceManagerWrapper; + + private File testWorkDir; + + // Number of nm-local-dirs per nodemanager + private int numLocalDirs; + // Number of nm-log-dirs per nodemanager + private int numLogDirs; + + /** + * @param testName name of the test + * @param noOfNodeManagers the number of node managers in the cluster + * @param numLocalDirs the number of nm-local-dirs per nodemanager + * @param numLogDirs the number of nm-log-dirs per nodemanager + */ + public MiniYARNCluster(String testName, int noOfNodeManagers, + int numLocalDirs, int numLogDirs, File testWorkDir) { + super(testName.replace("$", "")); + this.numLocalDirs = numLocalDirs; + this.numLogDirs = numLogDirs; + String testSubDir = testName.replace("$", ""); + File targetWorkDir = new File(testWorkDir, testSubDir); + try { + FileContext.getLocalFSFileContext().delete( + new Path(targetWorkDir.getAbsolutePath()), true); + } catch (Exception e) { + LOG.warn("COULD NOT CLEANUP", e); + throw new YarnRuntimeException("could not cleanup test dir: "+ e, e); + } + + if (Shell.WINDOWS) { + // The test working directory can exceed the maximum path length supported + // by some Windows APIs and cmd.exe (260 characters). To work around this, + // create a symlink in temporary storage with a much shorter path, + // targeting the full path to the test working directory. Then, use the + // symlink as the test working directory. + String targetPath = targetWorkDir.getAbsolutePath(); + File link = new File(System.getProperty("java.io.tmpdir"), + String.valueOf(System.currentTimeMillis())); + String linkPath = link.getAbsolutePath(); + + try { + FileContext.getLocalFSFileContext().delete(new Path(linkPath), true); + } catch (IOException e) { + throw new YarnRuntimeException("could not cleanup symlink: " + linkPath, e); + } + + // Guarantee target exists before creating symlink. + targetWorkDir.mkdirs(); + + ShellCommandExecutor shexec = new ShellCommandExecutor( + Shell.getSymlinkCommand(targetPath, linkPath)); + try { + shexec.execute(); + } catch (IOException e) { + throw new YarnRuntimeException(String.format(Locale.ENGLISH, + "failed to create symlink from %s to %s, shell output: %s", linkPath, + targetPath, shexec.getOutput()), e); + } + + this.testWorkDir = link; + } else { + this.testWorkDir = targetWorkDir; + } + + resourceManagerWrapper = new ResourceManagerWrapper(); + addService(resourceManagerWrapper); + nodeManagers = new CustomNodeManager[noOfNodeManagers]; + for(int index = 0; index < noOfNodeManagers; index++) { + addService(new NodeManagerWrapper(index)); + nodeManagers[index] = new CustomNodeManager(); + } + } + + @Override + public void serviceInit(Configuration conf) throws Exception { + super.serviceInit(conf instanceof YarnConfiguration ? conf + : new YarnConfiguration( + conf)); + } + + public File getTestWorkDir() { + return testWorkDir; + } + + public ResourceManager getResourceManager() { + return this.resourceManager; + } + + public NodeManager getNodeManager(int i) { + return this.nodeManagers[i]; + } + + public static String getHostname() { + try { + return InetAddress.getLocalHost().getHostName(); + } + catch (UnknownHostException ex) { + throw new RuntimeException(ex); + } + } + + private class ResourceManagerWrapper extends AbstractService { + public ResourceManagerWrapper() { + super(ResourceManagerWrapper.class.getName()); + } + + @Override + public synchronized void serviceStart() throws Exception { + try { + getConfig().setBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, true); + if (!getConfig().getBoolean( + YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, + YarnConfiguration.DEFAULT_YARN_MINICLUSTER_FIXED_PORTS)) { + // pick free random ports. + String hostname = MiniYARNCluster.getHostname(); + getConfig().set(YarnConfiguration.RM_ADDRESS, + hostname + ":0"); + getConfig().set(YarnConfiguration.RM_ADMIN_ADDRESS, + hostname + ":0"); + getConfig().set(YarnConfiguration.RM_SCHEDULER_ADDRESS, + hostname + ":0"); + getConfig().set(YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS, + hostname + ":0"); + getConfig().set(YarnConfiguration.RM_WEBAPP_ADDRESS, + hostname + ":0"); + } + resourceManager = new ResourceManager() { + @Override + protected void doSecureLogin() throws IOException { + // Don't try to login using keytab in the testcase. + }; + }; + resourceManager.init(getConfig()); + new Thread() { + public void run() { + resourceManager.start(); + }; + }.start(); + int waitCount = 0; + while (resourceManager.getServiceState() == STATE.INITED + && waitCount++ < 60) { + LOG.info("Waiting for RM to start..."); + Thread.sleep(1500); + } + if (resourceManager.getServiceState() != STATE.STARTED) { + // RM could have failed. + throw new IOException( + "ResourceManager failed to start. Final state is " + + resourceManager.getServiceState()); + } + super.serviceStart(); + } catch (Throwable t) { + throw new YarnRuntimeException(t); + } + LOG.info("MiniYARN ResourceManager address: " + + getConfig().get(YarnConfiguration.RM_ADDRESS)); + LOG.info("MiniYARN ResourceManager web address: " + + getConfig().get(YarnConfiguration.RM_WEBAPP_ADDRESS)); + } + + @Override + public synchronized void serviceStop() throws Exception { + if (resourceManager != null) { + resourceManager.stop(); + } + super.serviceStop(); + + if (Shell.WINDOWS) { + // On Windows, clean up the short temporary symlink that was created to + // work around path length limitation. + String testWorkDirPath = testWorkDir.getAbsolutePath(); + try { + FileContext.getLocalFSFileContext().delete(new Path(testWorkDirPath), + true); + } catch (IOException e) { + LOG.warn("could not cleanup symlink: " + + testWorkDir.getAbsolutePath()); + } + } + } + } + + private class NodeManagerWrapper extends AbstractService { + int index = 0; + + public NodeManagerWrapper(int i) { + super(NodeManagerWrapper.class.getName() + "_" + i); + index = i; + } + + public synchronized void serviceInit(Configuration conf) throws Exception { + Configuration config = new YarnConfiguration(conf); + super.serviceInit(config); + } + + /** + * Create local/log directories + * @param dirType type of directories i.e. local dirs or log dirs + * @param numDirs number of directories + * @return the created directories as a comma delimited String + */ + private String prepareDirs(String dirType, int numDirs) { + File []dirs = new File[numDirs]; + String dirsString = ""; + for (int i = 0; i < numDirs; i++) { + dirs[i]= new File(testWorkDir, MiniYARNCluster.this.getName() + + "-" + dirType + "Dir-nm-" + index + "_" + i); + dirs[i].mkdirs(); + LOG.info("Created " + dirType + "Dir in " + dirs[i].getAbsolutePath()); + String delimiter = (i > 0) ? "," : ""; + dirsString = dirsString.concat(delimiter + dirs[i].getAbsolutePath()); + } + return dirsString; + } + + public synchronized void serviceStart() throws Exception { + try { + // create nm-local-dirs and configure them for the nodemanager + String localDirsString = prepareDirs("local", numLocalDirs); + getConfig().set(YarnConfiguration.NM_LOCAL_DIRS, localDirsString); + // create nm-log-dirs and configure them for the nodemanager + String logDirsString = prepareDirs("log", numLogDirs); + getConfig().set(YarnConfiguration.NM_LOG_DIRS, logDirsString); + + File remoteLogDir = + new File(testWorkDir, MiniYARNCluster.this.getName() + + "-remoteLogDir-nm-" + index); + remoteLogDir.mkdir(); + getConfig().set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, + remoteLogDir.getAbsolutePath()); + // By default AM + 2 containers + getConfig().setInt(YarnConfiguration.NM_PMEM_MB, 4*1024); + getConfig().set(YarnConfiguration.NM_ADDRESS, + MiniYARNCluster.getHostname() + ":0"); + getConfig().set(YarnConfiguration.NM_LOCALIZER_ADDRESS, + MiniYARNCluster.getHostname() + ":0"); + getConfig().set(YarnConfiguration.NM_WEBAPP_ADDRESS, + MiniYARNCluster.getHostname() + ":0"); + + // Disable resource checks by default + if (!getConfig().getBoolean( + YarnConfiguration.YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING, + YarnConfiguration. + DEFAULT_YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING)) { + getConfig().setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false); + getConfig().setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false); + } + + LOG.info("Starting NM: " + index); + nodeManagers[index].init(getConfig()); + new Thread() { + public void run() { + nodeManagers[index].start(); + }; + }.start(); + int waitCount = 0; + while (nodeManagers[index].getServiceState() == STATE.INITED + && waitCount++ < 60) { + LOG.info("Waiting for NM " + index + " to start..."); + Thread.sleep(1000); + } + if (nodeManagers[index].getServiceState() != STATE.STARTED) { + // RM could have failed. + throw new IOException("NodeManager " + index + " failed to start"); + } + super.serviceStart(); + } catch (Throwable t) { + throw new YarnRuntimeException(t); + } + } + + @Override + public synchronized void serviceStop() throws Exception { + if (nodeManagers[index] != null) { + nodeManagers[index].stop(); + } + super.serviceStop(); + } + } + + private class CustomNodeManager extends NodeManager { + @Override + protected void doSecureLogin() throws IOException { + // Don't try to login using keytab in the testcase. + }; + + @Override + protected NodeStatusUpdater createNodeStatusUpdater(Context context, + Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { + return new NodeStatusUpdaterImpl(context, dispatcher, + healthChecker, metrics) { + @Override + protected ResourceTracker getRMClient() { + final ResourceTrackerService rt = resourceManager + .getResourceTrackerService(); + final RecordFactory recordFactory = + RecordFactoryProvider.getRecordFactory(null); + + // For in-process communication without RPC + return new ResourceTracker() { + + @Override + public NodeHeartbeatResponse nodeHeartbeat( + NodeHeartbeatRequest request) throws YarnException, + IOException { + NodeHeartbeatResponse response = recordFactory.newRecordInstance( + NodeHeartbeatResponse.class); + try { + response = rt.nodeHeartbeat(request); + } catch (YarnException e) { + LOG.info("Exception in heartbeat from node " + + request.getNodeStatus().getNodeId(), e); + throw e; + } + return response; + } + + @Override + public RegisterNodeManagerResponse registerNodeManager( + RegisterNodeManagerRequest request) + throws YarnException, IOException { + RegisterNodeManagerResponse response = recordFactory. + newRecordInstance(RegisterNodeManagerResponse.class); + try { + response = rt.registerNodeManager(request); + } catch (YarnException e) { + LOG.info("Exception in node registration from " + + request.getNodeId().toString(), e); + throw e; + } + return response; + } + }; + }; + + @Override + protected void stopRMProxy() { + return; + } + }; + }; + } +} diff --git a/solr/contrib/morphlines-cell/build.xml b/solr/contrib/morphlines-cell/build.xml new file mode 100644 index 00000000000..352e7cf106a --- /dev/null +++ b/solr/contrib/morphlines-cell/build.xml @@ -0,0 +1,143 @@ + + + + + + + + Solr Cell Morphline commands. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-cell/ivy.xml b/solr/contrib/morphlines-cell/ivy.xml new file mode 100644 index 00000000000..1394c71b7b6 --- /dev/null +++ b/solr/contrib/morphlines-cell/ivy.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java new file mode 100644 index 00000000000..dc1ae418255 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java @@ -0,0 +1,345 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.cell; + +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; + +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; +import org.apache.solr.common.params.MultiMapSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.DateUtil; +import org.apache.solr.handler.extraction.ExtractingParams; +import org.apache.solr.handler.extraction.SolrContentHandler; +import org.apache.solr.handler.extraction.SolrContentHandlerFactory; +import org.apache.solr.morphlines.solr.SolrLocator; +import org.apache.solr.schema.IndexSchema; +import org.apache.tika.exception.TikaException; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.TeeContentHandler; +import org.apache.tika.sax.XHTMLContentHandler; +import org.apache.tika.sax.xpath.Matcher; +import org.apache.tika.sax.xpath.MatchingContentHandler; +import org.apache.tika.sax.xpath.XPathParser; +import org.apache.xml.serialize.OutputFormat; +import org.apache.xml.serialize.XMLSerializer; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.CommandBuilder; +import com.cloudera.cdk.morphline.api.MorphlineCompilationException; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.MorphlineRuntimeException; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Configs; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.stdio.AbstractParser; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.ListMultimap; +import com.google.common.io.Closeables; +import com.typesafe.config.Config; + +/** + * Command that pipes the first attachment of a record into one of the given Tika parsers, then maps + * the Tika output back to a record using SolrCell. + *

    + * The Tika parser is chosen from the configurable list of parsers, depending on the MIME type + * specified in the input record. Typically, this requires an upstream DetectMimeTypeBuilder + * in a prior command. + */ +public final class SolrCellBuilder implements CommandBuilder { + + @Override + public Collection getNames() { + return Collections.singletonList("solrCell"); + } + + @Override + public Command build(Config config, Command parent, Command child, MorphlineContext context) { + return new SolrCell(this, config, parent, child, context); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class SolrCell extends AbstractParser { + + private final IndexSchema schema; + private final List dateFormats; + private final String xpathExpr; + private final List parsers = new ArrayList(); + private final SolrContentHandlerFactory solrContentHandlerFactory; + + private final SolrParams solrParams; + private final Map mediaTypeToParserMap; + + private static final XPathParser PARSER = new XPathParser("xhtml", XHTMLContentHandler.XHTML); + + public static final String ADDITIONAL_SUPPORTED_MIME_TYPES = "additionalSupportedMimeTypes"; + + public SolrCell(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { + super(builder, config, parent, child, context); + + Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); + SolrLocator locator = new SolrLocator(solrLocatorConfig, context); + LOG.debug("solrLocator: {}", locator); + this.schema = locator.getIndexSchema(); + Preconditions.checkNotNull(schema); + LOG.trace("Solr schema: \n{}", Joiner.on("\n").join(new TreeMap(schema.getFields()).values())); + + ListMultimap cellParams = ArrayListMultimap.create(); + String uprefix = getConfigs().getString(config, ExtractingParams.UNKNOWN_FIELD_PREFIX, null); + if (uprefix != null) { + cellParams.put(ExtractingParams.UNKNOWN_FIELD_PREFIX, uprefix); + } + for (String capture : getConfigs().getStringList(config, ExtractingParams.CAPTURE_ELEMENTS, Collections.emptyList())) { + cellParams.put(ExtractingParams.CAPTURE_ELEMENTS, capture); + } + Config fmapConfig = getConfigs().getConfig(config, "fmap", null); + if (fmapConfig != null) { + for (Map.Entry entry : new Configs().getEntrySet(fmapConfig)) { + cellParams.put(ExtractingParams.MAP_PREFIX + entry.getKey(), entry.getValue().toString()); + } + } + String captureAttributes = getConfigs().getString(config, ExtractingParams.CAPTURE_ATTRIBUTES, null); + if (captureAttributes != null) { + cellParams.put(ExtractingParams.CAPTURE_ATTRIBUTES, captureAttributes); + } + String lowerNames = getConfigs().getString(config, ExtractingParams.LOWERNAMES, null); + if (lowerNames != null) { + cellParams.put(ExtractingParams.LOWERNAMES, lowerNames); + } + String defaultField = getConfigs().getString(config, ExtractingParams.DEFAULT_FIELD, null); + if (defaultField != null) { + cellParams.put(ExtractingParams.DEFAULT_FIELD, defaultField); + } + xpathExpr = getConfigs().getString(config, ExtractingParams.XPATH_EXPRESSION, null); + if (xpathExpr != null) { + cellParams.put(ExtractingParams.XPATH_EXPRESSION, xpathExpr); + } + + this.dateFormats = getConfigs().getStringList(config, "dateFormats", new ArrayList(DateUtil.DEFAULT_DATE_FORMATS)); + + String handlerStr = getConfigs().getString(config, "solrContentHandlerFactory", TrimSolrContentHandlerFactory.class.getName()); + Class factoryClass; + try { + factoryClass = (Class)Class.forName(handlerStr); + } catch (ClassNotFoundException cnfe) { + throw new MorphlineCompilationException("Could not find class " + + handlerStr + " to use for " + "solrContentHandlerFactory", config, cnfe); + } + this.solrContentHandlerFactory = getSolrContentHandlerFactory(factoryClass, dateFormats, config); + + this.mediaTypeToParserMap = new HashMap(); + //MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(); // FIXME getMediaTypeRegistry.normalize() + + List parserConfigs = getConfigs().getConfigList(config, "parsers"); + for (Config parserConfig : parserConfigs) { + String parserClassName = getConfigs().getString(parserConfig, "parser"); + + Object obj; + try { + obj = Class.forName(parserClassName).newInstance(); + } catch (Throwable e) { + throw new MorphlineCompilationException("Cannot instantiate Tika parser: " + parserClassName, config, e); + } + if (!(obj instanceof Parser)) { + throw new MorphlineCompilationException("Tika parser " + obj.getClass().getName() + + " must be an instance of class " + Parser.class.getName(), config); + } + Parser parser = (Parser) obj; + this.parsers.add(parser); + + List mediaTypes = getConfigs().getStringList(parserConfig, SUPPORTED_MIME_TYPES, Collections.emptyList()); + for (String mediaTypeStr : mediaTypes) { + MediaType mediaType = parseMediaType(mediaTypeStr); + addSupportedMimeType(mediaTypeStr); + this.mediaTypeToParserMap.put(mediaType, parser); + } + + if (!parserConfig.hasPath(SUPPORTED_MIME_TYPES)) { + for (MediaType mediaType : parser.getSupportedTypes(new ParseContext())) { + mediaType = mediaType.getBaseType(); + addSupportedMimeType(mediaType.toString()); + this.mediaTypeToParserMap.put(mediaType, parser); + } + List extras = getConfigs().getStringList(parserConfig, ADDITIONAL_SUPPORTED_MIME_TYPES, Collections.emptyList()); + for (String mediaTypeStr : extras) { + MediaType mediaType = parseMediaType(mediaTypeStr); + addSupportedMimeType(mediaTypeStr); + this.mediaTypeToParserMap.put(mediaType, parser); + } + } + } + //LOG.info("mediaTypeToParserMap="+mediaTypeToParserMap); + + Map tmp = new HashMap(); + for (Map.Entry> entry : cellParams.asMap().entrySet()) { + tmp.put(entry.getKey(), entry.getValue().toArray(new String[entry.getValue().size()])); + } + this.solrParams = new MultiMapSolrParams(tmp); + validateArguments(); + } + + @Override + protected boolean doProcess(Record record, InputStream inputStream) { + Parser parser = detectParser(record); + if (parser == null) { + return false; + } + + ParseContext parseContext = new ParseContext(); + + // necessary for gzipped files or tar files, etc! copied from TikaCLI + parseContext.set(Parser.class, parser); + + Metadata metadata = new Metadata(); + for (Entry entry : record.getFields().entries()) { + metadata.add(entry.getKey(), entry.getValue().toString()); + } + + SolrContentHandler handler = solrContentHandlerFactory.createSolrContentHandler(metadata, solrParams, schema); + + try { + inputStream = TikaInputStream.get(inputStream); + + ContentHandler parsingHandler = handler; + StringWriter debugWriter = null; + if (LOG.isTraceEnabled()) { + debugWriter = new StringWriter(); + ContentHandler serializer = new XMLSerializer(debugWriter, new OutputFormat("XML", "UTF-8", true)); + parsingHandler = new TeeContentHandler(parsingHandler, serializer); + } + + // String xpathExpr = "/xhtml:html/xhtml:body/xhtml:div/descendant:node()"; + if (xpathExpr != null) { + Matcher matcher = PARSER.parse(xpathExpr); + parsingHandler = new MatchingContentHandler(parsingHandler, matcher); + } + + try { + parser.parse(inputStream, parsingHandler, metadata, parseContext); + } catch (IOException e) { + throw new MorphlineRuntimeException("Cannot parse", e); + } catch (SAXException e) { + throw new MorphlineRuntimeException("Cannot parse", e); + } catch (TikaException e) { + throw new MorphlineRuntimeException("Cannot parse", e); + } + + LOG.trace("debug XML doc: {}", debugWriter); + } finally { + if (inputStream != null) { + Closeables.closeQuietly(inputStream); + } + } + + SolrInputDocument doc = handler.newDocument(); + LOG.debug("solr doc: {}", doc); + Record outputRecord = toRecord(doc); + return getChild().process(outputRecord); + } + + private Parser detectParser(Record record) { + if (!hasAtLeastOneMimeType(record)) { + return null; + } + String mediaTypeStr = (String) record.getFirstValue(Fields.ATTACHMENT_MIME_TYPE); //ExtractingParams.STREAM_TYPE); + assert mediaTypeStr != null; + + MediaType mediaType = parseMediaType(mediaTypeStr).getBaseType(); + Parser parser = mediaTypeToParserMap.get(mediaType); // fast path + if (parser != null) { + return parser; + } + // wildcard matching + for (Map.Entry entry : mediaTypeToParserMap.entrySet()) { + if (isMediaTypeMatch(mediaType, entry.getKey())) { + return entry.getValue(); + } + } + if (LOG.isDebugEnabled()) { + LOG.debug("No supported MIME type parser found for " + Fields.ATTACHMENT_MIME_TYPE + "=" + mediaTypeStr); + } + return null; + } + + private boolean hasAtLeastOneMimeType(Record record) { + if (!record.getFields().containsKey(Fields.ATTACHMENT_MIME_TYPE)) { + LOG.debug("Command failed because of missing MIME type for record: {}", record); + return false; + } + return true; + } + + private MediaType parseMediaType(String mediaTypeStr) { + MediaType mediaType = MediaType.parse(mediaTypeStr.trim().toLowerCase(Locale.ROOT)); + return mediaType.getBaseType(); + }; + + /** Returns true if mediaType falls withing the given range (pattern), false otherwise */ + private boolean isMediaTypeMatch(MediaType mediaType, MediaType rangePattern) { + String WILDCARD = "*"; + String rangePatternType = rangePattern.getType(); + String rangePatternSubtype = rangePattern.getSubtype(); + return (rangePatternType.equals(WILDCARD) || rangePatternType.equals(mediaType.getType())) + && (rangePatternSubtype.equals(WILDCARD) || rangePatternSubtype.equals(mediaType.getSubtype())); + } + + private static SolrContentHandlerFactory getSolrContentHandlerFactory( + Class factoryClass, Collection dateFormats, Config config) { + try { + return factoryClass.getConstructor(Collection.class).newInstance(dateFormats); + } catch (NoSuchMethodException nsme) { + throw new MorphlineCompilationException("Unable to find valid constructor of type " + + factoryClass.getName() + " for creating SolrContentHandler", config, nsme); + } catch (Exception e) { + throw new MorphlineCompilationException("Unexpected exception when trying to create SolrContentHandlerFactory of type " + + factoryClass.getName(), config, e); + } + } + + private Record toRecord(SolrInputDocument doc) { + Record record = new Record(); + for (Entry entry : doc.entrySet()) { + record.getFields().putAll(entry.getKey(), entry.getValue().getValues()); + } + return record; + } + + } + +} diff --git a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java new file mode 100644 index 00000000000..81f49afd4e5 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.cell; + +import java.util.Collection; + +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.handler.extraction.SolrContentHandler; +import org.apache.solr.handler.extraction.SolrContentHandlerFactory; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.tika.metadata.Metadata; + +/** + * {@link SolrContentHandler} and associated factory that strips non-characters and trims on output. + * This prevents exceptions on parsing integer fields inside Solr server. + */ +public class StripNonCharSolrContentHandlerFactory extends SolrContentHandlerFactory { + + public StripNonCharSolrContentHandlerFactory(Collection dateFormats) { + super(dateFormats); + } + + @Override + public SolrContentHandler createSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) { + return new StripNonCharSolrContentHandler(metadata, params, schema, dateFormats); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class StripNonCharSolrContentHandler extends SolrContentHandler { + + public StripNonCharSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema, Collection dateFormats) { + super(metadata, params, schema, dateFormats); + } + + /** + * Strip all non-characters, which can cause SolrReducer problems if present. + * This is borrowed from Apache Nutch. + */ + private static String stripNonCharCodepoints(String input) { + StringBuilder stripped = new StringBuilder(input.length()); + char ch; + for (int i = 0; i < input.length(); i++) { + ch = input.charAt(i); + // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:] + // and non-printable control characters except tabulator, new line and carriage return + if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000 + ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range + (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef + (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) { + stripped.append(ch); + } + } + return stripped.toString(); + } + + @Override + protected String transformValue(String val, SchemaField schemaField) { + String ret = super.transformValue(val, schemaField).trim(); + ret = stripNonCharCodepoints(ret); + return ret; + } + } +} diff --git a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java new file mode 100644 index 00000000000..6e7df593ff8 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.cell; + +import java.util.Collection; + +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.handler.extraction.SolrContentHandler; +import org.apache.solr.handler.extraction.SolrContentHandlerFactory; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.tika.metadata.Metadata; + +/** + * {@link SolrContentHandler} and associated factory that trims field values on output. + * This prevents exceptions on parsing integer fields inside Solr server. + */ +public class TrimSolrContentHandlerFactory extends SolrContentHandlerFactory { + + public TrimSolrContentHandlerFactory(Collection dateFormats) { + super(dateFormats); + } + + @Override + public SolrContentHandler createSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) { + return new TrimSolrContentHandler(metadata, params, schema, dateFormats); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class TrimSolrContentHandler extends SolrContentHandler { + + public TrimSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema, Collection dateFormats) { + super(metadata, params, schema, dateFormats); + } + + @Override + protected String transformValue(String val, SchemaField schemaField) { + return super.transformValue(val, schemaField).trim(); + } + } +} diff --git a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/package.html b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/package.html new file mode 100644 index 00000000000..9d5daec89bb --- /dev/null +++ b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/package.html @@ -0,0 +1,22 @@ + + + + +Morphlines Solr Cell related code. + + diff --git a/solr/contrib/morphlines-cell/src/java/overview.html b/solr/contrib/morphlines-cell/src/java/overview.html new file mode 100644 index 00000000000..3e25367d302 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/java/overview.html @@ -0,0 +1,21 @@ + + + +Apache Solr Search Server: Solr Cell Morphline Commands + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/currency.xml b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/elevate.xml b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ca.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_fr.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ga.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_it.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#名詞 +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#名詞-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#名詞-固有名詞 +# +# noun-proper-misc: miscellaneous proper nouns +#名詞-固有名詞-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#名詞-固有名詞-人名 +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. お市の方 +#名詞-固有名詞-人名-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#名詞-固有名詞-人名-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#名詞-固有名詞-人名-名 +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産省, NHK +#名詞-固有名詞-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#名詞-固有名詞-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, バルセロナ, 京都 +#名詞-固有名詞-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#名詞-固有名詞-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#名詞-代名詞 +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ +#名詞-代名詞-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ +#名詞-代名詞-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, 午後, 少量 +#名詞-副詞可能 +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (する, できる, なさる, くださる) +# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り +#名詞-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before な ("na") +# e.g. 健康, 安易, 駄目, だめ +#名詞-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. +# e.g. 0, 1, 2, 何, 数, 幾 +#名詞-数 +# +# noun-affix: noun affixes where the sub-classification is undefined +#名詞-非自立 +# +# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, +# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, +# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, +# わり, 割り, 割, ん-口語/, もん-口語/ +#名詞-非自立-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, +# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, +# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, +# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, +# 儘, 侭, みぎり, 矢先 +#名詞-非自立-副詞可能 +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よう(だ) ("you(da)"). +# e.g. よう, やう, 様 (よう) +#名詞-非自立-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form な (aux "da"). +# e.g. みたい, ふう +#名詞-非自立-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#名詞-特殊 +# +# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. そう +#名詞-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#名詞-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, +# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 +#名詞-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. 君, 様, 著 +#名詞-接尾-人名 +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#名詞-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分け, 入り, 落ち, 買い +#名詞-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. そう +#名詞-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula だ ("da"). +# e.g. 的, げ, がち +#名詞-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) +#名詞-接尾-副詞可能 +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 +#名詞-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽し) さ, (考え) 方 +#名詞-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) +#名詞-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are +# semantically verb-like. +# e.g. ごらん, ご覧, 御覧, 頂戴 +#名詞-動詞非自立的 +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") +# is いわく ("iwaku"). +#名詞-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and +# behave like an adjective. +# e.g. 申し訳, 仕方, とんでも, 違い +#名詞-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) +#接頭詞-名詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by なる/なさる/くださる. +# e.g. お (読みなさい), お (座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. お (寒いですねえ), バカ (でかい) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. 約, およそ, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#動詞 +# +# verb-main: +#動詞-自立 +# +# verb-auxiliary: +#動詞-非自立 +# +# verb-suffix: +#動詞-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-非自立 +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. あいかわらず, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, +# な, する, だ, etc. +# e.g. こんなに, そんなに, あんなに, なにか, なんでも +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, +# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, +# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. が, けれども, そして, じゃあ, それどころか +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. から, が, で, と, に, へ, より, を, の, にて +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, +# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, +# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, +# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, +# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, +# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, +# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, +# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, +# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, +# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. こそ, さえ, しか, すら, は, も, ぞ +助詞-係助詞 +# +# particle-adverbial: +# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, +# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, +# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, +# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, +# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (松島) や +助詞-間投助詞 +# +# particle-coordinate: +# e.g. と, たり, だの, だり, とか, なり, や, やら +助詞-並立助詞 +# +# particle-final: +# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, +# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 +# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 +# 「(祈りが届いたせい) か (, 試験に合格した.)」 +# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 +# e.g. か +助詞-副助詞/並立助詞/終助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. に, と +助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, +# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい +#感動詞 +# +##### +# symbol: unclassified Symbols. +記号 +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [○◎@$〒→+] +記号-一般 +# +# symbol-comma: Commas +# e.g. [,、] +記号-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記号-句点 +# +# symbol-space: Full-width whitespace. +記号-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『【] +記号-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’”』」】] +記号-括弧閉 +# +# symbol-alphabetic: +#記号-アルファベット +# +##### +# other: unclassified other +#その他 +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (だ)ァ +その他-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. あの, うんと, えと +フィラー +# +##### +# non-verbal: non-verbal sound. +非言語音 +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both أ and ا +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +أ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeš +budem +byli +jseš +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naši +napište +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +však +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +ještě +až +bez +také +pouze +první +vaše +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_da.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +på | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +når | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +også | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sådan | such, like this/like that diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_de.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_el.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_en.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_es.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +այդ +այլ +այն +այս +դու +դուք +եմ +են +ենք +ես +եք +է +էի +էին +էինք +էիր +էիք +էր +ըստ +թ +ի +ին +իսկ +իր +կամ +համար +հետ +հետո +մենք +մեջ +մի +ն +նա +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +պիտի +վրա +և diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_id.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_it.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +の +に +は +を +た +が +で +て +と +し +れ +さ +ある +いる +も +する +から +な +こと +として +い +や +れる +など +なっ +ない +この +ため +その +あっ +よう +また +もの +という +あり +まで +られ +なる +へ +か +だ +これ +によって +により +おり +より +による +ず +なり +られる +において +ば +なかっ +なく +しかし +について +せ +だっ +その後 +できる +それ +う +ので +なお +のみ +でき +き +つ +における +および +いう +さらに +でも +ら +たり +その他 +に関する +たち +ます +ん +なら +に対して +特に +せる +及び +これら +とき +では +にて +ほか +ながら +うち +そして +とともに +ただし +かつて +それぞれ +または +お +ほど +ものの +に対する +ほとんど +と共に +といった +です +とも +ところ +ここ +##### End of file diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakš +ārpus +augšpus +bez +caur +dēļ +gar +iekš +iz +kopš +labad +lejpus +līdz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekš +starp +šaipus +uz +viņpus +virs +virspus +zem +apakšpus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretī +arī +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droši +diemžēl +nebūt +ik +it +taču +nu +pat +tiklab +iekšpus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolīdz +līdzko +tiklīdz +jebšu +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +būt +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +būšu +būsi +būs +būsim +būsiet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikšu +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapšu +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_no.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmål dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +på | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +så | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nå | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +når | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +å | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sånn | such a +inni | inside/within +mellom | between +vår | our +hver | each +hvem | who +vors | us/ours +hvis | whose +både | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +også | also +slik | just +vært | been +være | to be +båe | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +då | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjå | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceşti +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aş +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aţi +au +avea +avem +aveţi +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiţi +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulţi +ne +nicăieri +nici +nimeni +nişte +noastră +noastre +noi +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +şi +sînt +sîntem +sînteţi +spre +sub +sunt +suntem +sunteţi +ta +tăi +tale +tău +te +ţi +ţie +tine +toată +toate +tot +toţi +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voştri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | så = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +på | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +så | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +då | then, when +sin | his +nu | now +har | have +inte | inte någon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +något | some etc +från | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +någon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +åt | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +några | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sådan | such a +vår | our +blivit | from bli +dess | its +inom | within +mellan | between +sådant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sådana | such a +vart | each +dina | thy +vars | whose +vårt | our +våra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_th.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +แห่ง +แล้ว +และ +แรก +แบบ +แต่ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นการ +เป็น +เปิดเผย +เปิด +เนื่องจาก +เดียวกัน +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีก +อาจ +อะไร +ออก +อย่าง +อยู่ +อยาก +หาก +หลาย +หลังจาก +หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สําหรับ +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาก +มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นํา +นั้น +นัก +นอกจาก +ทุก +ที่สุด +ที่ +ทําให้ +ทํา +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูก +ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งแต่ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาก +จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +ก่อน +ก็ +การ +กับ +กัน +กว่า +กล่าว diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/userdict_ja.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 + +# Custom segmentation for compound katakana +トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 +ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 + +# Custom reading for former sumo wrestler +朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/protwords.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/schema.xml new file mode 100644 index 00000000000..ae2c56d18ae --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/schema.xml @@ -0,0 +1,947 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iddiff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/solrconfig.xml new file mode 100644 index 00000000000..9d9178746cf --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/solrconfig.xml @@ -0,0 +1,1764 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + textSpell + + + + + + default + name + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/stopwords.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/synonyms.txt b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/collection1/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/currency.xml b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/elevate.xml b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ca.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_fr.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ga.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_it.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#名詞 +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#名詞-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#名詞-固有名詞 +# +# noun-proper-misc: miscellaneous proper nouns +#名詞-固有名詞-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#名詞-固有名詞-人名 +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. お市の方 +#名詞-固有名詞-人名-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#名詞-固有名詞-人名-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#名詞-固有名詞-人名-名 +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産省, NHK +#名詞-固有名詞-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#名詞-固有名詞-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, バルセロナ, 京都 +#名詞-固有名詞-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#名詞-固有名詞-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#名詞-代名詞 +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ +#名詞-代名詞-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ +#名詞-代名詞-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, 午後, 少量 +#名詞-副詞可能 +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (する, できる, なさる, くださる) +# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り +#名詞-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before な ("na") +# e.g. 健康, 安易, 駄目, だめ +#名詞-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. +# e.g. 0, 1, 2, 何, 数, 幾 +#名詞-数 +# +# noun-affix: noun affixes where the sub-classification is undefined +#名詞-非自立 +# +# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, +# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, +# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, +# わり, 割り, 割, ん-口語/, もん-口語/ +#名詞-非自立-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, +# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, +# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, +# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, +# 儘, 侭, みぎり, 矢先 +#名詞-非自立-副詞可能 +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よう(だ) ("you(da)"). +# e.g. よう, やう, 様 (よう) +#名詞-非自立-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form な (aux "da"). +# e.g. みたい, ふう +#名詞-非自立-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#名詞-特殊 +# +# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. そう +#名詞-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#名詞-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, +# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 +#名詞-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. 君, 様, 著 +#名詞-接尾-人名 +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#名詞-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分け, 入り, 落ち, 買い +#名詞-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. そう +#名詞-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula だ ("da"). +# e.g. 的, げ, がち +#名詞-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) +#名詞-接尾-副詞可能 +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 +#名詞-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽し) さ, (考え) 方 +#名詞-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) +#名詞-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are +# semantically verb-like. +# e.g. ごらん, ご覧, 御覧, 頂戴 +#名詞-動詞非自立的 +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") +# is いわく ("iwaku"). +#名詞-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and +# behave like an adjective. +# e.g. 申し訳, 仕方, とんでも, 違い +#名詞-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) +#接頭詞-名詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by なる/なさる/くださる. +# e.g. お (読みなさい), お (座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. お (寒いですねえ), バカ (でかい) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. 約, およそ, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#動詞 +# +# verb-main: +#動詞-自立 +# +# verb-auxiliary: +#動詞-非自立 +# +# verb-suffix: +#動詞-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-非自立 +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. あいかわらず, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, +# な, する, だ, etc. +# e.g. こんなに, そんなに, あんなに, なにか, なんでも +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, +# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, +# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. が, けれども, そして, じゃあ, それどころか +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. から, が, で, と, に, へ, より, を, の, にて +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, +# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, +# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, +# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, +# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, +# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, +# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, +# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, +# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, +# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. こそ, さえ, しか, すら, は, も, ぞ +助詞-係助詞 +# +# particle-adverbial: +# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, +# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, +# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, +# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, +# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (松島) や +助詞-間投助詞 +# +# particle-coordinate: +# e.g. と, たり, だの, だり, とか, なり, や, やら +助詞-並立助詞 +# +# particle-final: +# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, +# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 +# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 +# 「(祈りが届いたせい) か (, 試験に合格した.)」 +# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 +# e.g. か +助詞-副助詞/並立助詞/終助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. に, と +助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, +# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい +#感動詞 +# +##### +# symbol: unclassified Symbols. +記号 +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [○◎@$〒→+] +記号-一般 +# +# symbol-comma: Commas +# e.g. [,、] +記号-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記号-句点 +# +# symbol-space: Full-width whitespace. +記号-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『【] +記号-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’”』」】] +記号-括弧閉 +# +# symbol-alphabetic: +#記号-アルファベット +# +##### +# other: unclassified other +#その他 +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (だ)ァ +その他-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. あの, うんと, えと +フィラー +# +##### +# non-verbal: non-verbal sound. +非言語音 +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both أ and ا +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +أ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeš +budem +byli +jseš +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naši +napište +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +však +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +ještě +až +bez +také +pouze +první +vaše +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_da.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +på | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +når | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +også | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sådan | such, like this/like that diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_de.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_el.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_en.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_es.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +այդ +այլ +այն +այս +դու +դուք +եմ +են +ենք +ես +եք +է +էի +էին +էինք +էիր +էիք +էր +ըստ +թ +ի +ին +իսկ +իր +կամ +համար +հետ +հետո +մենք +մեջ +մի +ն +նա +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +պիտի +վրա +և diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_id.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_it.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +の +に +は +を +た +が +で +て +と +し +れ +さ +ある +いる +も +する +から +な +こと +として +い +や +れる +など +なっ +ない +この +ため +その +あっ +よう +また +もの +という +あり +まで +られ +なる +へ +か +だ +これ +によって +により +おり +より +による +ず +なり +られる +において +ば +なかっ +なく +しかし +について +せ +だっ +その後 +できる +それ +う +ので +なお +のみ +でき +き +つ +における +および +いう +さらに +でも +ら +たり +その他 +に関する +たち +ます +ん +なら +に対して +特に +せる +及び +これら +とき +では +にて +ほか +ながら +うち +そして +とともに +ただし +かつて +それぞれ +または +お +ほど +ものの +に対する +ほとんど +と共に +といった +です +とも +ところ +ここ +##### End of file diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakš +ārpus +augšpus +bez +caur +dēļ +gar +iekš +iz +kopš +labad +lejpus +līdz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekš +starp +šaipus +uz +viņpus +virs +virspus +zem +apakšpus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretī +arī +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droši +diemžēl +nebūt +ik +it +taču +nu +pat +tiklab +iekšpus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolīdz +līdzko +tiklīdz +jebšu +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +būt +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +būšu +būsi +būs +būsim +būsiet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikšu +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapšu +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_no.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmål dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +på | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +så | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nå | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +når | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +å | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sånn | such a +inni | inside/within +mellom | between +vår | our +hver | each +hvem | who +vors | us/ours +hvis | whose +både | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +også | also +slik | just +vært | been +være | to be +båe | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +då | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjå | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceşti +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aş +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aţi +au +avea +avem +aveţi +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiţi +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulţi +ne +nicăieri +nici +nimeni +nişte +noastră +noastre +noi +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +şi +sînt +sîntem +sînteţi +spre +sub +sunt +suntem +sunteţi +ta +tăi +tale +tău +te +ţi +ţie +tine +toată +toate +tot +toţi +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voştri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | så = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +på | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +så | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +då | then, when +sin | his +nu | now +har | have +inte | inte någon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +något | some etc +från | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +någon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +åt | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +några | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sådan | such a +vår | our +blivit | from bli +dess | its +inom | within +mellan | between +sådant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sådana | such a +vart | each +dina | thy +vars | whose +vårt | our +våra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_th.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +แห่ง +แล้ว +และ +แรก +แบบ +แต่ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นการ +เป็น +เปิดเผย +เปิด +เนื่องจาก +เดียวกัน +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีก +อาจ +อะไร +ออก +อย่าง +อยู่ +อยาก +หาก +หลาย +หลังจาก +หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สําหรับ +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาก +มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นํา +นั้น +นัก +นอกจาก +ทุก +ที่สุด +ที่ +ทําให้ +ทํา +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูก +ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งแต่ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาก +จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +ก่อน +ก็ +การ +กับ +กัน +กว่า +กล่าว diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/userdict_ja.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 + +# Custom segmentation for compound katakana +トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 +ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 + +# Custom reading for former sumo wrestler +朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/protwords.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/schema.xml b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/schema.xml new file mode 100644 index 00000000000..65192efe442 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/schema.xml @@ -0,0 +1,961 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iddiff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/solrconfig.xml b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/solrconfig.xml new file mode 100644 index 00000000000..beff1b2af0a --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/solrconfig.xml @@ -0,0 +1,1784 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + ${solr.hdfs.home:} + ${solr.hdfs.confdir:} + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.slab.count:1} + ${solr.hdfs.blockcache.direct.memory.allocation:true} + ${solr.hdfs.blockcache.blocksperbank:16384} + ${solr.hdfs.blockcache.read.enabled:true} + ${solr.hdfs.blockcache.write.enabled:true} + ${solr.hdfs.nrtcachingdirectory.enable:true} + ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16} + ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192} + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + ${solr.lock.type:hdfs} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/stopwords.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/synonyms.txt b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/minimr/solr.xml b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/solr.xml new file mode 100644 index 00000000000..6c8b43f75ed --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/minimr/solr.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/currency.xml b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/elevate.xml b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_it.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#名詞 +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#名詞-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#名詞-固有名詞 +# +# noun-proper-misc: miscellaneous proper nouns +#名詞-固有名詞-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#名詞-固有名詞-人名 +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. お市の方 +#名詞-固有名詞-人名-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#名詞-固有名詞-人名-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#名詞-固有名詞-人名-名 +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産省, NHK +#名詞-固有名詞-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#名詞-固有名詞-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, バルセロナ, 京都 +#名詞-固有名詞-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#名詞-固有名詞-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#名詞-代名詞 +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ +#名詞-代名詞-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ +#名詞-代名詞-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, 午後, 少量 +#名詞-副詞可能 +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (する, できる, なさる, くださる) +# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り +#名詞-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before な ("na") +# e.g. 健康, 安易, 駄目, だめ +#名詞-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. +# e.g. 0, 1, 2, 何, 数, 幾 +#名詞-数 +# +# noun-affix: noun affixes where the sub-classification is undefined +#名詞-非自立 +# +# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, +# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, +# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, +# わり, 割り, 割, ん-口語/, もん-口語/ +#名詞-非自立-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, +# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, +# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, +# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, +# 儘, 侭, みぎり, 矢先 +#名詞-非自立-副詞可能 +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よう(だ) ("you(da)"). +# e.g. よう, やう, 様 (よう) +#名詞-非自立-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form な (aux "da"). +# e.g. みたい, ふう +#名詞-非自立-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#名詞-特殊 +# +# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. そう +#名詞-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#名詞-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, +# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 +#名詞-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. 君, 様, 著 +#名詞-接尾-人名 +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#名詞-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分け, 入り, 落ち, 買い +#名詞-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. そう +#名詞-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula だ ("da"). +# e.g. 的, げ, がち +#名詞-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) +#名詞-接尾-副詞可能 +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 +#名詞-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽し) さ, (考え) 方 +#名詞-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) +#名詞-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are +# semantically verb-like. +# e.g. ごらん, ご覧, 御覧, 頂戴 +#名詞-動詞非自立的 +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") +# is いわく ("iwaku"). +#名詞-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and +# behave like an adjective. +# e.g. 申し訳, 仕方, とんでも, 違い +#名詞-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) +#接頭詞-名詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by なる/なさる/くださる. +# e.g. お (読みなさい), お (座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. お (寒いですねえ), バカ (でかい) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. 約, およそ, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#動詞 +# +# verb-main: +#動詞-自立 +# +# verb-auxiliary: +#動詞-非自立 +# +# verb-suffix: +#動詞-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-非自立 +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. あいかわらず, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, +# な, する, だ, etc. +# e.g. こんなに, そんなに, あんなに, なにか, なんでも +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, +# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, +# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. が, けれども, そして, じゃあ, それどころか +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. から, が, で, と, に, へ, より, を, の, にて +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, +# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, +# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, +# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, +# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, +# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, +# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, +# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, +# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, +# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. こそ, さえ, しか, すら, は, も, ぞ +助詞-係助詞 +# +# particle-adverbial: +# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, +# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, +# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, +# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, +# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (松島) や +助詞-間投助詞 +# +# particle-coordinate: +# e.g. と, たり, だの, だり, とか, なり, や, やら +助詞-並立助詞 +# +# particle-final: +# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, +# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 +# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 +# 「(祈りが届いたせい) か (, 試験に合格した.)」 +# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 +# e.g. か +助詞-副助詞/並立助詞/終助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. に, と +助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, +# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい +#感動詞 +# +##### +# symbol: unclassified Symbols. +記号 +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [○◎@$〒→+] +記号-一般 +# +# symbol-comma: Commas +# e.g. [,、] +記号-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記号-句点 +# +# symbol-space: Full-width whitespace. +記号-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『【] +記号-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’”』」】] +記号-括弧閉 +# +# symbol-alphabetic: +#記号-アルファベット +# +##### +# other: unclassified other +#その他 +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (だ)ァ +その他-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. あの, うんと, えと +フィラー +# +##### +# non-verbal: non-verbal sound. +非言語音 +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both أ and ا +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +أ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeš +budem +byli +jseš +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naši +napište +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +však +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +ještě +až +bez +také +pouze +první +vaše +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +på | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +når | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +også | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sådan | such, like this/like that diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +այդ +այլ +այն +այս +դու +դուք +եմ +են +ենք +ես +եք +է +էի +էին +էինք +էիր +էիք +էր +ըստ +թ +ի +ին +իսկ +իր +կամ +համար +հետ +հետո +մենք +մեջ +մի +ն +նա +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +պիտի +վրա +և diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +の +に +は +を +た +が +で +て +と +し +れ +さ +ある +いる +も +する +から +な +こと +として +い +や +れる +など +なっ +ない +この +ため +その +あっ +よう +また +もの +という +あり +まで +られ +なる +へ +か +だ +これ +によって +により +おり +より +による +ず +なり +られる +において +ば +なかっ +なく +しかし +について +せ +だっ +その後 +できる +それ +う +ので +なお +のみ +でき +き +つ +における +および +いう +さらに +でも +ら +たり +その他 +に関する +たち +ます +ん +なら +に対して +特に +せる +及び +これら +とき +では +にて +ほか +ながら +うち +そして +とともに +ただし +かつて +それぞれ +または +お +ほど +ものの +に対する +ほとんど +と共に +といった +です +とも +ところ +ここ +##### End of file diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakš +ārpus +augšpus +bez +caur +dēļ +gar +iekš +iz +kopš +labad +lejpus +līdz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekš +starp +šaipus +uz +viņpus +virs +virspus +zem +apakšpus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretī +arī +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droši +diemžēl +nebūt +ik +it +taču +nu +pat +tiklab +iekšpus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolīdz +līdzko +tiklīdz +jebšu +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +būt +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +būšu +būsi +būs +būsim +būsiet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikšu +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapšu +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmål dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +på | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +så | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nå | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +når | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +å | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sånn | such a +inni | inside/within +mellom | between +vår | our +hver | each +hvem | who +vors | us/ours +hvis | whose +både | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +også | also +slik | just +vært | been +være | to be +båe | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +då | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjå | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceşti +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aş +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aţi +au +avea +avem +aveţi +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiţi +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulţi +ne +nicăieri +nici +nimeni +nişte +noastră +noastre +noi +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +şi +sînt +sîntem +sînteţi +spre +sub +sunt +suntem +sunteţi +ta +tăi +tale +tău +te +ţi +ţie +tine +toată +toate +tot +toţi +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voştri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | så = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +på | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +så | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +då | then, when +sin | his +nu | now +har | have +inte | inte någon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +något | some etc +från | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +någon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +åt | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +några | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sådan | such a +vår | our +blivit | from bli +dess | its +inom | within +mellan | between +sådant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sådana | such a +vart | each +dina | thy +vars | whose +vårt | our +våra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +แห่ง +แล้ว +และ +แรก +แบบ +แต่ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นการ +เป็น +เปิดเผย +เปิด +เนื่องจาก +เดียวกัน +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีก +อาจ +อะไร +ออก +อย่าง +อยู่ +อยาก +หาก +หลาย +หลังจาก +หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สําหรับ +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาก +มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นํา +นั้น +นัก +นอกจาก +ทุก +ที่สุด +ที่ +ทําให้ +ทํา +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูก +ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งแต่ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาก +จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +ก่อน +ก็ +การ +กับ +กัน +กว่า +กล่าว diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 + +# Custom segmentation for compound katakana +トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 +ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 + +# Custom reading for former sumo wrestler +朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/protwords.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/schema.xml b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/schema.xml new file mode 100644 index 00000000000..b133c135f31 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/schema.xml @@ -0,0 +1,961 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iddiff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/solrconfig.xml b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/solrconfig.xml new file mode 100644 index 00000000000..f9683b27db7 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/solrconfig.xml @@ -0,0 +1,1789 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + ${solr.hdfs.home:} + ${solr.hdfs.confdir:} + ${solr.hdfs.security.kerberos.enabled:false} + ${solr.hdfs.security.kerberos.keytabfile:} + ${solr.hdfs.security.kerberos.principal:} + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.slab.count:1} + ${solr.hdfs.blockcache.direct.memory.allocation:true} + ${solr.hdfs.blockcache.blocksperbank:16384} + ${solr.hdfs.blockcache.read.enabled:true} + ${solr.hdfs.blockcache.write.enabled:true} + ${solr.hdfs.nrtcachingdirectory.enable:true} + ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16} + ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192} + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + ${solr.lock.type:hdfs} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/stopwords.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/synonyms.txt b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/solr.xml b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/solr.xml new file mode 100644 index 00000000000..6c8b43f75ed --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/mrunit/solr.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solr.xml b/solr/contrib/morphlines-cell/src/test-files/solr/solr.xml new file mode 100644 index 00000000000..4604f60476f --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solr.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/currency.xml b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#名詞 +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#名詞-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#名詞-固有名詞 +# +# noun-proper-misc: miscellaneous proper nouns +#名詞-固有名詞-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#名詞-固有名詞-人名 +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. お市の方 +#名詞-固有名詞-人名-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#名詞-固有名詞-人名-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#名詞-固有名詞-人名-名 +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産省, NHK +#名詞-固有名詞-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#名詞-固有名詞-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, バルセロナ, 京都 +#名詞-固有名詞-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#名詞-固有名詞-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#名詞-代名詞 +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ +#名詞-代名詞-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ +#名詞-代名詞-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, 午後, 少量 +#名詞-副詞可能 +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (する, できる, なさる, くださる) +# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り +#名詞-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before な ("na") +# e.g. 健康, 安易, 駄目, だめ +#名詞-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. +# e.g. 0, 1, 2, 何, 数, 幾 +#名詞-数 +# +# noun-affix: noun affixes where the sub-classification is undefined +#名詞-非自立 +# +# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, +# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, +# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, +# わり, 割り, 割, ん-口語/, もん-口語/ +#名詞-非自立-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, +# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, +# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, +# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, +# 儘, 侭, みぎり, 矢先 +#名詞-非自立-副詞可能 +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よう(だ) ("you(da)"). +# e.g. よう, やう, 様 (よう) +#名詞-非自立-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form な (aux "da"). +# e.g. みたい, ふう +#名詞-非自立-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#名詞-特殊 +# +# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. そう +#名詞-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#名詞-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, +# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 +#名詞-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. 君, 様, 著 +#名詞-接尾-人名 +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#名詞-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分け, 入り, 落ち, 買い +#名詞-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. そう +#名詞-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula だ ("da"). +# e.g. 的, げ, がち +#名詞-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) +#名詞-接尾-副詞可能 +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 +#名詞-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽し) さ, (考え) 方 +#名詞-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) +#名詞-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are +# semantically verb-like. +# e.g. ごらん, ご覧, 御覧, 頂戴 +#名詞-動詞非自立的 +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") +# is いわく ("iwaku"). +#名詞-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and +# behave like an adjective. +# e.g. 申し訳, 仕方, とんでも, 違い +#名詞-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) +#接頭詞-名詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by なる/なさる/くださる. +# e.g. お (読みなさい), お (座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. お (寒いですねえ), バカ (でかい) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. 約, およそ, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#動詞 +# +# verb-main: +#動詞-自立 +# +# verb-auxiliary: +#動詞-非自立 +# +# verb-suffix: +#動詞-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-非自立 +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. あいかわらず, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, +# な, する, だ, etc. +# e.g. こんなに, そんなに, あんなに, なにか, なんでも +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, +# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, +# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. が, けれども, そして, じゃあ, それどころか +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. から, が, で, と, に, へ, より, を, の, にて +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, +# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, +# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, +# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, +# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, +# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, +# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, +# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, +# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, +# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. こそ, さえ, しか, すら, は, も, ぞ +助詞-係助詞 +# +# particle-adverbial: +# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, +# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, +# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, +# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, +# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (松島) や +助詞-間投助詞 +# +# particle-coordinate: +# e.g. と, たり, だの, だり, とか, なり, や, やら +助詞-並立助詞 +# +# particle-final: +# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, +# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 +# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 +# 「(祈りが届いたせい) か (, 試験に合格した.)」 +# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 +# e.g. か +助詞-副助詞/並立助詞/終助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. に, と +助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, +# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい +#感動詞 +# +##### +# symbol: unclassified Symbols. +記号 +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [○◎@$〒→+] +記号-一般 +# +# symbol-comma: Commas +# e.g. [,、] +記号-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記号-句点 +# +# symbol-space: Full-width whitespace. +記号-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『【] +記号-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’”』」】] +記号-括弧閉 +# +# symbol-alphabetic: +#記号-アルファベット +# +##### +# other: unclassified other +#その他 +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (だ)ァ +その他-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. あの, うんと, えと +フィラー +# +##### +# non-verbal: non-verbal sound. +非言語音 +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both أ and ا +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +أ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeš +budem +byli +jseš +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naši +napište +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +však +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +ještě +až +bez +také +pouze +první +vaše +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +på | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +når | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +også | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sådan | such, like this/like that diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +այդ +այլ +այն +այս +դու +դուք +եմ +են +ենք +ես +եք +է +էի +էին +էինք +էիր +էիք +էր +ըստ +թ +ի +ին +իսկ +իր +կամ +համար +հետ +հետո +մենք +մեջ +մի +ն +նա +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +պիտի +վրա +և diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +の +に +は +を +た +が +で +て +と +し +れ +さ +ある +いる +も +する +から +な +こと +として +い +や +れる +など +なっ +ない +この +ため +その +あっ +よう +また +もの +という +あり +まで +られ +なる +へ +か +だ +これ +によって +により +おり +より +による +ず +なり +られる +において +ば +なかっ +なく +しかし +について +せ +だっ +その後 +できる +それ +う +ので +なお +のみ +でき +き +つ +における +および +いう +さらに +でも +ら +たり +その他 +に関する +たち +ます +ん +なら +に対して +特に +せる +及び +これら +とき +では +にて +ほか +ながら +うち +そして +とともに +ただし +かつて +それぞれ +または +お +ほど +ものの +に対する +ほとんど +と共に +といった +です +とも +ところ +ここ +##### End of file diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakš +ārpus +augšpus +bez +caur +dēļ +gar +iekš +iz +kopš +labad +lejpus +līdz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekš +starp +šaipus +uz +viņpus +virs +virspus +zem +apakšpus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretī +arī +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droši +diemžēl +nebūt +ik +it +taču +nu +pat +tiklab +iekšpus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolīdz +līdzko +tiklīdz +jebšu +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +būt +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +būšu +būsi +būs +būsim +būsiet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikšu +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapšu +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmål dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +på | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +så | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nå | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +når | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +å | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sånn | such a +inni | inside/within +mellom | between +vår | our +hver | each +hvem | who +vors | us/ours +hvis | whose +både | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +også | also +slik | just +vært | been +være | to be +båe | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +då | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjå | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceşti +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aş +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aţi +au +avea +avem +aveţi +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiţi +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulţi +ne +nicăieri +nici +nimeni +nişte +noastră +noastre +noi +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +şi +sînt +sîntem +sînteţi +spre +sub +sunt +suntem +sunteţi +ta +tăi +tale +tău +te +ţi +ţie +tine +toată +toate +tot +toţi +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voştri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | så = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +på | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +så | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +då | then, when +sin | his +nu | now +har | have +inte | inte någon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +något | some etc +från | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +någon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +åt | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +några | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sådan | such a +vår | our +blivit | from bli +dess | its +inom | within +mellan | between +sådant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sådana | such a +vart | each +dina | thy +vars | whose +vårt | our +våra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +แห่ง +แล้ว +และ +แรก +แบบ +แต่ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นการ +เป็น +เปิดเผย +เปิด +เนื่องจาก +เดียวกัน +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีก +อาจ +อะไร +ออก +อย่าง +อยู่ +อยาก +หาก +หลาย +หลังจาก +หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สําหรับ +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาก +มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นํา +นั้น +นัก +นอกจาก +ทุก +ที่สุด +ที่ +ทําให้ +ทํา +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูก +ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งแต่ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาก +จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +ก่อน +ก็ +การ +กับ +กัน +กว่า +กล่าว diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 + +# Custom segmentation for compound katakana +トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 +ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 + +# Custom reading for former sumo wrestler +朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/schema.xml b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/schema.xml new file mode 100644 index 00000000000..83080dfa40c --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/schema.xml @@ -0,0 +1,914 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + iddiff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml new file mode 100644 index 00000000000..9d9178746cf --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml @@ -0,0 +1,1764 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + textSpell + + + + + + default + name + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/morphlines-cell/src/test-files/solr/solrcloud/conf/solrconfig.xml b/solr/contrib/morphlines-cell/src/test-files/solr/solrcloud/conf/solrconfig.xml new file mode 100644 index 00000000000..a37ab12ecfe --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test-files/solr/solrcloud/conf/solrconfig.xml @@ -0,0 +1,1787 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + ${solr.hdfs.home:} + ${solr.hdfs.confdir:} + ${solr.hdfs.security.kerberos.enabled:false} + ${solr.hdfs.security.kerberos.keytabfile:} + ${solr.hdfs.security.kerberos.principal:} + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.slab.count:1} + ${solr.hdfs.blockcache.direct.memory.allocation:true} + ${solr.hdfs.blockcache.blocksperbank:16384} + ${solr.hdfs.blockcache.read.enabled:true} + ${solr.hdfs.blockcache.write.enabled:true} + ${solr.hdfs.nrtcachingdirectory.enable:true} + ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16} + ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192} + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + ${solr.lock.type:hdfs} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java b/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java new file mode 100644 index 00000000000..912febceef6 --- /dev/null +++ b/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java @@ -0,0 +1,215 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.cell; + +import java.io.File; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.io.FileUtils; +import org.apache.lucene.util.Constants; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.MapSolrParams; +import org.apache.solr.common.util.DateUtil; +import org.apache.solr.handler.extraction.SolrContentHandler; +import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase; +import org.apache.solr.schema.IndexSchema; +import org.apache.tika.metadata.Metadata; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + + +public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase { + + private Map expectedRecords = new HashMap(); + + @BeforeClass + public static void beforeClass2() { + assumeFalse("FIXME: This test fails under Java 8 due to the Saxon dependency - see SOLR-1301", Constants.JRE_IS_MINIMUM_JAVA8); + assumeFalse("FIXME: This test fails under J9 due to the Saxon dependency - see SOLR-1301", System.getProperty("java.vm.info", "").contains("IBM J9")); + } + + @Before + public void setUp() throws Exception { + super.setUp(); + String path = RESOURCES_DIR + "/test-documents"; + expectedRecords.put(path + "/sample-statuses-20120906-141433.avro", 2); + expectedRecords.put(path + "/sample-statuses-20120906-141433", 2); + expectedRecords.put(path + "/sample-statuses-20120906-141433.gz", 2); + expectedRecords.put(path + "/sample-statuses-20120906-141433.bz2", 2); + expectedRecords.put(path + "/cars.csv", 5); + expectedRecords.put(path + "/cars.csv.gz", 5); + expectedRecords.put(path + "/cars.tar.gz", 4); + expectedRecords.put(path + "/cars.tsv", 5); + expectedRecords.put(path + "/cars.ssv", 5); + expectedRecords.put(path + "/test-documents.7z", 9); + expectedRecords.put(path + "/test-documents.cpio", 9); + expectedRecords.put(path + "/test-documents.tar", 9); + expectedRecords.put(path + "/test-documents.tbz2", 9); + expectedRecords.put(path + "/test-documents.tgz", 9); + expectedRecords.put(path + "/test-documents.zip", 9); + expectedRecords.put(path + "/multiline-stacktrace.log", 4); + + FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml")); + } + + @Test + public void testSolrCellJPGCompressed() throws Exception { + + morphline = createMorphline("test-morphlines/solrCellJPGCompressed"); + String path = RESOURCES_DIR + "/test-documents"; + String[] files = new String[] { + path + "/testJPEG_EXIF.jpg", + path + "/testJPEG_EXIF.jpg.gz", + path + "/testJPEG_EXIF.jpg.tar.gz", + //path + "/jpeg2000.jp2", + }; + testDocumentTypesInternal(files, expectedRecords); + } + + @Test + public void testSolrCellXML() throws Exception { + morphline = createMorphline("test-morphlines/solrCellXML"); + String path = RESOURCES_DIR + "/test-documents"; + String[] files = new String[] { + path + "/testXML2.xml", + }; + testDocumentTypesInternal(files, expectedRecords); + } + + @Test + public void testSolrCellDocumentTypes() throws Exception { + + morphline = createMorphline("test-morphlines/solrCellDocumentTypes"); + String path = RESOURCES_DIR + "/test-documents"; + String[] files = new String[] { + path + "/testBMPfp.txt", + path + "/boilerplate.html", + path + "/NullHeader.docx", + path + "/testWORD_various.doc", + path + "/testPDF.pdf", + path + "/testJPEG_EXIF.jpg", + path + "/testJPEG_EXIF.jpg.gz", + path + "/testJPEG_EXIF.jpg.tar.gz", + path + "/testXML.xml", +// path + "/cars.csv", +// path + "/cars.tsv", +// path + "/cars.ssv", +// path + "/cars.csv.gz", +// path + "/cars.tar.gz", + path + "/sample-statuses-20120906-141433.avro", + path + "/sample-statuses-20120906-141433", + path + "/sample-statuses-20120906-141433.gz", + path + "/sample-statuses-20120906-141433.bz2", + }; + testDocumentTypesInternal(files, expectedRecords); + } + + @Test + public void testSolrCellDocumentTypes2() throws Exception { + morphline = createMorphline("test-morphlines/solrCellDocumentTypes"); + String path = RESOURCES_DIR + "/test-documents"; + String[] files = new String[] { + path + "/testPPT_various.ppt", + path + "/testPPT_various.pptx", + path + "/testEXCEL.xlsx", + path + "/testEXCEL.xls", + path + "/testPages.pages", + //path + "/testNumbers.numbers", + //path + "/testKeynote.key", + + path + "/testRTFVarious.rtf", + path + "/complex.mbox", + path + "/test-outlook.msg", + path + "/testEMLX.emlx", +// path + "/testRFC822", + path + "/rsstest.rss", +// path + "/testDITA.dita", + + path + "/testMP3i18n.mp3", + path + "/testAIFF.aif", + path + "/testFLAC.flac", +// path + "/testFLAC.oga", +// path + "/testVORBIS.ogg", + path + "/testMP4.m4a", + path + "/testWAV.wav", +// path + "/testWMA.wma", + + path + "/testFLV.flv", +// path + "/testWMV.wmv", + + path + "/testBMP.bmp", + path + "/testPNG.png", + path + "/testPSD.psd", + path + "/testSVG.svg", + path + "/testTIFF.tif", + +// path + "/test-documents.7z", +// path + "/test-documents.cpio", +// path + "/test-documents.tar", +// path + "/test-documents.tbz2", +// path + "/test-documents.tgz", +// path + "/test-documents.zip", +// path + "/test-zip-of-zip.zip", +// path + "/testJAR.jar", + +// path + "/testKML.kml", +// path + "/testRDF.rdf", + path + "/testVISIO.vsd", +// path + "/testWAR.war", +// path + "/testWindows-x86-32.exe", +// path + "/testWINMAIL.dat", +// path + "/testWMF.wmf", + }; + testDocumentTypesInternal(files, expectedRecords); + } + + /** + * Test that the ContentHandler properly strips the illegal characters + */ + @Test + public void testTransformValue() { + String fieldName = "user_name"; + assertFalse("foobar".equals(getFoobarWithNonChars())); + + Metadata metadata = new Metadata(); + // load illegal char string into a metadata field and generate a new document, + // which will cause the ContentHandler to be invoked. + metadata.set(fieldName, getFoobarWithNonChars()); + StripNonCharSolrContentHandlerFactory contentHandlerFactory = + new StripNonCharSolrContentHandlerFactory(DateUtil.DEFAULT_DATE_FORMATS); + IndexSchema schema = h.getCore().getLatestSchema(); + SolrContentHandler contentHandler = + contentHandlerFactory.createSolrContentHandler(metadata, new MapSolrParams(new HashMap()), schema); + SolrInputDocument doc = contentHandler.newDocument(); + String foobar = doc.getFieldValue(fieldName).toString(); + assertTrue("foobar".equals(foobar)); + } + + /** + * Returns string "foobar" with illegal characters interspersed. + */ + private String getFoobarWithNonChars() { + char illegalChar = '\uffff'; + StringBuilder builder = new StringBuilder(); + builder.append(illegalChar).append(illegalChar).append("foo").append(illegalChar) + .append(illegalChar).append("bar").append(illegalChar).append(illegalChar); + return builder.toString(); + } + +} diff --git a/solr/contrib/morphlines-core/build.xml b/solr/contrib/morphlines-core/build.xml new file mode 100644 index 00000000000..859b0322e2b --- /dev/null +++ b/solr/contrib/morphlines-core/build.xml @@ -0,0 +1,107 @@ + + + + + + + + Solr Morphlines commands. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/ivy.xml b/solr/contrib/morphlines-core/ivy.xml new file mode 100644 index 00000000000..c3712b7270c --- /dev/null +++ b/solr/contrib/morphlines-core/ivy.xml @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java new file mode 100644 index 00000000000..f3030247065 --- /dev/null +++ b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.IOException; + +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.response.SolrPingResponse; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; + +/** + * A vehicle to load a list of Solr documents into some kind of destination, + * such as a SolrServer or MapReduce RecordWriter. + */ +public interface DocumentLoader { + + /** Begins a transaction */ + public void beginTransaction() throws IOException, SolrServerException; + + /** Loads the given document into the destination */ + public void load(SolrInputDocument doc) throws IOException, SolrServerException; + + /** + * Sends any outstanding documents to the destination and waits for a positive + * or negative ack (i.e. exception). Depending on the outcome the caller + * should then commit or rollback the current flume transaction + * correspondingly. + * + * @throws IOException + * If there is a low-level I/O error. + */ + public void commitTransaction() throws IOException, SolrServerException; + + /** + * Performs a rollback of all non-committed documents pending. + *

    + * Note that this is not a true rollback as in databases. Content you have + * previously added may have already been committed due to autoCommit, buffer + * full, other client performing a commit etc. So this is only a best-effort + * rollback. + * + * @throws IOException + * If there is a low-level I/O error. + */ + public UpdateResponse rollbackTransaction() throws IOException, SolrServerException; + + /** Releases allocated resources */ + public void shutdown() throws IOException, SolrServerException; + + /** + * Issues a ping request to check if the server is alive + * + * @throws IOException + * If there is a low-level I/O error. + */ + public SolrPingResponse ping() throws IOException, SolrServerException; + +} diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/GenerateSolrSequenceKeyBuilder.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/GenerateSolrSequenceKeyBuilder.java new file mode 100644 index 00000000000..badf99ec3e7 --- /dev/null +++ b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/GenerateSolrSequenceKeyBuilder.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.security.SecureRandom; +import java.util.Arrays; +import java.util.Collection; +import java.util.Random; + +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; + +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.CommandBuilder; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.MorphlineRuntimeException; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.AbstractCommand; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.base.Notifications; +import com.typesafe.config.Config; + +/** + * A command that assigns a record unique key that is the concatenation of the given + * baseIdField record field, followed by a running count of the record number within + * the current session. The count is reset to zero whenever a "startSession" notification is + * received. + *

    + * For example, assume a CSV file containing multiple records but no unique ids, and the + * baseIdField field is the filesystem path of the file. Now this command can be used + * to assign the following record values to Solr's unique key field: + * $path#0, $path#1, ... $path#N. + *

    + * The name of the unique key field is fetched from Solr's schema.xml file, as directed by the + * solrLocator configuration parameter. + */ +public final class GenerateSolrSequenceKeyBuilder implements CommandBuilder { + + @Override + public Collection getNames() { + return Arrays.asList( + "generateSolrSequenceKey", + "sanitizeUniqueSolrKey" // old name (retained for backwards compatibility) + ); + } + + @Override + public Command build(Config config, Command parent, Command child, MorphlineContext context) { + return new GenerateSolrSequenceKey(this, config, parent, child, context); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class GenerateSolrSequenceKey extends AbstractCommand { + + private final boolean preserveExisting; + private final String baseIdFieldName; + private final String uniqueKeyName; + private long recordCounter = 0; + + private final String idPrefix; // for load testing only; enables adding same document many times with a different unique key + private final Random randomIdPrefix; // for load testing only; enables adding same document many times with a different unique key + + public GenerateSolrSequenceKey(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { + super(builder, config, parent, child, context); + this.baseIdFieldName = getConfigs().getString(config, "baseIdField", Fields.BASE_ID); + this.preserveExisting = getConfigs().getBoolean(config, "preserveExisting", true); + + Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); + SolrLocator locator = new SolrLocator(solrLocatorConfig, context); + LOG.debug("solrLocator: {}", locator); + IndexSchema schema = locator.getIndexSchema(); + SchemaField uniqueKey = schema.getUniqueKeyField(); + uniqueKeyName = uniqueKey == null ? null : uniqueKey.getName(); + + String tmpIdPrefix = getConfigs().getString(config, "idPrefix", null); // for load testing only + Random tmpRandomIdPrefx = null; + if ("random".equals(tmpIdPrefix)) { // for load testing only + tmpRandomIdPrefx = new Random(new SecureRandom().nextLong()); + tmpIdPrefix = null; + } + idPrefix = tmpIdPrefix; + randomIdPrefix = tmpRandomIdPrefx; + validateArguments(); + } + + @Override + protected boolean doProcess(Record doc) { + long num = recordCounter++; + // LOG.debug("record #{} id before sanitizing doc: {}", num, doc); + if (uniqueKeyName == null || (preserveExisting && doc.getFields().containsKey(uniqueKeyName))) { + ; // we must preserve the existing id + } else { + Object baseId = doc.getFirstValue(baseIdFieldName); + if (baseId == null) { + throw new MorphlineRuntimeException("Record field " + baseIdFieldName + + " must not be null as it is needed as a basis for a unique key for solr doc: " + doc); + } + doc.replaceValues(uniqueKeyName, baseId.toString() + "#" + num); + } + + // for load testing only; enables adding same document many times with a different unique key + if (idPrefix != null) { + String id = doc.getFirstValue(uniqueKeyName).toString(); + id = idPrefix + id; + doc.replaceValues(uniqueKeyName, id); + } else if (randomIdPrefix != null) { + String id = doc.getFirstValue(uniqueKeyName).toString(); + id = String.valueOf(Math.abs(randomIdPrefix.nextInt())) + "#" + id; + doc.replaceValues(uniqueKeyName, id); + } + + LOG.debug("record #{} unique key sanitized to this: {}", num, doc); + + return super.doProcess(doc); + } + + @Override + protected void doNotify(Record notification) { + if (Notifications.containsLifecycleEvent(notification, Notifications.LifecycleEvent.START_SESSION)) { + recordCounter = 0; // reset + } + super.doNotify(notification); + } + + } +} diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/LoadSolrBuilder.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/LoadSolrBuilder.java new file mode 100644 index 00000000000..ff27cd09e6f --- /dev/null +++ b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/LoadSolrBuilder.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.common.SolrInputDocument; + +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.CommandBuilder; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.MorphlineRuntimeException; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.AbstractCommand; +import com.cloudera.cdk.morphline.base.Configs; +import com.cloudera.cdk.morphline.base.Metrics; +import com.cloudera.cdk.morphline.base.Notifications; +import com.codahale.metrics.Timer; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; + +/** + * A command that loads a record into a SolrServer or MapReduce SolrOutputFormat. + */ +public final class LoadSolrBuilder implements CommandBuilder { + + @Override + public Collection getNames() { + return Collections.singletonList("loadSolr"); + } + + @Override + public Command build(Config config, Command parent, Command child, MorphlineContext context) { + return new LoadSolr(this, config, parent, child, context); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class LoadSolr extends AbstractCommand { + + private final DocumentLoader loader; + private final Map boosts = new HashMap(); + private final Timer elapsedTime; + + public LoadSolr(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { + super(builder, config, parent, child, context); + Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); + SolrLocator locator = new SolrLocator(solrLocatorConfig, context); + LOG.debug("solrLocator: {}", locator); + this.loader = locator.getLoader(); + Config boostsConfig = getConfigs().getConfig(config, "boosts", ConfigFactory.empty()); + for (Map.Entry entry : new Configs().getEntrySet(boostsConfig)) { + String fieldName = entry.getKey(); + float boost = Float.parseFloat(entry.getValue().toString().trim()); + boosts.put(fieldName, boost); + } + validateArguments(); + this.elapsedTime = getTimer(Metrics.ELAPSED_TIME); + } + + @Override + protected void doNotify(Record notification) { + for (Object event : Notifications.getLifecycleEvents(notification)) { + if (event == Notifications.LifecycleEvent.BEGIN_TRANSACTION) { + try { + loader.beginTransaction(); + } catch (SolrServerException e) { + throw new MorphlineRuntimeException(e); + } catch (IOException e) { + throw new MorphlineRuntimeException(e); + } + } else if (event == Notifications.LifecycleEvent.COMMIT_TRANSACTION) { + try { + loader.commitTransaction(); + } catch (SolrServerException e) { + throw new MorphlineRuntimeException(e); + } catch (IOException e) { + throw new MorphlineRuntimeException(e); + } + } + else if (event == Notifications.LifecycleEvent.ROLLBACK_TRANSACTION) { + try { + loader.rollbackTransaction(); + } catch (SolrServerException e) { + throw new MorphlineRuntimeException(e); + } catch (IOException e) { + throw new MorphlineRuntimeException(e); + } + } + else if (event == Notifications.LifecycleEvent.SHUTDOWN) { + try { + loader.shutdown(); + } catch (SolrServerException e) { + throw new MorphlineRuntimeException(e); + } catch (IOException e) { + throw new MorphlineRuntimeException(e); + } + } + } + super.doNotify(notification); + } + + @Override + protected boolean doProcess(Record record) { + Timer.Context timerContext = elapsedTime.time(); + SolrInputDocument doc = convert(record); + try { + loader.load(doc); + } catch (IOException e) { + throw new MorphlineRuntimeException(e); + } catch (SolrServerException e) { + throw new MorphlineRuntimeException(e); + } finally { + timerContext.stop(); + } + + // pass record to next command in chain: + return super.doProcess(record); + } + + private SolrInputDocument convert(Record record) { + Map> map = record.getFields().asMap(); + SolrInputDocument doc = new SolrInputDocument(new HashMap(2 * map.size())); + for (Map.Entry> entry : map.entrySet()) { + String key = entry.getKey(); + doc.setField(key, entry.getValue(), getBoost(key)); + } + return doc; + } + + private float getBoost(String key) { + if (boosts.size() > 0) { + Float boost = boosts.get(key); + if (boost != null) { + return boost.floatValue(); + } + } + return 1.0f; + } + + } +} diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SafeConcurrentUpdateSolrServer.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SafeConcurrentUpdateSolrServer.java new file mode 100644 index 00000000000..f98eeb25016 --- /dev/null +++ b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SafeConcurrentUpdateSolrServer.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import org.apache.http.client.HttpClient; +import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrServer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * ConcurrentUpdateSolrServer that propagates exceptions up to the submitter of + * requests on blockUntilFinished() + */ +final class SafeConcurrentUpdateSolrServer extends ConcurrentUpdateSolrServer { + + private Throwable currentException = null; + private final Object myLock = new Object(); + + private static final Logger LOGGER = LoggerFactory.getLogger(SafeConcurrentUpdateSolrServer.class); + + public SafeConcurrentUpdateSolrServer(String solrServerUrl, int queueSize, int threadCount) { + this(solrServerUrl, null, queueSize, threadCount); + } + + public SafeConcurrentUpdateSolrServer(String solrServerUrl, HttpClient client, int queueSize, int threadCount) { + super(solrServerUrl, client, queueSize, threadCount); + } + + @Override + public void handleError(Throwable ex) { + assert ex != null; + synchronized (myLock) { + currentException = ex; + } + LOGGER.error("handleError", ex); + } + + @Override + public void blockUntilFinished() { + super.blockUntilFinished(); + synchronized (myLock) { + if (currentException != null) { + throw new RuntimeException(currentException); + } + } + } + + public void clearException() { + synchronized (myLock) { + currentException = null; + } + } + +} diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SanitizeUnknownSolrFieldsBuilder.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SanitizeUnknownSolrFieldsBuilder.java new file mode 100644 index 00000000000..79ecec34b64 --- /dev/null +++ b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SanitizeUnknownSolrFieldsBuilder.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.solr.schema.IndexSchema; + +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.CommandBuilder; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.AbstractCommand; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.typesafe.config.Config; + +/** + * Command that sanitizes record fields that are unknown to Solr schema.xml by either deleting them + * (renameToPrefix is absent or a zero length string), or by moving them to a field prefixed with + * the given renameToPrefix (e.g. renameToPrefix = "ignored_" to use typical dynamic Solr fields). + *

    + * Recall that Solr throws an exception on any attempt to load a document that contains a field that + * isn't specified in schema.xml. + */ +public final class SanitizeUnknownSolrFieldsBuilder implements CommandBuilder { + + @Override + public Collection getNames() { + return Collections.singletonList("sanitizeUnknownSolrFields"); + } + + @Override + public Command build(Config config, Command parent, Command child, MorphlineContext context) { + return new SanitizeUnknownSolrFields(this, config, parent, child, context); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class SanitizeUnknownSolrFields extends AbstractCommand { + + private final IndexSchema schema; + private final String renameToPrefix; + + public SanitizeUnknownSolrFields(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { + super(builder, config, parent, child, context); + + Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); + SolrLocator locator = new SolrLocator(solrLocatorConfig, context); + LOG.debug("solrLocator: {}", locator); + this.schema = locator.getIndexSchema(); + Preconditions.checkNotNull(schema); + LOG.trace("Solr schema: \n{}", Joiner.on("\n").join(new TreeMap(schema.getFields()).values())); + + String str = getConfigs().getString(config, "renameToPrefix", "").trim(); + this.renameToPrefix = str.length() > 0 ? str : null; + validateArguments(); + } + + @Override + protected boolean doProcess(Record record) { + Collection entries = new ArrayList(record.getFields().asMap().entrySet()); + for (Map.Entry> entry : entries) { + String key = entry.getKey(); + if (schema.getFieldOrNull(key) == null) { + LOG.debug("Sanitizing unknown Solr field: {}", key); + Collection values = entry.getValue(); + if (renameToPrefix != null) { + record.getFields().putAll(renameToPrefix + key, values); + } + values.clear(); // implicitly removes key from record + } + } + return super.doProcess(record); + } + + } +} diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java new file mode 100644 index 00000000000..3254acd9326 --- /dev/null +++ b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import com.cloudera.cdk.morphline.api.MorphlineCompilationException; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.MorphlineRuntimeException; +import com.cloudera.cdk.morphline.base.Configs; +import com.google.common.base.Preconditions; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; +import com.typesafe.config.ConfigRenderOptions; +import com.typesafe.config.ConfigUtil; +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.impl.CloudSolrServer; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.core.SolrConfig; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.util.SystemIdResolver; +import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import java.io.File; +import java.io.IOException; + +/** + * Set of configuration parameters that identify the location and schema of a Solr server or + * SolrCloud; Based on this information this class can return the schema and a corresponding + * {@link DocumentLoader}. + */ +public class SolrLocator { + + private Config config; + private MorphlineContext context; + private String collectionName; + private String zkHost; + private String solrUrl; + private String solrHomeDir; + private int batchSize = 1000; + + private static final String SOLR_HOME_PROPERTY_NAME = "solr.solr.home"; + + private static final Logger LOG = LoggerFactory.getLogger(SolrLocator.class); + + protected SolrLocator(MorphlineContext context) { + Preconditions.checkNotNull(context); + this.context = context; + } + + public SolrLocator(Config config, MorphlineContext context) { + this(context); + this.config = config; + Configs configs = new Configs(); + collectionName = configs.getString(config, "collection", null); + zkHost = configs.getString(config, "zkHost", null); + solrHomeDir = configs.getString(config, "solrHomeDir", null); + solrUrl = configs.getString(config, "solrUrl", null); + batchSize = configs.getInt(config, "batchSize", batchSize); + LOG.trace("Constructed solrLocator: {}", this); + configs.validateArguments(config); + } + + public DocumentLoader getLoader() { + if (context instanceof SolrMorphlineContext) { + DocumentLoader loader = ((SolrMorphlineContext)context).getDocumentLoader(); + if (loader != null) { + return loader; + } + } + + if (zkHost != null && zkHost.length() > 0) { + if (collectionName == null || collectionName.length() == 0) { + throw new MorphlineCompilationException("Parameter 'zkHost' requires that you also pass parameter 'collection'", config); + } + CloudSolrServer cloudSolrServer = new CloudSolrServer(zkHost); + cloudSolrServer.setDefaultCollection(collectionName); + cloudSolrServer.connect(); + return new SolrServerDocumentLoader(cloudSolrServer, batchSize); + } else { + if (solrUrl == null || solrUrl.length() == 0) { + throw new MorphlineCompilationException("Missing parameter 'solrUrl'", config); + } + int solrServerNumThreads = 2; + int solrServerQueueLength = solrServerNumThreads; + SolrServer server = new SafeConcurrentUpdateSolrServer(solrUrl, solrServerQueueLength, solrServerNumThreads); + // SolrServer server = new HttpSolrServer(solrServerUrl); + // SolrServer server = new ConcurrentUpdateSolrServer(solrServerUrl, solrServerQueueLength, solrServerNumThreads); + // server.setParser(new XMLResponseParser()); // binary parser is used by default + return new SolrServerDocumentLoader(server, batchSize); + } + } + + public IndexSchema getIndexSchema() { + if (context instanceof SolrMorphlineContext) { + IndexSchema schema = ((SolrMorphlineContext)context).getIndexSchema(); + if (schema != null) { + validateSchema(schema); + return schema; + } + } + + // If solrHomeDir isn't defined and zkHost and collectionName are defined + // then download schema.xml and solrconfig.xml, etc from zk and use that as solrHomeDir + String oldSolrHomeDir = null; + String mySolrHomeDir = solrHomeDir; + if (solrHomeDir == null || solrHomeDir.length() == 0) { + if (zkHost == null || zkHost.length() == 0) { + // TODO: implement download from solrUrl if specified + throw new MorphlineCompilationException( + "Downloading a Solr schema requires either parameter 'solrHomeDir' or parameters 'zkHost' and 'collection'", + config); + } + if (collectionName == null || collectionName.length() == 0) { + throw new MorphlineCompilationException( + "Parameter 'zkHost' requires that you also pass parameter 'collection'", config); + } + ZooKeeperDownloader zki = new ZooKeeperDownloader(); + SolrZkClient zkClient = zki.getZkClient(zkHost); + try { + String configName = zki.readConfigName(zkClient, collectionName); + File downloadedSolrHomeDir = zki.downloadConfigDir(zkClient, configName); + mySolrHomeDir = downloadedSolrHomeDir.getAbsolutePath(); + } catch (KeeperException e) { + throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e); + } catch (InterruptedException e) { + throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e); + } catch (IOException e) { + throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e); + } finally { + zkClient.close(); + } + } + + oldSolrHomeDir = System.setProperty(SOLR_HOME_PROPERTY_NAME, mySolrHomeDir); + try { + SolrConfig solrConfig = new SolrConfig(); // TODO use SolrResourceLoader ala TikaMapper? + // SolrConfig solrConfig = new SolrConfig("solrconfig.xml"); + // SolrConfig solrConfig = new + // SolrConfig("/cloud/apache-solr-4.0.0-BETA/example/solr/collection1", + // "solrconfig.xml", null); + // SolrConfig solrConfig = new + // SolrConfig("/cloud/apache-solr-4.0.0-BETA/example/solr/collection1/conf/solrconfig.xml"); + SolrResourceLoader loader = solrConfig.getResourceLoader(); + + InputSource is = new InputSource(loader.openSchema("schema.xml")); + is.setSystemId(SystemIdResolver.createSystemIdFromResourceName("schema.xml")); + + IndexSchema schema = new IndexSchema(solrConfig, "schema.xml", is); + validateSchema(schema); + return schema; + } catch (ParserConfigurationException e) { + throw new MorphlineRuntimeException(e); + } catch (IOException e) { + throw new MorphlineRuntimeException(e); + } catch (SAXException e) { + throw new MorphlineRuntimeException(e); + } finally { // restore old global state + if (solrHomeDir != null) { + if (oldSolrHomeDir == null) { + System.clearProperty(SOLR_HOME_PROPERTY_NAME); + } else { + System.setProperty(SOLR_HOME_PROPERTY_NAME, oldSolrHomeDir); + } + } + } + } + + private void validateSchema(IndexSchema schema) { + if (schema.getUniqueKeyField() == null) { + throw new MorphlineCompilationException("Solr schema.xml is missing unique key field", config); + } + if (!schema.getUniqueKeyField().isRequired()) { + throw new MorphlineCompilationException("Solr schema.xml must contain a required unique key field", config); + } + } + + @Override + public String toString() { + return toConfig(null).root().render(ConfigRenderOptions.concise()); + } + + public Config toConfig(String key) { + String json = ""; + if (key != null) { + json = toJson(key) + " : "; + } + json += + "{" + + " collection : " + toJson(collectionName) + ", " + + " zkHost : " + toJson(zkHost) + ", " + + " solrUrl : " + toJson(solrUrl) + ", " + + " solrHomeDir : " + toJson(solrHomeDir) + ", " + + " batchSize : " + toJson(batchSize) + " " + + "}"; + return ConfigFactory.parseString(json); + } + + private String toJson(Object key) { + String str = key == null ? "" : key.toString(); + str = ConfigUtil.quoteString(str); + return str; + } + + public String getCollectionName() { + return this.collectionName; + } + + public void setCollectionName(String collectionName) { + this.collectionName = collectionName; + } + + public String getZkHost() { + return this.zkHost; + } + + public void setZkHost(String zkHost) { + this.zkHost = zkHost; + } + + public String getSolrHomeDir() { + return this.solrHomeDir; + } + + public void setSolrHomeDir(String solrHomeDir) { + this.solrHomeDir = solrHomeDir; + } + + public String getServerUrl() { + return this.solrUrl; + } + + public void setServerUrl(String solrUrl) { + this.solrUrl = solrUrl; + } + + public int getBatchSize() { + return this.batchSize; + } + + public void setBatchSize(int batchSize) { + this.batchSize = batchSize; + } + +} diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrMorphlineContext.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrMorphlineContext.java new file mode 100644 index 00000000000..56d6e39227c --- /dev/null +++ b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrMorphlineContext.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import org.apache.solr.schema.IndexSchema; + +import com.cloudera.cdk.morphline.api.MorphlineContext; + +/** + * A context that is specific to Solr. + */ +public class SolrMorphlineContext extends MorphlineContext { + + private DocumentLoader loader; + private IndexSchema schema; + + /** For public access use {@link Builder#build()} instead */ + protected SolrMorphlineContext() {} + + public DocumentLoader getDocumentLoader() { + return loader; + } + + public IndexSchema getIndexSchema() { + return schema; + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + /** + * Helper to construct a {@link SolrMorphlineContext} instance. + */ + public static class Builder extends MorphlineContext.Builder { + + private DocumentLoader loader; + private IndexSchema schema; + + public Builder() {} + + public Builder setDocumentLoader(DocumentLoader loader) { + this.loader = loader; + return this; + } + + public Builder setIndexSchema(IndexSchema schema) { + this.schema = schema; + return this; + } + + @Override + public SolrMorphlineContext build() { + ((SolrMorphlineContext)context).loader = loader; + ((SolrMorphlineContext)context).schema = schema; + return (SolrMorphlineContext) super.build(); + } + + @Override + protected SolrMorphlineContext create() { + return new SolrMorphlineContext(); + } + + } + +} diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrServerDocumentLoader.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrServerDocumentLoader.java new file mode 100644 index 00000000000..d343230fcba --- /dev/null +++ b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrServerDocumentLoader.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudSolrServer; +import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrServer; +import org.apache.solr.client.solrj.response.SolrPingResponse; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A vehicle to load a list of Solr documents into a local or remote {@link SolrServer}. + */ +public class SolrServerDocumentLoader implements DocumentLoader { + + private final SolrServer server; // proxy to local or remote solr server + private long numLoadedDocs = 0; // number of documents loaded in the current transaction + private final int batchSize; + private final List batch = new ArrayList(); + + private static final Logger LOGGER = LoggerFactory.getLogger(SolrServerDocumentLoader.class); + + public SolrServerDocumentLoader(SolrServer server, int batchSize) { + if (server == null) { + throw new IllegalArgumentException("solr server must not be null"); + } + this.server = server; + if (batchSize <= 0) { + throw new IllegalArgumentException("batchSize must be a positive number: " + batchSize); + } + this.batchSize = batchSize; + } + + @Override + public void beginTransaction() { + LOGGER.trace("beginTransaction"); + batch.clear(); + numLoadedDocs = 0; + if (server instanceof SafeConcurrentUpdateSolrServer) { + ((SafeConcurrentUpdateSolrServer) server).clearException(); + } + } + + @Override + public void load(SolrInputDocument doc) throws IOException, SolrServerException { + LOGGER.trace("load doc: {}", doc); + batch.add(doc); + if (batch.size() >= batchSize) { + loadBatch(); + } + } + + @Override + public void commitTransaction() throws SolrServerException, IOException { + LOGGER.trace("commitTransaction"); + if (batch.size() > 0) { + loadBatch(); + } + if (numLoadedDocs > 0) { + if (server instanceof ConcurrentUpdateSolrServer) { + ((ConcurrentUpdateSolrServer) server).blockUntilFinished(); + } + } + } + + private void loadBatch() throws SolrServerException, IOException { + numLoadedDocs += batch.size(); + try { + UpdateResponse rsp = server.add(batch); + } finally { + batch.clear(); + } + } + + @Override + public UpdateResponse rollbackTransaction() throws SolrServerException, IOException { + LOGGER.trace("rollback"); + if (!(server instanceof CloudSolrServer)) { + return server.rollback(); + } else { + return new UpdateResponse(); + } + } + + @Override + public void shutdown() { + LOGGER.trace("shutdown"); + server.shutdown(); + } + + @Override + public SolrPingResponse ping() throws SolrServerException, IOException { + LOGGER.trace("ping"); + return server.ping(); + } + + public SolrServer getSolrServer() { + return server; + } + +} diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/TokenizeTextBuilder.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/TokenizeTextBuilder.java new file mode 100644 index 00000000000..323eedd27de --- /dev/null +++ b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/TokenizeTextBuilder.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.IOException; +import java.io.Reader; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IndexSchema; + +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.CommandBuilder; +import com.cloudera.cdk.morphline.api.MorphlineCompilationException; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.MorphlineRuntimeException; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.AbstractCommand; +import com.google.common.base.Preconditions; +import com.typesafe.config.Config; + +/** + * A command that uses the embedded Solr/Lucene Analyzer library to generate tokens from a text + * string, without sending data to a Solr server. + */ +public final class TokenizeTextBuilder implements CommandBuilder { + + @Override + public Collection getNames() { + return Collections.singletonList("tokenizeText"); + } + + @Override + public Command build(Config config, Command parent, Command child, MorphlineContext context) { + return new TokenizeText(this, config, parent, child, context); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class TokenizeText extends AbstractCommand { + + private final String inputFieldName; + private final String outputFieldName; + private final Analyzer analyzer; + private final CharTermAttribute token; // cached + private final ReusableStringReader reader = new ReusableStringReader(); // cached + + public TokenizeText(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { + super(builder, config, parent, child, context); + this.inputFieldName = getConfigs().getString(config, "inputField"); + this.outputFieldName = getConfigs().getString(config, "outputField"); + String solrFieldType = getConfigs().getString(config, "solrFieldType"); + Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); + SolrLocator locator = new SolrLocator(solrLocatorConfig, context); + LOG.debug("solrLocator: {}", locator); + IndexSchema schema = locator.getIndexSchema(); + FieldType fieldType = schema.getFieldTypeByName(solrFieldType); + if (fieldType == null) { + throw new MorphlineCompilationException("Missing Solr field type in schema.xml for name: " + solrFieldType, config); + } + this.analyzer = fieldType.getAnalyzer(); + Preconditions.checkNotNull(analyzer); + try { // register CharTermAttribute for later (implicit) reuse + this.token = analyzer.tokenStream("content", reader).addAttribute(CharTermAttribute.class); + } catch (IOException e) { + throw new MorphlineCompilationException("Cannot create token stream", config, e); + } + Preconditions.checkNotNull(token); + validateArguments(); + } + + @Override + protected boolean doProcess(Record record) { + try { + List outputValues = record.get(outputFieldName); + for (Object value : record.get(inputFieldName)) { + reader.setValue(value.toString()); + TokenStream tokenStream = analyzer.tokenStream("content", reader); + tokenStream.reset(); + while (tokenStream.incrementToken()) { + if (token.length() > 0) { // incrementToken() updates the token! + String tokenStr = new String(token.buffer(), 0, token.length()); + outputValues.add(tokenStr); + } + } + tokenStream.end(); + tokenStream.close(); + } + } catch (IOException e) { + throw new MorphlineRuntimeException(e); + } + + // pass record to next command in chain: + return super.doProcess(record); + } + + } + + + // Copied from org.apache.lucene.document.Field.java from lucene-4.3.0 + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + private static final class ReusableStringReader extends Reader { + private int pos = 0, size = 0; + private String s = null; + + void setValue(String s) { + this.s = s; + this.size = s.length(); + this.pos = 0; + } + + @Override + public int read() { + if (pos < size) { + return s.charAt(pos++); + } else { + s = null; + return -1; + } + } + + @Override + public int read(char[] c, int off, int len) { + if (pos < size) { + len = Math.min(len, size-pos); + s.getChars(pos, pos+len, c, off); + pos += len; + return len; + } else { + s = null; + return -1; + } + } + + @Override + public void close() { + pos = size; // this prevents NPE when reading after close! + s = null; + } + } +} diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/ZooKeeperDownloader.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/ZooKeeperDownloader.java new file mode 100644 index 00000000000..68cb6270139 --- /dev/null +++ b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/ZooKeeperDownloader.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.apache.solr.cloud.ZkController; +import org.apache.solr.common.cloud.Aliases; +import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.cloud.ZkNodeProps; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.util.StrUtils; +import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.io.Files; + +/** + * Downloads SolrCloud information from ZooKeeper. + */ +final class ZooKeeperDownloader { + + private static final Logger LOG = LoggerFactory.getLogger(ZooKeeperDownloader.class); + + public SolrZkClient getZkClient(String zkHost) { + if (zkHost == null) { + throw new IllegalArgumentException("zkHost must not be null"); + } + + SolrZkClient zkClient; + try { + zkClient = new SolrZkClient(zkHost, 30000); + } catch (Exception e) { + throw new IllegalArgumentException("Cannot connect to ZooKeeper: " + zkHost, e); + } + return zkClient; + } + + /** + * Returns config value given collection name + * Borrowed heavily from Solr's ZKController. + */ + public String readConfigName(SolrZkClient zkClient, String collection) + throws KeeperException, InterruptedException { + if (collection == null) { + throw new IllegalArgumentException("collection must not be null"); + } + String configName = null; + + // first check for alias + byte[] aliasData = zkClient.getData(ZkStateReader.ALIASES, null, null, true); + Aliases aliases = ClusterState.load(aliasData); + String alias = aliases.getCollectionAlias(collection); + if (alias != null) { + List aliasList = StrUtils.splitSmart(alias, ",", true); + if (aliasList.size() > 1) { + throw new IllegalArgumentException("collection cannot be an alias that maps to multiple collections"); + } + collection = aliasList.get(0); + } + + String path = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection; + if (LOG.isInfoEnabled()) { + LOG.info("Load collection config from:" + path); + } + byte[] data = zkClient.getData(path, null, null, true); + + if(data != null) { + ZkNodeProps props = ZkNodeProps.load(data); + configName = props.getStr(ZkController.CONFIGNAME_PROP); + } + + if (configName != null && !zkClient.exists(ZkController.CONFIGS_ZKNODE + "/" + configName, true)) { + LOG.error("Specified config does not exist in ZooKeeper:" + configName); + throw new IllegalArgumentException("Specified config does not exist in ZooKeeper:" + + configName); + } + + return configName; + } + + /** + * Download and return the config directory from ZK + */ + public File downloadConfigDir(SolrZkClient zkClient, String configName) + throws IOException, InterruptedException, KeeperException { + File dir = Files.createTempDir(); + dir.deleteOnExit(); + ZkController.downloadConfigDir(zkClient, configName, dir); + File confDir = new File(dir, "conf"); + if (!confDir.isDirectory()) { + // create a temporary directory with "conf" subdir and mv the config in there. This is + // necessary because of CDH-11188; solrctl does not generate nor accept directories with e.g. + // conf/solrconfig.xml which is necessary for proper solr operation. This should work + // even if solrctl changes. + confDir = new File(Files.createTempDir().getAbsolutePath(), "conf"); + confDir.getParentFile().deleteOnExit(); + Files.move(dir, confDir); + dir = confDir.getParentFile(); + } + return dir; + } + +} diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/package.html b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/package.html new file mode 100644 index 00000000000..ecec1bdf4d8 --- /dev/null +++ b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/package.html @@ -0,0 +1,22 @@ + + + + +Morphlines Solr related code. + + diff --git a/solr/contrib/morphlines-core/src/java/overview.html b/solr/contrib/morphlines-core/src/java/overview.html new file mode 100644 index 00000000000..7f8ad137a34 --- /dev/null +++ b/solr/contrib/morphlines-core/src/java/overview.html @@ -0,0 +1,21 @@ + + + +Apache Solr Search Server: Solr Core Morphline Commands + + diff --git a/solr/contrib/morphlines-core/src/test-files/README b/solr/contrib/morphlines-core/src/test-files/README new file mode 100644 index 00000000000..10f878acccb --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/README @@ -0,0 +1,21 @@ + + +This directory is where any non-transient, non-java files needed +for the execution of tests should live. + +It is used as the CWD when running JUnit tests. diff --git a/solr/contrib/morphlines-core/src/test-files/books_numeric_ids.csv b/solr/contrib/morphlines-core/src/test-files/books_numeric_ids.csv new file mode 100644 index 00000000000..817e8b769cf --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/books_numeric_ids.csv @@ -0,0 +1,11 @@ +id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s +0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy +0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy +0553573429,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy +0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi +0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy +0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi +0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy +0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy +0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy +0805080499,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy diff --git a/solr/contrib/morphlines-core/src/test-files/exampledocs/example.html b/solr/contrib/morphlines-core/src/test-files/exampledocs/example.html new file mode 100644 index 00000000000..5732f6214bc --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/exampledocs/example.html @@ -0,0 +1,49 @@ + + + Welcome to Solr + + +

    + Here is some text +

    +
    Here is some text in a div
    +
    This has a link.
    +News + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/exampledocs/example.txt b/solr/contrib/morphlines-core/src/test-files/exampledocs/example.txt new file mode 100644 index 00000000000..0c9928b9e26 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/exampledocs/example.txt @@ -0,0 +1,3 @@ +Example text document + +This is a simple example for a plain text document, indexed to Solr \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/lib-dirs/README b/solr/contrib/morphlines-core/src/test-files/lib-dirs/README new file mode 100644 index 00000000000..b7ca5b834f4 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/lib-dirs/README @@ -0,0 +1,18 @@ + + +Items under this directory are used by TestConfig.testLibs() diff --git a/solr/contrib/morphlines-core/src/test-files/lib-dirs/a/a1/empty-file-a1.txt b/solr/contrib/morphlines-core/src/test-files/lib-dirs/a/a1/empty-file-a1.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/lib-dirs/a/a1/empty-file-a1.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/morphlines-core/src/test-files/lib-dirs/a/a2/empty-file-a2.txt b/solr/contrib/morphlines-core/src/test-files/lib-dirs/a/a2/empty-file-a2.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/lib-dirs/a/a2/empty-file-a2.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/morphlines-core/src/test-files/lib-dirs/b/b1/empty-file-b1.txt b/solr/contrib/morphlines-core/src/test-files/lib-dirs/b/b1/empty-file-b1.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/lib-dirs/b/b1/empty-file-b1.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/morphlines-core/src/test-files/lib-dirs/b/b2/empty-file-b2.txt b/solr/contrib/morphlines-core/src/test-files/lib-dirs/b/b2/empty-file-b2.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/lib-dirs/b/b2/empty-file-b2.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/morphlines-core/src/test-files/lib-dirs/c/c1/empty-file-c1.txt b/solr/contrib/morphlines-core/src/test-files/lib-dirs/c/c1/empty-file-c1.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/lib-dirs/c/c1/empty-file-c1.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/morphlines-core/src/test-files/lib-dirs/c/c2/empty-file-c2.txt b/solr/contrib/morphlines-core/src/test-files/lib-dirs/c/c2/empty-file-c2.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/lib-dirs/c/c2/empty-file-c2.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/morphlines-core/src/test-files/lib-dirs/d/d1/empty-file-d1.txt b/solr/contrib/morphlines-core/src/test-files/lib-dirs/d/d1/empty-file-d1.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/lib-dirs/d/d1/empty-file-d1.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/morphlines-core/src/test-files/lib-dirs/d/d2/empty-file-d2.txt b/solr/contrib/morphlines-core/src/test-files/lib-dirs/d/d2/empty-file-d2.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/lib-dirs/d/d2/empty-file-d2.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/morphlines-core/src/test-files/log4j.properties b/solr/contrib/morphlines-core/src/test-files/log4j.properties new file mode 100644 index 00000000000..fb0577130bb --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/log4j.properties @@ -0,0 +1,12 @@ +# Logging level +log4j.rootLogger=INFO, CONSOLE + +log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender +log4j.appender.CONSOLE.Target=System.err +log4j.appender.CONSOLE.layout=org.apache.solr.util.SolrLogLayout +log4j.appender.CONSOLE.layout.ConversionPattern=%-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %C; %m\n + +log4j.logger.org.apache.zookeeper=WARN +log4j.logger.org.apache.hadoop=WARN +#log4j.logger.org.apache.solr=WARN +log4j.logger.org.apache.solr.hadoop=INFO \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/mailing_lists.pdf b/solr/contrib/morphlines-core/src/test-files/mailing_lists.pdf new file mode 100755 index 00000000000..33b819f0649 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/mailing_lists.pdf @@ -0,0 +1,382 @@ +%PDF-1.3 +% +4 0 obj +<< /Type /Info +/Producer (FOP 0.20.5) >> +endobj +5 0 obj +<< /Length 425 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gb!$BYuAO_'ZTnF'lQbNnGsdiUK'C#3dAWc3lI>k\P#:a@Qja<+itJa;R]7&ni\$9pOi?T._;3m?jT+q7>,P^70oB=!nr]%k%\U^KVqaF4*Z`$VJ7Gs`T5OO`(tY]Q1`-5*m;!--h%?*_0SbIU\BV=OFg<#%YcH_YI$(sDCIJts'M2*drjRrJE!OM7HP!^-&EW>B\:RYFnaY.m[$s5f"XG0>^fduHe6/++D0fY3@AWR@HYabmQ5jDQ.c0>I.uQX&(lA@VLm_s_9XnBh7%"*/%^]AO3eTI!BTo'pF?%''A*PDU*NW%d`2@p'@:D@U??4PP08m[K4N,8,(e`N+\7n+a>ac%q#,D8DRQ*3l]MS>'gn3lWNGmRAtQ7n]eDnLPrD!?DEdB/hNarb_7$B7U-H7!['nXLkV_no5AHq`>6~> +endstream +endobj +6 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 5 0 R +/Annots 7 0 R +>> +endobj +7 0 obj +[ +8 0 R +10 0 R +12 0 R +] +endobj +8 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 559.666 137.324 547.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 9 0 R +/H /I +>> +endobj +10 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 541.466 164.648 529.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 11 0 R +/H /I +>> +endobj +12 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 523.266 154.016 511.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 13 0 R +/H /I +>> +endobj +14 0 obj +<< /Length 2197 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gb"/)9lo&I&A@C2n5a2!7YkueV^?,ABrC@*[F.^sK-J\u-^*\VZ9A3]?'#&sU^3,]d[;/F9HjMs^A"j:!rHNC?7rs!0)f1q`$?\lOaRt/g/f.>-Am[t'`RUrGL7Uk8K90.i-up;qeIYfjWZ2&ki:[3`TuXFj]`a&Hbo8r&P(RZ+M_>&eY.T4jXOI%UHbq1GnF>g$KgW%R24nBkc\[qA$(koU$isG(W7`PE,nMam;U4(ZC8,Ca!_P2VYf>\V0gK0g;-.E[Y(&s=+&g6ms""'Ip>0b/D!>a&PX9eo_tuueR:b=r@6Q5LM],XbK;&L$0WubNX9c"=FM$543G_>rAQ_%2/dW<)/"U1&]l:AZ&\Mif8sF`r5>b<$lqK"2t]maZ*oDb!^$Zn6OC'%XkI];&*rkLP1BMGI@$,0fK(=gC-3q7n7d4EQ4DepBc'^Q^A%e?19a(`S*FHTN*RNjP&P%2`6%jpOU\DBUN)cnMYa3PQ!sYETiGJi'q>>m*e;[,.1l\rZo3K;>$K"a1:s3pU>o+:'7fND!+6GV@2G;qf`\`=J#WkOjSke<1f>VfbcUtXM"1jGN:@Ptec8Mc-hmS5S>q/nAY%[4%7BCI![NA:We(41]ld_`pU80;+e`1DbG.RQ:'#GQJAL2![aIWY'A*Y_>mF7>2S0IWM%nLg3%%;7r5=;3!7]05r?Ft-6I]9n9C\fUUF6R\9bPEVSutd9LFTpaoaP7Iuus-S#S.3;sVu-*T/:&2Ld]&g0oHoo`TmR'b]ps6hq9s&f+6_5c(k"m96-f:YA!:)K:q+(Hl=t`:+"<lQm6B=K&/r/Ep6Y]EG.T/34(fT0=6_m5PA-7PVo:"r)W'.mX>1A8Yg9kfa?"Qp+ta7Hb$FM`*OP^>3Sg)P[?jIXd]i]"h)Tdjnm[6@=kmEBkP1/K[bg`"7U:BWk^=!+3\ANTnN75*Rh_<-UA*!&rr#KW/7EXkeJU9GF5RA,#kqJ5aC9Ra5,PsiI`uF23/B"nkPHe2Q;B@pBXGM-i;<'oOM,dc3'qL)Ne,OV2.*f^Bt;0P#roPn?h]@-63,-9lQSF!dic13Ag\_]m=7Llb\*&C+>\+o6)Y,C._?+X1Qok%j>f[#T!,CD2T4cL'.Nb_Vit&M]!j7j6LHB.g9AQre&be$gJhbAg68kDJf@XZ7'2791RD*qAP]u")(lEjX)\-#O$aK(E]jq*3XbL:3q:o&9gcZLl?:E-l'-dHf;;_hhH3m/Q3]9jJRn>Z8]1Gt6PAVJ[r2gsg=4$!6I$RQ@Y6;H(U>,LWdW>Z5iTYZ'tAcSfoN,U=/fIoA::l8X^fXIa4m3-]9$Zc\E0H^!pmfeMjW3#p1J)pbH^VZML"NZ$U,Yg;f[AVrZRhlRCC[)D*>K0IRWR98A=<>dPSd)@Ec)OXGjK01hM%!FhVR[I<5Va3V,I"YuQZb-,XEM!Gk_-r<9T0W#M!!;RX!]MtBdJ0ah'FCoNF1r"gmU>Rb4aE:Z'I)d-f_1:B0gfmnM?K9ljY>R%*Fc9oYiohHndi(!dK+]ElID:'g:PKq6fKKHdO>bmG-2]ZmVcqs+ef-EWR(1Da)F&CoL[['3)UZ^!fo+Ua2NSC7m5oIXlLoF)+cWUr/MaMP@shSN$gD*jB=:/ru]MF>3-m'j6_-'>(Uq'PN4Fl*XC8ABmg\b`kmI@<0Sh)bkNopK]E6S7,V*o!<)infW?).%mtC2S8!kqh$BpiWu=4)>.Wm+Mt.YPC"ZlO^Ge*Y5)8QlX2 +endstream +endobj +15 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 14 0 R +/Annots 16 0 R +>> +endobj +16 0 obj +[ +17 0 R +18 0 R +19 0 R +20 0 R +21 0 R +22 0 R +23 0 R +24 0 R +25 0 R +26 0 R +27 0 R +28 0 R +29 0 R +] +endobj +17 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 232.344 608.466 372.012 596.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-user@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +18 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 591.266 189.336 579.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-user-subscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +19 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 578.066 215.988 566.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-user-unsubscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +20 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 564.866 197.316 552.866 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://mail-archives.apache.org/mod_mbox/lucene-solr-user/) +/S /URI >> +/H /I +>> +endobj +21 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 453.924 564.866 475.26 552.866 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/SolrResources) +/S /URI >> +/H /I +>> +endobj +22 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 259.668 441.722 396.672 429.722 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-dev@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +23 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 424.522 189.336 412.522 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-dev-subscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +24 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 411.322 215.988 399.322 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-dev-unsubscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +25 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 398.122 197.316 386.122 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://mail-archives.apache.org/mod_mbox/lucene-solr-dev/) +/S /URI >> +/H /I +>> +endobj +26 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 453.924 398.122 475.26 386.122 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/SolrResources) +/S /URI >> +/H /I +>> +endobj +27 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 294.624 296.178 403.284 284.178 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (version_control.html) +/S /URI >> +/H /I +>> +endobj +28 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 265.778 189.336 253.778 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-commits-subscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +29 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 252.578 215.988 240.578 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-commits-unsubscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +31 0 obj +<< + /Title (\376\377\0\61\0\40\0\125\0\163\0\145\0\162\0\163) + /Parent 30 0 R + /Next 32 0 R + /A 9 0 R +>> endobj +32 0 obj +<< + /Title (\376\377\0\62\0\40\0\104\0\145\0\166\0\145\0\154\0\157\0\160\0\145\0\162\0\163) + /Parent 30 0 R + /Prev 31 0 R + /Next 33 0 R + /A 11 0 R +>> endobj +33 0 obj +<< + /Title (\376\377\0\63\0\40\0\103\0\157\0\155\0\155\0\151\0\164\0\163) + /Parent 30 0 R + /Prev 32 0 R + /A 13 0 R +>> endobj +34 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F3 +/BaseFont /Helvetica-Bold +/Encoding /WinAnsiEncoding >> +endobj +35 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F5 +/BaseFont /Times-Roman +/Encoding /WinAnsiEncoding >> +endobj +36 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding >> +endobj +37 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica-Oblique +/Encoding /WinAnsiEncoding >> +endobj +38 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F7 +/BaseFont /Times-Bold +/Encoding /WinAnsiEncoding >> +endobj +1 0 obj +<< /Type /Pages +/Count 2 +/Kids [6 0 R 15 0 R ] >> +endobj +2 0 obj +<< /Type /Catalog +/Pages 1 0 R + /Outlines 30 0 R + /PageMode /UseOutlines + >> +endobj +3 0 obj +<< +/Font << /F3 34 0 R /F5 35 0 R /F1 36 0 R /F2 37 0 R /F7 38 0 R >> +/ProcSet [ /PDF /ImageC /Text ] >> +endobj +9 0 obj +<< +/S /GoTo +/D [15 0 R /XYZ 85.0 659.0 null] +>> +endobj +11 0 obj +<< +/S /GoTo +/D [15 0 R /XYZ 85.0 492.256 null] +>> +endobj +13 0 obj +<< +/S /GoTo +/D [15 0 R /XYZ 85.0 325.512 null] +>> +endobj +30 0 obj +<< + /First 31 0 R + /Last 33 0 R +>> endobj +xref +0 39 +0000000000 65535 f +0000007198 00000 n +0000007263 00000 n +0000007355 00000 n +0000000015 00000 n +0000000071 00000 n +0000000587 00000 n +0000000707 00000 n +0000000746 00000 n +0000007478 00000 n +0000000881 00000 n +0000007541 00000 n +0000001018 00000 n +0000007607 00000 n +0000001155 00000 n +0000003445 00000 n +0000003568 00000 n +0000003679 00000 n +0000003867 00000 n +0000004063 00000 n +0000004261 00000 n +0000004471 00000 n +0000004665 00000 n +0000004852 00000 n +0000005047 00000 n +0000005244 00000 n +0000005453 00000 n +0000005647 00000 n +0000005821 00000 n +0000006020 00000 n +0000007673 00000 n +0000006221 00000 n +0000006342 00000 n +0000006508 00000 n +0000006642 00000 n +0000006755 00000 n +0000006865 00000 n +0000006973 00000 n +0000007089 00000 n +trailer +<< +/Size 39 +/Root 2 0 R +/Info 4 0 R +>> +startxref +7724 +%%EOF diff --git a/solr/contrib/morphlines-core/src/test-files/old-solr-example/README.txt b/solr/contrib/morphlines-core/src/test-files/old-solr-example/README.txt new file mode 100644 index 00000000000..6242cff237b --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/old-solr-example/README.txt @@ -0,0 +1 @@ +This is around for back compat testing purposes and should be able to be removed in Solr 5.0 \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/old-solr-example/solr.xml b/solr/contrib/morphlines-core/src/test-files/old-solr-example/solr.xml new file mode 100644 index 00000000000..75da88a52f1 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/old-solr-example/solr.xml @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/addfields.updateprocessor.js b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/addfields.updateprocessor.js new file mode 100644 index 00000000000..1b3c9fc2d6e --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/addfields.updateprocessor.js @@ -0,0 +1,26 @@ +function processAdd(cmd) { + // Integer.valueOf is needed here to get a tru java object, because + // all javascript numbers are floating point (ie: java.lang.Double) + cmd.getSolrInputDocument().addField("script_added_i", + java.lang.Integer.valueOf(42)); + cmd.getSolrInputDocument().addField("script_added_d", 42.3); + +} + +// // // + +function processDelete() { + // NOOP +} +function processCommit() { + // NOOP +} +function processRollback() { + // NOOP +} +function processMergeIndexes() { + // NOOP +} +function finish() { + // NOOP +} diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt new file mode 100644 index 00000000000..6d276c33a16 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt @@ -0,0 +1,5 @@ +# simple AnalyzingInfix suggest phrase dictionary for testing +Japanese Autocomplete and Japanese Highlighter broken +Add Japanese Kanji number normalization to Kuromoji +Add decompose compound Japanese Katakana token capability to Kuromoji +This is just another entry! \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-currency.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-currency.xml new file mode 100644 index 00000000000..d7aeeeb2331 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-currency.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-mp-solrconfig.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-mp-solrconfig.xml new file mode 100644 index 00000000000..af5d8fbb155 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-mp-solrconfig.xml @@ -0,0 +1,34 @@ + + + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + 8 + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-analyzer-class-and-nested.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-analyzer-class-and-nested.xml new file mode 100644 index 00000000000..16796361c66 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-analyzer-class-and-nested.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-analysis-parameters.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-analysis-parameters.xml new file mode 100644 index 00000000000..3f8e224ce1b --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-analysis-parameters.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-field-parameters.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-field-parameters.xml new file mode 100644 index 00000000000..3575c438c72 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-field-parameters.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-codec-global-vs-ft-mismatch.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-codec-global-vs-ft-mismatch.xml new file mode 100644 index 00000000000..9a704fdd731 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-codec-global-vs-ft-mismatch.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + pulsing1text + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-dynamic-multivalued.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-dynamic-multivalued.xml new file mode 100644 index 00000000000..a71b361c956 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-dynamic-multivalued.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-code-in-xml.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-code-in-xml.xml new file mode 100644 index 00000000000..6339ae25eab --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-code-in-xml.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-default-code.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-default-code.xml new file mode 100644 index 00000000000..1f92977760e --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-default-code.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-multivalued.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-multivalued.xml new file mode 100644 index 00000000000..a1b788e628e --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-multivalued.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-oer-norates.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-oer-norates.xml new file mode 100644 index 00000000000..bd23933b270 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-oer-norates.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-multivalued.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-multivalued.xml new file mode 100644 index 00000000000..84bfaea141d --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-multivalued.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-dynamicField.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-dynamicField.xml new file mode 100644 index 00000000000..460fbda8ba2 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-dynamicField.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-field.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-field.xml new file mode 100644 index 00000000000..4272362a3f4 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-field.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-fieldType.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-fieldType.xml new file mode 100644 index 00000000000..34ef44bcc73 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-fieldType.xml @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-default-val.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-default-val.xml new file mode 100644 index 00000000000..0e3595d75cb --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-default-val.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-required.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-required.xml new file mode 100644 index 00000000000..c372afd44a4 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-required.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-external-filefield.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-external-filefield.xml new file mode 100644 index 00000000000..e7874c88d25 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-external-filefield.xml @@ -0,0 +1,27 @@ + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-dest-should-fail-test.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-dest-should-fail-test.xml new file mode 100644 index 00000000000..5b32376751c --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-dest-should-fail-test.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-source-should-fail-test.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-source-should-fail-test.xml new file mode 100644 index 00000000000..ddc9f4dc685 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-source-should-fail-test.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-dest-should-fail-test.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-dest-should-fail-test.xml new file mode 100644 index 00000000000..fb3ddbe5c41 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-dest-should-fail-test.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-source-should-fail-test.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-source-should-fail-test.xml new file mode 100644 index 00000000000..b3ca6ae3096 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-source-should-fail-test.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-non-glob-copyfield-source-matching-nothing-should-fail-test.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-non-glob-copyfield-source-matching-nothing-should-fail-test.xml new file mode 100644 index 00000000000..86e80a4555e --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-non-glob-copyfield-source-matching-nothing-should-fail-test.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-nontext-analyzer.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-nontext-analyzer.xml new file mode 100644 index 00000000000..06a689a8298 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-nontext-analyzer.xml @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-norms.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-norms.xml new file mode 100644 index 00000000000..f7c4e9b2d80 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-norms.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-pos.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-pos.xml new file mode 100644 index 00000000000..774d58755f4 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-pos.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-tf.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-tf.xml new file mode 100644 index 00000000000..d153793830a --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-tf.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-omit-tf-but-not-pos.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-omit-tf-but-not-pos.xml new file mode 100644 index 00000000000..116f116a176 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-omit-tf-but-not-pos.xml @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sim-global-vs-ft-mismatch.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sim-global-vs-ft-mismatch.xml new file mode 100644 index 00000000000..a776d105541 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sim-global-vs-ft-mismatch.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + sim1text + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-both-tf.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-both-tf.xml new file mode 100644 index 00000000000..99028c18a7c --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-both-tf.xml @@ -0,0 +1,48 @@ + + + + + + + + + + + + + 6.0 + 1.5 + 3.3 + 7.7 + 5.0 + 5.0 + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-baseline.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-baseline.xml new file mode 100644 index 00000000000..cf34ec8e21b --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-baseline.xml @@ -0,0 +1,44 @@ + + + + + + + + + + + + + 6.0 + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-hyperbolic.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-hyperbolic.xml new file mode 100644 index 00000000000..61e18ad73c7 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-hyperbolic.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + 3.3 + + 5.0 + 5.0 + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-norms.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-norms.xml new file mode 100644 index 00000000000..ef4e8042b3c --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-norms.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + 3 + + 0.5 + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-is-copyfield-dest.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-is-copyfield-dest.xml new file mode 100644 index 00000000000..bf1d53212e4 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-is-copyfield-dest.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-multivalued.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-multivalued.xml new file mode 100644 index 00000000000..81ce319eb86 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-multivalued.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-uses-default.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-uses-default.xml new file mode 100644 index 00000000000..026b529a942 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-uses-default.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-unsupported-docValues.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-unsupported-docValues.xml new file mode 100644 index 00000000000..5f4d69a31a7 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-schema-unsupported-docValues.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-bogus-scriptengine-name.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-bogus-scriptengine-name.xml new file mode 100644 index 00000000000..fc9e108bee3 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-bogus-scriptengine-name.xml @@ -0,0 +1,32 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + giberish + missleading.extension.updateprocessor.js.txt + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-invalid-scriptfile.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-invalid-scriptfile.xml new file mode 100644 index 00000000000..dbadbb5c2c0 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-invalid-scriptfile.xml @@ -0,0 +1,33 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + javascript + + currency.xml + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-managed-schema-named-schema.xml.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-managed-schema-named-schema.xml.xml new file mode 100644 index 00000000000..a15c0ac1d6e --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-managed-schema-named-schema.xml.xml @@ -0,0 +1,30 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + false + schema.xml + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-missing-scriptfile.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-missing-scriptfile.xml new file mode 100644 index 00000000000..4dee70ce08f --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-missing-scriptfile.xml @@ -0,0 +1,31 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + a-file-name-that-does-not-exist.js + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-cfs.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-cfs.xml new file mode 100644 index 00000000000..f13acb3f6b0 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-cfs.xml @@ -0,0 +1,30 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + true + false + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-dirfactory.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-dirfactory.xml new file mode 100644 index 00000000000..4da2a002f40 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-dirfactory.xml @@ -0,0 +1,34 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-indexconfigs.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-indexconfigs.xml new file mode 100644 index 00000000000..00dd08c36fe --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-indexconfigs.xml @@ -0,0 +1,35 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + true + false + + + + ${useCompoundFile:false} + true + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-schema-mutable-but-not-managed.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-schema-mutable-but-not-managed.xml new file mode 100644 index 00000000000..9fe2e89e037 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-schema-mutable-but-not-managed.xml @@ -0,0 +1,32 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + false + schema.xml + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-unexpected-schema-attribute.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-unexpected-schema-attribute.xml new file mode 100644 index 00000000000..d07cb0d1c11 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-unexpected-schema-attribute.xml @@ -0,0 +1,32 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + false + managed-schema + bogusValue + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-warmer-no-reopen.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-warmer-no-reopen.xml new file mode 100644 index 00000000000..9c9c96402ec --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-warmer-no-reopen.xml @@ -0,0 +1,27 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + false + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad_solrconfig.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad_solrconfig.xml new file mode 100644 index 00000000000..ed07d9afdea --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/bad_solrconfig.xml @@ -0,0 +1,27 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + ${unset.sys.property} + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/compoundDictionary.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/compoundDictionary.txt new file mode 100644 index 00000000000..f4977b5df72 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/compoundDictionary.txt @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# A set of words for testing the DictionaryCompound factory +soft +ball +team diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/conditional.updateprocessor.js b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/conditional.updateprocessor.js new file mode 100644 index 00000000000..5ec9487c150 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/conditional.updateprocessor.js @@ -0,0 +1,25 @@ +function processAdd(cmd) { + if (req.getParams().getBool("go-for-it",false)) { + cmd.getSolrInputDocument().addField("script_added_s", "i went for it"); + return true; + } + return false; +} + +// // // + +function processDelete() { + // NOOP +} +function processCommit() { + // NOOP +} +function processRollback() { + // NOOP +} +function processMergeIndexes() { + // NOOP +} +function finish() { + // NOOP +} diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/currency.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/currency.xml new file mode 100644 index 00000000000..6a12b32b2a8 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/currency.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/da_UTF8.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/da_UTF8.xml new file mode 100644 index 00000000000..2c8d203be68 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/da_UTF8.xml @@ -0,0 +1,1208 @@ + + + + + + + + + + +aA +bB +cC +dD +eE +fF +gG +hH +iI +jJ +kK +lL +mM +nN +oO +pP +qQ +rR +sS +tT +uU +vV +wW +xX +yY +zZ +æÆ +øØ +åÅ + + + +.ae3 +.an3k +.an1s +.be5la +.be1t +.bi4tr +.der3i +.diagno5 +.her3 +.hoved3 +.ne4t5 +.om1 +.ove4 +.po1 +.til3 +.yd5r +ab5le +3abst +a3c +ade5la +5adg +a1e +5afg +5a4f1l +af3r +af4ri +5afs +a4gef +a4gi +ag5in +ag5si +3agti +a4gy +a3h +ais5t +a3j +a5ka +a3ke +a5kr +aku5 +a3la +a1le +a1li +al3k +4alkv +a1lo +al5si +a3lu +a1ly +am4pa +3analy +an4k5r +a3nu +3anv +a5o +a5pe +a3pi +a5po +a1ra +ar5af +1arb +a1re +5arg +a1ri +a3ro +a3sa +a3sc +a1si +a3sk +a3so +3a3sp +a3ste +a3sti +a1ta1 +a1te +a1ti +a4t5in +a1to +ato5v +a5tr +a1tu +a5va +a1ve +a5z +1ba +ba4ti +4bd +1be +be1k +be3ro +be5ru +be1s4 +be1tr +1bi +bi5sk +b1j +4b1n +1bo +bo4gr +bo3ra +bo5re +1br4 +4bs +bs5k +b3so +b1st +b5t +3bu +bu4s5tr +b5w +1by +by5s +4c1c +1ce +ce5ro +3ch +4ch. +ci4o +ck3 +5cy +3da +4d3af +d5anta +da4s +d1b +d1d4 +1de +de5d +4de4lem +der5eri +de4rig +de5sk +d1f +d1g +d3h +1di +di1e +di5l +d3j +d1k +d1l +d1m +4d1n +3do +4dop +d5ov +d1p +4drett +5d4reve +3drif +3driv +d5ros +d5ru +ds5an +ds5in +d1ski +d4sm +d4su +dsu5l +ds5vi +d3ta +d1te +dt5o +d5tr +dt5u +1du +dub5 +d1v +3dy +e5ad +e3af +e5ag +e3ak +e1al +ea4la +e3an +e5ap +e3at +e3bl +ebs3 +e1ci +ed5ar +edde4 +eddel5 +e4do +ed5ra +ed3re +ed3rin +ed4str +e3e +3eff +e3fr +3eft +e3gu +e1h +e3in +ei5s +e3je +e4j5el +e1ka +e3ke +e3kl +4e1ko +e5kr +ek5sa +3eksem +3eksp +e3ku +e1kv +e5ky +e3lad +el3ak +el3ar +e1las +e3le +e4lek +3elem +e1li +5elim +e3lo +el5sa +e5lu +e3ly +e4mad +em4p5le +em1s +en5ak +e4nan +4enn +e4no +en3so +e5nu +e5ol +e3op +e1or +e3ov +epi3 +e1pr +e3ra +er3af +e4rag +e4rak +e1re +e4ref +er5ege +5erhv +e1ri +e4rib +er1k +ero5d +er5ov +er3s +er5tr +e3rum +er5un +e5ry +e1ta +e1te +etek4s +e1ti +e3tj +e1to +e3tr +e3tu +e1ty +e3um +e3un +3eur +e1va +e3ve +e4v3erf +e1vi +e5x +1fa +fa4ce +fags3 +f1b +f1d +1fe +fej4 +fejl1 +f1f +f1g +f1h +1fi +f1k +3fl +1fo +for1en +fo4ri +f1p +f1s4 +4ft +f3ta +f1te +f1ti +f5to +f5tvi +1fu +f1v +3fy +1ga +g3art +g1b +g1d +1ge +4g5enden +ger3in +ge3s +g3f +g1g +g1h +1gi +gi4b +gi3st +5gj +g3k +g1l +g1m +3go +4g5om +g5ov +g3p +1gr +gs1a +gsde4len +g4se +gsha4 +g5sla +gs3or +gs1p +g5s4tide +g4str +gs1v +g3ta +g1te +g1ti +g5to +g3tr +gt4s +g3ud +gun5 +g3v +1gy +g5yd +4ha. +heds3 +he5s +4het +hi4e +hi4n5 +hi3s +ho5ko +ho5ve +4h3t +hun4 +hund3 +hvo4 +i1a +i3b +i4ble +i1c +i3dr +ids5k +i1el +i1en +i3er +i3et. +if3r +i3gu +i3h +i5i +i5j +i1ka +i1ke +ik1l +i5ko +ik3re +ik5ri +iks5t +ik4tu +i3ku +ik3v +i3lag +il3eg +il5ej +il5el +i3li +i4l5id +il3k +i1lo +il5u +i3mu +ind3t +5inf +ings1 +in3s +in4sv +inter1 +i3nu +i3od +i3og +i5ok +i3ol +ion4 +ions1 +i5o5r +i3ot +i5pi +i3pli +i5pr +i3re +i3ri +ir5t +i3sc +i3si +i4sm +is3p +i1ster +i3sti +i5sua +i1ta +i1te +i1ti +i3to +i3tr +it5re. +i1tu +i3ty +i1u +i1va +i1ve +i1vi +j3ag +jde4rer +jds1 +jek4to +4j5en. +j5k +j3le +j3li +jlmeld5 +jlmel4di +j3r +jre5 +ju3s +5kap +k5au +5kav +k5b +kel5s +ke3sk +ke5st +ke4t5a +k3h +ki3e +ki3st +k1k +k5lak +k1le +3klu +k4ny +5kod +1kon +ko3ra +3kort +ko3v +1kra +5kry +ks3an +k1si +ks3k +ks1p +k3ste +k5stu +ks5v +k1t +k4tar +k4terh +kti4e +kt5re +kt5s +3kur +1kus +3kut +k4vo +k4vu +5lab +lad3r +5lagd +la4g3r +5lam +1lat +l1b +ldiagnos5 +l3dr +ld3st +1le. +5led +4lele +le4mo +3len +1ler +1les +4leu +l1f +lfin4 +lfind5 +l1go1 +l3h +li4ga +4l5ins +4l3int +li5o +l3j +l1ke +l1ko +l3ky +l1l +l5mu +lo4du +l3op +4l5or +3lov +4l3p +l4ps +l3r +4ls +lses1 +ls5in +l5sj +l1ta +l4taf +l1te +l4t5erf +l3ti +lt3o +l3tr +l3tu +lu5l +l3ve +l3vi +1ma +m1b +m3d +1me +4m5ej +m3f +m1g +m3h +1mi +mi3k +m5ing +mi4o +mi5sty +m3k +m1l +m1m +mmen5 +m1n +3mo +mo4da +4mop +4m5ov +m1pe +m3pi +m3pl +m1po +m3pr +m1r +mse5s +ms5in +m5sk +ms3p +m3ste +ms5v +m3ta +m3te +m3ti +m3tr +m1ud +1mul +mu1li +3my +3na +4nak +1nal +n1b +n1c +4nd +n3dr +nd5si +nd5sk +nd5sp +1ne +ne5a +ne4da +nemen4 +nement5e +neo4 +n3erk +n5erl +ne5sl +ne5st +n1f +n4go +4n1h +1ni +4nim +ni5o +ni3st +n1ke +n1ko +n3kr +n3ku +n5kv +4n1l +n1m +n1n +1no +n3ord +n5p +n3r +4ns +n3si +n1sku +ns3po +n1sta +n5sti +n1ta +nta4le +n1te +n1ti +ntiali4 +n3to +n1tr +nt4s5t +nt4su +n3tu +n3ty +4n1v +3ny +n3z +o3a +o4as +ob3li +o1c +o4din +od5ri +od5s +od5un +o1e +of5r +o4gek +o4gel +o4g5o +og5re +og5sk +o5h +o5in +oi6s5e +o1j +o3ka +o1ke +o3ku +o3la +o3le +o1li +o1lo +o3lu +o5ly +1omr +on3k +ook5 +o3or +o5ov +o3pi +op3l +op3r +op3s +3opta +4or. +or1an +3ordn +ord5s +o3re. +o3reg +o3rek +o3rer +o3re3s +o3ret +o3ri +3orient +or5im +o4r5in +or3k +or5o +or3sl +or3st +o3si +o3so +o3t +o1te +o5un +ov4s +3pa +pa5gh +p5anl +p3d +4pec +3pen +1per +pe1ra +pe5s +pe3u +p3f +4p5h +1pla +p4lan +4ple. +4pler +4ples +p3m +p3n +5pok +4po3re +3pot +4p5p4 +p4ro +1proc +p3sk +p5so +ps4p +p3st +p1t +1pu +pu5b +p5ule +p5v +5py3 +qu4 +4raf +ra5is +4rarb +r1b +r4d5ar +r3dr +rd4s3 +4reks +1rel +re5la +r5enss +5rese +re5spo +4ress +re3st +re5s4u +5rett +r1f +r1gu +r1h +ri1e +ri5la +4rimo +r4ing +ringse4 +ringso4r +4rinp +4rint +r3ka +r1ke +r1ki +rk3so +r3ku +r1l +rmo4 +r5mu +r1n +ro1b +ro3p +r3or +r3p +r1r +rre5s +rro4n5 +r1sa +r1si +r5skr +r4sk5v +rs4n +r3sp +r5stu +r5su +r3sv +r5tal +r1te +r4teli +r1ti +r3to +r4t5or +rt5rat +rt3re +r5tri +r5tro +rt3s +r5ty +r3ud +run4da +5rut +r3va +r1ve +r3vi +ry4s +s3af +1sam +sa4ma +s3ap +s1ar +1sat +4s1b +s1d +sdy4 +1se +s4ed +5s4er +se4se +s1f +4s1g4 +4s3h +si4bl +1sig +s5int +5sis +5sit +5siu +s5ju +4sk. +1skab +1ske +s3kl +sk5s4 +5sky +s1le +s1li +slo3 +5slu +s5ly +s1m +s4my +4snin +s4nit +so5k +5sol +5som. +3somm +s5oms +5somt +3son +4s1op +sp4 +3spec +4sper +3s4pi +s1pl +3sprog. +s5r4 +s1s4 +4st. +5s4tam +1stan +st5as +3stat +1stav +1ste. +1sted +3stel +5stemo +1sten +5step +3ster. +3stes +5stet +5stj +3sto +st5om +1str +s1ud +3sul +s3un +3sur +s3ve +3s4y +1sy1s +5ta. +1tag +tands3 +4tanv +4tb +tede4l +teds5 +3teg +5tekn +teo1 +5term +te5ro +4t1f +6t3g +t1h +tialis5t +3tid +ti4en +ti3st +4t3k +4t1l +tli4s5 +t1m +t1n +to5ra +to1re +to1ri +tor4m +4t3p +t4ra +4tres +tro5v +1try +4ts +t3si +ts4pa +ts5pr +t3st +ts5ul +4t1t +t5uds +5tur +t5ve +1typ +u1a +5udl +ud5r +ud3s +3udv +u1e +ue4t5 +uge4ri +ugs3 +u5gu +u3i +u5kl +uk4ta +uk4tr +u1la +u1le +u5ly +u5pe +up5l +u5q +u3ra +u3re +u4r3eg +u1rer +u3ro +us5a +u3si +u5ska +u5so +us5v +u1te +u1ti +u1to +ut5r +ut5s4 +5u5v +va5d +3varm +1ved +ve4l5e +ve4reg +ve3s +5vet +v5h +vi4l3in +1vis +v5j +v5k +vl4 +v3le +v5li +vls1 +1vo +4v5om +v5p +v5re +v3st +v5su +v5t +3vu +y3a +y5dr +y3e +y3ke +y5ki +yk3li +y3ko +yk4s5 +y3kv +y5li +y5lo +y5mu +yns5 +y5o +y1pe +y3pi +y3re +yr3ek +y3ri +y3si +y3ti +y5t3r +y5ve +zi5o + +.så3 +.ær5i +.øv3r +a3tø +a5væ +brød3 +5bæ +5drøv +dstå4 +3dæ +3dø +e3læ +e3lø +e3rø +er5øn +e5tæ +e5tø +e1væ +e3æ +e5å +3fæ +3fø +fø4r5en +giø4 +g4sø +g5så +3gæ +3gø1 +3gå +i5tæ +i3ø +3kø +3kå +lingeniø4 +l3væ +5løs +m5tå +1mæ +3mø +3må +n3kæ +n5tæ +3næ +4n5æb +5nø +o5læ +or3ø +o5å +5præ +5pæd +på3 +r5kæ +r5tæ +r5tø +r3væ +r5æl +4røn +5rør +3råd +r5år +s4kå +3slå +s4næ +5stø +1stå +1sæ +4s5æn +1sø +s5øk +så4r5 +ti4ø +3træk. +t4sø +t5så +t3væ +u3læ +3værd +1værk +5vå +y5væ +æb3l +æ3c +æ3e +æg5a +æ4gek +æ4g5r +ægs5 +æ5i +æ5kv +ælle4 +æn1dr +æ5o +æ1re +ær4g5r +æ3ri +ær4ma +ær4mo +ær5s +æ5si +æ3so +æ3ste +æ3ve +øde5 +ø3e +ø1je +ø3ke +ø3le +øms5 +øn3st +øn4t3 +ø1re +ø3ri +ørne3 +ør5o +ø1ve +å1d +å1e +å5h +å3l +å3re +års5t +å5sk +å3t + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/da_compoundDictionary.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/da_compoundDictionary.txt new file mode 100644 index 00000000000..9a14f40c5f9 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/da_compoundDictionary.txt @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# A set of words for testing the HyphenationCompound factory, +# in conjunction with the danish hyphenation grammar. +læse +hest diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/elevate.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/elevate.xml new file mode 100644 index 00000000000..1befc5443e7 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/elevate.xml @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/frenchArticles.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/frenchArticles.txt new file mode 100644 index 00000000000..914161185f7 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/frenchArticles.txt @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# A set of articles for testing the French Elision filter. +# Requiring a text file is a bit weird here... +l +m +t +qu +n +s +j diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/fuzzysuggest.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/fuzzysuggest.txt new file mode 100644 index 00000000000..94e2152160a --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/fuzzysuggest.txt @@ -0,0 +1,4 @@ +# simple fuzzy suggest phrase dictionary for testing +change 1.0 +charge 1.0 +chance 1.0 \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.aff b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.aff new file mode 100644 index 00000000000..d035ad18001 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.aff @@ -0,0 +1,13 @@ +SET UTF-8 +TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ + +SFX A Y 2 +SFX A 0 e n +SFX A 0 e t + +SFX C Y 2 +SFX C 0 d/C c +SFX C 0 c b + +PFX B Y 1 +PFX B 0 s o \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.dic b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.dic new file mode 100644 index 00000000000..92c35d2b6ab --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.dic @@ -0,0 +1,6 @@ +5 +lucen/A +lucene +mahout/A +olr/B +ab/C \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/hyphenation.dtd b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/hyphenation.dtd new file mode 100644 index 00000000000..083c2bd8e80 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/hyphenation.dtd @@ -0,0 +1,68 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/jasuggest.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/jasuggest.txt new file mode 100644 index 00000000000..6df149de61a --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/jasuggest.txt @@ -0,0 +1,5 @@ +# simple auto-suggest phrase dictionary for testing +# note this uses tabs as separator! +北海道 1.0 +今夜 3.0 +話した 6.0 \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/keep-1.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/keep-1.txt new file mode 100644 index 00000000000..8dfe80902d2 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/keep-1.txt @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +foo +bar \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/keep-2.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/keep-2.txt new file mode 100644 index 00000000000..646b7ff4ddb --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/keep-2.txt @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +junk +more \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/mapping-ISOLatin1Accent.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/mapping-ISOLatin1Accent.txt new file mode 100644 index 00000000000..ede7742581b --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/mapping-ISOLatin1Accent.txt @@ -0,0 +1,246 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Syntax: +# "source" => "target" +# "source".length() > 0 (source cannot be empty.) +# "target".length() >= 0 (target can be empty.) + +# example: +# "À" => "A" +# "\u00C0" => "A" +# "\u00C0" => "\u0041" +# "ß" => "ss" +# "\t" => " " +# "\n" => "" + +# À => A +"\u00C0" => "A" + +# Á => A +"\u00C1" => "A" + +#  => A +"\u00C2" => "A" + +# à => A +"\u00C3" => "A" + +# Ä => A +"\u00C4" => "A" + +# Å => A +"\u00C5" => "A" + +# Æ => AE +"\u00C6" => "AE" + +# Ç => C +"\u00C7" => "C" + +# È => E +"\u00C8" => "E" + +# É => E +"\u00C9" => "E" + +# Ê => E +"\u00CA" => "E" + +# Ë => E +"\u00CB" => "E" + +# Ì => I +"\u00CC" => "I" + +# Í => I +"\u00CD" => "I" + +# Î => I +"\u00CE" => "I" + +# Ï => I +"\u00CF" => "I" + +# IJ => IJ +"\u0132" => "IJ" + +# Ð => D +"\u00D0" => "D" + +# Ñ => N +"\u00D1" => "N" + +# Ò => O +"\u00D2" => "O" + +# Ó => O +"\u00D3" => "O" + +# Ô => O +"\u00D4" => "O" + +# Õ => O +"\u00D5" => "O" + +# Ö => O +"\u00D6" => "O" + +# Ø => O +"\u00D8" => "O" + +# Œ => OE +"\u0152" => "OE" + +# Þ +"\u00DE" => "TH" + +# Ù => U +"\u00D9" => "U" + +# Ú => U +"\u00DA" => "U" + +# Û => U +"\u00DB" => "U" + +# Ü => U +"\u00DC" => "U" + +# Ý => Y +"\u00DD" => "Y" + +# Ÿ => Y +"\u0178" => "Y" + +# à => a +"\u00E0" => "a" + +# á => a +"\u00E1" => "a" + +# â => a +"\u00E2" => "a" + +# ã => a +"\u00E3" => "a" + +# ä => a +"\u00E4" => "a" + +# å => a +"\u00E5" => "a" + +# æ => ae +"\u00E6" => "ae" + +# ç => c +"\u00E7" => "c" + +# è => e +"\u00E8" => "e" + +# é => e +"\u00E9" => "e" + +# ê => e +"\u00EA" => "e" + +# ë => e +"\u00EB" => "e" + +# ì => i +"\u00EC" => "i" + +# í => i +"\u00ED" => "i" + +# î => i +"\u00EE" => "i" + +# ï => i +"\u00EF" => "i" + +# ij => ij +"\u0133" => "ij" + +# ð => d +"\u00F0" => "d" + +# ñ => n +"\u00F1" => "n" + +# ò => o +"\u00F2" => "o" + +# ó => o +"\u00F3" => "o" + +# ô => o +"\u00F4" => "o" + +# õ => o +"\u00F5" => "o" + +# ö => o +"\u00F6" => "o" + +# ø => o +"\u00F8" => "o" + +# œ => oe +"\u0153" => "oe" + +# ß => ss +"\u00DF" => "ss" + +# þ => th +"\u00FE" => "th" + +# ù => u +"\u00F9" => "u" + +# ú => u +"\u00FA" => "u" + +# û => u +"\u00FB" => "u" + +# ü => u +"\u00FC" => "u" + +# ý => y +"\u00FD" => "y" + +# ÿ => y +"\u00FF" => "y" + +# ff => ff +"\uFB00" => "ff" + +# fi => fi +"\uFB01" => "fi" + +# fl => fl +"\uFB02" => "fl" + +# ffi => ffi +"\uFB03" => "ffi" + +# ffl => ffl +"\uFB04" => "ffl" + +# ſt => ft +"\uFB05" => "ft" + +# st => st +"\uFB06" => "st" diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/missing.functions.updateprocessor.js b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/missing.functions.updateprocessor.js new file mode 100644 index 00000000000..6e8728a0d77 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/missing.functions.updateprocessor.js @@ -0,0 +1,3 @@ +function doSomeStuff() { + return "This script doesn't contain any update processor functions"; +} diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/missleading.extension.updateprocessor.js.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/missleading.extension.updateprocessor.js.txt new file mode 100644 index 00000000000..984e1d82f10 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/missleading.extension.updateprocessor.js.txt @@ -0,0 +1,23 @@ +function processAdd(cmd) { + // Integer.valueOf is needed here to get a tru java object, because + // all javascript numbers are floating point (ie: java.lang.Double) + cmd.getSolrInputDocument().addField("script_added_i", + java.lang.Integer.valueOf(42)); + cmd.getSolrInputDocument().addField("script_added_d", 42.3); + +} +function processDelete() { + // NOOP +} +function processCommit() { + // NOOP +} +function processRollback() { + // NOOP +} +function processMergeIndexes() { + // NOOP +} +function finish() { + // NOOP +} diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/old_synonyms.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/old_synonyms.txt new file mode 100644 index 00000000000..a7624f0597d --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/old_synonyms.txt @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +a => aa +b => b1 b2 +c => c1,c2 +a\=>a => b\=>b +a\,a => b\,b +foo,bar,baz + +Television,TV,Televisions diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/open-exchange-rates.json b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/open-exchange-rates.json new file mode 100644 index 00000000000..8fbc217f6e9 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/open-exchange-rates.json @@ -0,0 +1,18 @@ +{ + "disclaimer": "This data is not real, it was synthetically created to match currency.xml. It is modeled after the data format available from openexchangerates.org. See https://openexchangerates.org/documentation for details", + "license": "http://www.apache.org/licenses/LICENSE-2.0", + "timestamp": 1332070464, + + + "IMPORTANT NOTE": "In order for tests to work, this data must be kept in sync with ./currency.xml", + + + "base": "USD", + "rates": { + "USD": 1, + "JPY": 81.29, + "EUR": 2.5, + "GBP": 0.5, + "MXN": 2.0 + } +} diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/phrasesuggest.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/phrasesuggest.txt new file mode 100644 index 00000000000..fd4984d70b8 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/phrasesuggest.txt @@ -0,0 +1,8 @@ +# simple auto-suggest phrase dictionary for testing +# note this uses tabs as separator! +the first phrase 1.0 +the second phrase 2.0 +testing 1234 3.0 +foo 5.0 +the fifth phrase 2.0 +the final phrase 4.0 diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/protwords.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/protwords.txt new file mode 100644 index 00000000000..ab7e3e2470e --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/protwords.txt @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#use a protected word file to avoid stemming two +#unrelated words to the same base word. +#to test, we will use words that would normally obviously be stemmed. +cats +ridding +c# +c++ +.net diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/regex-boost-processor-test.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/regex-boost-processor-test.txt new file mode 100644 index 00000000000..1dc0537c72b --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/regex-boost-processor-test.txt @@ -0,0 +1,10 @@ +# Sample config file for RegexBoostProcessor +# This example applies boost on the "url" field to boost or deboost certain urls +# All rules are evaluated, and if several of them match, the boosts are multiplied. +# If for example one rule with boost 2.0 and one rule with boost 0.1 match, the resulting urlboost=0.2 + +https?://[^/]+/old/.* 0.1 #Comments are removed +https?://[^/]+/.*index\([0-9]\).html$ 0.5 + +# Prioritize certain sites over others +https?://www.mydomain.no/.* 1.5 \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-add-schema-fields-update-processor.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-add-schema-fields-update-processor.xml new file mode 100644 index 00000000000..2b59472f5f0 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-add-schema-fields-update-processor.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-behavior.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-behavior.xml new file mode 100644 index 00000000000..20b5a3533b9 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-behavior.xml @@ -0,0 +1,121 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-binaryfield.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-binaryfield.xml new file mode 100644 index 00000000000..1f9312e61d0 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-binaryfield.xml @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-bm25.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-bm25.xml new file mode 100644 index 00000000000..54bdc0566aa --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-bm25.xml @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + 1.2 + 0.76 + + + + + + + + + + + + text + id + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-charfilters.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-charfilters.xml new file mode 100644 index 00000000000..5eaab1f19e5 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-charfilters.xml @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + content + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-class-name-shortening-on-serialization.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-class-name-shortening-on-serialization.xml new file mode 100644 index 00000000000..46a1321260c --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-class-name-shortening-on-serialization.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-collate.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-collate.xml new file mode 100644 index 00000000000..7feb73a3015 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-collate.xml @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + id + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml new file mode 100644 index 00000000000..3ab7837284f --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-copyfield-test.xmltext + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-dfr.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-dfr.xml new file mode 100644 index 00000000000..c4f7d8331dd --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-dfr.xml @@ -0,0 +1,70 @@ + + + + + + + + + + + + + + I(F) + B + H2 + + + + + + + + I(F) + B + H3 + 900 + + + + + + + + P + L + H2 + 7 + + + + + + + + + + + + + text + id + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-docValues.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-docValues.xml new file mode 100644 index 00000000000..63d87997402 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-docValues.xml @@ -0,0 +1,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml new file mode 100755 index 00000000000..0e3116d0797 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + id + id + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMissing.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMissing.xml new file mode 100644 index 00000000000..3e39c2c40ac --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMissing.xml @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMulti.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMulti.xml new file mode 100644 index 00000000000..6d58feda4e5 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMulti.xml @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-eff.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-eff.xml new file mode 100644 index 00000000000..60cab4f8601 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-eff.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + id + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-folding.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-folding.xml new file mode 100644 index 00000000000..c2a0e60f3ed --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-folding.xmlcontent + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-ib.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-ib.xml new file mode 100644 index 00000000000..3d55b2ac70b --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-ib.xml @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + SPL + DF + H2 + + + + + + + + LL + TTF + H3 + 900 + + + + + + + + + + + + text + id + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-id-and-version-fields-only.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-id-and-version-fields-only.xml new file mode 100644 index 00000000000..9f5059f26c1 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-id-and-version-fields-only.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + id + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-lmdirichlet.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-lmdirichlet.xml new file mode 100644 index 00000000000..f39922f7c45 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-lmdirichlet.xml @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + 1000 + + + + + + + + + + + + text + id + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-lmjelinekmercer.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-lmjelinekmercer.xml new file mode 100644 index 00000000000..49b692e8d90 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-lmjelinekmercer.xml @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + 0.4 + + + + + + + + + + + + text + id + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml new file mode 100644 index 00000000000..3bb2b491b3b --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-minimal.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-minimal.xml new file mode 100644 index 00000000000..9e2f9471026 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-minimal.xml @@ -0,0 +1,25 @@ + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-not-required-unique-key.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-not-required-unique-key.xml new file mode 100644 index 00000000000..b3869812375 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-not-required-unique-key.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + subject + id + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-numeric.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-numeric.xml new file mode 100644 index 00000000000..d00545ed102 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-numeric.xml @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field-unique-key.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field-unique-key.xml new file mode 100644 index 00000000000..783ae77c958 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field-unique-key.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + str + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field.xml new file mode 100644 index 00000000000..035f975d6b2 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field.xml @@ -0,0 +1,28 @@ + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-phrasesuggest.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-phrasesuggest.xml new file mode 100644 index 00000000000..f5ed9155e66 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-phrasesuggest.xml @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + id + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml new file mode 100644 index 00000000000..e58b2e82eaf --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + id + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-replication1.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-replication1.xml new file mode 100644 index 00000000000..fe123dfa6d0 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-replication1.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-replication2.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-replication2.xml new file mode 100644 index 00000000000..a2409459aa7 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-replication2.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-required-fields.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-required-fields.xml new file mode 100644 index 00000000000..8dea7914549 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-required-fields.xmltext + id + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-rest-lucene-match-version.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-rest-lucene-match-version.xml new file mode 100644 index 00000000000..15caf81c67d --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-rest-lucene-match-version.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-rest.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-rest.xml new file mode 100755 index 00000000000..a735e434bc7 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-rest.xmltext + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-reversed.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-reversed.xml new file mode 100644 index 00000000000..40fc0e8e2f5 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-reversed.xml @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + one + id + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-sim.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-sim.xml new file mode 100644 index 00000000000..ca2bd788b38 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-sim.xml @@ -0,0 +1,68 @@ + + + + + + + + + + + + + + + + + + + + + + + + is there an echo? + + + + + + + + + + + + + + + + + + + + + + + + sim1text + id + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-field.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-field.xml new file mode 100644 index 00000000000..9e0d29f3e20 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-field.xml @@ -0,0 +1,3 @@ + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-type.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-type.xml new file mode 100644 index 00000000000..bfbd3334204 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-type.xml @@ -0,0 +1,3 @@ + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-types.incl b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-types.incl new file mode 100644 index 00000000000..fe9fd6d7a7b --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-types.incl @@ -0,0 +1,19 @@ + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-spatial.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-spatial.xml new file mode 100644 index 00000000000..d1ca1f701cd --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-spatial.xml @@ -0,0 +1,63 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-spellchecker.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-spellchecker.xml new file mode 100644 index 00000000000..7124065626d --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-spellchecker.xml @@ -0,0 +1,87 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + text + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-stop-keep.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-stop-keep.xml new file mode 100644 index 00000000000..831539ee8be --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-stop-keep.xml @@ -0,0 +1,64 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + one + id + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-sweetspot.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-sweetspot.xml new file mode 100644 index 00000000000..350e2e90851 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-sweetspot.xml @@ -0,0 +1,76 @@ + + + + + + + + + + + + + + + + + + + + + 6.0 + 1.5 + + 3 + 5 + 0.5 + + + + + + + + 3.3 + 7.7 + 2.718281828459045 + 5.0 + + 1 + 5 + 0.2 + + + + + + + + + + + + + text + id + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-synonym-tokenizer.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-synonym-tokenizer.xml new file mode 100644 index 00000000000..0906a13bfb5 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-synonym-tokenizer.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + text + id + \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-tfidf.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-tfidf.xml new file mode 100644 index 00000000000..eacea9009a8 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-tfidf.xml @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + false + + + + + + + + + + + + text + id + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-tiny.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-tiny.xml new file mode 100644 index 00000000000..08e0aebc42f --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-tiny.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + id + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-trie.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-trie.xml new file mode 100644 index 00000000000..1819bfa9020 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-trie.xml @@ -0,0 +1,332 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + text + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-xinclude.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-xinclude.xml new file mode 100644 index 00000000000..94194df6192 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema-xinclude.xml @@ -0,0 +1,30 @@ + + +]> + + + + + + &schema_entity_include; + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema.xml new file mode 100644 index 00000000000..a22844de0c4 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema.xmltext + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + I am your default sim + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema11.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema11.xml new file mode 100755 index 00000000000..a993cbd6f61 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema11.xml @@ -0,0 +1,387 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + text + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema12.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema12.xml new file mode 100755 index 00000000000..506e08d787a --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema12.xmltext + id + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema15.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema15.xml new file mode 100755 index 00000000000..b05e1a7ce9e --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema15.xmltext + id + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema_codec.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema_codec.xml new file mode 100644 index 00000000000..4e49dce953e --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema_codec.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + string_f + string_f + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schemasurround.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schemasurround.xml new file mode 100644 index 00000000000..04e90e33678 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schemasurround.xmltext + id + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-SOLR-749.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-SOLR-749.xml new file mode 100644 index 00000000000..1fabd5c202f --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-SOLR-749.xml @@ -0,0 +1,29 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-add-schema-fields-update-processor-chains.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-add-schema-fields-update-processor-chains.xml new file mode 100644 index 00000000000..9a59d90820a --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-add-schema-fields-update-processor-chains.xml @@ -0,0 +1,155 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + true + managed-schema + + + + + text + + java.lang.Boolean + boolean + + + java.lang.Integer + tint + + + java.lang.Float + tfloat + + + java.util.Date + tdate + + + java.lang.Long + java.lang.Integer + tlong + + + + java.lang.Double + java.lang.Float + + tdouble + + + + + + + text + + java.lang.Boolean + boolean + + + java.lang.Integer + tint + + + java.lang.Float + tfloat + + + java.util.Date + tdate + + + java.lang.Long + java.lang.Integer + tlong + + + java.lang.Number + tdouble + + + + + + + + + + + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm:ss,SSSZ + yyyy-MM-dd'T'HH:mm:ss.SSS + yyyy-MM-dd'T'HH:mm:ss,SSS + yyyy-MM-dd'T'HH:mm:ssZ + yyyy-MM-dd'T'HH:mm:ss + yyyy-MM-dd'T'HH:mmZ + yyyy-MM-dd'T'HH:mm + yyyy-MM-dd HH:mm:ss.SSSZ + yyyy-MM-dd HH:mm:ss,SSSZ + yyyy-MM-dd HH:mm:ss.SSS + yyyy-MM-dd HH:mm:ss,SSS + yyyy-MM-dd HH:mm:ssZ + yyyy-MM-dd HH:mm:ss + yyyy-MM-dd HH:mmZ + yyyy-MM-dd HH:mm + yyyy-MM-dd + + + + text + + java.lang.Boolean + boolean + + + java.lang.Integer + tint + + + java.lang.Float + tfloat + + + java.util.Date + tdate + + + java.lang.Long + java.lang.Integer + tlong + + + java.lang.Number + tdouble + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-altdirectory.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-altdirectory.xml new file mode 100755 index 00000000000..3105baf5157 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-altdirectory.xml @@ -0,0 +1,26 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-basic.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-basic.xml new file mode 100644 index 00000000000..03963023ae1 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-basic.xml @@ -0,0 +1,29 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-caching.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-caching.xml new file mode 100644 index 00000000000..0de6f9412f7 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-caching.xml @@ -0,0 +1,39 @@ + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-components-name.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-components-name.xml new file mode 100644 index 00000000000..b5501d85508 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-components-name.xml @@ -0,0 +1,75 @@ + + + + + + + + + + + + ${solr.data.dir:} + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + true + + component1 + + + component2 + + + + + + + + + + max-age=30, public + + + + + solr + solrconfig.xml schema.xml admin-extra.html + + + + foo + + + bar + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-defaults.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-defaults.xml new file mode 100644 index 00000000000..fe39eef6a3e --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-defaults.xml @@ -0,0 +1,43 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy1.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy1.xml new file mode 100644 index 00000000000..5cd0e7edf1a --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy1.xml @@ -0,0 +1,51 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + ${useCompoundFile:false} + + ${solr.tests.maxBufferedDocs} + ${solr.tests.maxIndexingThreads} + ${solr.tests.ramBufferSizeMB} + + + + single + + + + true + 3 + 100MILLISECONDS + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy2.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy2.xml new file mode 100644 index 00000000000..9925a1e1b69 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy2.xml @@ -0,0 +1,48 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + ${useCompoundFile:false} + + ${solr.tests.maxBufferedDocs} + ${solr.tests.maxIndexingThreads} + ${solr.tests.ramBufferSizeMB} + + + + single + + + value1 + value2 + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-elevate.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-elevate.xml new file mode 100644 index 00000000000..b7dc855a0c5 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-elevate.xml @@ -0,0 +1,178 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + true + + 10 + + + + + + + + + + + + + + string + ${elevate.file:elevate.xml} + + + + + + string + ${elevate.data.file:elevate-data.xml} + + + + + explicit + + + elevate + + + + + + explicit + + + dataElevate + + + + + + + + + + max-age=30, public + + + + + solr + solrconfig.xml schema.xml admin-extra.html + + + + prefix-${solr.test.sys.prop2}-suffix + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-functionquery.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-functionquery.xml new file mode 100755 index 00000000000..1a1a4ffca62 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-functionquery.xml @@ -0,0 +1,43 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + + + + + 0.0 + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-highlight.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-highlight.xml new file mode 100644 index 00000000000..7d55cc2adef --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-highlight.xml @@ -0,0 +1,60 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + + + + + + 100 + + + + + + 70 + + + + + + + ]]> + ]]> + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-implicitproperties.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-implicitproperties.xml new file mode 100644 index 00000000000..a54168c38cd --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-implicitproperties.xml @@ -0,0 +1,79 @@ + + + + + + + LUCENE_41 + + ${solr.data.dir:} + + + + + + + + + + + + true + 20 + 20 + + true + + 1 + + + + + + + + + + + + all + text + ${solr.core.name} + ${solr.core.dataDir} + ${solr.core.config} + ${solr.core.schema} + ${solr.core.transient} + + + + + + + + + text/plain; charset=UTF-8 + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-indexconfig.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-indexconfig.xml new file mode 100644 index 00000000000..066f8632e96 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-indexconfig.xml @@ -0,0 +1,30 @@ + + + + + ${solr.data.dir:} + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + ${useCompoundFile:false} + 123 + true + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-infostream-logging.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-infostream-logging.xml new file mode 100644 index 00000000000..722f5e42265 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-infostream-logging.xml @@ -0,0 +1,27 @@ + + + + + ${solr.data.dir:} + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + true + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-lazywriter.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-lazywriter.xml new file mode 100644 index 00000000000..0636a1dcfac --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-lazywriter.xml @@ -0,0 +1,28 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-logmergepolicy.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-logmergepolicy.xml new file mode 100644 index 00000000000..371bfb5638d --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-logmergepolicy.xml @@ -0,0 +1,37 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + -1 + -1 + -1 + + 11 + 456 + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-managed-schema.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-managed-schema.xml new file mode 100644 index 00000000000..fc49a7b1c8c --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-managed-schema.xml @@ -0,0 +1,51 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + ${managed.schema.mutable} + managed-schema + + + + + + + ${solr.ulog.dir:} + + + + + true + + + + + true + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master.xml new file mode 100644 index 00000000000..9118bef45f0 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master.xml @@ -0,0 +1,72 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + ${solr.data.dir:} + + + + + + + + true + + + + + commit + + schema.xml,xslt/dummy.xsl + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1-keepOneBackup.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1-keepOneBackup.xml new file mode 100644 index 00000000000..30b4e3b7cb6 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1-keepOneBackup.xml @@ -0,0 +1,49 @@ + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + + + + + commit + schema-replication2.xml:schema.xml + + 1 + + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1.xml new file mode 100644 index 00000000000..2e9885f4478 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1.xml @@ -0,0 +1,69 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + + true + + + + + commit + schema-replication2.xml:schema.xml + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master2.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master2.xml new file mode 100644 index 00000000000..21d38a3af94 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master2.xml @@ -0,0 +1,69 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + + true + + + + + startup + schema.xml + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master3.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master3.xml new file mode 100644 index 00000000000..b19073ba0ef --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master3.xml @@ -0,0 +1,70 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + + true + + + + + commit + startup + schema.xml + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-defaults.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-defaults.xml new file mode 100644 index 00000000000..9d2a99aff4d --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-defaults.xml @@ -0,0 +1,32 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-legacy.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-legacy.xml new file mode 100644 index 00000000000..00c77ae5e78 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-legacy.xml @@ -0,0 +1,31 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + 7 + ${useCompoundFile:false} + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-minimal.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-minimal.xml new file mode 100644 index 00000000000..78a4eb711d3 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-minimal.xml @@ -0,0 +1,75 @@ + + + + + + + LUCENE_41 + + ${solr.data.dir:} + + + + + + + + + + + + true + 20 + 20 + + true + + 1 + + + + + + + + + + + explicit + json + true + text + + + + + + + + + text/plain; charset=UTF-8 + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-nocache.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-nocache.xml new file mode 100644 index 00000000000..ee27d0c49de --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-nocache.xml @@ -0,0 +1,41 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + ${solr.data.dir:} + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-noopregen.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-noopregen.xml new file mode 100644 index 00000000000..4537724b433 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-noopregen.xml @@ -0,0 +1,36 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml new file mode 100644 index 00000000000..3c41f507158 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml @@ -0,0 +1,230 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + + + + + + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + + + + + + false + yyyy-MM-dd'T'HH:mm:ss.SSSZ + + + + + + solr.DateField + solr.TrieDateField + yyyy-MM-dd'T'HH:mm:ss.SSSZ + + + + + + America/New_York + en_US + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm:ss.SSS + + + + + + + America/Los_Angeles + + MM/dd/yyyy + + + + + + + UTC + en_US + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm:ss,SSSZ + yyyy-MM-dd'T'HH:mm:ss.SSS + yyyy-MM-dd'T'HH:mm:ss,SSS + yyyy-MM-dd'T'HH:mm:ssZ + yyyy-MM-dd'T'HH:mm:ss + yyyy-MM-dd'T'HH:mmZ + yyyy-MM-dd'T'HH:mm + yyyy-MM-dd HH:mm:ss.SSSZ + yyyy-MM-dd HH:mm:ss,SSSZ + yyyy-MM-dd HH:mm:ss.SSS + yyyy-MM-dd HH:mm:ss,SSS + yyyy-MM-dd HH:mm:ssZ + yyyy-MM-dd HH:mm:ss + yyyy-MM-dd HH:mmZ + yyyy-MM-dd HH:mm + yyyy-MM-dd hh:mm a + yyyy-MM-dd hh:mma + yyyy-MM-dd + EEE MMM dd HH:mm:ss Z yyyy + EEE MMM dd HH:mm:ss yyyy Z + EEE MMM dd HH:mm:ss yyyy + EEE, dd MMM yyyy HH:mm:ss Z + EEEE, dd-MMM-yy HH:mm:ss Z + EEEE, MMMM dd, yyyy + MMMM dd, yyyy + MMM. dd, yyyy + + + + + + + UTC + fr + 'le' EEEE dd MMMM yyyy + + + + + + + + + + + + + + + ru_RU + + + + + + + + + + + + + + + ru_RU + + + + + + + + + + + + + + + fr_FR + + + + + + + + + + + + + + + fr_FR + + + + + + + + + + + + + + + false + + true + YES + on + + + false + no + oFF + + + + + + + yup + nope + + + + + + + + + + + + + + + + yyyy-MM-dd + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml new file mode 100644 index 00000000000..b4f560ed32f --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml @@ -0,0 +1,272 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + ${solr.data.dir:} + + + + + + + suggest_wfst + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.WFSTLookupFactory + suggest_wfst + false + + + true + + phrasesuggest.txt + + + + phrase_suggest + + + + + + suggest_analyzing + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.AnalyzingLookupFactory + suggest_analyzing + false + + + true + ja_suggest + false + + jasuggest.txt + + + + phrase_suggest + + + + + + infix_suggest_analyzing + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.AnalyzingInfixLookupFactory + false + + + text + + analyzingInfixSuggest.txt + + + + phrase_suggest + + + + + + fuzzy_suggest_analyzing + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory + fuzzy_suggest_analyzing + false + + + true + text + false + + fuzzysuggest.txt + + + + phrase_suggest + + + + + + fuzzy_suggest_analyzing_with_max_edit_2 + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory + fuzzy_suggest_analyzing_with_max_edit_2 + false + + + true + text + false + 2 + + fuzzysuggest.txt + + + + phrase_suggest + + + + + + fuzzy_suggest_analyzing_with_non_fuzzy_prefix_4 + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory + fuzzy_suggest_analyzing_with_non_fuzzy_prefix_4 + false + + + true + text + false + 4 + + fuzzysuggest.txt + + + + phrase_suggest + + + + + + fuzzy_suggest_analyzing_with_min_fuzzy_length_2 + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory + fuzzy_suggest_analyzing_with_min_fuzzy_length_2 + false + + + true + text + false + 2 + + fuzzysuggest.txt + + + + phrase_suggest + + + + + + + + + true + suggest_wfst + false + + true + + + suggest_wfst + + + + + + + true + suggest_analyzing + false + + true + + + suggest_analyzing + + + + + + + true + infix_suggest_analyzing + false + + true + + + infix_suggest_analyzing + + + + + + true + fuzzy_suggest_analyzing + false + + true + + + fuzzy_suggest_analyzing + + + + + + + true + fuzzy_suggest_analyzing_with_max_edit_2 + false + + true + + + fuzzy_suggest_analyzing_with_max_edit_2 + + + + + + + true + fuzzy_suggest_analyzing_with_non_fuzzy_prefix_4 + false + + true + + + fuzzy_suggest_analyzing_with_non_fuzzy_prefix_4 + + + + + + + true + fuzzy_suggest_analyzing_with_min_fuzzy_length_2 + false + + true + + + fuzzy_suggest_analyzing_with_min_fuzzy_length_2 + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml new file mode 100644 index 00000000000..c3d9d544e1f --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml @@ -0,0 +1,34 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + false + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender-noquery.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender-noquery.xml new file mode 100644 index 00000000000..af6cc75112d --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender-noquery.xml @@ -0,0 +1,74 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender.xml new file mode 100644 index 00000000000..12252c06b6f --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender.xml @@ -0,0 +1,70 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + + + + + + solr 0 10 mock + rocks 0 10 mock + + + + + + + + fast_warm 0 10 + mock + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-repeater.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-repeater.xml new file mode 100644 index 00000000000..5ec8e5920b3 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-repeater.xml @@ -0,0 +1,63 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + ${solr.data.dir:} + + + + + + + + true + + + + + + + + + + + + + + + + commit + schema.xml + + + http://127.0.0.1:TEST_PORT/solr/replication + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-reqHandler.incl b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-reqHandler.incl new file mode 100644 index 00000000000..03f236fccf7 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-reqHandler.incl @@ -0,0 +1,5 @@ + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-response-log-component.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-response-log-component.xml new file mode 100644 index 00000000000..859883d52f0 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-response-log-component.xml @@ -0,0 +1,54 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + ${solr.data.dir:} + + + + + + + + + + dismax + + + responselog + + + + + + dismax + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-script-updateprocessor.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-script-updateprocessor.xml new file mode 100644 index 00000000000..43fbc2873da --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-script-updateprocessor.xml @@ -0,0 +1,112 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + javascript + missleading.extension.updateprocessor.js.txt + + + + + + + + + + + + trivial.updateprocessor0.js + + true + 1 + + + + + + + + + trivial.updateprocessor0.js + trivial.updateprocessor1.js + + + true + 1 + + + + + + + + trivial.updateprocessor0.js + trivial.updateprocessor1.js + + true + 1 + + + + + + + + + conditional.updateprocessor.js + addfields.updateprocessor.js + + + + + + + conditional.updateprocessor.js + + + addfields.updateprocessor.js + + + + + + throw.error.on.add.updateprocessor.js + + + + + missing.functions.updateprocessor.js + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave.xml new file mode 100644 index 00000000000..ac2e59ee56e --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave.xml @@ -0,0 +1,61 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + ${solr.data.dir:} + + + + + + + + true + + + + + + + + + + + + + + + + http://127.0.0.1:TEST_PORT/solr + 00:00:01 + COMPRESSION + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave1.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave1.xml new file mode 100644 index 00000000000..36d6d92e146 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave1.xml @@ -0,0 +1,57 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + + true + + + + + + + + + + + + + + + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-snippet-processor.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-snippet-processor.xml new file mode 100644 index 00000000000..8c76857f32b --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-snippet-processor.xml @@ -0,0 +1,6 @@ + + + field-included + x + x_x + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-solcoreproperties.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-solcoreproperties.xml new file mode 100644 index 00000000000..3a1547f1b1c --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-solcoreproperties.xml @@ -0,0 +1,35 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + ${foo.foo1} + ${foo.foo2} + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellcheckcomponent.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellcheckcomponent.xml new file mode 100644 index 00000000000..9092a5875a8 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellcheckcomponent.xml @@ -0,0 +1,178 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + ${solr.data.dir:} + + + + + + + lowerpunctfilt + + + default + lowerfilt + spellchecker1 + true + + + default_teststop + default_teststop + true + teststop + + + direct + solr.DirectSolrSpellChecker + 3 + 100 + teststop + + + direct_lowerfilt + solr.DirectSolrSpellChecker + 3 + 100 + lowerfilt + + + wordbreak + solr.WordBreakSolrSpellChecker + lowerfilt + true + true + MAX_FREQ + 10 + + + threshold + lowerfilt + spellcheckerThreshold + true + .29 + + + threshold_direct + solr.DirectSolrSpellChecker + lowerfilt + spellcheckerThreshold + true + .29 + + + multipleFields + lowerfilt1and2 + spellcheckerMultipleFields + true + + + + jarowinkler + lowerfilt + + org.apache.lucene.search.spell.JaroWinklerDistance + spellchecker2 + + + + solr.FileBasedSpellChecker + external + spellings.txt + UTF-8 + spellchecker3 + + + + freq + lowerfilt + spellcheckerFreq + + freq + true + + + fqcn + lowerfilt + spellcheckerFQCN + org.apache.solr.spelling.SampleComparator + true + + + perDict + org.apache.solr.handler.component.DummyCustomParamSpellChecker + lowerfilt + + + + + + + + + + false + + false + + 1 + + + spellcheck + + + + + dismax + lowerfilt1^1 + + + spellcheck + + + + + default + wordbreak + 20 + + + spellcheck + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellchecker.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellchecker.xml new file mode 100644 index 00000000000..e6744cb3944 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellchecker.xml @@ -0,0 +1,142 @@ + + + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + suggest + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.jaspell.JaspellLookup + suggest + suggest + true + + + 0.0 + + + + + + + suggest_tst + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.tst.TSTLookup + suggest + suggest_tst + true + + + 0.0 + + + + + + + suggest_fst + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.FSTLookup + suggest + suggest_fst + true + + + 5 + true + + + + + + + suggest_wfst + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.WFSTLookupFactory + suggest + suggest_wfst + true + + + true + + + + + + + true + suggest + true + + + suggest_jaspell + + + + + + + true + suggest_tst + true + + + suggest_tst + + + + + + + true + suggest_fst + false + + + suggest_fst + + + + + + + true + suggest_wfst + false + + + suggest_wfst + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-test-misc.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-test-misc.xml new file mode 100644 index 00000000000..fdca7893d92 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-test-misc.xml @@ -0,0 +1,52 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + solr + solrconfig.xml schema.xml admin-extra.html + + + + + + + + + + prefix-${solr.test.sys.prop2}-suffix + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tieredmergepolicy.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tieredmergepolicy.xml new file mode 100644 index 00000000000..86a79fbf8fc --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tieredmergepolicy.xml @@ -0,0 +1,47 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + 7 + + 19 + 9 + 0.1 + + + ${useCompoundFile:false} + + + + 987 + 42 + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml new file mode 100644 index 00000000000..d55845c13d0 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml @@ -0,0 +1,120 @@ + + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.blocksperbank:1024} + ${solr.hdfs.home:} + ${solr.hdfs.confdir:} + + + ${solr.data.dir:} + + + + + + + + + + + + + + + true + + + + + + + + + + ${solr.ulog.dir:} + + + + + + true + true + v_t,t_field + org.apache.solr.update.processor.TextProfileSignature + + + + + + + true + non_indexed_signature_sS + false + v_t,t_field + org.apache.solr.update.processor.TextProfileSignature + + + + + + + + + + regex_dup_A_s + x + x_x + + + + regex_dup_B_s + x + x_x + + + + + + + + regex_dup_A_s + x + x_x + + + regex_dup_B_s + x + x_x + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-transformers.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-transformers.xml new file mode 100644 index 00000000000..ecaaf1146d5 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-transformers.xml @@ -0,0 +1,84 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + x1 + x2 + + + + 100 + + + + x1 + x2 + + + + + xA + xA + + + + + + + + 88 + 99 + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml new file mode 100644 index 00000000000..1b99f61dc36 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml @@ -0,0 +1,464 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + solr.TrieIntField + solr.TrieLongField + + + + min_foo_l + + + max_foo_l + + + ; + + primary_author_s1 + + + + primary_author_s1 + first_foo_l + + + + + + + + + + + + + foo_t + + + + + + + + foo_t + + + + + + foo.* + bar.* + + .*HOSS.* + + + + + + foo.* + bar.* + + + solr.DateField + + + .*HOSS.* + + + + + + foo.* + bar.* + + + solr.DateField + .*HOSS.* + + + + + + + name + foo_t + + + + + + name + foo_t + + + + + + + foo.* + bar.*_s + + + + + + nametext + text_sw + + + + + + solr.DateField + solr.StrField + + + + + + solr.DateField + solr.StrField + + foo.* + + + + + + + + + + + + + + + + + + foo.* + yak.* + + + + + + + + + + + foo_s + + + + + string + ; + + + + + + foo_s + bar_s + + + + + foo_s + bar_s + + + + + foo_i + foo_s + bar_s + + + + + foo_i + foo_s + bar_s + + + + + + html_s + + + + + + + trunc + 5 + + + + + + count_field + + + + + + + + + + false + + + + + + true + + + + + + foo.* + false + + + + + + foo.* + + false + + + + + + + false + + + + + + true + + + + + + .*_raw + + + + + + source1_s + dest_s + + + + + source1_s + source2_s + dest_s + + + + + + + source1_s + source2_s + + dest_s + + + + + + + source\d_.* + + source0_.* + + + dest_s + + + + + + field1 + toField + + + toField + 3 + + + + + + field1 + toField + + + field1 + + + + + + toField + + + field1 + toField + + + + + + field1 + field2 + toField + + + ; + toField + + + + + + + category + category_s + + + + authors + editors + + contributors + + + + .*_price + + list_price + + + all_prices + + + + + + category + category_count + + + category_count + + + category_count + 0 + + + + + + content + title + \s+ + X + + + + + + processor_default_s + X + + + processor_default_i + 42 + + + uuid + + + timestamp + + + + + + uniq_.* + + + + + + subject + title + teststop + nonexistent + ssto + sind + simple + + + + + + + subject + title + teststop + nonexistent + ssto + sind + json + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-warmer.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-warmer.xml new file mode 100644 index 00000000000..3f187f34d9d --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-warmer.xml @@ -0,0 +1,46 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + + ${useCompoundFile} + ${solr.tests.maxBufferedDocs} + ${solr.tests.maxIndexingThreads} + ${solr.tests.ramBufferSizeMB} + + 1000 + 10000 + single + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-xinclude.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-xinclude.xml new file mode 100644 index 00000000000..230a1ebf2f6 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig-xinclude.xml @@ -0,0 +1,35 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml new file mode 100644 index 00000000000..055f3d7faeb --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml @@ -0,0 +1,48 @@ + + + + + + + + + ${useCompoundFile:false} + + ${solr.tests.maxBufferedDocs} + ${solr.tests.maxIndexingThreads} + ${solr.tests.ramBufferSizeMB} + + + ${solr.tests.nrtMode:true} + + 1000 + 10000 + + + ${solr.tests.lockType:single} + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig.xml new file mode 100644 index 00000000000..810aa1d312e --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig.xml @@ -0,0 +1,562 @@ + + + + + + + + + + + + ${solr.data.dir:} + + + + 1000000 + 2000000 + 3000000 + 4000000 + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + ${solr.commitwithin.softcommit:true} + + + + + + + 1024 + + + + + + + + + + + + true + + + + + + 10 + + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + true + + + + + + dismax + *:* + 0.01 + + text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0 + + + text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5 + + + ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3 + + + 3<-1 5<-2 6<90% + + 100 + + + + + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + + lowerpunctfilt + + + default + lowerfilt + spellchecker1 + false + + + direct + DirectSolrSpellChecker + lowerfilt + 3 + + + wordbreak + solr.WordBreakSolrSpellChecker + lowerfilt + true + true + 10 + + + multipleFields + lowerfilt1and2 + spellcheckerMultipleFields + false + + + + jarowinkler + lowerfilt + + org.apache.lucene.search.spell.JaroWinklerDistance + spellchecker2 + + + + solr.FileBasedSpellChecker + external + spellings.txt + UTF-8 + spellchecker3 + + + + freq + lowerfilt + spellcheckerFreq + + freq + false + + + fqcn + lowerfilt + spellcheckerFQCN + org.apache.solr.spelling.SampleComparator + false + + + perDict + org.apache.solr.handler.component.DummyCustomParamSpellChecker + lowerfilt + + + + + + + + termsComp + + + + + + + + + false + + false + + 1 + + + spellcheck + + + + + direct + false + false + 1 + + + spellcheck + + + + + default + wordbreak + 20 + + + spellcheck + + + + + direct + wordbreak + 20 + + + spellcheck + + + + + dismax + lowerfilt1^1 + + + spellcheck + + + + + + + + + + + + + + + tvComponent + + + + + + + + + + + + 100 + + + + + + 70 + + + + + + + ]]> + ]]> + + + + + + + + + + + + + 10 + .,!? + + + + + + WORD + en + US + + + + + + + + + + max-age=30, public + + + + + + + explicit + true + + + + + solr + solrconfig.xml schema.xml admin-extra.html + + + + prefix-${solr.test.sys.prop2}-suffix + + + + + + false + true + v_t,t_field + org.apache.solr.update.processor.TextProfileSignature + + + + + + false + false + id + + org.apache.solr.update.processor.Lookup3Signature + + + + + + + true + non_indexed_signature_sS + false + v_t,t_field + org.apache.solr.update.processor.TextProfileSignature + + + + + + + uniq + uniq2 + uniq3 + + + + + + + + + regex_dup_A_s + x + x_x + + + + regex_dup_B_s + x + x_x + + + + + + + + regex_dup_A_s + x + x_x + + + regex_dup_B_s + x + x_x + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig_codec.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig_codec.xml new file mode 100644 index 00000000000..c5cc04cfe9d --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig_codec.xml @@ -0,0 +1,25 @@ + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig_perf.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig_perf.xml new file mode 100755 index 00000000000..172fc953f37 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig_perf.xml @@ -0,0 +1,76 @@ + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + ${solr.data.dir:} + + + + + + + + + + + + + + + + + true + 20 + 200 + false + 2 + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stemdict.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stemdict.txt new file mode 100644 index 00000000000..f57a4ad490f --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stemdict.txt @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# test that we can override the stemming algorithm with our own mappings +# these must be tab-separated +monkeys monkey +otters otter +# some crazy ones that a stemmer would never do +dogs cat diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stop-1.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stop-1.txt new file mode 100644 index 00000000000..8dfe80902d2 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stop-1.txt @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +foo +bar \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stop-2.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stop-2.txt new file mode 100644 index 00000000000..646b7ff4ddb --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stop-2.txt @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +junk +more \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stop-snowball.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stop-snowball.txt new file mode 100644 index 00000000000..1c0c6f51142 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stop-snowball.txt @@ -0,0 +1,10 @@ + | This is a file in snowball format, empty lines are ignored, '|' is a comment + | Additionally, multiple words can be on the same line, allowing stopwords to be + | arranged in tables (useful in some languages where they might inflect) + + | fictitious table below + +|third person singular +|Subject Object Possessive Reflexive +he him his himself| masculine +she her hers herself| feminine diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stoptypes-1.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stoptypes-1.txt new file mode 100644 index 00000000000..456348ea9dc --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stoptypes-1.txt @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stoptypes-2.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stoptypes-2.txt new file mode 100644 index 00000000000..d8a3810c26c --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stoptypes-2.txt @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stopwithbom.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stopwithbom.txt new file mode 100644 index 00000000000..eb5f6e1c0f8 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stopwithbom.txt @@ -0,0 +1 @@ +BOMsAreEvil diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stopwords.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stopwords.txt new file mode 100644 index 00000000000..b5824da3263 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stopwords.txt @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +#Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +s +such +t +that +the +their +then +there +these +they +this +to +was +will +with + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stopwordsWrongEncoding.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stopwordsWrongEncoding.txt new file mode 100644 index 00000000000..0d305c88c59 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stopwordsWrongEncoding.txt @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# stopwords in the wrong encoding (ISO-8859-1). +# tests resourceloader's ability to report wrongly encoded files. +baadores diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/synonyms.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/synonyms.txt new file mode 100644 index 00000000000..b0e31cb7ec8 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/synonyms.txt @@ -0,0 +1,31 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaa => aaaa +bbb => bbbb1 bbbb2 +ccc => cccc1,cccc2 +a\=>a => b\=>b +a\,a => b\,b +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/throw.error.on.add.updateprocessor.js b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/throw.error.on.add.updateprocessor.js new file mode 100644 index 00000000000..ca56fe35cfe --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/throw.error.on.add.updateprocessor.js @@ -0,0 +1,21 @@ +function processAdd() { + throw "guess what? no-soup-fo-you !!!"; +} + +// // // + +function processDelete() { + // NOOP +} +function processCommit() { + // NOOP +} +function processRollback() { + // NOOP +} +function processMergeIndexes() { + // NOOP +} +function finish() { + // NOOP +} diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor0.js b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor0.js new file mode 100644 index 00000000000..b1856b15d85 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor0.js @@ -0,0 +1,59 @@ +var Assert = Packages.org.junit.Assert; + +function processAdd(cmd) { + functionMessages.add("processAdd0"); + Assert.assertNotNull(req); + Assert.assertNotNull(rsp); + Assert.assertNotNull(logger); + Assert.assertNotNull(cmd); + Assert.assertNotNull(params); + Assert.assertTrue(1 == params.get('intValue').intValue()); // had issues with assertTrue(1, params.get('intValue').intValue()) casting to wrong variant + Assert.assertTrue(params.get('boolValue').booleanValue()); + + // Integer.valueOf is needed here to get a tru java object, because + // all javascript numbers are floating point (ie: java.lang.Double) + cmd.getSolrInputDocument().addField("script_added_i", + java.lang.Integer.valueOf(42)); + cmd.getSolrInputDocument().addField("script_added_d", 42.3); + +} + +function processDelete(cmd) { + functionMessages.add("processDelete0"); + Assert.assertNotNull(req); + Assert.assertNotNull(rsp); + Assert.assertNotNull(logger); + Assert.assertNotNull(cmd); +} + +function processMergeIndexes(cmd) { + functionMessages.add("processMergeIndexes0"); + Assert.assertNotNull(req); + Assert.assertNotNull(rsp); + Assert.assertNotNull(logger); + Assert.assertNotNull(cmd); +} + +function processCommit(cmd) { + functionMessages.add("processCommit0"); + Assert.assertNotNull(req); + Assert.assertNotNull(rsp); + Assert.assertNotNull(logger); + Assert.assertNotNull(cmd); +} + +function processRollback(cmd) { + functionMessages.add("processRollback0"); + Assert.assertNotNull(req); + Assert.assertNotNull(rsp); + Assert.assertNotNull(logger); + Assert.assertNotNull(cmd); +} + +function finish() { + functionMessages.add("finish0"); + Assert.assertNotNull(req); + Assert.assertNotNull(rsp); + Assert.assertNotNull(logger); +} + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor1.js b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor1.js new file mode 100644 index 00000000000..98bdf2ab060 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor1.js @@ -0,0 +1,25 @@ +function processAdd(cmd) { + functionMessages.add("processAdd1"); + +} + +function processDelete(cmd) { + functionMessages.add("processDelete1"); +} + +function processMergeIndexes(cmd) { + functionMessages.add("processMergeIndexes1"); +} + +function processCommit(cmd) { + functionMessages.add("processCommit1"); +} + +function processRollback(cmd) { + functionMessages.add("processRollback1"); +} + +function finish() { + functionMessages.add("finish1"); +} + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/wdftypes.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/wdftypes.txt new file mode 100644 index 00000000000..7378b0802e7 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/wdftypes.txt @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# A customized type mapping for WordDelimiterFilterFactory +# the allowable types are: LOWER, UPPER, ALPHA, DIGIT, ALPHANUM, SUBWORD_DELIM +# +# the default for any character without a mapping is always computed from +# Unicode character properties + +# Map the $, %, '.', and ',' characters to DIGIT +# This might be useful for financial data. +$ => DIGIT +% => DIGIT +. => DIGIT +\u002C => DIGIT + +# in some cases you might not want to split on ZWJ +# this also tests the case where we need a bigger byte[] +# see http://en.wikipedia.org/wiki/Zero-width_joiner +\u200D => ALPHANUM diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy-using-include.xsl b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy-using-include.xsl new file mode 100644 index 00000000000..f10cfbf9330 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy-using-include.xsl @@ -0,0 +1,31 @@ + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy.xsl b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy.xsl new file mode 100644 index 00000000000..fbbd8f745cd --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy.xsl @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/xslt/xsl-update-handler-test.xsl b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/xslt/xsl-update-handler-test.xsl new file mode 100644 index 00000000000..2e7359a62b6 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/xslt/xsl-update-handler-test.xsl @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/lib/README b/solr/contrib/morphlines-core/src/test-files/solr/collection1/lib/README new file mode 100644 index 00000000000..b7ca5b834f4 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/lib/README @@ -0,0 +1,18 @@ + + +Items under this directory are used by TestConfig.testLibs() diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/lib/classes/empty-file-main-lib.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/lib/classes/empty-file-main-lib.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/lib/classes/empty-file-main-lib.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/conf/core.properties b/solr/contrib/morphlines-core/src/test-files/solr/conf/core.properties new file mode 100644 index 00000000000..65df5e6114f --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/conf/core.properties @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +schema=schema-tiny.xml +config=solrconfig-minimal.xml +transient=true +loadOnStartup=false + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/crazy-path-to-config.xml b/solr/contrib/morphlines-core/src/test-files/solr/crazy-path-to-config.xml new file mode 100644 index 00000000000..55801c4faf1 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/crazy-path-to-config.xml @@ -0,0 +1,59 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + 0 + + + + 1024 + true + 10 + + + + + + + + implicit + + + + + + + + + + + solr + solrconfig.xml schema.xml + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/crazy-path-to-schema.xml b/solr/contrib/morphlines-core/src/test-files/solr/crazy-path-to-schema.xml new file mode 100644 index 00000000000..a2216ddfa99 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/crazy-path-to-schema.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + subject + id + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/external_eff b/solr/contrib/morphlines-core/src/test-files/solr/external_eff new file mode 100644 index 00000000000..a23f9b554bd --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/external_eff @@ -0,0 +1,10 @@ +1=0.354 +2=0.975 +3=0.001 +4=100.35 +5=53.9 +6=70 +7=3.957 +8=1400 +9=24 +10=450 \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solr-50-all.xml b/solr/contrib/morphlines-core/src/test-files/solr/solr-50-all.xml new file mode 100644 index 00000000000..886e4434631 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/solr-50-all.xml @@ -0,0 +1,52 @@ + + + + testAdminHandler + 11 + ${coreRootDirectory:testCoreRootDirectory} + testManagementPath + testSharedLib + ${shareSchema:testShareSchema} + 66 + + + 22 + 33 + 55 + testHost + testHostContext + ${hostPort:44} + 77 + testZkHost + + + + testLoggingClass + testLoggingEnabled + + 88 + 99 + + + + + ${socketTimeout:100} + ${connTimeout:110} + + + \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solr-multicore.xml b/solr/contrib/morphlines-core/src/test-files/solr/solr-multicore.xml new file mode 100644 index 00000000000..abb308ec997 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/solr-multicore.xml @@ -0,0 +1,70 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solr-no-core.xml b/solr/contrib/morphlines-core/src/test-files/solr/solr-no-core.xml new file mode 100644 index 00000000000..476b5bc7a10 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/solr-no-core.xml @@ -0,0 +1,39 @@ + + + + + + ${shareSchema:false} + + + 127.0.0.1 + ${hostContext:solr} + ${hostPort:8983} + ${solr.zkclienttimeout:30000} + ${genericCoreNodeNames:true} + ${distribUpdateConnTimeout:15000} + ${distribUpdateSoTimeout:120000} + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solr-shardhandler-old.xml b/solr/contrib/morphlines-core/src/test-files/solr/solr-shardhandler-old.xml new file mode 100644 index 00000000000..70aaa56faa0 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/solr-shardhandler-old.xml @@ -0,0 +1,29 @@ + + + + + + + + myMagicRequiredValue + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solr-shardhandler.xml b/solr/contrib/morphlines-core/src/test-files/solr/solr-shardhandler.xml new file mode 100644 index 00000000000..f5d24fe931d --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/solr-shardhandler.xml @@ -0,0 +1,29 @@ + + + + + + + + myMagicRequiredValue + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solr-stress-new.xml b/solr/contrib/morphlines-core/src/test-files/solr/solr-stress-new.xml new file mode 100644 index 00000000000..3f8b213eab5 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/solr-stress-new.xml @@ -0,0 +1,34 @@ + + + + + + + 127.0.0.1 + 8983 + ${hostContext:solr} + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solr-stress-old.xml b/solr/contrib/morphlines-core/src/test-files/solr/solr-stress-old.xml new file mode 100644 index 00000000000..6bc1c35e888 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/solr-stress-old.xml @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solr.xml b/solr/contrib/morphlines-core/src/test-files/solr/solr.xml new file mode 100644 index 00000000000..4604f60476f --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/solr/solr.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/morphlines-core/src/test-files/spellings.txt b/solr/contrib/morphlines-core/src/test-files/spellings.txt new file mode 100644 index 00000000000..2d2472e340a --- /dev/null +++ b/solr/contrib/morphlines-core/src/test-files/spellings.txt @@ -0,0 +1,16 @@ +foo +bar +Solr +junk +foo +bar +Solr +junk +foo +bar +Solr +junk +foo +bar +Solr +junk \ No newline at end of file diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java new file mode 100644 index 00000000000..9ffe6f63220 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang.StringEscapeUtils; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.HttpSolrServer; +import org.apache.solr.client.solrj.impl.XMLResponseParser; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.util.ExternalPaths; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.cloudera.cdk.morphline.api.Collector; +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Compiler; +import com.cloudera.cdk.morphline.base.FaultTolerance; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.base.Notifications; +import com.cloudera.cdk.morphline.stdlib.PipeBuilder; +import com.codahale.metrics.MetricRegistry; +import com.google.common.io.Files; +import com.typesafe.config.Config; + +public class AbstractSolrMorphlineTestBase extends SolrTestCaseJ4 { + + protected Collector collector; + protected Command morphline; + protected SolrServer solrServer; + protected DocumentLoader testServer; + + protected static final boolean TEST_WITH_EMBEDDED_SOLR_SERVER = true; + protected static final String EXTERNAL_SOLR_SERVER_URL = System.getProperty("externalSolrServer"); +// protected static final String EXTERNAL_SOLR_SERVER_URL = "http://127.0.0.1:8983/solr"; + + protected static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/map-reduce/src/test-files"; + protected static final String DEFAULT_BASE_DIR = "solr"; + protected static final AtomicInteger SEQ_NUM = new AtomicInteger(); + protected static final AtomicInteger SEQ_NUM2 = new AtomicInteger(); + + private static final Logger LOGGER = LoggerFactory.getLogger(AbstractSolrMorphlineTestBase.class); + + protected String tempDir; + + @BeforeClass + public static void beforeClass() throws Exception { + myInitCore(DEFAULT_BASE_DIR); + } + + protected static void myInitCore(String baseDirName) throws Exception { + initCore( + RESOURCES_DIR + "/" + baseDirName + "/collection1/conf/solrconfig.xml", + RESOURCES_DIR + "/" + baseDirName + "/collection1/conf/schema.xml", + RESOURCES_DIR + "/" + baseDirName + ); + } + + @Before + public void setUp() throws Exception { + super.setUp(); + collector = new Collector(); + + if (EXTERNAL_SOLR_SERVER_URL != null) { + //solrServer = new ConcurrentUpdateSolrServer(EXTERNAL_SOLR_SERVER_URL, 2, 2); + //solrServer = new SafeConcurrentUpdateSolrServer(EXTERNAL_SOLR_SERVER_URL, 2, 2); + solrServer = new HttpSolrServer(EXTERNAL_SOLR_SERVER_URL); + ((HttpSolrServer)solrServer).setParser(new XMLResponseParser()); + } else { + if (TEST_WITH_EMBEDDED_SOLR_SERVER) { + solrServer = new EmbeddedTestSolrServer(h.getCoreContainer(), ""); + } else { + throw new RuntimeException("Not yet implemented"); + //solrServer = new TestSolrServer(getSolrServer()); + } + } + + int batchSize = SEQ_NUM2.incrementAndGet() % 2 == 0 ? 100 : 1; //SolrInspector.DEFAULT_SOLR_SERVER_BATCH_SIZE : 1; + testServer = new SolrServerDocumentLoader(solrServer, batchSize); + deleteAllDocuments(); + + tempDir = TEMP_DIR + "/test-morphlines-" + System.currentTimeMillis(); + new File(tempDir).mkdirs(); + } + + @After + public void tearDown() throws Exception { + collector = null; + solrServer = null; + super.tearDown(); + } + + protected void testDocumentTypesInternal(String[] files, Map expectedRecords) throws Exception { + deleteAllDocuments(); + int numDocs = 0; + for (int i = 0; i < 1; i++) { + + for (String file : files) { + File f = new File(file); + byte[] body = Files.toByteArray(f); + Record event = new Record(); + //event.put(Fields.ID, docId++); + event.getFields().put(Fields.ATTACHMENT_BODY, new ByteArrayInputStream(body)); + event.getFields().put(Fields.ATTACHMENT_NAME, f.getName()); + event.getFields().put(Fields.BASE_ID, f.getName()); + load(event); + Integer count = expectedRecords.get(file); + if (count != null) { + numDocs += count; + } else { + numDocs++; + } + assertEquals("unexpected results in " + file, numDocs, queryResultSetSize("*:*")); + } + } + assertEquals(numDocs, queryResultSetSize("*:*")); + } + + private boolean load(Record record) { + Notifications.notifyStartSession(morphline); + return morphline.process(record); + } + + protected int queryResultSetSize(String query) { +// return collector.getRecords().size(); + try { + testServer.commitTransaction(); + solrServer.commit(false, true, true); + QueryResponse rsp = solrServer.query(new SolrQuery(query).setRows(Integer.MAX_VALUE)); + LOGGER.debug("rsp: {}", rsp); + int i = 0; + for (SolrDocument doc : rsp.getResults()) { + LOGGER.debug("rspDoc #{}: {}", i++, doc); + } + int size = rsp.getResults().size(); + return size; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private void deleteAllDocuments() throws SolrServerException, IOException { + collector.reset(); + SolrServer s = solrServer; + s.deleteByQuery("*:*"); // delete everything! + s.commit(); + } + + + public static void setupMorphline(String tempDir, String file) throws IOException { + String morphlineText = FileUtils.readFileToString(new File(RESOURCES_DIR + "/" + file + ".conf"), "UTF-8"); + morphlineText = morphlineText.replace("RESOURCES_DIR", StringEscapeUtils.escapeJavaScript(new File(tempDir).getAbsolutePath())); + + FileUtils.writeStringToFile(new File(tempDir + "/" + file + ".conf"), morphlineText, "UTF-8"); + } + + protected Command createMorphline(String file) throws IOException { + setupMorphline(tempDir, file); + + return new PipeBuilder().build(parse(file), null, collector, createMorphlineContext()); + } + + private MorphlineContext createMorphlineContext() { + return new SolrMorphlineContext.Builder() + .setDocumentLoader(testServer) +// .setDocumentLoader(new CollectingDocumentLoader(100)) + .setExceptionHandler(new FaultTolerance(false, false, SolrServerException.class.getName())) + .setMetricRegistry(new MetricRegistry()) + .build(); + } + + private Config parse(String file) throws IOException { + SolrLocator locator = new SolrLocator(createMorphlineContext()); + locator.setSolrHomeDir(testSolrHome + "/collection1"); + Config config = new Compiler().parse(new File(tempDir + "/" + file + ".conf"), locator.toConfig("SOLR_LOCATOR")); + config = config.getConfigList("morphlines").get(0); + return config; + } + + protected void startSession() { + Notifications.notifyStartSession(morphline); + } + + protected void testDocumentContent(HashMap expectedResultMap) + throws Exception { + QueryResponse rsp = solrServer.query(new SolrQuery("*:*").setRows(Integer.MAX_VALUE)); + // Check that every expected field/values shows up in the actual query + for (Entry current : expectedResultMap.entrySet()) { + String field = current.getKey(); + for (String expectedFieldValue : current.getValue().getFieldValues()) { + ExpectedResult.CompareType compareType = current.getValue().getCompareType(); + boolean foundField = false; + + for (SolrDocument doc : rsp.getResults()) { + Collection actualFieldValues = doc.getFieldValues(field); + if (compareType == ExpectedResult.CompareType.equals) { + if (actualFieldValues != null && actualFieldValues.contains(expectedFieldValue)) { + foundField = true; + break; + } + } + else { + for (Iterator it = actualFieldValues.iterator(); it.hasNext(); ) { + String actualValue = it.next().toString(); // test only supports string comparison + if (actualFieldValues != null && actualValue.contains(expectedFieldValue)) { + foundField = true; + break; + } + } + } + } + assert(foundField); // didn't find expected field/value in query + } + } + } + + /** + * Representation of the expected output of a SolrQuery. + */ + protected static class ExpectedResult { + private HashSet fieldValues; + public enum CompareType { + equals, // Compare with equals, i.e. actual.equals(expected) + contains; // Compare with contains, i.e. actual.contains(expected) + } + private CompareType compareType; + + public ExpectedResult(HashSet fieldValues, CompareType compareType) { + this.fieldValues = fieldValues; + this.compareType = compareType; + } + public HashSet getFieldValues() { return fieldValues; } + public CompareType getCompareType() { return compareType; } + } +} diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java new file mode 100644 index 00000000000..615c2ccc733 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.morphlines.solr; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; + +import org.apache.commons.io.FileUtils; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.cloud.AbstractFullDistribZkTestBase; +import org.apache.solr.cloud.AbstractZkTestCase; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.util.ExternalPaths; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.Collector; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Compiler; +import com.cloudera.cdk.morphline.base.FaultTolerance; +import com.cloudera.cdk.morphline.base.Notifications; +import com.cloudera.cdk.morphline.stdlib.PipeBuilder; +import com.codahale.metrics.MetricRegistry; +import com.google.common.collect.ListMultimap; +import com.typesafe.config.Config; + +public abstract class AbstractSolrMorphlineZkTestBase extends AbstractFullDistribZkTestBase { + private static final File solrHomeDirectory = new File(TEMP_DIR, AbstractSolrMorphlineZkTestBase.class.getName()); + + protected static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/map-reduce/src/test-files"; + private static final File SOLR_INSTANCE_DIR = new File(RESOURCES_DIR + "/solr"); + private static final File SOLR_CONF_DIR = new File(RESOURCES_DIR + "/solr/collection1"); + + protected Collector collector; + protected Command morphline; + + @Override + public String getSolrHome() { + return solrHomeDirectory.getPath(); + } + + public AbstractSolrMorphlineZkTestBase() { + fixShardCount = true; + sliceCount = 3; + shardCount = 3; + } + + @BeforeClass + public static void setupClass() throws Exception { + AbstractZkTestCase.SOLRHOME = solrHomeDirectory; + FileUtils.copyDirectory(SOLR_INSTANCE_DIR, solrHomeDirectory); + createTempDir(); + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + System.setProperty("host", "127.0.0.1"); + System.setProperty("numShards", Integer.toString(sliceCount)); + uploadConfFiles(); + collector = new Collector(); + } + + @Override + @After + public void tearDown() throws Exception { + super.tearDown(); + System.clearProperty("host"); + System.clearProperty("numShards"); + } + + @Test + @Override + public void testDistribSearch() throws Exception { + super.testDistribSearch(); + } + + @Override + protected void commit() throws Exception { + Notifications.notifyCommitTransaction(morphline); + super.commit(); + } + + protected Command parse(String file) throws IOException { + return parse(file, "collection1"); + } + + protected Command parse(String file, String collection) throws IOException { + SolrLocator locator = new SolrLocator(createMorphlineContext()); + locator.setCollectionName(collection); + locator.setZkHost(zkServer.getZkAddress()); + //locator.setServerUrl(cloudJettys.get(0).url); // TODO: download IndexSchema from solrUrl not yet implemented + //locator.setSolrHomeDir(SOLR_HOME_DIR.getPath()); + Config config = new Compiler().parse(new File(RESOURCES_DIR + "/" + file + ".conf"), locator.toConfig("SOLR_LOCATOR")); + config = config.getConfigList("morphlines").get(0); + return createMorphline(config); + } + + private Command createMorphline(Config config) { + return new PipeBuilder().build(config, null, collector, createMorphlineContext()); + } + + private MorphlineContext createMorphlineContext() { + return new MorphlineContext.Builder() + .setExceptionHandler(new FaultTolerance(false, false, SolrServerException.class.getName())) + .setMetricRegistry(new MetricRegistry()) + .build(); + } + + protected void startSession() { + Notifications.notifyStartSession(morphline); + } + + protected ListMultimap next(Iterator iter) { + SolrDocument doc = iter.next(); + Record record = toRecord(doc); + record.removeAll("_version_"); // the values of this field are unknown and internal to solr + return record.getFields(); + } + + private Record toRecord(SolrDocument doc) { + Record record = new Record(); + for (String key : doc.keySet()) { + record.getFields().replaceValues(key, doc.getFieldValues(key)); + } + return record; + } + + @Override + public JettySolrRunner createJetty(File solrHome, String dataDir, + String shardList, String solrConfigOverride, String schemaOverride) + throws Exception { + + JettySolrRunner jetty = new JettySolrRunner(solrHome.getAbsolutePath(), + context, 0, solrConfigOverride, schemaOverride); + + jetty.setShards(shardList); + + if (System.getProperty("collection") == null) { + System.setProperty("collection", "collection1"); + } + + jetty.start(); + + System.clearProperty("collection"); + + return jetty; + } + + private static void putConfig(SolrZkClient zkClient, File solrhome, String name) throws Exception { + putConfig(zkClient, solrhome, name, name); + } + + private static void putConfig(SolrZkClient zkClient, File solrhome, String srcName, String destName) + throws Exception { + + File file = new File(solrhome, "conf" + File.separator + srcName); + if (!file.exists()) { + // LOG.info("skipping " + file.getAbsolutePath() + + // " because it doesn't exist"); + return; + } + + String destPath = "/configs/conf1/" + destName; + // LOG.info("put " + file.getAbsolutePath() + " to " + destPath); + zkClient.makePath(destPath, file, false, true); + } + + private void uploadConfFiles() throws Exception { + // upload our own config files + SolrZkClient zkClient = new SolrZkClient(zkServer.getZkAddress(), 10000); + putConfig(zkClient, SOLR_CONF_DIR, "solrconfig.xml"); + putConfig(zkClient, SOLR_CONF_DIR, "schema.xml"); + putConfig(zkClient, SOLR_CONF_DIR, "elevate.xml"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_en.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ar.txt"); + + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_bg.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ca.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_cz.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_da.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_el.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_es.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_eu.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_de.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_fa.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_fi.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_fr.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ga.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_gl.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_hi.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_hu.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_hy.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_id.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_it.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ja.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_lv.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_nl.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_no.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_pt.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ro.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ru.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_sv.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_th.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_tr.txt"); + + putConfig(zkClient, SOLR_CONF_DIR, "lang/contractions_ca.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/contractions_fr.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/contractions_ga.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/contractions_it.txt"); + + putConfig(zkClient, SOLR_CONF_DIR, "lang/stemdict_nl.txt"); + + putConfig(zkClient, SOLR_CONF_DIR, "lang/hyphenations_ga.txt"); + + putConfig(zkClient, SOLR_CONF_DIR, "stopwords.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "protwords.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "currency.xml"); + putConfig(zkClient, SOLR_CONF_DIR, "open-exchange-rates.json"); + putConfig(zkClient, SOLR_CONF_DIR, "mapping-ISOLatin1Accent.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "old_synonyms.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "synonyms.txt"); + zkClient.close(); + } + +} diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/CollectingDocumentLoader.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/CollectingDocumentLoader.java new file mode 100644 index 00000000000..ed58cffff6e --- /dev/null +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/CollectingDocumentLoader.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.solr.client.solrj.response.SolrPingResponse; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A mockup DocumentLoader implementation for unit tests; collects all documents into a main memory list. + */ +class CollectingDocumentLoader implements DocumentLoader { + + private final int batchSize; + private final List batch = new ArrayList (); + private List results = new ArrayList (); + + private static final Logger LOGGER = LoggerFactory.getLogger(CollectingDocumentLoader.class); + + public CollectingDocumentLoader(int batchSize) { + if (batchSize <= 0) { + throw new IllegalArgumentException("batchSize must be a positive number: " + batchSize); + } + this.batchSize = batchSize; + } + + @Override + public void beginTransaction() { + LOGGER.trace("beginTransaction"); + batch.clear(); + } + + @Override + public void load(SolrInputDocument doc) { + LOGGER.trace("load doc: {}", doc); + batch.add(doc); + if (batch.size() >= batchSize) { + loadBatch(); + } + } + + @Override + public void commitTransaction() { + LOGGER.trace("commitTransaction"); + if (batch.size() > 0) { + loadBatch(); + } + } + + private void loadBatch() { + try { + results.addAll(batch); + } finally { + batch.clear(); + } + } + + @Override + public UpdateResponse rollbackTransaction() { + LOGGER.trace("rollback"); + return new UpdateResponse(); + } + + @Override + public void shutdown() { + LOGGER.trace("shutdown"); + } + + @Override + public SolrPingResponse ping() { + LOGGER.trace("ping"); + return new SolrPingResponse(); + } + +} diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/EmbeddedTestSolrServer.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/EmbeddedTestSolrServer.java new file mode 100644 index 00000000000..1f747f3d2d8 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/EmbeddedTestSolrServer.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.IOException; + +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.core.CoreContainer; + +/** + * An EmbeddedSolrServer that supresses shutdown and rollback requests as + * necessary for testing + */ +public class EmbeddedTestSolrServer extends EmbeddedSolrServer { + + public EmbeddedTestSolrServer(CoreContainer coreContainer, String coreName) { + super(coreContainer, coreName); + } + + @Override + public void shutdown() { + ; // NOP + } + + @Override + public UpdateResponse rollback() throws SolrServerException, IOException { + return new UpdateResponse(); + } + +} diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java new file mode 100644 index 00000000000..658020ca3c0 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.util.Arrays; + +import org.apache.lucene.util.Constants; +import org.junit.BeforeClass; +import org.junit.Test; + +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.base.Notifications; + +public class SolrMorphlineTest extends AbstractSolrMorphlineTestBase { + + @BeforeClass + public static void beforeClass2() { + assumeFalse("FIXME: This test fails under Java 8 due to the Saxon dependency - see SOLR-1301", Constants.JRE_IS_MINIMUM_JAVA8); + assumeFalse("FIXME: This test fails under J9 due to the Saxon dependency - see SOLR-1301", System.getProperty("java.vm.info", "").contains("IBM J9")); + } + + @Test + public void testLoadSolrBasic() throws Exception { + //System.setProperty("ENV_SOLR_HOME", testSolrHome + "/collection1"); + morphline = createMorphline("test-morphlines/loadSolrBasic"); + //System.clearProperty("ENV_SOLR_HOME"); + Record record = new Record(); + record.put(Fields.ID, "id0"); + record.put("first_name", "Nadja"); // will be sanitized + startSession(); + Notifications.notifyBeginTransaction(morphline); + assertTrue(morphline.process(record)); + assertEquals(1, collector.getNumStartEvents()); + Notifications.notifyCommitTransaction(morphline); + Record expected = new Record(); + expected.put(Fields.ID, "id0"); + assertEquals(Arrays.asList(expected), collector.getRecords()); + assertEquals(1, queryResultSetSize("*:*")); + Notifications.notifyRollbackTransaction(morphline); + Notifications.notifyShutdown(morphline); + } + + @Test + public void testTokenizeText() throws Exception { + morphline = createMorphline("test-morphlines/tokenizeText"); + Record record = new Record(); + record.put(Fields.MESSAGE, "Hello World!"); + record.put(Fields.MESSAGE, "\nFoo@Bar.com #%()123"); + Record expected = record.copy(); + expected.getFields().putAll("tokens", Arrays.asList("hello", "world", "foo", "bar.com", "123")); + startSession(); + Notifications.notifyBeginTransaction(morphline); + assertTrue(morphline.process(record)); + assertEquals(1, collector.getNumStartEvents()); + Notifications.notifyCommitTransaction(morphline); + assertEquals(expected, collector.getFirstRecord()); + } + +} diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java new file mode 100644 index 00000000000..35fbdf1486d --- /dev/null +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.lucene.util.Constants; +import org.apache.lucene.util.LuceneTestCase.Slow; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.params.CollectionParams.CollectionAction; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.junit.BeforeClass; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.base.Notifications; + +@ThreadLeakAction({Action.WARN}) +@ThreadLeakLingering(linger = 0) +@ThreadLeakZombies(Consequence.CONTINUE) +@ThreadLeakScope(Scope.NONE) +@SuppressCodecs({"Lucene3x", "Lucene40"}) +@Slow +public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase { + + @BeforeClass + public static void beforeClass2() { + assumeFalse("FIXME: This test fails under Java 8 due to the Saxon dependency - see SOLR-1301", Constants.JRE_IS_MINIMUM_JAVA8); + assumeFalse("FIXME: This test fails under J9 due to the Saxon dependency - see SOLR-1301", System.getProperty("java.vm.info", "").contains("IBM J9")); + } + + @Override + public void doTest() throws Exception { + + waitForRecoveriesToFinish(false); + + createAlias("aliascollection", "collection1"); + + morphline = parse("test-morphlines/loadSolrBasic", "aliascollection"); + Record record = new Record(); + record.put(Fields.ID, "id0-innsbruck"); + record.put("text", "mytext"); + record.put("user_screen_name", "foo"); + record.put("first_name", "Nadja"); // will be sanitized + startSession(); + assertEquals(1, collector.getNumStartEvents()); + Notifications.notifyBeginTransaction(morphline); + assertTrue(morphline.process(record)); + + record = new Record(); + record.put(Fields.ID, "id1-innsbruck"); + record.put("text", "mytext1"); + record.put("user_screen_name", "foo1"); + record.put("first_name", "Nadja1"); // will be sanitized + assertTrue(morphline.process(record)); + + Record expected = new Record(); + expected.put(Fields.ID, "id0-innsbruck"); + expected.put("text", "mytext"); + expected.put("user_screen_name", "foo"); + Iterator citer = collector.getRecords().iterator(); + assertEquals(expected, citer.next()); + + Record expected2 = new Record(); + expected2.put(Fields.ID, "id1-innsbruck"); + expected2.put("text", "mytext1"); + expected2.put("user_screen_name", "foo1"); + assertEquals(expected2, citer.next()); + + assertFalse(citer.hasNext()); + + commit(); + + QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc)); + //System.out.println(rsp); + Iterator iter = rsp.getResults().iterator(); + assertEquals(expected.getFields(), next(iter)); + assertEquals(expected2.getFields(), next(iter)); + assertFalse(iter.hasNext()); + + Notifications.notifyRollbackTransaction(morphline); + Notifications.notifyShutdown(morphline); + + + createAlias("aliascollection", "collection1,collection2"); + + try { + parse("test-morphlines/loadSolrBasic", "aliascollection"); + fail("Expected IAE because update alias maps to multiple collections"); + } catch (IllegalArgumentException e) { + + } + } + + private NamedList createAlias(String alias, String collections) throws SolrServerException, IOException { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("collections", collections); + params.set("name", alias); + params.set("action", CollectionAction.CREATEALIAS.toString()); + QueryRequest request = new QueryRequest(params); + request.setPath("/admin/collections"); + return cloudClient.request(request); + } + +} diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java new file mode 100644 index 00000000000..41abfe54ff7 --- /dev/null +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.File; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; + +import org.apache.avro.Schema.Field; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.file.FileReader; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.lucene.util.Constants; +import org.apache.lucene.util.LuceneTestCase.Slow; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.junit.BeforeClass; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.base.Notifications; +import com.google.common.base.Preconditions; +import com.google.common.io.Files; + +@ThreadLeakAction({Action.WARN}) +@ThreadLeakLingering(linger = 0) +@ThreadLeakZombies(Consequence.CONTINUE) +@ThreadLeakScope(Scope.NONE) +@SuppressCodecs({"Lucene3x", "Lucene40"}) +@Slow +public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase { + + @BeforeClass + public static void beforeClass2() { + assumeFalse("FIXME: This test fails under Java 8 due to the Saxon dependency - see SOLR-1301", Constants.JRE_IS_MINIMUM_JAVA8); + assumeFalse("FIXME: This test fails under J9 due to the Saxon dependency - see SOLR-1301", System.getProperty("java.vm.info", "").contains("IBM J9")); + } + + @Override + public void doTest() throws Exception { + File file = new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433-medium.avro"); + + waitForRecoveriesToFinish(false); + + // load avro records via morphline and zk into solr + morphline = parse("test-morphlines/tutorialReadAvroContainer"); + Record record = new Record(); + byte[] body = Files.toByteArray(file); + record.put(Fields.ATTACHMENT_BODY, body); + startSession(); + Notifications.notifyBeginTransaction(morphline); + assertTrue(morphline.process(record)); + assertEquals(1, collector.getNumStartEvents()); + + commit(); + + // fetch sorted result set from solr + QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort("id", SolrQuery.ORDER.asc)); + assertEquals(2104, collector.getRecords().size()); + assertEquals(collector.getRecords().size(), rsp.getResults().size()); + + Collections.sort(collector.getRecords(), new Comparator() { + @Override + public int compare(Record r1, Record r2) { + return r1.get("id").toString().compareTo(r2.get("id").toString()); + } + }); + + // fetch test input data and sort like solr result set + List records = new ArrayList(); + FileReader reader = new DataFileReader(file, new GenericDatumReader()); + while (reader.hasNext()) { + GenericData.Record expected = reader.next(); + records.add(expected); + } + assertEquals(collector.getRecords().size(), records.size()); + Collections.sort(records, new Comparator() { + @Override + public int compare(GenericData.Record r1, GenericData.Record r2) { + return r1.get("id").toString().compareTo(r2.get("id").toString()); + } + }); + + Object lastId = null; + for (int i = 0; i < records.size(); i++) { + //System.out.println("myrec" + i + ":" + records.get(i)); + Object id = records.get(i); + if (id != null && id.equals(lastId)) { + throw new IllegalStateException("Detected duplicate id. Test input data must not contain duplicate ids!"); + } + lastId = id; + } + + for (int i = 0; i < records.size(); i++) { + //System.out.println("myrsp" + i + ":" + rsp.getResults().get(i)); + } + + Iterator rspIter = rsp.getResults().iterator(); + for (int i = 0; i < records.size(); i++) { + // verify morphline spat out expected data + Record actual = collector.getRecords().get(i); + GenericData.Record expected = records.get(i); + Preconditions.checkNotNull(expected); + assertTweetEquals(expected, actual, i); + + // verify Solr result set contains expected data + actual = new Record(); + actual.getFields().putAll(next(rspIter)); + assertTweetEquals(expected, actual, i); + } + + Notifications.notifyRollbackTransaction(morphline); + Notifications.notifyShutdown(morphline); + cloudClient.shutdown(); + } + + private void assertTweetEquals(GenericData.Record expected, Record actual, int i) { + Preconditions.checkNotNull(expected); + Preconditions.checkNotNull(actual); +// System.out.println("\n\nexpected: " + toString(expected)); +// System.out.println("actual: " + actual); + String[] fieldNames = new String[] { + "id", + "in_reply_to_status_id", + "in_reply_to_user_id", + "retweet_count", + "text", + }; + for (String fieldName : fieldNames) { + assertEquals( + i + " fieldName: " + fieldName, + expected.get(fieldName).toString(), + actual.getFirstValue(fieldName).toString()); + } + } + + private String toString(GenericData.Record avroRecord) { + Record record = new Record(); + for (Field field : avroRecord.getSchema().getFields()) { + record.put(field.name(), avroRecord.get(field.pos())); + } + return record.toString(); // prints sorted by key for human readability + } + +} diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java new file mode 100644 index 00000000000..d104e7b84ee --- /dev/null +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.util.Iterator; + +import org.apache.lucene.util.Constants; +import org.apache.lucene.util.LuceneTestCase.Slow; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.junit.BeforeClass; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.base.Notifications; + +@ThreadLeakAction({Action.WARN}) +@ThreadLeakLingering(linger = 0) +@ThreadLeakZombies(Consequence.CONTINUE) +@ThreadLeakScope(Scope.NONE) +@SuppressCodecs({"Lucene3x", "Lucene40"}) +@Slow +public class SolrMorphlineZkTest extends AbstractSolrMorphlineZkTestBase { + + @BeforeClass + public static void beforeClass2() { + assumeFalse("FIXME: This test fails under Java 8 due to the Saxon dependency - see SOLR-1301", Constants.JRE_IS_MINIMUM_JAVA8); + assumeFalse("FIXME: This test fails under J9 due to the Saxon dependency - see SOLR-1301", System.getProperty("java.vm.info", "").contains("IBM J9")); + } + + @Override + public void doTest() throws Exception { + + waitForRecoveriesToFinish(false); + + morphline = parse("test-morphlines/loadSolrBasic"); + Record record = new Record(); + record.put(Fields.ID, "id0-innsbruck"); + record.put("text", "mytext"); + record.put("user_screen_name", "foo"); + record.put("first_name", "Nadja"); // will be sanitized + startSession(); + assertEquals(1, collector.getNumStartEvents()); + Notifications.notifyBeginTransaction(morphline); + assertTrue(morphline.process(record)); + + record = new Record(); + record.put(Fields.ID, "id1-innsbruck"); + record.put("text", "mytext1"); + record.put("user_screen_name", "foo1"); + record.put("first_name", "Nadja1"); // will be sanitized + assertTrue(morphline.process(record)); + + Record expected = new Record(); + expected.put(Fields.ID, "id0-innsbruck"); + expected.put("text", "mytext"); + expected.put("user_screen_name", "foo"); + Iterator citer = collector.getRecords().iterator(); + assertEquals(expected, citer.next()); + + Record expected2 = new Record(); + expected2.put(Fields.ID, "id1-innsbruck"); + expected2.put("text", "mytext1"); + expected2.put("user_screen_name", "foo1"); + assertEquals(expected2, citer.next()); + + assertFalse(citer.hasNext()); + + commit(); + + QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc)); + //System.out.println(rsp); + Iterator iter = rsp.getResults().iterator(); + assertEquals(expected.getFields(), next(iter)); + assertEquals(expected2.getFields(), next(iter)); + assertFalse(iter.hasNext()); + + Notifications.notifyRollbackTransaction(morphline); + Notifications.notifyShutdown(morphline); + cloudClient.shutdown(); + } + +} diff --git a/solr/core/ivy.xml b/solr/core/ivy.xml index 2d165ba8819..06806ff40a6 100644 --- a/solr/core/ivy.xml +++ b/solr/core/ivy.xml @@ -35,6 +35,9 @@ + + + diff --git a/solr/core/src/java/org/apache/solr/analytics/accumulator/BasicAccumulator.java b/solr/core/src/java/org/apache/solr/analytics/accumulator/BasicAccumulator.java new file mode 100644 index 00000000000..6a9232f9db5 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/accumulator/BasicAccumulator.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.accumulator; + +import java.io.IOException; +import java.util.Collections; +import java.util.Date; +import java.util.Set; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.solr.analytics.expression.Expression; +import org.apache.solr.analytics.expression.ExpressionFactory; +import org.apache.solr.analytics.request.AnalyticsRequest; +import org.apache.solr.analytics.request.ExpressionRequest; +import org.apache.solr.analytics.statistics.StatsCollector; +import org.apache.solr.analytics.statistics.StatsCollectorSupplierFactory; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.schema.TrieDateField; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.SolrIndexSearcher; + +import com.google.common.base.Supplier; + +/** + * A BasicAccumulator manages the ValueCounters and Expressions without regard to Facets. + */ +public class BasicAccumulator extends ValueAccumulator { + protected final SolrIndexSearcher searcher; + protected final AnalyticsRequest request; + protected final DocSet docs; + protected final Supplier statsCollectorArraySupplier; + protected final StatsCollector[] statsCollectors; + protected final Expression[] expressions; + protected final String[] expressionNames; + protected final String[] expressionStrings; + protected final Set hiddenExpressions; + protected AtomicReaderContext context = null; + + public BasicAccumulator(SolrIndexSearcher searcher, DocSet docs, AnalyticsRequest request) throws IOException { + this.searcher = searcher; + this.docs = docs; + this.request = request; + statsCollectorArraySupplier = StatsCollectorSupplierFactory.create(searcher.getSchema(), request); + statsCollectors = statsCollectorArraySupplier.get(); + int size = request.getExpressions().size(); + expressionNames = new String[size]; + expressionStrings = new String[size]; + int count = 0; + Collections.sort(request.getExpressions()); + for (ExpressionRequest expRequest : request.getExpressions()) { + expressionNames[count] = expRequest.getName(); + expressionStrings[count++] = expRequest.getExpressionString(); + } + expressions = makeExpressions(statsCollectors); + hiddenExpressions = request.getHiddenExpressions(); + } + + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + this.context = context; + for (StatsCollector counter : statsCollectors) { + counter.setNextReader(context); + } + } + + public static BasicAccumulator create(SolrIndexSearcher searcher, DocSet docs, AnalyticsRequest request) throws IOException { + return new BasicAccumulator(searcher,docs,request); + } + + /** + * Passes the documents on to the {@link StatsCollector}s to be collected. + * @param doc Document to collect from + */ + @Override + public void collect(int doc) throws IOException { + for (StatsCollector statsCollector : statsCollectors) { + statsCollector.collect(doc); + } + } + + @Override + public void compute() { + for (StatsCollector statsCollector : statsCollectors) { + statsCollector.compute(); + } + } + + public NamedList export(){ + NamedList base = new NamedList(); + for (int count = 0; count < expressions.length; count++) { + if (!hiddenExpressions.contains(expressionNames[count])) { + base.add(expressionNames[count], expressions[count].getValue()); + } + } + return base; + } + + /** + * Builds an array of Expressions with the given list of counters + * @param statsCollectors the stats collectors + * @return The array of Expressions + */ + public Expression[] makeExpressions(StatsCollector[] statsCollectors) { + Expression[] expressions = new Expression[expressionStrings.length]; + for (int count = 0; count < expressionStrings.length; count++) { + expressions[count] = ExpressionFactory.create(expressionStrings[count], statsCollectors); + } + return expressions; + } + + /** + * Returns the value of an expression to use in a field or query facet. + * @param expressionName the name of the expression + * @return String String representation of pivot value + */ + @SuppressWarnings({ "deprecation", "rawtypes" }) + public String getResult(String expressionName) { + for (int count = 0; count < expressionNames.length; count++) { + if (expressionName.equals(expressionNames[count])) { + Comparable value = expressions[count].getValue(); + if (value.getClass().equals(Date.class)) { + return TrieDateField.formatExternal((Date)value); + } else { + return value.toString(); + } + } + } + throw new SolrException(ErrorCode.BAD_REQUEST, "Pivot expression "+expressionName+" not found."); + } + + /** + * Used for JMX stats collecting. Counts the number of stats requests + * @return number of unique stats collectors + */ + public long getNumStatsCollectors() { + return statsCollectors.length; + } + + /** + * Used for JMX stats collecting. Counts the number of queries in all query facets + * @return number of queries requested in all query facets. + */ + public long getNumQueries() { + return 0l; + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/accumulator/FacetingAccumulator.java b/solr/core/src/java/org/apache/solr/analytics/accumulator/FacetingAccumulator.java new file mode 100644 index 00000000000..f22c345e424 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/accumulator/FacetingAccumulator.java @@ -0,0 +1,722 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.accumulator; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.Date; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.Query; +import org.apache.solr.analytics.accumulator.facet.FacetValueAccumulator; +import org.apache.solr.analytics.accumulator.facet.FieldFacetAccumulator; +import org.apache.solr.analytics.accumulator.facet.QueryFacetAccumulator; +import org.apache.solr.analytics.accumulator.facet.RangeFacetAccumulator; +import org.apache.solr.analytics.expression.Expression; +import org.apache.solr.analytics.expression.ExpressionFactory; +import org.apache.solr.analytics.request.AnalyticsContentHandler; +import org.apache.solr.analytics.request.AnalyticsRequest; +import org.apache.solr.analytics.request.FieldFacetRequest; +import org.apache.solr.analytics.request.FieldFacetRequest.FacetSortSpecification; +import org.apache.solr.analytics.request.QueryFacetRequest; +import org.apache.solr.analytics.request.RangeFacetRequest; +import org.apache.solr.analytics.statistics.StatsCollector; +import org.apache.solr.analytics.util.AnalyticsParams; +import org.apache.solr.analytics.util.RangeEndpointCalculator; +import org.apache.solr.analytics.util.RangeEndpointCalculator.FacetRange; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.TrieDateField; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.QParser; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SyntaxError; + +import com.google.common.collect.Iterables; + +/** + * A FacetingAccumulator manages the StatsCollectors and Expressions for facets. + */ +public class FacetingAccumulator extends BasicAccumulator implements FacetValueAccumulator { + public static final String MISSING_VALUE = "(MISSING)"; + protected boolean basicsAndFieldFacetsComputed; + protected int leafNum; + protected AtomicReaderContext leaf; + protected final AnalyticsRequest analyticsRequest; + protected final Map> fieldFacetExpressions; + protected final Map> rangeFacetExpressions; + protected final Map> queryFacetExpressions; + protected final Map> fieldFacetCollectors; + protected final Map> rangeFacetCollectors; + protected final Map> queryFacetCollectors; + protected final List facetAccumulators; + protected final Set hiddenFieldFacets; + /** the current value of this stat field */ + protected final SolrQueryRequest queryRequest; + + protected List rangeFacets = null; + protected List queryFacets = null; + + protected long queryCount; + + public FacetingAccumulator(SolrIndexSearcher searcher, DocSet docs, AnalyticsRequest request, SolrQueryRequest queryRequest) throws IOException { + // The parent Basic Accumulator keeps track of overall stats while + // the Faceting Accumulator only manages the facet stats + super(searcher, docs, request); + this.analyticsRequest = request; + this.queryRequest = queryRequest; + basicsAndFieldFacetsComputed = false; + List fieldFreqs = request.getFieldFacets(); + List rangeFreqs = request.getRangeFacets(); + List queryFreqs = request.getQueryFacets(); + + this.fieldFacetExpressions = new LinkedHashMap>(fieldFreqs.size()); + this.rangeFacetExpressions = new LinkedHashMap>(rangeFreqs.size()); + this.queryFacetExpressions = new LinkedHashMap>(queryFreqs.size()); + this.fieldFacetCollectors = new LinkedHashMap>(fieldFreqs.size()); + this.rangeFacetCollectors = new LinkedHashMap>(rangeFreqs.size()); + this.queryFacetCollectors = new LinkedHashMap>(queryFreqs.size()); + this.facetAccumulators = new ArrayList(); + this.hiddenFieldFacets = new HashSet(); + + /** + * For each field facet request add a bucket to the {@link Expression} map and {@link StatsCollector} map. + * Field facets are computed during the initial collection of documents, therefore + * the FieldFacetAccumulators are created initially. + */ + for( FieldFacetRequest freq : fieldFreqs ){ + final FieldFacetRequest fr = (FieldFacetRequest) freq; + if (fr.isHidden()) { + hiddenFieldFacets.add(fr.getName()); + } + final SchemaField ff = fr.getField(); + final FieldFacetAccumulator facc = FieldFacetAccumulator.create(searcher, this, ff); + facetAccumulators.add(facc); + fieldFacetExpressions.put(freq.getName(), new LinkedHashMap() ); + fieldFacetCollectors.put(freq.getName(), new LinkedHashMap()); + } + /** + * For each range and query facet request add a bucket to the corresponding + * {@link Expression} map and {@link StatsCollector} map. + * Range and Query Facets are computed in the post processing, so the accumulators + * are not created initially. + */ + for( RangeFacetRequest freq : rangeFreqs ){ + if( rangeFacets == null ) rangeFacets = new ArrayList(); + rangeFacets.add(freq); + rangeFacetExpressions.put(freq.getName(), new LinkedHashMap() ); + rangeFacetCollectors.put(freq.getName(), new LinkedHashMap()); + } + for( QueryFacetRequest freq : queryFreqs ){ + if( queryFacets == null ) queryFacets = new ArrayList(); + queryFacets.add(freq); + queryFacetExpressions.put(freq.getName(), new LinkedHashMap() ); + queryFacetCollectors.put(freq.getName(), new LinkedHashMap()); + } + this.queryCount = 0l; + } + + public static FacetingAccumulator create(SolrIndexSearcher searcher, DocSet docs, AnalyticsRequest request, SolrQueryRequest queryRequest) throws IOException { + return new FacetingAccumulator(searcher,docs,request,queryRequest); + } + + /** + * Update the readers for the {@link BasicAccumulator}, field facets and field facet {@link StatsCollector}s. + * @param context The context to read documents from. + * @throws IOException if there is an error setting the next reader + */ + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + super.setNextReader(context); + for( Map valueList : fieldFacetCollectors.values() ){ + for (StatsCollector[] statsCollectorList : valueList.values()) { + for (StatsCollector statsCollector : statsCollectorList) { + statsCollector.setNextReader(context); + } + } + } + for (FieldFacetAccumulator fa : facetAccumulators) { + fa.setNextReader(context); + } + } + + /** + * Updates the reader for all of the range facet {@link StatsCollector}s. + * @param context The context to read documents from. + * @throws IOException if there is an error setting the next reader + */ + public void setRangeStatsCollectorReaders(AtomicReaderContext context) throws IOException { + super.setNextReader(context); + for( Map rangeList : rangeFacetCollectors.values() ){ + for (StatsCollector[] statsCollectorList : rangeList.values()) { + for (StatsCollector statsCollector : statsCollectorList) { + statsCollector.setNextReader(context); + } + } + } + } + + + /** + * Updates the reader for all of the query facet {@link StatsCollector}s. + * @param context The context to read documents from. + * @throws IOException if there is an error setting the next reader + */ + public void setQueryStatsCollectorReaders(AtomicReaderContext context) throws IOException { + super.setNextReader(context); + for( Map queryList : queryFacetCollectors.values() ){ + for (StatsCollector[] statsCollectorList : queryList.values()) { + for (StatsCollector statsCollector : statsCollectorList) { + statsCollector.setNextReader(context); + } + } + } + } + + /** + * Called from Analytics stats, adds documents to the field + * facets and the super {@link BasicAccumulator}. + */ + @Override + public void collect(int doc) throws IOException { + for( FieldFacetAccumulator fa : facetAccumulators ){ + fa.collect(doc); + } + super.collect(doc); + } + + /** + * Given a document, fieldFacet field and facetValue, adds the document to the + * {@link StatsCollector}s held in the bucket corresponding to the fieldFacet field and facetValue. + * Called during initial document collection. + */ + @Override + public void collectField(int doc, String facetField, String facetValue) throws IOException { + Map map = fieldFacetCollectors.get(facetField); + StatsCollector[] statsCollectors = map.get(facetValue); + // If the facetValue has not been seen yet, a StatsCollector array is + // created and associated with that bucket. + if( statsCollectors == null ){ + statsCollectors = statsCollectorArraySupplier.get(); + map.put(facetValue,statsCollectors); + fieldFacetExpressions.get(facetField).put(facetValue,makeExpressions(statsCollectors)); + for (StatsCollector statsCollector : statsCollectors) { + statsCollector.setNextReader(context); + } + } + for (StatsCollector statsCollector : statsCollectors) { + statsCollector.collect(doc); + } + } + + /** + * Given a document, rangeFacet field and range, adds the document to the + * {@link StatsCollector}s held in the bucket corresponding to the rangeFacet field and range. + * Called during post processing. + */ + @Override + public void collectRange(int doc, String facetField, String range) throws IOException { + Map map = rangeFacetCollectors.get(facetField); + StatsCollector[] statsCollectors = map.get(range); + // If the range has not been seen yet, a StatsCollector array is + // created and associated with that bucket. + if( statsCollectors == null ){ + statsCollectors = statsCollectorArraySupplier.get(); + map.put(range,statsCollectors); + rangeFacetExpressions.get(facetField).put(range,makeExpressions(statsCollectors)); + for (StatsCollector statsCollector : statsCollectors) { + statsCollector.setNextReader(context); + } + } + for (StatsCollector statsCollector : statsCollectors) { + statsCollector.collect(doc); + } + } + + /** + * Given a document, queryFacet name and query, adds the document to the + * {@link StatsCollector}s held in the bucket corresponding to the queryFacet name and query. + * Called during post processing. + */ + @Override + public void collectQuery(int doc, String facetName, String query) throws IOException { + Map map = queryFacetCollectors.get(facetName); + StatsCollector[] statsCollectors = map.get(query); + // If the query has not been seen yet, a StatsCollector array is + // created and associated with that bucket. + if( statsCollectors == null ){ + statsCollectors = statsCollectorArraySupplier.get(); + map.put(query,statsCollectors); + queryFacetExpressions.get(facetName).put(query,makeExpressions(statsCollectors)); + for (StatsCollector statsCollector : statsCollectors) { + statsCollector.setNextReader(context); + } + } + for (StatsCollector statsCollector : statsCollectors) { + statsCollector.collect(doc); + } + } + + /** + * A comparator to compare expression values for field facet sorting. + */ + public static class EntryComparator implements Comparator> { + private final Comparator comp; + private final int comparatorExpressionPlace; + + public EntryComparator(Comparator comp, int comparatorExpressionPlace) { + this.comp = comp; + this.comparatorExpressionPlace = comparatorExpressionPlace; + } + + @Override + public int compare(Entry o1, Entry o2) { + return comp.compare(o1.getValue()[comparatorExpressionPlace], o2.getValue()[comparatorExpressionPlace]); + } + } + + /** + * Finalizes the statistics within the each facet bucket before exporting; + */ + @Override + public void compute() { + if (!basicsAndFieldFacetsComputed) { + super.compute(); + for( Map f : fieldFacetCollectors.values() ){ + for( StatsCollector[] arr : f.values() ){ + for( StatsCollector b : arr ){ + b.compute(); + } + } + } + basicsAndFieldFacetsComputed = true; + } + } + + /** + * Finalizes the statistics within the a specific query facet before exporting; + */ + public void computeQueryFacet(String facet) { + Map f = queryFacetCollectors.get(facet); + for( StatsCollector[] arr : f.values() ){ + for( StatsCollector b : arr ){ + b.compute(); + } + } + } + + /** + * Finalizes the statistics within the a specific range facet before exporting; + */ + public void computeRangeFacet(String facet) { + Map f = rangeFacetCollectors.get(facet); + for( StatsCollector[] arr : f.values() ){ + for( StatsCollector b : arr ){ + b.compute(); + } + } + } + + /** + * Returns the value of an expression to use in a range or query facet. + * @param expressionName the name of the expression + * @param fieldFacet the facet field + * @param facetValue the facet value + * @return String String representation of pivot value + */ + @SuppressWarnings({ "deprecation", "rawtypes" }) + public String getResult(String expressionName, String fieldFacet, String facetValue) { + if (facetValue.contains(AnalyticsParams.RESULT) && !facetValue.contains(AnalyticsParams.QUERY_RESULT)) { + try { + String[] pivotStr = ExpressionFactory.getArguments(facetValue.substring(facetValue.indexOf('(')+1,facetValue.lastIndexOf(')')).trim()); + if (pivotStr.length==1) { + facetValue = getResult(pivotStr[0]); + } else if (pivotStr.length==3) { + facetValue = getResult(pivotStr[0],pivotStr[1],pivotStr[2]); + } else { + throw new SolrException(ErrorCode.BAD_REQUEST, "Result request "+facetValue+" has an invalid amount of arguments."); + } + } catch (IndexOutOfBoundsException e) { + throw new SolrException(ErrorCode.BAD_REQUEST, "Result request "+facetValue+" is invalid. Lacks parentheses.",e); + } + } + if (fieldFacetExpressions.get(fieldFacet)!=null) { + Expression[] facetExpressions = fieldFacetExpressions.get(fieldFacet).get(facetValue); + for (int count = 0; count < expressionNames.length; count++) { + if (expressionName.equals(expressionNames[count])) { + Comparable value = facetExpressions[count].getValue(); + if (value.getClass().equals(Date.class)) { + return TrieDateField.formatExternal((Date)value); + } else { + return value.toString(); + } + } + } + } + throw new SolrException(ErrorCode.BAD_REQUEST,"Field Facet Pivot expression "+expressionName+" not found."); + } + + /** + * Returns the value of an expression to use in a range or query facet. + * @param currentFacet the name of the current facet + * @param expressionName the name of the expression + * @param queryFacet the facet query + * @param facetValue the field value + * @return String String representation of pivot value + */ + @SuppressWarnings({ "deprecation", "rawtypes" }) + public String getQueryResult(String currentFacet, String expressionName, String queryFacet, String facetValue) { + if (facetValue.contains(AnalyticsParams.RESULT) && !facetValue.contains(AnalyticsParams.QUERY_RESULT)) { + try { + String[] pivotStr = ExpressionFactory.getArguments(facetValue.substring(facetValue.indexOf('(')+1,facetValue.lastIndexOf(')')).trim()); + if (pivotStr.length==1) { + facetValue = getResult(pivotStr[0]); + } else if (pivotStr.length==3) { + facetValue = getResult(pivotStr[0],pivotStr[1],pivotStr[2]); + } else { + throw new SolrException(ErrorCode.BAD_REQUEST, "Result request "+facetValue+" has an invalid amount of arguments."); + } + } catch (IndexOutOfBoundsException e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"Result request "+facetValue+" is invalid. Lacks parentheses.",e); + } + } + if (facetValue.contains(AnalyticsParams.QUERY_RESULT)) { + try { + String[] pivotStr = ExpressionFactory.getArguments(facetValue.substring(facetValue.indexOf('(')+1,facetValue.lastIndexOf(')')).trim()); + if (pivotStr.length==1) { + facetValue = getResult(pivotStr[0]); + } else if (pivotStr.length==3) { + facetValue = getQueryResult(currentFacet,pivotStr[0],pivotStr[1],pivotStr[2]); + } else { + throw new SolrException(ErrorCode.BAD_REQUEST,"Result request "+facetValue+" has an invalid amount of arguments."); + } + } catch (IndexOutOfBoundsException e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"Result request "+facetValue+" is invalid. Lacks parentheses.",e); + } + } + if (queryFacetExpressions.get(queryFacet)!=null) { + Expression[] facetExpressions = queryFacetExpressions.get(queryFacet).get(facetValue); + for (int count = 0; count < expressionNames.length; count++) { + if (expressionName.equals(expressionNames[count])) { + Comparable value = facetExpressions[count].getValue(); + if (value.getClass().equals(Date.class)) { + return TrieDateField.formatExternal((Date)value); + } else { + return value.toString(); + } + } + } + } + throw new SolrException(ErrorCode.BAD_REQUEST,"Field Facet Pivot expression "+expressionName+" not found."); + } + + @Override + @SuppressWarnings("unchecked") + public NamedList export() { + final NamedList base = (NamedList)super.export(); + NamedList> facetList = new NamedList>(); + + // Add the field facet buckets to the output + base.add("fieldFacets",facetList); + for( FieldFacetRequest freq : request.getFieldFacets() ){ + final String name = freq.getName(); + if (hiddenFieldFacets.contains(name)) { + continue; + } + final Map buckets = fieldFacetExpressions.get(name); + final NamedList bucketBase = new NamedList(); + + Iterable> iter = buckets.entrySet(); + + final FieldFacetRequest fr = (FieldFacetRequest) freq; + + final FacetSortSpecification sort = fr.getSort(); + final int limit = fr.getLimit(); + final int offset = fr.getOffset(); + final boolean showMissing = fr.showsMissing(); + if (!showMissing) { + buckets.remove(MISSING_VALUE); + } + // Sorting the buckets if a sort specification is provided + if( sort != null && buckets.values().iterator().hasNext()){ + int sortPlace = Arrays.binarySearch(expressionNames, sort.getStatistic()); + final Expression first = buckets.values().iterator().next()[sortPlace]; + final Comparator comp = (Comparator) first.comparator(sort.getDirection()); + + final List> sorted = new ArrayList>(buckets.size()); + Iterables.addAll(sorted, iter); + Collections.sort(sorted, new EntryComparator(comp,sortPlace)); + iter = sorted; + } + // apply the limit + if( limit > AnalyticsContentHandler.DEFAULT_FACET_LIMIT ){ + if( offset > 0 ){ + iter = Iterables.skip(iter, offset); + } + iter = Iterables.limit(iter, limit); + } + + // Export each expression in the bucket. + for( Entry bucket : iter ){ + bucketBase.add(bucket.getKey(),export(bucket.getValue())); + } + + facetList.add(name, bucketBase); + } + + // Add the range facet buckets to the output + facetList = new NamedList>(); + base.add("rangeFacets",facetList); + for( RangeFacetRequest freq : request.getRangeFacets() ){ + final String name = freq.getName(); + final Map buckets = rangeFacetExpressions.get(name); + final NamedList bucketBase = new NamedList(); + + Iterable> iter = buckets.entrySet(); + + for( Entry bucket : iter ){ + bucketBase.add(bucket.getKey(),export(bucket.getValue())); + } + + facetList.add(name, bucketBase); + } + + // Add the query facet buckets to the output + facetList = new NamedList>(); + base.add("queryFacets",facetList); + for( QueryFacetRequest freq : request.getQueryFacets() ){ + final String name = freq.getName(); + final Map buckets = queryFacetExpressions.get(name); + final NamedList bucketBase = new NamedList(); + + Iterable> iter = buckets.entrySet(); + + for( Entry bucket : iter ){ + bucketBase.add(bucket.getKey(),export(bucket.getValue())); + } + + facetList.add(name, bucketBase); + } + + return base; + } + + /** + * Exports a list of expressions as a NamedList + * @param expressionArr an array of expressions + * @return named list of expressions + */ + public NamedList export(Expression[] expressionArr) { + NamedList base = new NamedList(); + for (int count = 0; count < expressionArr.length; count++) { + if (!hiddenExpressions.contains(expressionNames[count])) { + base.add(expressionNames[count], expressionArr[count].getValue()); + } + } + return base; + } + + /** + * Processes the query and range facets. + * Must be called if range and/or query facets are supported. + */ + @Override + public void postProcess() throws IOException { + super.compute(); + for( Map f : fieldFacetCollectors.values() ){ + for( StatsCollector[] arr : f.values() ){ + for( StatsCollector b : arr ){ + b.compute(); + } + } + } + basicsAndFieldFacetsComputed = true; + final Filter filter = docs.getTopFilter(); + if( rangeFacets != null ){ + processRangeFacets(filter); + } + if( queryFacets != null ){ + processQueryFacets(filter); + } + } + + /** + * Initiates the collecting of query facets + * @param filter the base filter to work against + * @throws IOException if searching failed + */ + public void processQueryFacets(final Filter filter) throws IOException { + for( QueryFacetRequest qfr : queryFacets ){ + for( String query : qfr.getQueries() ){ + if (query.contains(AnalyticsParams.RESULT) && !query.contains(AnalyticsParams.QUERY_RESULT)) { + try { + String[] pivotStr = ExpressionFactory.getArguments(query.substring(query.indexOf('(')+1,query.lastIndexOf(')')).trim()); + if (pivotStr.length==1) { + query = getResult(pivotStr[0]); + } else if (pivotStr.length==3) { + query = getResult(pivotStr[0],pivotStr[1],pivotStr[2]); + } else { + throw new SolrException(ErrorCode.BAD_REQUEST,"Result request "+query+" has an invalid amount of arguments."); + } + } catch (IndexOutOfBoundsException e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"Result request "+query+" is invalid. Lacks parentheses.",e); + } + } else if (query.contains(AnalyticsParams.QUERY_RESULT)) { + try { + String[] pivotStr = ExpressionFactory.getArguments(query.substring(query.indexOf('(')+1,query.lastIndexOf(')')).trim()); + if (pivotStr.length==3) { + query = getQueryResult(qfr.getName(),pivotStr[0],pivotStr[1],pivotStr[2]); + } else { + throw new SolrException(ErrorCode.BAD_REQUEST,"Result request "+query+" has an invalid amount of arguments."); + } + } catch (IndexOutOfBoundsException e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"Result request "+query+" is invalid. Lacks parentheses.",e); + } + } + QueryFacetAccumulator qAcc = new QueryFacetAccumulator(this,qfr.getName(),query); + final Query q; + try { + q = QParser.getParser(query, null, queryRequest).getQuery(); + } catch( SyntaxError e ){ + throw new SolrException(ErrorCode.BAD_REQUEST,"Invalid query '"+query+"'",e); + } + // The searcher sends docIds to the QueryFacetAccumulator which forwards + // them to collectQuery() in this class for collection. + searcher.search(q, filter, qAcc); + computeQueryFacet(qfr.getName()); + queryCount++; + } + } + } + + @Override + public long getNumQueries() { + return queryCount; + } + + /** + * Initiates the collecting of range facets + * @param filter the base filter to use + * @throws IOException if searching fails + */ + public void processRangeFacets(final Filter filter) throws IOException { + for( RangeFacetRequest rfr : rangeFacets ){ + String[] pivotStr; + String start = rfr.getStart(); + if (start.contains(AnalyticsParams.QUERY_RESULT)) { + throw new SolrException(ErrorCode.BAD_REQUEST,"Query result requests can not be used in Range Facets"); + } else if (start.contains(AnalyticsParams.RESULT)) { + try { + pivotStr = ExpressionFactory.getArguments(start.substring(start.indexOf('(')+1,start.indexOf(')')).trim()); + if (pivotStr.length==1) { + rfr.setStart(getResult(pivotStr[0])); + } else if (pivotStr.length==3) { + rfr.setStart(getResult(pivotStr[0],pivotStr[1],pivotStr[2])); + } else { + throw new SolrException(ErrorCode.BAD_REQUEST, "Result request "+start+" has an invalid amount of arguments."); + } + } catch (IndexOutOfBoundsException e) { + throw new SolrException(ErrorCode.BAD_REQUEST, "Result request "+start+" is invalid. Lacks parentheses.",e); + } + } + String end = rfr.getEnd(); + if (end.contains(AnalyticsParams.QUERY_RESULT)) { + throw new SolrException(ErrorCode.BAD_REQUEST, "Query result requests can not be used in Range Facets"); + } else if (end.contains(AnalyticsParams.RESULT)) { + try { + pivotStr = ExpressionFactory.getArguments(end.substring(end.indexOf('(')+1,end.indexOf(')')).trim()); + if (pivotStr.length==1) { + rfr.setEnd(getResult(pivotStr[0])); + } else if (pivotStr.length==3) { + rfr.setEnd(getResult(pivotStr[0],pivotStr[1],pivotStr[2])); + } else { + throw new SolrException(ErrorCode.BAD_REQUEST, "Result request "+end+" has an invalid amount of arguments."); + } + } catch (IndexOutOfBoundsException e) { + throw new SolrException(ErrorCode.BAD_REQUEST, "Result request "+end+" is invalid. Lacks parentheses.",e); + } + } + String[] gaps = rfr.getGaps(); + for (int count = 0; count> rec = RangeEndpointCalculator.create(rfr); + final SchemaField sf = rfr.getField(); + + // Create a rangeFacetAccumulator for each range and + // collect the documents for that range. + for( FacetRange range : rec.getRanges() ){ + final String upper; + final String lower; + String facetValue = ""; + if( range.lower == null ){ + facetValue = "(*"; + lower = null; + } else { + lower = range.lower; + facetValue = ((range.includeLower)?"[":"(") + range.lower; + } + facetValue+=" TO "; + if( range.upper == null ){ + upper = null; + facetValue += "*)"; + } else { + upper = range.upper; + facetValue += range.upper + ((range.includeUpper)?"]":")"); + } + + Query q = sf.getType().getRangeQuery(null, sf, lower, upper, range.includeLower,range.includeUpper); + RangeFacetAccumulator rAcc = new RangeFacetAccumulator(this,rfr.getName(),facetValue); + // The searcher sends docIds to the RangeFacetAccumulator which forwards + // them to collectRange() in this class for collection. + searcher.search(q, filter, rAcc); + computeRangeFacet(sf.getName()); + } + } + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/accumulator/ValueAccumulator.java b/solr/core/src/java/org/apache/solr/analytics/accumulator/ValueAccumulator.java new file mode 100644 index 00000000000..ecc74ef01d5 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/accumulator/ValueAccumulator.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.accumulator; + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; +import org.apache.solr.common.util.NamedList; + +/** + * Abstract Collector that manages all StatsCollectors, Expressions and Facets. + */ +public abstract class ValueAccumulator extends Collector { + + /** + * @param context The context to read documents from. + * @throws IOException if setting next reader fails + */ + public abstract void setNextReader(AtomicReaderContext context) throws IOException; + + /** + * Finalizes the statistics within each StatsCollector. + * Must be called before export(). + */ + public abstract void compute(); + public abstract NamedList export(); + + public void postProcess() throws IOException { + // NOP + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + // NOP + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/FacetValueAccumulator.java b/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/FacetValueAccumulator.java new file mode 100644 index 00000000000..856f45f5ba1 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/FacetValueAccumulator.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.accumulator.facet; + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; + +/** + * Interface that describes the methods needed for an Accumulator to be able to handle + * fieldFacets, rangeFacets and queryFacets. + */ +public interface FacetValueAccumulator { + + void collectField(int doc, String facetName, String facetValue) throws IOException; + void collectQuery(int doc, String facetName, String facetValue) throws IOException; + void collectRange(int doc, String facetName, String facetValue) throws IOException; + void setQueryStatsCollectorReaders(AtomicReaderContext context) throws IOException; + void setRangeStatsCollectorReaders(AtomicReaderContext context) throws IOException; + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/FieldFacetAccumulator.java b/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/FieldFacetAccumulator.java new file mode 100644 index 00000000000..a4649237d2e --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/FieldFacetAccumulator.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.accumulator.facet; + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.analytics.accumulator.FacetingAccumulator; +import org.apache.solr.analytics.accumulator.ValueAccumulator; +import org.apache.solr.analytics.util.AnalyticsParsers; +import org.apache.solr.analytics.util.AnalyticsParsers.NumericParser; +import org.apache.solr.analytics.util.AnalyticsParsers.Parser; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.TrieDateField; +import org.apache.solr.search.SolrIndexSearcher; + +/** + * An Accumulator that manages the faceting for fieldFacets. + * Collects the field facet values. + */ +public class FieldFacetAccumulator extends ValueAccumulator { + protected final Parser parser; + protected final FacetValueAccumulator parent; + protected final String name; + protected final SolrIndexSearcher searcher; + protected final SchemaField schemaField; + protected final boolean multiValued; + protected final boolean numField; + protected final boolean dateField; + protected final BytesRef value; + protected SortedSetDocValues setValues; + protected SortedDocValues sortValues; + protected NumericDocValues numValues; + protected Bits numValuesBits; + + public FieldFacetAccumulator(SolrIndexSearcher searcher, FacetValueAccumulator parent, SchemaField schemaField) throws IOException { + if( !schemaField.hasDocValues() ){ + throw new SolrException(ErrorCode.BAD_REQUEST, "Field '"+schemaField.getName()+"' does not have docValues"); + } + this.searcher = searcher; + this.schemaField = schemaField; + this.name = schemaField.getName(); + if (!schemaField.hasDocValues()) { + throw new IOException(name+" does not have docValues and therefore cannot be faceted over."); + } + this.multiValued = schemaField.multiValued(); + this.numField = schemaField.getType().getNumericType()!=null; + this.dateField = schemaField.getType().getClass().equals(TrieDateField.class); + this.parent = parent; + this.value = new BytesRef(); + this.parser = AnalyticsParsers.getParser(schemaField.getType().getClass()); + } + + public static FieldFacetAccumulator create(SolrIndexSearcher searcher, FacetValueAccumulator parent, SchemaField facetField) throws IOException{ + return new FieldFacetAccumulator(searcher,parent,facetField); + } + + /** + * Move to the next set of documents to add to the field facet. + */ + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + if (multiValued) { + setValues = context.reader().getSortedSetDocValues(name); + } else { + if (numField) { + numValues = context.reader().getNumericDocValues(name); + numValuesBits = context.reader().getDocsWithField(name); + } else { + sortValues = context.reader().getSortedDocValues(name); + } + } + } + + /** + * Tell the FacetingAccumulator to collect the doc with the + * given fieldFacet and value(s). + */ + @Override + public void collect(int doc) throws IOException { + if (multiValued) { + boolean exists = false; + if (setValues!=null) { + setValues.setDocument(doc); + int term; + while ((term = (int)setValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + exists = true; + setValues.lookupOrd(term, value); + parent.collectField(doc, name, parser.parse(value) ); + } + } + if (!exists) { + parent.collectField(doc, name, FacetingAccumulator.MISSING_VALUE ); + } + } else { + if(numField){ + long v = numValues.get(doc); + if( v != 0 || numValuesBits.get(doc) ){ + parent.collectField(doc, name, ((NumericParser)parser).parseNum(numValues.get(doc))); + } else { + parent.collectField(doc, name, FacetingAccumulator.MISSING_VALUE ); + } + } else { + sortValues.get(doc,value); + if( BytesRef.EMPTY_BYTES == value.bytes ){ + parent.collectField(doc, name, FacetingAccumulator.MISSING_VALUE ); + } else { + parent.collectField(doc, name, parser.parse(value) ); + } + } + } + } + + @Override + public void compute() {} + + @Override + public NamedList export() { return null; } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/QueryFacetAccumulator.java b/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/QueryFacetAccumulator.java new file mode 100644 index 00000000000..f0d6b4aa516 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/QueryFacetAccumulator.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.accumulator.facet; + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.solr.analytics.accumulator.ValueAccumulator; +import org.apache.solr.analytics.statistics.StatsCollector; +import org.apache.solr.common.util.NamedList; + +/** + * An Accumulator that manages a certain query of a given query facet. + */ +public class QueryFacetAccumulator extends ValueAccumulator { + protected final FacetValueAccumulator parent; + protected final String facetName; + protected final String facetValue; + + public QueryFacetAccumulator(FacetValueAccumulator parent, String facetName, String facetValue) { + this.parent = parent; + this.facetName = facetName; + this.facetValue = facetValue; + } + + /** + * Tell the FacetingAccumulator to collect the doc with the + * given queryFacet and query. + */ + @Override + public void collect(int doc) throws IOException { + parent.collectQuery(doc, facetName, facetValue); + } + + /** + * Update the readers of the queryFacet {@link StatsCollector}s in FacetingAccumulator + */ + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + parent.setQueryStatsCollectorReaders(context); + } + + @Override + public void compute() { + // NOP + } + + @Override + public NamedList export() { + // NOP + return null; + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/RangeFacetAccumulator.java b/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/RangeFacetAccumulator.java new file mode 100644 index 00000000000..dd29c1c414b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/RangeFacetAccumulator.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.accumulator.facet; + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.solr.analytics.statistics.StatsCollector; + +/** + * An Accumulator that manages a certain range of a given range facet. + */ +public class RangeFacetAccumulator extends QueryFacetAccumulator { + public RangeFacetAccumulator(FacetValueAccumulator parent, String facetName, String facetValue) { + super(parent, facetName, facetValue); + } + + /** + * Tell the FacetingAccumulator to collect the doc with the + * given rangeFacet and range. + */ + @Override + public void collect(int doc) throws IOException { + parent.collectRange(doc, facetName, facetValue); + } + + /** + * Update the readers of the rangeFacet {@link StatsCollector}s in FacetingAccumulator + */ + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + parent.setRangeStatsCollectorReaders(context); + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/package.html b/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/package.html new file mode 100644 index 00000000000..8737a008740 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/accumulator/facet/package.html @@ -0,0 +1,27 @@ + + + + + + + +

    +Accumulators for accumulating over differnt types of facets +

    + + diff --git a/solr/core/src/java/org/apache/solr/analytics/accumulator/package.html b/solr/core/src/java/org/apache/solr/analytics/accumulator/package.html new file mode 100644 index 00000000000..b2cb8c2d121 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/accumulator/package.html @@ -0,0 +1,27 @@ + + + + + + + +

    +Accumulators accumulate values over different types of strucuture (eg result, facet, etc..) +

    + + diff --git a/solr/core/src/java/org/apache/solr/analytics/expression/BaseExpression.java b/solr/core/src/java/org/apache/solr/analytics/expression/BaseExpression.java new file mode 100644 index 00000000000..3e56c89c665 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/expression/BaseExpression.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.expression; + +import java.util.Date; + +import org.apache.solr.analytics.statistics.StatsCollector; + + +/** + * BaseExpression returns the value returned by the {@link StatsCollector} for the specified stat. + */ +public class BaseExpression extends Expression { + protected final StatsCollector statsCollector; + protected final String stat; + + public BaseExpression(StatsCollector statsCollector, String stat) { + this.statsCollector = statsCollector; + this.stat = stat; + } + + public Comparable getValue() { + return statsCollector.getStat(stat); + } +} +/** + * ConstantStringExpression returns the specified constant double. + */ +class ConstantNumberExpression extends Expression { + protected final Double constant; + + public ConstantNumberExpression(double d) { + constant = new Double(d); + } + + public Comparable getValue() { + return constant; + } +} +/** + * ConstantStringExpression returns the specified constant date. + */ +class ConstantDateExpression extends Expression { + protected final Date constant; + + public ConstantDateExpression(Date date) { + constant = date; + } + + public ConstantDateExpression(Long date) { + constant = new Date(date); + } + + public Comparable getValue() { + return constant; + } +} +/** + * ConstantStringExpression returns the specified constant string. + */ +class ConstantStringExpression extends Expression { + protected final String constant; + + public ConstantStringExpression(String str) { + constant = str; + } + + public Comparable getValue() { + return constant; + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/expression/DualDelegateExpression.java b/solr/core/src/java/org/apache/solr/analytics/expression/DualDelegateExpression.java new file mode 100644 index 00000000000..f8579bf2d61 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/expression/DualDelegateExpression.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.expression; + +/** + * Abstraction of an expression that applies a function to two delegate expressions. + */ +public abstract class DualDelegateExpression extends Expression { + protected Expression a; + protected Expression b; + public DualDelegateExpression(Expression a, Expression b) { + this.a = a; + this.b = b; + } +} +/** + * DivideExpression returns the quotient of 'a' and 'b'. + */ +class DivideExpression extends DualDelegateExpression { + + /** + * @param a numerator + * @param b divisor + */ + public DivideExpression(Expression a, Expression b) { + super(a,b); + } + + @Override + public Comparable getValue() { + Comparable aComp = a.getValue(); + Comparable bComp = b.getValue(); + if (aComp==null || bComp==null) { + return null; + } + double div = ((Number)aComp).doubleValue(); + div = div / ((Number)bComp).doubleValue(); + return new Double(div); + } +} +/** + * PowerExpression returns 'a' to the power of 'b'. + */ +class PowerExpression extends DualDelegateExpression { + + /** + * @param a base + * @param b exponent + */ + public PowerExpression(Expression a, Expression b) { + super(a,b); + } + + @Override + public Comparable getValue() { + Comparable aComp = a.getValue(); + Comparable bComp = b.getValue(); + if (aComp==null || bComp==null) { + return null; + } + return new Double(Math.pow(((Number)aComp).doubleValue(),((Number)bComp).doubleValue())); + } +} +/** + * LogExpression returns the log of the delegate's value given a base number. + */ +class LogExpression extends DualDelegateExpression { + /** + * @param a number + * @param b base + */ + public LogExpression(Expression a, Expression b) { + super(a,b); + } + + @Override + public Comparable getValue() { + Comparable aComp = a.getValue(); + Comparable bComp = b.getValue(); + if (aComp==null || bComp==null) { + return null; + } + return Math.log(((Number)aComp).doubleValue())/Math.log(((Number)bComp).doubleValue()); + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/expression/Expression.java b/solr/core/src/java/org/apache/solr/analytics/expression/Expression.java new file mode 100644 index 00000000000..add097682e7 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/expression/Expression.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.expression; + +import java.util.Comparator; + +import org.apache.solr.analytics.request.FieldFacetRequest.FacetSortDirection; + +/** + * Expressions map either zero, one, two or many inputs to a single value. + * They can be defined recursively to compute complex math. + */ +public abstract class Expression { + public abstract Comparable getValue(); + + public Comparator comparator(final FacetSortDirection direction) { + return new Comparator(){ + @SuppressWarnings("unchecked") + @Override + public int compare(Expression a, Expression b) { + if( direction == FacetSortDirection.ASCENDING ){ + return a.getValue().compareTo(b.getValue()); + } else { + return b.getValue().compareTo(a.getValue()); + } + } + }; + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/expression/ExpressionFactory.java b/solr/core/src/java/org/apache/solr/analytics/expression/ExpressionFactory.java new file mode 100644 index 00000000000..5da5fb011c3 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/expression/ExpressionFactory.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.expression; + +import java.text.ParseException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.solr.analytics.statistics.StatsCollector; +import org.apache.solr.analytics.util.AnalyticsParams; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.schema.TrieDateField; + +public class ExpressionFactory { + + /** + * Creates a single expression that contains delegate expressions and/or + * a StatsCollector. + * StatsCollectors are given as input and not created within the method so that + * expressions can share the same StatsCollectors, minimizing computation. + * + * @param expression String representation of the desired expression + * @param statsCollectors List of StatsCollectors to build the expression with. + * @return the expression + */ + @SuppressWarnings("deprecation") + public static Expression create(String expression, StatsCollector[] statsCollectors) { + int paren = expression.indexOf('('); + if (paren<=0) { + throw new SolrException(ErrorCode.BAD_REQUEST, "The expression ["+expression+"] has no arguments and is not supported."); + } + String topOperation = expression.substring(0,paren).trim(); + String operands; + try { + operands = expression.substring(paren+1, expression.lastIndexOf(')')).trim(); + } catch (Exception e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"Missing closing parenthesis in ["+expression+"]",e); + } + + // Builds a statistic, constant or recursively builds an expression tree + + // Statistic + if (AnalyticsParams.ALL_STAT_SET.contains(topOperation)) { + if (topOperation.equals(AnalyticsParams.STAT_PERCENTILE)) { + operands = expression.substring(expression.indexOf(',')+1, expression.lastIndexOf(')')).trim(); + topOperation = topOperation+"_"+expression.substring(expression.indexOf('(')+1, expression.indexOf(',')).trim(); + } + StatsCollector collector = null; + // Finds the desired counter and builds an expression around it and the desired statistic. + for (StatsCollector c : statsCollectors) { + if (c.valueSourceString().equals(operands)) { + collector = c; + break; + } + } + if (collector == null) { + throw new SolrException(ErrorCode.BAD_REQUEST, "ValueSource ["+operands+"] in Expression ["+expression+"] not found."); + } + return new BaseExpression(collector, topOperation); + } + // Constant + if (topOperation.equals(AnalyticsParams.CONSTANT_NUMBER)) { + try { + return new ConstantNumberExpression(Double.parseDouble(operands)); + } catch (NumberFormatException e) { + throw new SolrException(ErrorCode.BAD_REQUEST, "The constant "+operands+" cannot be converted into a number.",e); + } + } else if (topOperation.equals(AnalyticsParams.CONSTANT_DATE)) { + try { + return new ConstantDateExpression(TrieDateField.parseDate(operands)); + } catch (ParseException e) { + throw new SolrException(ErrorCode.BAD_REQUEST, "The constant "+operands+" cannot be converted into a date.",e); + } + } else if (topOperation.equals(AnalyticsParams.CONSTANT_STRING)) { + operands = expression.substring(paren+1, expression.lastIndexOf(')')); + return new ConstantStringExpression(operands); + } + + // Complex Delegating Expressions + String[] arguments = getArguments(operands); + Expression[] expArgs = new Expression[arguments.length]; + for (int count = 0; count < arguments.length; count++) { + // Recursively builds delegate expressions + expArgs[count] = create(arguments[count], statsCollectors); + } + + // Single Delegate Expressions + if (expArgs.length==1) { + // Numeric Expression + if (topOperation.equals(AnalyticsParams.NEGATE)) { + return new NegateExpression(expArgs[0]); + } + if (topOperation.equals(AnalyticsParams.ABSOLUTE_VALUE)) { + return new AbsoluteValueExpression(expArgs[0]); + } + // String Expression + else if (topOperation.equals(AnalyticsParams.REVERSE)) { + return new ReverseExpression(expArgs[0]); + } + throw new SolrException(ErrorCode.BAD_REQUEST, topOperation+" does not have the correct number of arguments."); + } else { + // Multi Delegate Expressions + // Numeric Expression + if (topOperation.equals(AnalyticsParams.ADD)) { + return new AddExpression(expArgs); + } else if (topOperation.equals(AnalyticsParams.MULTIPLY)) { + return new MultiplyExpression(expArgs); + } + // Date Expression + else if (topOperation.equals(AnalyticsParams.DATE_MATH)) { + return new DateMathExpression(expArgs); + } + // String Expression + else if (topOperation.equals(AnalyticsParams.CONCATENATE)) { + return new ConcatenateExpression(expArgs); + } + // Dual Delegate Expressions + else if (expArgs.length==2 && (topOperation.equals(AnalyticsParams.DIVIDE) || topOperation.equals(AnalyticsParams.POWER) + || topOperation.equals(AnalyticsParams.LOG))) { + // Numeric Expression + if (topOperation.equals(AnalyticsParams.DIVIDE)) { + return new DivideExpression(expArgs[0], expArgs[1]); + } else if (topOperation.equals(AnalyticsParams.POWER)) { + return new PowerExpression(expArgs[0], expArgs[1]); + } else if (topOperation.equals(AnalyticsParams.LOG)) { + return new LogExpression(expArgs[0], expArgs[1]); + } + return null; + } + throw new SolrException(ErrorCode.BAD_REQUEST, topOperation+" does not have the correct number of arguments or is unsupported."); + } + + } + + /** + * Splits up an Expression's arguments. + * + * @param expression Current expression string + * @return List The list of arguments + */ + public static String[] getArguments(String expression) { + String[] strings = new String[1]; + int stack = 0; + int start = 0; + List arguments = new ArrayList(); + char[] chars = expression.toCharArray(); + for (int count = 0; count < expression.length(); count++) { + char c = chars[count]; + if (c==',' && stack == 0) { + arguments.add(expression.substring(start, count).replace("\\(","(").replace("\\)",")").replace("\\,",",").trim()); + start = count+1; + } else if (c == '(') { + stack ++; + } else if (c == ')') { + stack --; + } else if (c == '\\') { + ; // Do nothing. + } + } + if (stack==0) { + arguments.add(expression.substring(start).trim()); + } + return arguments.toArray(strings); + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/expression/MultiDelegateExpression.java b/solr/core/src/java/org/apache/solr/analytics/expression/MultiDelegateExpression.java new file mode 100644 index 00000000000..4ea4825f7d7 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/expression/MultiDelegateExpression.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.expression; + +import java.text.ParseException; +import java.util.Date; + +import org.apache.solr.util.DateMathParser; + +/** + * Abstraction of an expression that applies a function to an array of delegate expressions. + */ +public abstract class MultiDelegateExpression extends Expression { + protected final Expression[] delegates; + + public MultiDelegateExpression(Expression[] delegates) { + this.delegates = delegates; + } +} +/** + * AddExpression returns the sum of it's components' values. + */ +class AddExpression extends MultiDelegateExpression { + public AddExpression(Expression[] delegates) { + super(delegates); + } + + @Override + public Comparable getValue() { + double sum = 0; + for (Expression delegate : delegates) { + Comparable dComp = delegate.getValue(); + if (dComp==null) { + return null; + } else if (dComp.getClass().equals(Date.class)) { + dComp = new Long(((Date)dComp).getTime()); + } + sum += ((Number)dComp).doubleValue(); + } + return new Double(sum); + } +} +/** + * MultiplyExpression returns the product of it's delegates' values. + */ +class MultiplyExpression extends MultiDelegateExpression { + public MultiplyExpression(Expression[] delegates) { + super(delegates); + } + + @Override + public Comparable getValue() { + double prod = 1; + for (Expression delegate : delegates) { + Comparable dComp = delegate.getValue(); + if (dComp==null) { + return null; + } + prod *= ((Number)dComp).doubleValue(); + } + return new Double(prod); + } +} +/** + * DateMathExpression returns the start date modified by the DateMath operations + */ +class DateMathExpression extends MultiDelegateExpression { + /** + * @param delegates A list of Expressions. The first element in the list + * should be a numeric Expression which represents the starting date. + * The rest of the field should be string Expression objects which contain + * the DateMath operations to perform on the start date. + */ + public DateMathExpression(Expression[] delegates) { + super(delegates); + } + + @Override + public Comparable getValue() { + DateMathParser parser = new DateMathParser(); + parser.setNow((Date)delegates[0].getValue()); + try { + for (int count = 1; countConcatenateExpression returns the concatenation of it's delegates' values in the order given. + */ +class ConcatenateExpression extends MultiDelegateExpression { + public ConcatenateExpression(Expression[] delegates) { + super(delegates); + } + + @Override + public Comparable getValue() { + StringBuilder builder = new StringBuilder(); + for (Expression delegate : delegates) { + Comparable dComp = delegate.getValue(); + if (dComp==null) { + return null; + } + builder.append(dComp.toString()); + } + return builder.toString(); + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/expression/SingleDelegateExpression.java b/solr/core/src/java/org/apache/solr/analytics/expression/SingleDelegateExpression.java new file mode 100644 index 00000000000..c6ab60ed3d2 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/expression/SingleDelegateExpression.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.expression; + +import java.util.Date; + +/** + * Abstraction of an expression that applies a function to one delegate expression. + */ +public abstract class SingleDelegateExpression extends Expression { + protected Expression delegate; + + public SingleDelegateExpression(Expression delegate) { + this.delegate = delegate; + } +} +/** + * NegateExpression returns the negation of the delegate's value. + */ +class NegateExpression extends SingleDelegateExpression { + public NegateExpression(Expression delegate) { + super(delegate); + } + + @Override + public Comparable getValue() { + Comparable nComp = delegate.getValue(); + if (nComp==null) { + return null; + } else if (nComp.getClass().equals(Date.class)) { + nComp = new Long(((Date)nComp).getTime()); + } + return new Double(((Number)nComp).doubleValue()*-1); + } +} +/** + * AbsoluteValueExpression returns the negation of the delegate's value. + */ +class AbsoluteValueExpression extends SingleDelegateExpression { + public AbsoluteValueExpression(Expression delegate) { + super(delegate); + } + + @Override + public Comparable getValue() { + Comparable nComp = delegate.getValue(); + if (nComp==null) { + return null; + } + double d = ((Number)nComp).doubleValue(); + if (d<0) { + return new Double(d*-1); + } else { + return new Double(d); + } + } +} +/** + * StringExpression returns the reverse of the delegate's string value. + */ +class ReverseExpression extends SingleDelegateExpression { + public ReverseExpression(Expression delegate) { + super(delegate); + } + + @Override + public Comparable getValue() { + Comparable rComp = delegate.getValue(); + if (rComp==null) { + return null; + } + return new StringBuilder(rComp.toString()).reverse().toString(); + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/expression/package.html b/solr/core/src/java/org/apache/solr/analytics/expression/package.html new file mode 100644 index 00000000000..434f7103fa9 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/expression/package.html @@ -0,0 +1,27 @@ + + + + + + + +

    +Expressions map either zero, one, two or many inputs to a single value. They can be defined recursively to compute complex math. +

    + + diff --git a/solr/core/src/java/org/apache/solr/analytics/plugin/AnalyticsStatisticsCollector.java b/solr/core/src/java/org/apache/solr/analytics/plugin/AnalyticsStatisticsCollector.java new file mode 100644 index 00000000000..74db91dfa94 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/plugin/AnalyticsStatisticsCollector.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.plugin; + +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.util.stats.Snapshot; +import org.apache.solr.util.stats.Timer; +import org.apache.solr.util.stats.TimerContext; + +public class AnalyticsStatisticsCollector { + private final AtomicLong numRequests; + private final AtomicLong numAnalyticsRequests; + private final AtomicLong numStatsRequests; + private final AtomicLong numCollectedStats; + private final AtomicLong numFieldFacets; + private final AtomicLong numRangeFacets; + private final AtomicLong numQueryFacets; + private final AtomicLong numQueries; + private final Timer requestTimes; + + public TimerContext currentTimer; + + public AnalyticsStatisticsCollector() { + numRequests = new AtomicLong(); + numAnalyticsRequests = new AtomicLong(); + numStatsRequests = new AtomicLong(); + numCollectedStats = new AtomicLong(); + numFieldFacets = new AtomicLong(); + numRangeFacets = new AtomicLong(); + numQueryFacets = new AtomicLong(); + numQueries = new AtomicLong(); + requestTimes = new Timer(); + } + + public void startRequest() { + numRequests.incrementAndGet(); + currentTimer = requestTimes.time(); + } + + public void addRequests(long num) { + numAnalyticsRequests.addAndGet(num); + } + + public void addStatsRequests(long num) { + numStatsRequests.addAndGet(num); + } + + public void addStatsCollected(long num) { + numCollectedStats.addAndGet(num); + } + + public void addFieldFacets(long num) { + numFieldFacets.addAndGet(num); + } + + public void addRangeFacets(long num) { + numRangeFacets.addAndGet(num); + } + + public void addQueryFacets(long num) { + numQueryFacets.addAndGet(num); + } + + public void addQueries(long num) { + numQueries.addAndGet(num); + } + + public void endRequest() { + currentTimer.stop(); + } + + public NamedList getStatistics() { + NamedList lst = new SimpleOrderedMap(); + Snapshot snapshot = requestTimes.getSnapshot(); + lst.add("requests", numRequests.longValue()); + lst.add("analyticsRequests", numAnalyticsRequests.longValue()); + lst.add("statsRequests", numStatsRequests.longValue()); + lst.add("statsCollected", numCollectedStats.longValue()); + lst.add("fieldFacets", numFieldFacets.longValue()); + lst.add("rangeFacets", numRangeFacets.longValue()); + lst.add("queryFacets", numQueryFacets.longValue()); + lst.add("queriesInQueryFacets", numQueries.longValue()); + lst.add("totalTime", requestTimes.getSum()); + lst.add("avgRequestsPerSecond", requestTimes.getMeanRate()); + lst.add("5minRateReqsPerSecond", requestTimes.getFiveMinuteRate()); + lst.add("15minRateReqsPerSecond", requestTimes.getFifteenMinuteRate()); + lst.add("avgTimePerRequest", requestTimes.getMean()); + lst.add("medianRequestTime", snapshot.getMedian()); + lst.add("75thPcRequestTime", snapshot.get75thPercentile()); + lst.add("95thPcRequestTime", snapshot.get95thPercentile()); + lst.add("99thPcRequestTime", snapshot.get99thPercentile()); + lst.add("999thPcRequestTime", snapshot.get999thPercentile()); + return lst; + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/plugin/package.html b/solr/core/src/java/org/apache/solr/analytics/plugin/package.html new file mode 100644 index 00000000000..7555251cb85 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/plugin/package.html @@ -0,0 +1,27 @@ + + + + + + + +

    +MBean plugins for stats collection +

    + + diff --git a/solr/core/src/java/org/apache/solr/analytics/request/AbstractFieldFacetRequest.java b/solr/core/src/java/org/apache/solr/analytics/request/AbstractFieldFacetRequest.java new file mode 100644 index 00000000000..6f85cf0dba3 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/request/AbstractFieldFacetRequest.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.request; + +import org.apache.solr.schema.SchemaField; + +/** + * An abstract request for a facet over a single field, such as a field or range facet. + */ +public abstract class AbstractFieldFacetRequest implements FacetRequest { + protected SchemaField field = null; + + public AbstractFieldFacetRequest(SchemaField field) { + this.field = field; + } + + public SchemaField getField() { + return field; + } + + public void setField(SchemaField field) { + this.field = field; + } + + public String getName() { + return field.getName(); + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsContentHandler.java b/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsContentHandler.java new file mode 100644 index 00000000000..1f038bae37b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsContentHandler.java @@ -0,0 +1,315 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.request; + +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.List; + +import org.apache.solr.analytics.request.FieldFacetRequest.FacetSortDirection; +import org.apache.solr.analytics.request.FieldFacetRequest.FacetSortSpecification; +import org.apache.solr.common.params.FacetParams.FacetRangeInclude; +import org.apache.solr.common.params.FacetParams.FacetRangeOther; +import org.apache.solr.schema.IndexSchema; +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * Handles the parsing of the AnalysisRequestEnvelope elements if passed in through XML. + */ +public class AnalyticsContentHandler implements ContentHandler { + // XML Element/Attribute Name Constants + public final String ANALYTICS_REQUEST_ENVELOPE="analyticsRequestEnvelope"; + + public final String ANALYTICS_REQUEST="analyticsRequest"; + public final String NAME="name"; + + public final String STATISTIC="statistic"; + public final String EXPRESSION="expression"; + + public final String FIELD_FACET="fieldFacet"; + public final String FIELD="field"; + public final String SHOW_MISSING="showMissing"; + public final String LIMIT="limit"; + public final String MIN_COUNT="minCount"; + + public final String SORT_SPECIFICATION="sortSpecification"; + public final String STAT_NAME="statName"; + public final String DIRECTION="direction"; + + public final String RANGE_FACET="rangeFacet"; + public final String START="start"; + public final String END="end"; + public final String GAP="gap"; + public final String INCLUDE_BOUNDARY="includeBoundary"; + public final String OTHER_RANGE="otherRange"; + public final String HARD_END="hardend"; + + public final String QUERY_FACET="queryFacet"; + public final String QUERY="query"; + + // Default Values + public static final int DEFAULT_FACET_LIMIT = -1; + public static final boolean DEFAULT_FACET_HARDEND = false; + public static final int DEFAULT_FACET_MINCOUNT = 0; + public static final boolean DEFAULT_FACET_FIELD_SHOW_MISSING = false; + + boolean inEnvelope = false; + boolean inRequest = false; + boolean inStatistic = false; + boolean inFieldFacet = false; + boolean inSortSpecification = false; + boolean inQueryFacet = false; + boolean inRangeFacet = false; + + private final IndexSchema schema; + + // Objects to use while building the Analytics Requests + + String currentElementText; + + List requests; + + AnalyticsRequest analyticsRequest; + List expressionList; + List fieldFacetList; + List rangeFacetList; + List queryFacetList; + + ExpressionRequest expression; + + FieldFacetRequest fieldFacet; + int limit; + int minCount; + boolean showMissing; + FacetSortSpecification sortSpecification; + + RangeFacetRequest rangeFacet; + boolean hardend; + List gaps; + EnumSet includeBoundaries; + EnumSet otherRanges; + + String queryName; + List queries; + + public AnalyticsContentHandler(IndexSchema schema) { + this.schema = schema; + } + + @Override + public void setDocumentLocator(Locator locator) { } + + @Override + public void startDocument() throws SAXException { } + + @Override + public void endDocument() throws SAXException { } + + @Override + public void startPrefixMapping(String prefix, String uri) throws SAXException { } + + @Override + public void endPrefixMapping(String prefix) throws SAXException { } + + @Override + public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { + currentElementText = ""; + if (inEnvelope) { + if (inRequest) { + if (localName.equals(STATISTIC)) { + // Start a Statistic Request + inStatistic = true; + } else if (inFieldFacet) { + if (localName.equals(SORT_SPECIFICATION)) { + // Start a Sort Specification + inSortSpecification = true; + sortSpecification = new FacetSortSpecification(); + } + } else if (localName.equals(FIELD_FACET)) { + // Start a Field Facet Request + // Get attributes (limit, minCount, showMissing) + String att = atts.getValue(uri,LIMIT); + if (att!=null) { + limit = Integer.parseInt(att); + } else { + limit = DEFAULT_FACET_LIMIT; + } + att = atts.getValue(uri,MIN_COUNT); + if (att!=null) { + minCount = Integer.parseInt(att); + } else { + minCount = DEFAULT_FACET_MINCOUNT; + } + att = atts.getValue(uri,SHOW_MISSING); + if (att!=null) { + showMissing = Boolean.parseBoolean(att); + } else { + showMissing = DEFAULT_FACET_FIELD_SHOW_MISSING; + } + + inFieldFacet = true; + } else if (localName.equals(RANGE_FACET)) { + // Start a Range Facet Request + // Get attributes (hardEnd) + String att = atts.getValue(uri,HARD_END); + if (att!=null) { + hardend = Boolean.parseBoolean(att); + } else { + hardend = false; + } + + // Initiate Range Facet classes + gaps = new ArrayList(); + includeBoundaries = EnumSet.noneOf(FacetRangeInclude.class); + otherRanges = EnumSet.noneOf(FacetRangeOther.class); + inRangeFacet = true; + } else if (localName.equals(QUERY_FACET)) { + // Start a Query Facet Request + queries = new ArrayList(); + inQueryFacet = true; + } + } else if (localName.equals(ANALYTICS_REQUEST)){ + // Start an Analytics Request + + // Renew each list. + fieldFacetList = new ArrayList(); + rangeFacetList = new ArrayList(); + queryFacetList = new ArrayList(); + expressionList = new ArrayList(); + inRequest = true; + } + } else if (localName.equals(ANALYTICS_REQUEST_ENVELOPE)){ + //Begin the parsing of the Analytics Requests + requests = new ArrayList(); + inEnvelope = true; + } + } + + @Override + public void endElement(String uri, String localName, String qName) throws SAXException { + if (inEnvelope) { + if (inRequest) { + if (inStatistic) { + if (localName.equals(EXPRESSION)) { + expression = new ExpressionRequest(currentElementText,currentElementText); + } else if (localName.equals(NAME)) { + expression.setName(currentElementText); + } else if (localName.equals(STATISTIC)) { + // Finished Parsing the Statistic Request + expressionList.add(expression); + inStatistic = false; + } + } else if (inFieldFacet) { + if (inSortSpecification) { + if (localName.equals(STAT_NAME)) { + sortSpecification.setStatistic(currentElementText); + } else if (localName.equals(DIRECTION)) { + sortSpecification.setDirection(FacetSortDirection.fromExternal(currentElementText)); + } else if (localName.equals(SORT_SPECIFICATION)) { + // Finished Parsing the Sort Specification + fieldFacet.setSort(sortSpecification); + inSortSpecification = false; + } + } else if (localName.equals(FIELD)) { + fieldFacet = new FieldFacetRequest(schema.getField(currentElementText)); + } else if (localName.equals(FIELD_FACET)) { + // Finished Parsing the Field Facet Request + fieldFacet.setLimit(limit); + fieldFacet.showMissing(showMissing); + fieldFacetList.add(fieldFacet); + inFieldFacet = false; + } + } else if (inRangeFacet) { + if (localName.equals(FIELD)) { + rangeFacet = new RangeFacetRequest(schema.getField(currentElementText), "", "", new String[1]); + } else if (localName.equals(START)) { + rangeFacet.setStart(currentElementText); + } else if (localName.equals(END)) { + rangeFacet.setEnd(currentElementText); + } else if (localName.equals(GAP)) { + gaps.add(currentElementText); + } else if (localName.equals(INCLUDE_BOUNDARY)) { + includeBoundaries.add(FacetRangeInclude.get(currentElementText)); + } else if (localName.equals(OTHER_RANGE)) { + otherRanges.add(FacetRangeOther.get(currentElementText)); + } else if (localName.equals(RANGE_FACET)) { + // Finished Parsing the Range Facet Request + rangeFacet.setHardEnd(hardend); + rangeFacet.setGaps(gaps.toArray(new String[1])); + rangeFacet.setInclude(includeBoundaries); + rangeFacet.setOthers(otherRanges); + inRangeFacet = false; + rangeFacetList.add(rangeFacet); + } + } else if (inQueryFacet) { + if (localName.equals(NAME)) { + queryName = currentElementText; + } else if (localName.equals(QUERY)) { + queries.add(currentElementText); + } else if (localName.equals(QUERY_FACET)) { + // Finished Parsing the Query Facet Request + QueryFacetRequest temp = new QueryFacetRequest(queryName); + temp.setQueries(queries); + queryFacetList.add(temp); + inQueryFacet = false; + } + } else if (localName.equals(NAME)) { + analyticsRequest = new AnalyticsRequest(currentElementText); + } else if (localName.equals(ANALYTICS_REQUEST)){ + // Finished Parsing the Analytics Request + analyticsRequest.setExpressions(expressionList); + analyticsRequest.setFieldFacets(fieldFacetList); + analyticsRequest.setRangeFacets(rangeFacetList); + analyticsRequest.setQueryFacets(queryFacetList); + requests.add(analyticsRequest); + inRequest = false; + } + } else if (localName.equals(ANALYTICS_REQUEST_ENVELOPE)){ + // Finished Parsing + inEnvelope = false; + } + } + } + + @Override + public void characters(char[] ch, int start, int length) throws SAXException { + currentElementText += new String(ch,start,length); + } + + @Override + public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { } + + @Override + public void processingInstruction(String target, String data) throws SAXException { } + + @Override + public void skippedEntity(String name) throws SAXException { } + + /** + * Returns the list of Analytics Requests built during parsing. + * + * @return List of {@link AnalyticsRequest} objects specified by the given XML file + */ + public List getAnalyticsRequests() { + return requests; + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsRequest.java b/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsRequest.java new file mode 100644 index 00000000000..8e8282c58e6 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsRequest.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.request; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Contains the specifications of an Analytics Request, specifically a name, + * a list of Expressions, a list of field facets, a list of range facets, a list of query facets + * and the list of expressions and their results calculated in previous AnalyticsRequests. + */ +public class AnalyticsRequest { + + private String name; + private List expressions; + private Set hiddenExpressions; + private List fieldFacets; + private List rangeFacets; + private List queryFacets; + + public AnalyticsRequest(String name) { + this.name = name; + expressions = new ArrayList(); + hiddenExpressions = new HashSet(); + fieldFacets = new ArrayList(); + rangeFacets = new ArrayList(); + queryFacets = new ArrayList(); + } + + public String getName() { + return name; + } + + public void setExpressions(List expressions) { + this.expressions = expressions; + } + + public void addExpression(ExpressionRequest expressionRequest) { + expressions.add(expressionRequest); + } + + public List getExpressions() { + return expressions; + } + + public void addHiddenExpression(ExpressionRequest expressionRequest) { + expressions.add(expressionRequest); + hiddenExpressions.add(expressionRequest.getName()); + } + + public Set getHiddenExpressions() { + return hiddenExpressions; + } + + public void setFieldFacets(List fieldFacets) { + this.fieldFacets = fieldFacets; + } + + public List getFieldFacets() { + return fieldFacets; + } + + public void setRangeFacets(List rangeFacets) { + this.rangeFacets = rangeFacets; + } + + public List getRangeFacets() { + return rangeFacets; + } + + public void setQueryFacets(List queryFacets) { + this.queryFacets = queryFacets; + } + + public List getQueryFacets() { + return queryFacets; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(""); + for (ExpressionRequest exp : expressions) { + builder.append(exp.toString()); + } + for (FieldFacetRequest facet : fieldFacets) { + builder.append(facet.toString()); + } + for (RangeFacetRequest facet : rangeFacets) { + builder.append(facet.toString()); + } + for (QueryFacetRequest facet : queryFacets) { + builder.append(facet.toString()); + } + builder.append(""); + return builder.toString(); + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsRequestFactory.java b/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsRequestFactory.java new file mode 100644 index 00000000000..62fa7345601 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsRequestFactory.java @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.request; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.solr.analytics.request.FieldFacetRequest.FacetSortSpecification; +import org.apache.solr.analytics.util.AnalyticsParams; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.params.FacetParams.FacetRangeInclude; +import org.apache.solr.common.params.FacetParams.FacetRangeOther; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.schema.IndexSchema; + +/** + * Parses the SolrParams to create a list of analytics requests. + */ +public class AnalyticsRequestFactory implements AnalyticsParams { + + public static final Pattern statPattern = Pattern.compile("^o(?:lap)?\\.([^\\.]+)\\.(?:"+EXPRESSION+")\\.([^\\.]+)$", Pattern.CASE_INSENSITIVE); + public static final Pattern hiddenStatPattern = Pattern.compile("^o(?:lap)?\\.([^\\.]+)\\.(?:"+HIDDEN_EXPRESSION+")\\.([^\\.]+)$", Pattern.CASE_INSENSITIVE); + public static final Pattern fieldFacetPattern = Pattern.compile("^o(?:lap)?\\.([^\\.]+)\\.(?:"+FIELD_FACET+")$", Pattern.CASE_INSENSITIVE); + public static final Pattern fieldFacetParamPattern = Pattern.compile("^o(?:lap)?\\.([^\\.]+)\\.(?:"+FIELD_FACET+")\\.([^\\.]+)\\.("+LIMIT+"|"+OFFSET+"|"+HIDDEN+"|"+SHOW_MISSING+"|"+SORT_STATISTIC+"|"+SORT_DIRECTION+")$", Pattern.CASE_INSENSITIVE); + public static final Pattern rangeFacetPattern = Pattern.compile("^o(?:lap)?\\.([^\\.]+)\\.(?:"+RANGE_FACET+")$", Pattern.CASE_INSENSITIVE); + public static final Pattern rangeFacetParamPattern = Pattern.compile("^o(?:lap)?\\.([^\\.]+)\\.(?:"+RANGE_FACET+")\\.([^\\.]+)\\.("+START+"|"+END+"|"+GAP+"|"+HARDEND+"|"+INCLUDE_BOUNDARY+"|"+OTHER_RANGE+")$", Pattern.CASE_INSENSITIVE); + public static final Pattern queryFacetPattern = Pattern.compile("^o(?:lap)?\\.([^\\.]+)\\.(?:"+QUERY_FACET+")$", Pattern.CASE_INSENSITIVE); + public static final Pattern queryFacetParamPattern = Pattern.compile("^o(?:lap)?\\.([^\\.]+)\\.(?:"+QUERY_FACET+")\\.([^\\.]+)\\.("+QUERY+"|"+DEPENDENCY+")$", Pattern.CASE_INSENSITIVE); + + public static List parse(IndexSchema schema, SolrParams params) { + Map requestMap = new HashMap(); + Map> fieldFacetMap = new HashMap>(); + Map> fieldFacetSet = new HashMap>(); + Map> rangeFacetMap = new HashMap>(); + Map> rangeFacetSet = new HashMap>(); + Map> queryFacetMap = new HashMap>(); + Map> queryFacetSet = new HashMap>(); + List requestList = new ArrayList(); + + Iterator paramsIterator = params.getParameterNamesIterator(); + while (paramsIterator.hasNext()) { + String param = paramsIterator.next(); + CharSequence paramSequence = param.subSequence(0, param.length()); + + // Check if stat + Matcher m = statPattern.matcher(paramSequence); + if (m.matches()) { + makeExpression(requestMap,m.group(1),m.group(2),params.get(param)); + } else { + // Check if hidden stat + m = hiddenStatPattern.matcher(paramSequence); + if (m.matches()) { + makeHiddenExpression(requestMap,m.group(1),m.group(2),params.get(param)); + } else { + // Check if field facet + m = fieldFacetPattern.matcher(paramSequence); + if (m.matches()) { + makeFieldFacet(schema,fieldFacetMap,fieldFacetSet,m.group(1),params.getParams(param)); + } else { + // Check if field facet parameter + m = fieldFacetParamPattern.matcher(paramSequence); + if (m.matches()) { + setFieldFacetParam(schema,fieldFacetMap,m.group(1),m.group(2),m.group(3),params.getParams(param)); + } else { + // Check if range facet + m = rangeFacetPattern.matcher(paramSequence); + if (m.matches()) { + makeRangeFacet(schema,rangeFacetSet,m.group(1),params.getParams(param)); + } else { + // Check if range facet parameter + m = rangeFacetParamPattern.matcher(paramSequence); + if (m.matches()) { + setRangeFacetParam(schema,rangeFacetMap,m.group(1),m.group(2),m.group(3),params.getParams(param)); + } else { + // Check if query facet + m = queryFacetPattern.matcher(paramSequence); + if (m.matches()) { + makeQueryFacet(schema,queryFacetSet,m.group(1),params.getParams(param)); + } else { + // Check if query + m = queryFacetParamPattern.matcher(paramSequence); + if (m.matches()) { + setQueryFacetParam(schema,queryFacetMap,m.group(1),m.group(2),m.group(3),params.getParams(param)); + } + } + } + } + } + } + } + } + } + for (String reqName : requestMap.keySet()) { + AnalyticsRequest ar = requestMap.get(reqName); + List ffrs = new ArrayList(); + if (fieldFacetSet.get(reqName)!=null) { + for (String field : fieldFacetSet.get(reqName)) { + ffrs.add(fieldFacetMap.get(reqName).get(field)); + } + } + ar.setFieldFacets(ffrs); + + List rfrs = new ArrayList(); + if (rangeFacetSet.get(reqName)!=null) { + for (String field : rangeFacetSet.get(reqName)) { + RangeFacetRequest rfr = rangeFacetMap.get(reqName).get(field); + if (rfr != null) { + rfrs.add(rfr); + } + } + } + ar.setRangeFacets(rfrs); + + List qfrs = new ArrayList(); + if (queryFacetSet.get(reqName)!=null) { + for (String name : queryFacetSet.get(reqName)) { + QueryFacetRequest qfr = queryFacetMap.get(reqName).get(name); + if (qfr != null) { + addQueryFacet(qfrs,qfr); + } + } + } + for (QueryFacetRequest qfr : qfrs) { + if (qfr.getDependencies().size()>0) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The query facet dependencies "+qfr.getDependencies().toString()+" either do not exist or are defined in a dependency looop."); + } + } + ar.setQueryFacets(qfrs); + requestList.add(ar); + } + return requestList; + } + + private static void makeFieldFacet(IndexSchema schema, Map> fieldFacetMap, Map> fieldFacetSet, String requestName, String[] fields) { + Map facetMap = fieldFacetMap.get(requestName); + if (facetMap == null) { + facetMap = new HashMap(); + fieldFacetMap.put(requestName, facetMap); + } + Set set = fieldFacetSet.get(requestName); + if (set == null) { + set = new HashSet(); + fieldFacetSet.put(requestName, set); + } + for (String field : fields) { + if (facetMap.get(field) == null) { + facetMap.put(field,new FieldFacetRequest(schema.getField(field))); + } + set.add(field); + } + } + + private static void setFieldFacetParam(IndexSchema schema, Map> fieldFacetMap, String requestName, String field, String paramType, String[] params) { + Map facetMap = fieldFacetMap.get(requestName); + if (facetMap == null) { + facetMap = new HashMap(); + fieldFacetMap.put(requestName, facetMap); + } + FieldFacetRequest fr = facetMap.get(field); + if (fr == null) { + fr = new FieldFacetRequest(schema.getField(field)); + facetMap.put(field,fr); + } + if (paramType.equals("limit")||paramType.equals("l")) { + fr.setLimit(Integer.parseInt(params[0])); + } else if (paramType.equals("offset")||paramType.equals("off")) { + fr.setOffset(Integer.parseInt(params[0])); + } else if (paramType.equals("hidden")||paramType.equals("h")) { + fr.setHidden(Boolean.parseBoolean(params[0])); + } else if (paramType.equals("showmissing")||paramType.equals("sm")) { + fr.showMissing(Boolean.parseBoolean(params[0])); + } else if (paramType.equals("sortstatistic")||paramType.equals("sortstat")||paramType.equals("ss")) { + fr.setSort(new FacetSortSpecification(params[0],fr.getDirection())); + } else if (paramType.equals("sortdirection")||paramType.equals("sd")) { + fr.setDirection(params[0]); + } + } + + private static void makeRangeFacet(IndexSchema schema, Map> rangeFacetSet, String requestName, String[] fields) { + Set set = rangeFacetSet.get(requestName); + if (set == null) { + set = new HashSet(); + rangeFacetSet.put(requestName, set); + } + for (String field : fields) { + set.add(field); + } + } + + private static void setRangeFacetParam(IndexSchema schema, Map> rangeFacetMap, String requestName, String field, String paramType, String[] params) { + Map facetMap = rangeFacetMap.get(requestName); + if (facetMap == null) { + facetMap = new HashMap(); + rangeFacetMap.put(requestName, facetMap); + } + RangeFacetRequest rr = facetMap.get(field); + if (rr == null) { + rr = new RangeFacetRequest(schema.getField(field)); + facetMap.put(field,rr); + } + if (paramType.equals("start")||paramType.equals("st")) { + rr.setStart(params[0]); + } else if (paramType.equals("end")||paramType.equals("e")) { + rr.setEnd(params[0]); + } else if (paramType.equals("gap")||paramType.equals("g")) { + rr.setGaps(params[0].split(",")); + } else if (paramType.equals("hardend")||paramType.equals("he")) { + rr.setHardEnd(Boolean.parseBoolean(params[0])); + } else if (paramType.equals("includebound")||paramType.equals("ib")) { + for (String param : params) { + rr.addInclude(FacetRangeInclude.get(param)); + } + } else if (paramType.equals("otherrange")||paramType.equals("or")) { + for (String param : params) { + rr.addOther(FacetRangeOther.get(param)); + } + } + } + + private static void makeQueryFacet(IndexSchema schema,Map> queryFacetSet, String requestName, String[] names) { + Set set = queryFacetSet.get(requestName); + if (set == null) { + set = new HashSet(); + queryFacetSet.put(requestName, set); + } + for (String name : names) { + set.add(name); + } + } + + private static void setQueryFacetParam(IndexSchema schema, Map> queryFacetMap, String requestName, String name, String paramType, String[] params) { + Map facetMap = queryFacetMap.get(requestName); + if (facetMap == null) { + facetMap = new HashMap(); + queryFacetMap.put(requestName, facetMap); + } + QueryFacetRequest qr = facetMap.get(name); + if (qr == null) { + qr = new QueryFacetRequest(name); + facetMap.put(name,qr); + } + if (paramType.equals("query")||paramType.equals("q")) { + for (String query : params) { + qr.addQuery(query); + } + } else if (paramType.equals("dependency")||paramType.equals("d")) { + for (String depend : params) { + qr.addDependency(depend); + } + } + } + + private static void makeHiddenExpression(Map requestMap, String requestName, String expressionName, String expression) { + AnalyticsRequest req = requestMap.get(requestName); + if (req == null) { + req = new AnalyticsRequest(requestName); + requestMap.put(requestName, req); + } + req.addHiddenExpression(new ExpressionRequest(expressionName,expression)); + } + + private static void makeExpression(Map requestMap, String requestName, String expressionName, String expression) { + AnalyticsRequest req = requestMap.get(requestName); + if (req == null) { + req = new AnalyticsRequest(requestName); + requestMap.put(requestName, req); + } + req.addExpression(new ExpressionRequest(expressionName,expression)); + } + + private static void addQueryFacet(List currentList, QueryFacetRequest queryFacet) { + Set depends = queryFacet.getDependencies(); + int place = 0; + for (QueryFacetRequest qfr : currentList) { + if (qfr.getDependencies().remove(queryFacet.getName())) { + break; + } + place++; + depends.remove(qfr.getName()); + } + currentList.add(place,queryFacet); + for (int count = place+1; count < currentList.size(); count++) { + currentList.get(count).getDependencies().remove(queryFacet.getName()); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsStats.java b/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsStats.java new file mode 100644 index 00000000000..a740fadfd5c --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsStats.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.request; + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Filter; +import org.apache.solr.analytics.accumulator.BasicAccumulator; +import org.apache.solr.analytics.accumulator.FacetingAccumulator; +import org.apache.solr.analytics.accumulator.ValueAccumulator; +import org.apache.solr.analytics.plugin.AnalyticsStatisticsCollector; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.SolrIndexSearcher; + +/** + * Class which computes the set of {@link AnalyticsRequest}s. + */ +public class AnalyticsStats { + protected DocSet docs; + protected SolrParams params; + protected SolrIndexSearcher searcher; + protected SolrQueryRequest req; + protected AnalyticsStatisticsCollector statsCollector; + + public AnalyticsStats(SolrQueryRequest req, DocSet docs, SolrParams params, AnalyticsStatisticsCollector statsCollector) { + this.req = req; + this.searcher = req.getSearcher(); + this.docs = docs; + this.params = params; + this.statsCollector = statsCollector; + } + + /** + * Calculates the analytics requested in the Parameters. + * + * @return List of results formated to mirror the input XML. + * @throws IOException if execution fails + */ + public NamedList execute() throws IOException { + statsCollector.startRequest(); + NamedList res = new NamedList(); + List requests; + + requests = AnalyticsRequestFactory.parse(searcher.getSchema(), params); + + if(requests == null || requests.size()==0){ + return res; + } + statsCollector.addRequests(requests.size()); + // Computing each Analytics Request Seperately + for( AnalyticsRequest areq : requests ){ + // The Accumulator which will control the statistics generation + // for the entire analytics request + ValueAccumulator accumulator; + + // The number of total facet requests + int facets = areq.getFieldFacets().size()+areq.getRangeFacets().size()+areq.getQueryFacets().size(); + try { + if( facets== 0 ){ + accumulator = BasicAccumulator.create(searcher, docs, areq); + } else { + accumulator = FacetingAccumulator.create(searcher, docs, areq, req); + } + } catch (IOException e) { + System.err.println(e.getMessage()); + continue; + } + + statsCollector.addStatsCollected(((BasicAccumulator)accumulator).getNumStatsCollectors()); + statsCollector.addStatsRequests(areq.getExpressions().size()); + statsCollector.addFieldFacets(areq.getFieldFacets().size()); + statsCollector.addRangeFacets(areq.getRangeFacets().size()); + statsCollector.addQueryFacets(areq.getQueryFacets().size()); + statsCollector.addQueries(((BasicAccumulator)accumulator).getNumQueries()); + + // Loop through the documents returned by the query and add to accumulator + Filter filter = docs.getTopFilter(); + List contexts = searcher.getTopReaderContext().leaves(); + for (int leafNum = 0; leafNum < contexts.size(); leafNum++) { + AtomicReaderContext context = contexts.get(leafNum); + DocIdSet dis = filter.getDocIdSet(context, null); // solr docsets already exclude any deleted docs + DocIdSetIterator disi = null; + if (dis != null) { + disi = dis.iterator(); + } + + if (disi != null) { + accumulator.setNextReader(context); + int doc = disi.nextDoc(); + while( doc != DocIdSetIterator.NO_MORE_DOCS){ + // Add a document to the statistics being generated + accumulator.collect(doc); + doc = disi.nextDoc(); + } + } + } + + // do some post-processing + accumulator.postProcess(); + + // compute the stats + accumulator.compute(); + + res.add(areq.getName(),accumulator.export()); + } + + statsCollector.endRequest(); + return res; + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/request/ExpressionRequest.java b/solr/core/src/java/org/apache/solr/analytics/request/ExpressionRequest.java new file mode 100644 index 00000000000..1549cdfb8a6 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/request/ExpressionRequest.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.request; + +import org.apache.solr.analytics.expression.Expression; + +/** + * Contains name and string representation of an expression. + */ +public class ExpressionRequest implements Comparable { + private String name; + private String expressionString; + private Expression expression; + + /** + * @param name The name of the Expression. + * @param expressionString The string representation of the desired Expression. + */ + public ExpressionRequest(String name, String expressionString) { + this.name = name; + this.expressionString = expressionString; + } + + public void setExpressionString(String expressionString) { + this.expressionString = expressionString; + } + + public String getExpressionString() { + return expressionString; + } + + public void setExpression(Expression expression) { + this.expression = expression; + } + + public Expression getExpression() { + return expression; + } + + public void setName(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + @Override + public int compareTo(ExpressionRequest o) { + return name.compareTo(o.getName()); + } + + @Override + public String toString() { + return ""; + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/request/FacetRequest.java b/solr/core/src/java/org/apache/solr/analytics/request/FacetRequest.java new file mode 100644 index 00000000000..6cca99d6f3d --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/request/FacetRequest.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.request; + +public interface FacetRequest { + + /** + * Get the name of this facet (commonly the field name) + * @return the name + */ + String getName(); +} diff --git a/solr/core/src/java/org/apache/solr/analytics/request/FieldFacetRequest.java b/solr/core/src/java/org/apache/solr/analytics/request/FieldFacetRequest.java new file mode 100644 index 00000000000..7884476d577 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/request/FieldFacetRequest.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.request; + +import org.apache.solr.analytics.util.AnalyticsParams; +import org.apache.solr.schema.SchemaField; + +import java.util.Locale; + + +/** + * Contains all of the specifications for a field facet. + */ +public class FieldFacetRequest extends AbstractFieldFacetRequest { + + private FacetSortSpecification sort = null; + private FacetSortDirection dir = null; + private int limit; + private int offset; + private boolean missing; + private boolean hidden; + + + public static enum FacetSortDirection { + ASCENDING , + DESCENDING; + + public static FacetSortDirection fromExternal(String value){ + final String sort = value.toLowerCase(Locale.ROOT); + if( "asc".equals(sort) ) return ASCENDING; + if( "ascending".equals(sort) ) return ASCENDING; + if( "desc".equals(sort) ) return DESCENDING; + if( "descending".equals(sort) ) return DESCENDING; + return Enum.valueOf(FacetSortDirection.class, value); + } + } + + /** + * Specifies how to sort the buckets of a field facet. + * + */ + public static class FacetSortSpecification { + private String statistic; + private FacetSortDirection direction = FacetSortDirection.DESCENDING; + + public FacetSortSpecification(){} + + /** + * @param statistic The name of a statistic specified in the {@link AnalyticsRequest} + * which is wrapping the {@link FieldFacetRequest} being sorted. + */ + public FacetSortSpecification(String statistic) { + this.statistic = statistic; + } + + public FacetSortSpecification(String statistic, FacetSortDirection direction) { + this(statistic); + this.direction = direction; + } + + public String getStatistic() { + return statistic; + } + public void setStatistic(String statistic) { + this.statistic = statistic; + } + public FacetSortDirection getDirection() { + return direction; + } + public void setDirection(FacetSortDirection direction) { + this.direction = direction; + } + + public static FacetSortSpecification fromExternal(String spec){ + String[] parts = spec.split(" ",2); + if( parts.length == 1 ){ + return new FacetSortSpecification(parts[0]); + } else { + return new FacetSortSpecification(parts[0], FacetSortDirection.fromExternal(parts[1])); + } + } + + @Override + public String toString() { + return ""; + } + } + + public FieldFacetRequest(SchemaField field) { + super(field); + this.limit = AnalyticsParams.DEFAULT_LIMIT; + this.hidden = AnalyticsParams.DEFAULT_HIDDEN; + } + + public FacetSortDirection getDirection() { + return dir; + } + + public void setDirection(String dir) { + this.dir = FacetSortDirection.fromExternal(dir); + if (sort!=null) { + sort.setDirection(this.dir); + } + } + + public FacetSortSpecification getSort() { + return sort; + } + + public void setSort(FacetSortSpecification sort) { + this.sort = sort; + } + + public boolean showsMissing() { + return missing; + } + + /** + * If there are missing values in the facet field, include the bucket + * for the missing facet values in the facet response. + * @param missing true/false if we calculate missing + */ + public void showMissing(boolean missing) { + this.missing = missing; + } + + public int getLimit() { + return limit; + } + + public void setLimit(int limit) { + this.limit = limit; + } + + public int getOffset() { + return offset; + } + + public void setOffset(int offset) { + this.offset = offset; + } + + public boolean isHidden() { + return hidden; + } + + public void setHidden(boolean hidden) { + this.hidden = hidden; + } + + @Override + public String toString() { + return ""; + } + + + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/request/QueryFacetRequest.java b/solr/core/src/java/org/apache/solr/analytics/request/QueryFacetRequest.java new file mode 100644 index 00000000000..6d36d58cb8b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/request/QueryFacetRequest.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.request; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Contains all of the specifications for a query facet. + */ +public class QueryFacetRequest implements FacetRequest { + private String name; + private List queries; + private Set dependencies; + + public QueryFacetRequest() { + dependencies = new HashSet(); + } + + public QueryFacetRequest(String name) { + this.name = name; + this.queries = new ArrayList(); + dependencies = new HashSet(); + } + + public List getQueries() { + return queries; + } + + public void setQueries(List queries) { + this.queries = queries; + } + + public void addQuery(String query) { + queries.add(query); + } + + public Set getDependencies() { + return dependencies; + } + + public void setDependencies(Set dependencies) { + this.dependencies = dependencies; + } + + public void addDependency(String dependency) { + dependencies.add(dependency); + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/request/RangeFacetRequest.java b/solr/core/src/java/org/apache/solr/analytics/request/RangeFacetRequest.java new file mode 100644 index 00000000000..8c70b98fa01 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/request/RangeFacetRequest.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.request; + +import java.util.Arrays; +import java.util.EnumSet; + +import org.apache.solr.analytics.util.AnalyticsParams; +import org.apache.solr.common.params.FacetParams.FacetRangeInclude; +import org.apache.solr.common.params.FacetParams.FacetRangeOther; +import org.apache.solr.schema.SchemaField; + +/** + * Contains all of the specifications for a range facet. + */ +public class RangeFacetRequest extends AbstractFieldFacetRequest { + protected String start; + protected String end; + protected String[] gaps; + protected boolean hardEnd = false; + protected EnumSet include; + protected boolean includeCalled = false; + protected EnumSet others; + protected boolean othersCalled = false; + + public RangeFacetRequest(SchemaField field) { + super(field); + include = EnumSet.of(AnalyticsParams.DEFAULT_INCLUDE); + others = EnumSet.of(AnalyticsParams.DEFAULT_OTHER); + } + + public RangeFacetRequest(SchemaField field, String start, String end, String[] gaps) { + super(field); + this.start = start; + this.end = end; + this.gaps = gaps; + } + + public String getStart() { + return start; + } + + public void setStart(String start) { + this.start = start; + } + + public String getEnd() { + return end; + } + + public void setEnd(String end) { + this.end = end; + } + + public EnumSet getInclude() { + return include; + } + + public void setInclude(EnumSet include) { + includeCalled = true; + this.include = include; + } + + public void addInclude(FacetRangeInclude include) { + if (includeCalled) { + this.include.add(include); + } else { + includeCalled = true; + this.include = EnumSet.of(include); + } + } + + public String[] getGaps() { + return gaps; + } + + public void setGaps(String[] gaps) { + this.gaps = gaps; + } + + public boolean isHardEnd() { + return hardEnd; + } + + public void setHardEnd(boolean hardEnd) { + this.hardEnd = hardEnd; + } + + public EnumSet getOthers() { + return others; + } + + public void setOthers(EnumSet others) { + othersCalled = true; + this.others = others; + } + + public void addOther(FacetRangeOther other) { + if (othersCalled) { + this.others.add(other); + } else { + othersCalled = true; + this.others = EnumSet.of(other); + } + } + + @Override + public String toString() { + return ""; + } + + + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/request/package.html b/solr/core/src/java/org/apache/solr/analytics/request/package.html new file mode 100644 index 00000000000..08822a9df7f --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/request/package.html @@ -0,0 +1,27 @@ + + + + + + + +

    +Request objects for creating Analytics requests +

    + + diff --git a/solr/core/src/java/org/apache/solr/analytics/statistics/AbstractDelegatingStatsCollector.java b/solr/core/src/java/org/apache/solr/analytics/statistics/AbstractDelegatingStatsCollector.java new file mode 100644 index 00000000000..093ab29f03c --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/statistics/AbstractDelegatingStatsCollector.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.statistics; + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.util.mutable.MutableValue; + +/** + * AbstractDelegationStatsCollector objects wrap other StatsCollectors. + * While they compute their own statistics they pass along all inputs and requests + * to the delegates as well. + */ +public abstract class AbstractDelegatingStatsCollector implements StatsCollector{ + protected final StatsCollector delegate; + protected final Set statsList; + MutableValue value; + FunctionValues function; + + /** + * @param delegate The delegate computing statistics on the same set of values. + */ + public AbstractDelegatingStatsCollector(StatsCollector delegate) { + this.delegate = delegate; + this.statsList = delegate.getStatsList(); + } + + public void setNextReader(AtomicReaderContext context) throws IOException { + delegate.setNextReader(context); + value = getValue(); + function = getFunction(); + } + + public StatsCollector delegate(){ + return delegate; + } + + public Set getStatsList(){ + return statsList; + } + + public MutableValue getValue() { + return delegate.getValue(); + } + + public FunctionValues getFunction() { + return delegate.getFunction(); + } + + public void collect(int doc) { + delegate.collect(doc); + } + + public String valueSourceString() { + return delegate.valueSourceString(); + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/statistics/MedianStatsCollector.java b/solr/core/src/java/org/apache/solr/analytics/statistics/MedianStatsCollector.java new file mode 100644 index 00000000000..c8f9ee064bc --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/statistics/MedianStatsCollector.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.statistics; + +import java.util.ArrayList; +import java.util.Date; +import java.util.List; + +import org.apache.solr.analytics.util.MedianCalculator; + +/** + * MedianStatsCollector computes the median. + */ +public class MedianStatsCollector extends AbstractDelegatingStatsCollector{ + + private final List values = new ArrayList(); + protected double median; + + public MedianStatsCollector(StatsCollector delegate) { + super(delegate); + } + + public Double getMedian() { + return new Double(MedianCalculator.getMedian(values)); + } + + @Override + public Comparable getStat(String stat) { + if (stat.equals("median")) { + return new Double(median); + } + return delegate.getStat(stat); + } + + public void compute(){ + delegate.compute(); + median = getMedian(); + } + + @Override + public void collect(int doc) { + super.collect(doc); + if (value.exists) { + values.add(function.doubleVal(doc)); + } + } +} +class DateMedianStatsCollector extends MedianStatsCollector{ + + public DateMedianStatsCollector(StatsCollector delegate) { + super(delegate); + } + + @Override + public Comparable getStat(String stat) { + if (stat.equals("median")) { + return new Date((long)median); + } + return delegate.getStat(stat); + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/statistics/MinMaxStatsCollector.java b/solr/core/src/java/org/apache/solr/analytics/statistics/MinMaxStatsCollector.java new file mode 100644 index 00000000000..08608861789 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/statistics/MinMaxStatsCollector.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.statistics; + +import java.io.IOException; +import java.util.Locale; +import java.util.Set; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.FunctionValues.ValueFiller; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.util.mutable.MutableValue; + +/** + * MinMaxStatsCollector computes the min, max, number of values and number of missing values. + */ +public class MinMaxStatsCollector implements StatsCollector{ + protected long missingCount = 0; + protected long valueCount = 0; + protected MutableValue max; + protected MutableValue min; + protected MutableValue value; + protected final Set statsList; + protected final ValueSource source; + protected FunctionValues function; + protected ValueFiller valueFiller; + + public MinMaxStatsCollector(ValueSource source, Set statsList) { + this.source = source; + this.statsList = statsList; + } + + public void setNextReader(AtomicReaderContext context) throws IOException { + function = source.getValues(null, context); + valueFiller = function.getValueFiller(); + value = valueFiller.getValue(); + } + + public void collect(int doc) { + valueFiller.fillValue(doc); + if( value.exists ){ + valueCount += 1; + if ( max==null ) max = value.duplicate(); + else if( !max.exists || value.compareTo(max) > 0 ) max.copy(value); + if ( min==null ) min = value.duplicate(); + else if( !min.exists || value.compareTo(min) < 0 ) min.copy(value); + } else { + missingCount += 1; + } + } + + @Override + public String toString() { + return String.format(Locale.ROOT, "", min, max, valueCount, missingCount ); + } + + public Comparable getStat(String stat){ + if (stat.equals("min")&&min!=null) { + return (Comparable)min.toObject(); + } + if (stat.equals("max")&&min!=null) { + return (Comparable)max.toObject(); + } + if (stat.equals("count")) { + return new Long(valueCount); + } + if (stat.equals("missing")) { + return new Long(missingCount); + } + return null; + } + + public Set getStatsList() { + return statsList; + } + + @Override + public void compute() { } + + @Override + public MutableValue getValue() { + return value; + } + + @Override + public FunctionValues getFunction() { + return function; + } + + public String valueSourceString() { + return source.toString(); + } + + public String statString(String stat) { + return stat+"("+valueSourceString()+")"; + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/statistics/NumericStatsCollector.java b/solr/core/src/java/org/apache/solr/analytics/statistics/NumericStatsCollector.java new file mode 100644 index 00000000000..ef3de5dc02b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/statistics/NumericStatsCollector.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.statistics; + +import java.util.Set; + +import org.apache.lucene.queries.function.ValueSource; + +/** + * NumericStatsCollector computes the sum, sum of squares, mean and standard deviation. + */ +public class NumericStatsCollector extends MinMaxStatsCollector { + protected double sum = 0; + protected double sumOfSquares = 0; + protected double mean = 0; + protected double stddev = 0; + + public NumericStatsCollector(ValueSource source, Set statsList) { + super(source, statsList); + } + + public void collect(int doc) { + super.collect(doc); + double value = function.doubleVal(doc); + sum += value; + sumOfSquares += (value * value); + } + + @Override + public Comparable getStat(String stat) { + if (stat.equals("sum")) { + return new Double(sum); + } + if (stat.equals("sumofsquares")) { + return new Double(sumOfSquares); + } + if (stat.equals("mean")) { + return new Double(mean); + } + if (stat.equals("stddev")) { + return new Double(stddev); + } + return super.getStat(stat); + } + + @Override + public void compute(){ + super.compute(); + mean = (valueCount==0)? 0:sum / valueCount; + stddev = (valueCount <= 1) ? 0.0D : Math.sqrt((sumOfSquares/valueCount) - (mean*mean)); + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/statistics/PercentileStatsCollector.java b/solr/core/src/java/org/apache/solr/analytics/statistics/PercentileStatsCollector.java new file mode 100644 index 00000000000..88e1c748d31 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/statistics/PercentileStatsCollector.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.statistics; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; + +import org.apache.solr.analytics.util.PercentileCalculator; + +import com.google.common.collect.Iterables; + +/** + * PercentileStatsCollector computes a given list of percentiles. + */ +@SuppressWarnings("rawtypes") +public class PercentileStatsCollector extends AbstractDelegatingStatsCollector{ + public final List values = new ArrayList(); + public static final Pattern PERCENTILE_PATTERN = Pattern.compile("perc(?:entile)?_(\\d+)",Pattern.CASE_INSENSITIVE); + protected final double[] percentiles; + protected final String[] percentileNames; + protected Comparable[] results; + + public PercentileStatsCollector(StatsCollector delegate, double[] percentiles, String[] percentileNames) { + super(delegate); + this.percentiles = percentiles; + this.percentileNames = percentileNames; + } + + @Override + public Comparable getStat(String stat) { + for( int i=0; i < percentiles.length; i++ ){ + if (stat.equals(percentileNames[i])) { + if (results!=null) { + return results[i]; + } else { + return null; + } + } + } + return delegate.getStat(stat); + } + + public void compute(){ + delegate.compute(); + if (values.size()>0) { + results = Iterables.toArray(getPercentiles(),Comparable.class); + } else { + results = null; + } + } + + @SuppressWarnings({ "unchecked"}) + protected List getPercentiles() { + return PercentileCalculator.getPercentiles(values, percentiles); + } + + public void collect(int doc) { + super.collect(doc); + if (value.exists) { + values.add((Comparable)value.toObject()); + } + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/statistics/StatsCollector.java b/solr/core/src/java/org/apache/solr/analytics/statistics/StatsCollector.java new file mode 100644 index 00000000000..b3f173de949 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/statistics/StatsCollector.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.statistics; + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.util.mutable.MutableValue; + +/** + * StatsCollector implementations reduce a list of Objects to a single value. + * Most implementations reduce a list to a statistic on that list. + */ +public interface StatsCollector { + + /** + * Collect values from the value source and add to statistics. + * @param doc Document to collect from + */ + void collect(int doc); + + /** + * @param context The context to read documents from. + * @throws IOException if setting next reader fails + */ + void setNextReader(AtomicReaderContext context) throws IOException; + + MutableValue getValue(); + FunctionValues getFunction(); + + /** + * @return The set of statistics being computed by the stats collector. + */ + Set getStatsList(); + + /** + * Return the value of the given statistic. + * @param stat the stat + * @return a comparable + */ + Comparable getStat(String stat); + + /** + * After all documents have been collected, this method should be + * called to finalize the calculations of each statistic. + */ + void compute(); + + /** + * @return The string representation of the value source. + */ + String valueSourceString(); +} diff --git a/solr/core/src/java/org/apache/solr/analytics/statistics/StatsCollectorSupplierFactory.java b/solr/core/src/java/org/apache/solr/analytics/statistics/StatsCollectorSupplierFactory.java new file mode 100644 index 00000000000..c4dea1b7ab9 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/statistics/StatsCollectorSupplierFactory.java @@ -0,0 +1,649 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.statistics; + +import java.text.ParseException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource; +import org.apache.lucene.queries.function.valuesource.DoubleFieldSource; +import org.apache.lucene.queries.function.valuesource.FloatFieldSource; +import org.apache.lucene.queries.function.valuesource.IntFieldSource; +import org.apache.lucene.queries.function.valuesource.LongFieldSource; +import org.apache.lucene.search.FieldCache; +import org.apache.solr.analytics.expression.ExpressionFactory; +import org.apache.solr.analytics.request.AnalyticsRequest; +import org.apache.solr.analytics.request.ExpressionRequest; +import org.apache.solr.analytics.util.AnalyticsParams; +import org.apache.solr.analytics.util.AnalyticsParsers; +import org.apache.solr.analytics.util.valuesource.AbsoluteValueDoubleFunction; +import org.apache.solr.analytics.util.valuesource.AddDoubleFunction; +import org.apache.solr.analytics.util.valuesource.ConcatStringFunction; +import org.apache.solr.analytics.util.valuesource.ConstDateSource; +import org.apache.solr.analytics.util.valuesource.ConstDoubleSource; +import org.apache.solr.analytics.util.valuesource.ConstStringSource; +import org.apache.solr.analytics.util.valuesource.DateFieldSource; +import org.apache.solr.analytics.util.valuesource.DateMathFunction; +import org.apache.solr.analytics.util.valuesource.DivDoubleFunction; +import org.apache.solr.analytics.util.valuesource.DualDoubleFunction; +import org.apache.solr.analytics.util.valuesource.FilterFieldSource; +import org.apache.solr.analytics.util.valuesource.LogDoubleFunction; +import org.apache.solr.analytics.util.valuesource.MultiDateFunction; +import org.apache.solr.analytics.util.valuesource.MultiDoubleFunction; +import org.apache.solr.analytics.util.valuesource.MultiplyDoubleFunction; +import org.apache.solr.analytics.util.valuesource.NegateDoubleFunction; +import org.apache.solr.analytics.util.valuesource.PowDoubleFunction; +import org.apache.solr.analytics.util.valuesource.ReverseStringFunction; +import org.apache.solr.analytics.util.valuesource.SingleDoubleFunction; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.StrField; +import org.apache.solr.schema.TrieDateField; +import org.apache.solr.schema.TrieDoubleField; +import org.apache.solr.schema.TrieFloatField; +import org.apache.solr.schema.TrieIntField; +import org.apache.solr.schema.TrieLongField; + +import com.google.common.base.Supplier; + +public class StatsCollectorSupplierFactory { + + // FunctionTypes + final static int NUMBER_TYPE = 0; + final static int DATE_TYPE = 1; + final static int STRING_TYPE = 2; + final static int FIELD_TYPE = 3; + final static int FILTER_TYPE = 4; + + /** + * Builds a Supplier that will generate identical arrays of new StatsCollectors. + * + * @param schema The Schema being used. + * @param request The AnalyticsRequest to generate a StatsCollector[] from. + * @return A Supplier that will return an array of new StatsCollector. + */ + @SuppressWarnings("unchecked") + public static Supplier create(IndexSchema schema, AnalyticsRequest request) { + final Map> collectorStats = new HashMap>(); + final Map> collectorPercs = new HashMap>(); + final Map collectorSources = new HashMap(); + + // Iterate through all expression request to make a list of ValueSource strings + // and statistics that need to be calculated on those ValueSources. + for (ExpressionRequest expRequest : request.getExpressions()) { + String statExpression = expRequest.getExpressionString(); + Set statistics = getStatistics(statExpression); + if (statistics == null) { + continue; + } + for (String statExp : statistics) { + String stat; + String operands; + try { + stat = statExp.substring(0, statExp.indexOf('(')).trim(); + operands = statExp.substring(statExp.indexOf('(')+1, statExp.lastIndexOf(')')).trim(); + } catch (Exception e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"Unable to parse statistic: ["+statExpression+"]",e); + } + String[] arguments = ExpressionFactory.getArguments(operands); + String source = arguments[0]; + if (stat.equals(AnalyticsParams.STAT_PERCENTILE)) { + // The statistic is a percentile, extra parsing is required + if (arguments.length<2) { + throw new SolrException(ErrorCode.BAD_REQUEST,"Too few arguments given for "+stat+"() in ["+statExp+"]."); + } else if (arguments.length>2) { + throw new SolrException(ErrorCode.BAD_REQUEST,"Too many arguments given for "+stat+"() in ["+statExp+"]."); + } + source = arguments[1]; + Set percs = collectorPercs.get(source); + if (percs == null) { + percs = new HashSet(); + collectorPercs.put(source, percs); + } + try { + int perc = Integer.parseInt(arguments[0]); + if (perc>0 && perc<100) { + percs.add(perc); + } else { + throw new SolrException(ErrorCode.BAD_REQUEST,"The percentile in ["+statExp+"] is not between 0 and 100, exculsive."); + } + } catch (NumberFormatException e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"\""+arguments[0]+"\" cannot be converted into a percentile.",e); + } + } else if (arguments.length>1) { + throw new SolrException(ErrorCode.BAD_REQUEST,"Too many arguments given for "+stat+"() in ["+statExp+"]."); + } else if (arguments.length==0) { + throw new SolrException(ErrorCode.BAD_REQUEST,"No arguments given for "+stat+"() in ["+statExp+"]."); + } + // Only unique ValueSources will be made; therefore statistics must be accumulated for + // each ValueSource, even across different expression requests + Set stats = collectorStats.get(source); + if (stats == null) { + stats = new HashSet(); + collectorStats.put(source, stats); + } + stats.add(stat); + } + } + String[] keys = collectorStats.keySet().toArray(new String[0]); + for (String sourceStr : keys) { + // Build one ValueSource for each unique value source string + ValueSource source = buildSourceTree(schema, sourceStr); + if (source == null) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The statistic ["+sourceStr+"] could not be parsed."); + } + String builtString = source.toString(); + collectorSources.put(builtString,source); + // Replace the user given string with the correctly built string + if (!builtString.equals(sourceStr)) { + Set stats = collectorStats.remove(sourceStr); + if (stats!=null) { + collectorStats.put(builtString, stats); + } + Set percs = collectorPercs.remove(sourceStr); + if (percs!=null) { + collectorPercs.put(builtString, percs); + } + for (ExpressionRequest er : request.getExpressions()) { + er.setExpressionString(er.getExpressionString().replace(sourceStr, builtString)); + } + } + } + if (collectorSources.size()==0) { + return new Supplier() { + @Override + public StatsCollector[] get() { + return new StatsCollector[0]; + } + }; + } + + // All information is stored in final arrays so that nothing + // has to be computed when the Supplier's get() method is called. + final Set[] statsArr = collectorStats.values().toArray(new Set[0]); + final ValueSource[] sourceArr = collectorSources.values().toArray(new ValueSource[0]); + final boolean[] uniqueBools = new boolean[statsArr.length]; + final boolean[] medianBools = new boolean[statsArr.length]; + final boolean[] numericBools = new boolean[statsArr.length]; + final boolean[] dateBools = new boolean[statsArr.length]; + final double[][] percsArr = new double[statsArr.length][]; + final String[][] percsNames = new String[statsArr.length][]; + for (int count = 0; count < sourceArr.length; count++) { + uniqueBools[count] = statsArr[count].contains(AnalyticsParams.STAT_UNIQUE); + medianBools[count] = statsArr[count].contains(AnalyticsParams.STAT_MEDIAN); + numericBools[count] = statsArr[count].contains(AnalyticsParams.STAT_SUM)||statsArr[count].contains(AnalyticsParams.STAT_SUM_OF_SQUARES)||statsArr[count].contains(AnalyticsParams.STAT_MEAN)||statsArr[count].contains(AnalyticsParams.STAT_STANDARD_DEVIATION); + dateBools[count] = (sourceArr[count] instanceof DateFieldSource) | (sourceArr[count] instanceof MultiDateFunction) | (sourceArr[count] instanceof ConstDateSource); + Set ps = collectorPercs.get(sourceArr[count].toString()); + if (ps!=null) { + percsArr[count] = new double[ps.size()]; + percsNames[count] = new String[ps.size()]; + int percCount = 0; + for (int p : ps) { + percsArr[count][percCount] = p/100.0; + percsNames[count][percCount++] = AnalyticsParams.STAT_PERCENTILE+"_"+p; + } + } + } + // Making the Supplier + return new Supplier() { + public StatsCollector[] get() { + StatsCollector[] collectors = new StatsCollector[statsArr.length]; + for (int count = 0; count < statsArr.length; count++) { + if(numericBools[count]){ + StatsCollector sc = new NumericStatsCollector(sourceArr[count], statsArr[count]); + if(uniqueBools[count]) sc = new UniqueStatsCollector(sc); + if(medianBools[count]) sc = new MedianStatsCollector(sc); + if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]); + collectors[count]=sc; + } else if (dateBools[count]) { + StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]); + if(uniqueBools[count]) sc = new UniqueStatsCollector(sc); + if(medianBools[count]) sc = new DateMedianStatsCollector(sc); + if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]); + collectors[count]=sc; + } else { + StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]); + if(uniqueBools[count]) sc = new UniqueStatsCollector(sc); + if(medianBools[count]) sc = new MedianStatsCollector(sc); + if(percsArr[count]!=null) sc = new PercentileStatsCollector(sc,percsArr[count],percsNames[count]); + collectors[count]=sc; + } + } + return collectors; + } + }; + } + + /** + * Finds the set of statistics that must be computed for the expression. + * @param expression The string representation of an expression + * @return The set of statistics (sum, mean, median, etc.) found in the expression + */ + public static Set getStatistics(String expression) { + HashSet set = new HashSet(); + int firstParen = expression.indexOf('('); + if (firstParen>0) { + String topOperation = expression.substring(0,firstParen).trim(); + if (AnalyticsParams.ALL_STAT_SET.contains(topOperation)) { + set.add(expression); + } else if (!(topOperation.equals(AnalyticsParams.CONSTANT_NUMBER)||topOperation.equals(AnalyticsParams.CONSTANT_DATE)||topOperation.equals(AnalyticsParams.CONSTANT_STRING))) { + String operands = expression.substring(firstParen+1, expression.lastIndexOf(')')).trim(); + String[] arguments = ExpressionFactory.getArguments(operands); + for (String argument : arguments) { + Set more = getStatistics(argument); + if (more!=null) { + set.addAll(more); + } + } + } + } + if (set.size()==0) { + return null; + } + return set; + } + + /** + * Builds a Value Source from a given string + * + * @param schema The schema being used. + * @param expression The string to be turned into an expression. + * @return The completed ValueSource + */ + private static ValueSource buildSourceTree(IndexSchema schema, String expression) { + return buildSourceTree(schema,expression,FIELD_TYPE); + } + + /** + * Builds a Value Source from a given string and a given source type + * + * @param schema The schema being used. + * @param expression The string to be turned into an expression. + * @param sourceType The type of source that must be returned. + * @return The completed ValueSource + */ + private static ValueSource buildSourceTree(IndexSchema schema, String expression, int sourceType) { + int expressionType = getSourceType(expression); + if (sourceType != FIELD_TYPE && expressionType != FIELD_TYPE && + expressionType != FILTER_TYPE && expressionType != sourceType) { + return null; + } + switch (expressionType) { + case NUMBER_TYPE : return buildNumericSource(schema, expression); + case DATE_TYPE : return buildDateSource(schema, expression); + case STRING_TYPE : return buildStringSource(schema, expression); + case FIELD_TYPE : return buildFieldSource(schema, expression, sourceType); + case FILTER_TYPE : return buildFilterSource(schema, expression.substring(expression.indexOf('(')+1,expression.lastIndexOf(')')), sourceType); + default : throw new SolrException(ErrorCode.BAD_REQUEST,expression+" is not a valid operation."); + } + } + + /** + * Determines what type of value source the expression represents. + * + * @param expression The expression representing the desired ValueSource + * @return NUMBER_TYPE, DATE_TYPE, STRING_TYPE or -1 + */ + private static int getSourceType(String expression) { + int paren = expression.indexOf('('); + if (paren<0) { + return FIELD_TYPE; + } + String operation = expression.substring(0,paren).trim(); + + if (AnalyticsParams.NUMERIC_OPERATION_SET.contains(operation)) { + return NUMBER_TYPE; + } else if (AnalyticsParams.DATE_OPERATION_SET.contains(operation)) { + return DATE_TYPE; + } else if (AnalyticsParams.STRING_OPERATION_SET.contains(operation)) { + return STRING_TYPE; + } else if (operation.equals(AnalyticsParams.FILTER)) { + return FILTER_TYPE; + } + throw new SolrException(ErrorCode.BAD_REQUEST,"The operation \""+operation+"\" in ["+expression+"] is not supported."); + } + + /** + * Builds a value source for a given field, making sure that the field fits a given source type. + * @param schema the schema + * @param expressionString The name of the field to build a Field Source from. + * @param sourceType FIELD_TYPE for any type of field, NUMBER_TYPE for numeric fields, + * DATE_TYPE for date fields and STRING_TYPE for string fields. + * @return a value source + */ + private static ValueSource buildFieldSource(IndexSchema schema, String expressionString, int sourceType) { + SchemaField sf; + try { + sf = schema.getField(expressionString); + } catch (SolrException e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The field "+expressionString+" does not exist.",e); + } + FieldType type = sf.getType(); + if ( type instanceof TrieIntField) { + if (sourceType!=NUMBER_TYPE&&sourceType!=FIELD_TYPE) { + return null; + } + return new IntFieldSource(expressionString, FieldCache.NUMERIC_UTILS_INT_PARSER) { + public String description() { + return field; + } + }; + } else if (type instanceof TrieLongField) { + if (sourceType!=NUMBER_TYPE&&sourceType!=FIELD_TYPE) { + return null; + } + return new LongFieldSource(expressionString, FieldCache.NUMERIC_UTILS_LONG_PARSER) { + public String description() { + return field; + } + }; + } else if (type instanceof TrieFloatField) { + if (sourceType!=NUMBER_TYPE&&sourceType!=FIELD_TYPE) { + return null; + } + return new FloatFieldSource(expressionString, FieldCache.NUMERIC_UTILS_FLOAT_PARSER) { + public String description() { + return field; + } + }; + } else if (type instanceof TrieDoubleField) { + if (sourceType!=NUMBER_TYPE&&sourceType!=FIELD_TYPE) { + return null; + } + return new DoubleFieldSource(expressionString, FieldCache.NUMERIC_UTILS_DOUBLE_PARSER) { + public String description() { + return field; + } + }; + } else if (type instanceof TrieDateField) { + if (sourceType!=DATE_TYPE&&sourceType!=FIELD_TYPE) { + return null; + } + return new DateFieldSource(expressionString, AnalyticsParsers.DEFAULT_DATE_PARSER) { + public String description() { + return field; + } + }; + } else if (type instanceof StrField) { + if (sourceType!=STRING_TYPE&&sourceType!=FIELD_TYPE) { + return null; + } + return new BytesRefFieldSource(expressionString) { + public String description() { + return field; + } + }; + } + throw new SolrException(ErrorCode.BAD_REQUEST, type.toString()+" is not a supported field type in Solr Analytics."); + } + + /** + * Builds a default is missing source that wraps a given source. A missing value is required for all + * non-field value sources. + * @param schema the schema + * @param expressionString The name of the field to build a Field Source from. + * @param sourceType FIELD_TYPE for any type of field, NUMBER_TYPE for numeric fields, + * DATE_TYPE for date fields and STRING_TYPE for string fields. + * @return a value source + */ + @SuppressWarnings("deprecation") + private static ValueSource buildFilterSource(IndexSchema schema, String expressionString, int sourceType) { + String[] arguments = ExpressionFactory.getArguments(expressionString); + if (arguments.length!=2) { + throw new SolrException(ErrorCode.BAD_REQUEST,"Invalid arguments were given for \""+AnalyticsParams.FILTER+"\"."); + } + ValueSource delegateSource = buildSourceTree(schema, arguments[0], sourceType); + if (delegateSource==null) { + return null; + } + Object defaultObject; + + ValueSource src = delegateSource; + if (delegateSource instanceof FilterFieldSource) { + src = ((FilterFieldSource)delegateSource).getRootSource(); + } + if ( src instanceof IntFieldSource) { + try { + defaultObject = new Integer(arguments[1]); + } catch (NumberFormatException e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The filter value "+arguments[1]+" cannot be converted into an integer.",e); + } + } else if ( src instanceof DateFieldSource || src instanceof MultiDateFunction) { + try { + defaultObject = TrieDateField.parseDate(arguments[1]); + } catch (ParseException e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The filter value "+arguments[1]+" cannot be converted into a date.",e); + } + } else if ( src instanceof LongFieldSource ) { + try { + defaultObject = new Long(arguments[1]); + } catch (NumberFormatException e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The filter value "+arguments[1]+" cannot be converted into a long.",e); + } + } else if ( src instanceof FloatFieldSource ) { + try { + defaultObject = new Float(arguments[1]); + } catch (NumberFormatException e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The filter value "+arguments[1]+" cannot be converted into a float.",e); + } + } else if ( src instanceof DoubleFieldSource || src instanceof SingleDoubleFunction || + src instanceof DualDoubleFunction|| src instanceof MultiDoubleFunction) { + try { + defaultObject = new Double(arguments[1]); + } catch (NumberFormatException e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The filter value "+arguments[1]+" cannot be converted into a double.",e); + } + } else { + defaultObject = arguments[1]; + } + return new FilterFieldSource(delegateSource,defaultObject); + } + + /** + * Recursively parses and breaks down the expression string to build a numeric ValueSource. + * + * @param schema The schema to pull fields from. + * @param expressionString The expression string to build a ValueSource from. + * @return The value source represented by the given expressionString + */ + private static ValueSource buildNumericSource(IndexSchema schema, String expressionString) { + int paren = expressionString.indexOf('('); + String[] arguments; + String operands; + if (paren<0) { + return buildFieldSource(schema,expressionString,NUMBER_TYPE); + } else { + try { + operands = expressionString.substring(paren+1, expressionString.lastIndexOf(')')).trim(); + } catch (Exception e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"Missing closing parenthesis in ["+expressionString+"]"); + } + arguments = ExpressionFactory.getArguments(operands); + } + String operation = expressionString.substring(0, paren).trim(); + if (operation.equals(AnalyticsParams.CONSTANT_NUMBER)) { + if (arguments.length!=1) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The constant number declaration ["+expressionString+"] does not have exactly 1 argument."); + } + return new ConstDoubleSource(Double.parseDouble(arguments[0])); + } else if (operation.equals(AnalyticsParams.NEGATE)) { + if (arguments.length!=1) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The negate operation ["+expressionString+"] does not have exactly 1 argument."); + } + ValueSource argSource = buildNumericSource(schema, arguments[0]); + if (argSource==null) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The operation \""+AnalyticsParams.NEGATE+"\" requires a numeric field or operation as argument. \""+arguments[0]+"\" is not a numeric field or operation."); + } + return new NegateDoubleFunction(argSource); + } else if (operation.equals(AnalyticsParams.ABSOLUTE_VALUE)) { + if (arguments.length!=1) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The absolute value operation ["+expressionString+"] does not have exactly 1 argument."); + } + ValueSource argSource = buildNumericSource(schema, arguments[0]); + if (argSource==null) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The operation \""+AnalyticsParams.NEGATE+"\" requires a numeric field or operation as argument. \""+arguments[0]+"\" is not a numeric field or operation."); + } + return new AbsoluteValueDoubleFunction(argSource); + } else if (operation.equals(AnalyticsParams.FILTER)) { + return buildFilterSource(schema, operands, NUMBER_TYPE); + } + List subExpressions = new ArrayList(); + for (String argument : arguments) { + ValueSource argSource = buildNumericSource(schema, argument); + if (argSource == null) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The operation \""+operation+"\" requires numeric fields or operations as arguments. \""+argument+"\" is not a numeric field or operation."); + } + subExpressions.add(argSource); + } + if (operation.equals(AnalyticsParams.ADD)) { + return new AddDoubleFunction(subExpressions.toArray(new ValueSource[0])); + } else if (operation.equals(AnalyticsParams.MULTIPLY)) { + return new MultiplyDoubleFunction(subExpressions.toArray(new ValueSource[0])); + } else if (operation.equals(AnalyticsParams.DIVIDE)) { + if (subExpressions.size()!=2) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The divide operation ["+expressionString+"] does not have exactly 2 arguments."); + } + return new DivDoubleFunction(subExpressions.get(0),subExpressions.get(1)); + } else if (operation.equals(AnalyticsParams.POWER)) { + if (subExpressions.size()!=2) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The power operation ["+expressionString+"] does not have exactly 2 arguments."); + } + return new PowDoubleFunction(subExpressions.get(0),subExpressions.get(1)); + } else if (operation.equals(AnalyticsParams.LOG)) { + if (subExpressions.size()!=2) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The log operation ["+expressionString+"] does not have exactly 2 arguments."); + } + return new LogDoubleFunction(subExpressions.get(0), subExpressions.get(1)); + } + if (AnalyticsParams.DATE_OPERATION_SET.contains(operation)||AnalyticsParams.STRING_OPERATION_SET.contains(operation)) { + return null; + } + throw new SolrException(ErrorCode.BAD_REQUEST,"The operation ["+expressionString+"] is not supported."); + } + + + /** + * Recursively parses and breaks down the expression string to build a date ValueSource. + * + * @param schema The schema to pull fields from. + * @param expressionString The expression string to build a ValueSource from. + * @return The value source represented by the given expressionString + */ + @SuppressWarnings("deprecation") + private static ValueSource buildDateSource(IndexSchema schema, String expressionString) { + int paren = expressionString.indexOf('('); + String[] arguments; + if (paren<0) { + return buildFieldSource(schema, expressionString, DATE_TYPE); + } else { + arguments = ExpressionFactory.getArguments(expressionString.substring(paren+1, expressionString.lastIndexOf(')')).trim()); + } + String operands = arguments[0]; + String operation = expressionString.substring(0, paren).trim(); + if (operation.equals(AnalyticsParams.CONSTANT_DATE)) { + if (arguments.length!=1) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The constant date declaration ["+expressionString+"] does not have exactly 1 argument."); + } + try { + return new ConstDateSource(TrieDateField.parseDate(operands)); + } catch (ParseException e) { + throw new SolrException(ErrorCode.BAD_REQUEST,"The constant "+operands+" cannot be converted into a date.",e); + } + } else if (operation.equals(AnalyticsParams.FILTER)) { + return buildFilterSource(schema, operands, DATE_TYPE); + } + if (operation.equals(AnalyticsParams.DATE_MATH)) { + List subExpressions = new ArrayList(); + boolean first = true; + for (String argument : arguments) { + ValueSource argSource; + if (first) { + first = false; + argSource = buildDateSource(schema, argument); + if (argSource == null) { + throw new SolrException(ErrorCode.BAD_REQUEST,"\""+AnalyticsParams.DATE_MATH+"\" requires the first argument be a date operation or field. ["+argument+"] is not a date operation or field."); + } + } else { + argSource = buildStringSource(schema, argument); + if (argSource == null) { + throw new SolrException(ErrorCode.BAD_REQUEST,"\""+AnalyticsParams.DATE_MATH+"\" requires that all arguments except the first be string operations. ["+argument+"] is not a string operation."); + } + } + subExpressions.add(argSource); + } + return new DateMathFunction(subExpressions.toArray(new ValueSource[0])); + } + if (AnalyticsParams.NUMERIC_OPERATION_SET.contains(operation)||AnalyticsParams.STRING_OPERATION_SET.contains(operation)) { + return null; + } + throw new SolrException(ErrorCode.BAD_REQUEST,"The operation ["+expressionString+"] is not supported."); + } + + + /** + * Recursively parses and breaks down the expression string to build a string ValueSource. + * + * @param schema The schema to pull fields from. + * @param expressionString The expression string to build a ValueSource from. + * @return The value source represented by the given expressionString + */ + private static ValueSource buildStringSource(IndexSchema schema, String expressionString) { + int paren = expressionString.indexOf('('); + String[] arguments; + if (paren<0) { + return buildFieldSource(schema, expressionString, FIELD_TYPE); + } else { + arguments = ExpressionFactory.getArguments(expressionString.substring(paren+1, expressionString.lastIndexOf(')')).trim()); + } + String operands = arguments[0]; + String operation = expressionString.substring(0, paren).trim(); + if (operation.equals(AnalyticsParams.CONSTANT_STRING)) { + operands = expressionString.substring(paren+1, expressionString.lastIndexOf(')')); + return new ConstStringSource(operands); + } else if (operation.equals(AnalyticsParams.FILTER)) { + return buildFilterSource(schema,operands,FIELD_TYPE); + } else if (operation.equals(AnalyticsParams.REVERSE)) { + if (arguments.length!=1) { + throw new SolrException(ErrorCode.BAD_REQUEST,"\""+AnalyticsParams.REVERSE+"\" requires exactly one argument. The number of arguments in "+expressionString+" is not 1."); + } + return new ReverseStringFunction(buildStringSource(schema, operands)); + } + List subExpressions = new ArrayList(); + for (String argument : arguments) { + subExpressions.add(buildSourceTree(schema, argument)); + } + if (operation.equals(AnalyticsParams.CONCATENATE)) { + return new ConcatStringFunction(subExpressions.toArray(new ValueSource[0])); + } + if (AnalyticsParams.NUMERIC_OPERATION_SET.contains(operation)) { + return buildNumericSource(schema, expressionString); + } else if (AnalyticsParams.DATE_OPERATION_SET.contains(operation)) { + return buildDateSource(schema, expressionString); + } + throw new SolrException(ErrorCode.BAD_REQUEST,"The operation ["+expressionString+"] is not supported."); + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/statistics/UniqueStatsCollector.java b/solr/core/src/java/org/apache/solr/analytics/statistics/UniqueStatsCollector.java new file mode 100644 index 00000000000..ca8d2ab47fb --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/statistics/UniqueStatsCollector.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.statistics; + +import java.util.HashSet; +import java.util.Set; + +/** + * UniqueValueCounter computes the number of unique values. + */ +public class UniqueStatsCollector extends AbstractDelegatingStatsCollector{ + private final Set uniqueValues = new HashSet(); + + public UniqueStatsCollector(StatsCollector delegate) { + super(delegate); + } + + @Override + public void collect(int doc) { + super.collect(doc); + if (value.exists) { + uniqueValues.add(value.toObject()); + } + } + + @Override + public Comparable getStat(String stat) { + if (stat.equals("unique")) { + return new Long(uniqueValues.size()); + } + return delegate.getStat(stat); + } + + @Override + public void compute() { + delegate.compute(); + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/statistics/package.html b/solr/core/src/java/org/apache/solr/analytics/statistics/package.html new file mode 100644 index 00000000000..99539fc28ed --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/statistics/package.html @@ -0,0 +1,27 @@ + + + + + + + +

    +Statistics collectors reduce a list of Objects to a single value. Most implementations reduce a list to a statistic on that list. +

    + + diff --git a/solr/core/src/java/org/apache/solr/analytics/util/AnalyticsParams.java b/solr/core/src/java/org/apache/solr/analytics/util/AnalyticsParams.java new file mode 100644 index 00000000000..d7f220a22b8 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/AnalyticsParams.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util; + +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import org.apache.solr.common.params.FacetParams.FacetRangeInclude; +import org.apache.solr.common.params.FacetParams.FacetRangeOther; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +public interface AnalyticsParams { + // Full length Analytics Params + public static final String ANALYTICS = "olap"; + + public static final String REQUEST = "o|olap"; + + public static final String EXPRESSION = "s|stat|statistic"; + public static final String HIDDEN_EXPRESSION = "hs|hiddenstat|hiddenstatistic"; + + public static final String FIELD_FACET = "ff|fieldfacet"; + public static final String LIMIT = "l|limit"; + public static final String OFFSET = "off|offset"; + public static final String HIDDEN = "h|hidden"; + public static final String SHOW_MISSING = "sm|showmissing"; + public static final String SORT_STATISTIC ="ss|sortstat|sortstatistic"; + public static final String SORT_DIRECTION ="sd|sortdirection"; + + public static final String RANGE_FACET = "rf|rangefacet"; + public static final String START = "st|start"; + public static final String END = "e|end"; + public static final String GAP = "g|gap"; + public static final String HARDEND = "he|hardend"; + public static final String INCLUDE_BOUNDARY = "ib|includebound"; + public static final String OTHER_RANGE = "or|otherrange"; + + public static final String QUERY_FACET = "qf|queryfacet"; + public static final String DEPENDENCY = "d|dependecy"; + public static final String QUERY = "q|query"; + + //Defaults + public static final boolean DEFAULT_ABBREVIATE_PREFIX = true; + public static final String DEFAULT_SORT_DIRECTION = "ascending"; + public static final int DEFAULT_LIMIT = -1; + public static final boolean DEFAULT_HIDDEN = false; + public static final boolean DEFAULT_HARDEND = false; + public static final boolean DEFAULT_SHOW_MISSING = false; + public static final FacetRangeInclude DEFAULT_INCLUDE = FacetRangeInclude.LOWER; + public static final FacetRangeOther DEFAULT_OTHER = FacetRangeOther.NONE; + + // Statistic Function Names (Cannot share names with ValueSource & Expression Functions) + public static final String STAT_COUNT = "count"; + public static final String STAT_MISSING = "missing"; + public static final String STAT_SUM = "sum"; + public static final String STAT_SUM_OF_SQUARES = "sumofsquares"; + public static final String STAT_STANDARD_DEVIATION = "stddev"; + public static final String STAT_MEAN = "mean"; + public static final String STAT_UNIQUE = "unique"; + public static final String STAT_MEDIAN = "median"; + public static final String STAT_PERCENTILE = "percentile"; + public static final String STAT_MIN = "min"; + public static final String STAT_MAX = "max"; + + public static final List ALL_STAT_LIST = Collections.unmodifiableList(Lists.newArrayList(STAT_COUNT, STAT_MISSING, STAT_SUM, STAT_SUM_OF_SQUARES, STAT_STANDARD_DEVIATION, STAT_MEAN, STAT_UNIQUE, STAT_MEDIAN, STAT_PERCENTILE,STAT_MIN,STAT_MAX)); + public static final Set ALL_STAT_SET = Collections.unmodifiableSet(Sets.newLinkedHashSet(ALL_STAT_LIST)); + + // ValueSource & Expression Function Names (Cannot share names with Statistic Functions) + // No specific type + final static String FILTER = "filter"; + final static String RESULT = "result"; + final static String QUERY_RESULT = "qresult"; + + // Numbers + final static String CONSTANT_NUMBER = "const_num"; + final static String NEGATE = "neg"; + final static String ABSOLUTE_VALUE = "abs"; + final static String LOG = "log"; + final static String ADD = "add"; + final static String MULTIPLY = "mult"; + final static String DIVIDE = "div"; + final static String POWER = "pow"; + public static final Set NUMERIC_OPERATION_SET = Collections.unmodifiableSet(Sets.newLinkedHashSet(Lists.newArrayList(CONSTANT_NUMBER,NEGATE,ABSOLUTE_VALUE,LOG,ADD,MULTIPLY,DIVIDE,POWER))); + + // Dates + final static String CONSTANT_DATE = "const_date"; + final static String DATE_MATH = "date_math"; + public static final Set DATE_OPERATION_SET = Collections.unmodifiableSet(Sets.newLinkedHashSet(Lists.newArrayList(CONSTANT_DATE,DATE_MATH))); + + //Strings + final static String CONSTANT_STRING = "const_str"; + final static String REVERSE = "rev"; + final static String CONCATENATE = "concat"; + public static final Set STRING_OPERATION_SET = Collections.unmodifiableSet(Sets.newLinkedHashSet(Lists.newArrayList(CONSTANT_STRING,REVERSE,CONCATENATE))); + + // Field Source Wrappers +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/AnalyticsParsers.java b/solr/core/src/java/org/apache/solr/analytics/util/AnalyticsParsers.java new file mode 100644 index 00000000000..b27fe702585 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/AnalyticsParsers.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util; + +import java.io.IOException; +import java.text.ParseException; +import java.util.Arrays; +import java.util.Date; + +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.FieldCache.LongParser; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.TrieDateField; +import org.apache.solr.schema.TrieDoubleField; +import org.apache.solr.schema.TrieFloatField; +import org.apache.solr.schema.TrieIntField; +import org.apache.solr.schema.TrieLongField; + +/** + * Class to hold the parsers used for Solr Analytics. + */ +public class AnalyticsParsers { + + /** + * Returns a parser that will translate a BytesRef or long from DocValues into + * a String that correctly represents the value. + * @param class1 class of the FieldType of the field being faceted on. + * @return A Parser + */ + public static Parser getParser(Class class1) { + if (class1.equals(TrieIntField.class)) { + return AnalyticsParsers.INT_DOC_VALUES_PARSER; + } else if (class1.equals(TrieLongField.class)) { + return AnalyticsParsers.LONG_DOC_VALUES_PARSER; + } else if (class1.equals(TrieFloatField.class)) { + return AnalyticsParsers.FLOAT_DOC_VALUES_PARSER; + } else if (class1.equals(TrieDoubleField.class)) { + return AnalyticsParsers.DOUBLE_DOC_VALUES_PARSER; + } else if (class1.equals(TrieDateField.class)) { + return AnalyticsParsers.DATE_DOC_VALUES_PARSER; + } else { + return AnalyticsParsers.STRING_PARSER; + } + } + + /** Long Parser that takes in String representations of dates and + * converts them into longs + */ + public final static LongParser DEFAULT_DATE_PARSER = new LongParser() { + @SuppressWarnings("deprecation") + @Override + public long parseLong(BytesRef term) { + try { + return TrieDateField.parseDate(term.utf8ToString()).getTime(); + } catch (ParseException e) { + System.err.println("Cannot parse date "+term.utf8ToString()); + return 0; + } + } + @Override + public String toString() { + return FieldCache.class.getName()+".DEFAULT_DATE_PARSER"; + } + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } + }; + + /** + * For use in classes that grab values by docValue. + * Converts a BytesRef object into the correct readable text. + */ + public static interface Parser { + String parse(BytesRef bytes) throws IOException; + } + + /** + * Converts the long returned by NumericDocValues into the + * correct number and return it as a string. + */ + public static interface NumericParser extends Parser { + String parseNum(long l); + } + + /** + * Converts the BytesRef or long to the correct int string. + */ + public static final NumericParser INT_DOC_VALUES_PARSER = new NumericParser() { + public String parse(BytesRef bytes) throws IOException { + try { + return ""+NumericUtils.prefixCodedToInt(bytes); + } catch (NumberFormatException e) { + throw new IOException("The byte array "+Arrays.toString(bytes.bytes)+" cannot be converted to an int."); + } + } + @Override + public String parseNum(long l) { + return ""+(int)l; + } + }; + + /** + * Converts the BytesRef or long to the correct long string. + */ + public static final NumericParser LONG_DOC_VALUES_PARSER = new NumericParser() { + public String parse(BytesRef bytes) throws IOException { + try { + return ""+NumericUtils.prefixCodedToLong(bytes); + } catch (NumberFormatException e) { + throw new IOException("The byte array "+Arrays.toString(bytes.bytes)+" cannot be converted to a long."); + } + } + @Override + public String parseNum(long l) { + return ""+l; + } + }; + + /** + * Converts the BytesRef or long to the correct float string. + */ + public static final NumericParser FLOAT_DOC_VALUES_PARSER = new NumericParser() { + public String parse(BytesRef bytes) throws IOException { + try { + return ""+NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(bytes)); + } catch (NumberFormatException e) { + throw new IOException("The byte array "+Arrays.toString(bytes.bytes)+" cannot be converted to a float."); + } + } + @Override + public String parseNum(long l) { + return ""+NumericUtils.sortableIntToFloat((int)l); + } + }; + + /** + * Converts the BytesRef or long to the correct double string. + */ + public static final NumericParser DOUBLE_DOC_VALUES_PARSER = new NumericParser() { + public String parse(BytesRef bytes) throws IOException { + try { + return ""+NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(bytes)); + } catch (NumberFormatException e) { + throw new IOException("The byte array "+Arrays.toString(bytes.bytes)+" cannot be converted to a double."); + } + } + @Override + public String parseNum(long l) { + return ""+NumericUtils.sortableLongToDouble(l); + } + }; + + /** + * Converts the BytesRef or long to the correct date string. + */ + public static final NumericParser DATE_DOC_VALUES_PARSER = new NumericParser() { + @SuppressWarnings("deprecation") + public String parse(BytesRef bytes) throws IOException { + try { + return TrieDateField.formatExternal(new Date(NumericUtils.prefixCodedToLong(bytes))); + } catch (NumberFormatException e) { + throw new IOException("The byte array "+Arrays.toString(bytes.bytes)+" cannot be converted to a date."); + } + } + @SuppressWarnings("deprecation") + @Override + public String parseNum(long l) { + return ""+TrieDateField.formatExternal(new Date(l)); + } + }; + + /** + * Converts the BytesRef to the correct string. + */ + public static final Parser STRING_PARSER = new Parser() { + public String parse(BytesRef bytes) { + return bytes.utf8ToString(); + } + }; +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/MedianCalculator.java b/solr/core/src/java/org/apache/solr/analytics/util/MedianCalculator.java new file mode 100644 index 00000000000..48575977939 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/MedianCalculator.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util; + +import java.util.List; + +public class MedianCalculator { + + /** + * Calculates the median of the given list of numbers. + * + * @param list A list of {@link Comparable} {@link Number} objects + * @return The median of the given list as a double. + */ + public static > double getMedian(List list) { + int size = list.size() - 1; + if (size == -1) { + return 0; + } + + select(list, .5 * size, 0, size); + + int firstIdx = (int) (Math.floor(.5 * size)); + int secondIdx = (firstIdx <= size && size % 2 == 1) ? firstIdx + 1 : firstIdx; + double result = list.get(firstIdx).doubleValue() * .5 + list.get(secondIdx).doubleValue() * .5; + + return result; + } + + private static > void select(List list, double place, int begin, int end) { + T split; + if (end - begin < 10) { + split = list.get((int) (Math.random() * (end - begin + 1)) + begin); + } else { + split = split(list, begin, end); + } + + Point result = partition(list, begin, end, split); + + if (place < result.low) { + select(list, place, begin, result.low); + } else if (place > result.high) { + select(list, place, result.high, end); + } else { + if (result.low == (int) (Math.floor(place)) && result.low > begin) { + select(list, result.low, begin, result.low); + } + if (result.high == (int) (Math.ceil(place)) && result.high < end) { + select(list, result.high, result.high, end); + } + } + } + + private static > T split(List list, int begin, int end) { + T temp; + int num = (end - begin + 1); + int recursiveSize = (int) Math.sqrt((double) num); + int step = num / recursiveSize; + for (int i = 1; i < recursiveSize; i++) { + int swapFrom = i * step + begin; + int swapTo = i + begin; + temp = list.get(swapFrom); + list.set(swapFrom, list.get(swapTo)); + list.set(swapTo, temp); + } + recursiveSize--; + select(list, recursiveSize / 2 + begin, begin, recursiveSize + begin); + return list.get(recursiveSize / 2 + begin); + } + + private static > Point partition(List list, int begin, int end, T indexElement) { + T temp; + int left, right; + for (left = begin, right = end; left < right; left++, right--) { + while (list.get(left).compareTo(indexElement) < 0) { + left++; + } + while (right != begin - 1 && list.get(right).compareTo(indexElement) >= 0) { + right--; + } + if (right <= left) { + left--; + right++; + break; + } + temp = list.get(left); + list.set(left, list.get(right)); + list.set(right, temp); + } + while (left != begin - 1 && list.get(left).compareTo(indexElement) >= 0) { + left--; + } + while (right != end + 1 && list.get(right).compareTo(indexElement) <= 0) { + right++; + } + int rightMove = right + 1; + while (rightMove < end + 1) { + if (list.get(rightMove).equals(indexElement)) { + temp = list.get(rightMove); + list.set(rightMove, list.get(right)); + list.set(right, temp); + do { + right++; + } while (list.get(right).equals(indexElement)); + if (rightMove <= right) { + rightMove = right; + } + } + rightMove++; + } + return new Point(left, right); + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/PercentileCalculator.java b/solr/core/src/java/org/apache/solr/analytics/util/PercentileCalculator.java new file mode 100644 index 00000000000..a98ed0c4d8e --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/PercentileCalculator.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class PercentileCalculator { + /** + * Calculates a list of percentile values for a given list of objects and percentiles. + * + * @param list The list of {@link Comparable} objects to calculate the percentiles of. + * @param percents The array of percentiles (.01 to .99) to calculate. + * @return a list of comparables + */ + public static > List getPercentiles(List list, double[] percents) { + int size = list.size(); + if (size == 0) { + return null; + } + + int[] percs = new int[percents.length]; + for (int i = 0; i < percs.length; i++) { + percs[i] = (int) Math.round(percents[i] * size - .5); + } + int[] percentiles = Arrays.copyOf(percs, percs.length); + Arrays.sort(percentiles); + + if (percentiles[0] < 0 || percentiles[percentiles.length - 1] > size - 1) { + throw new IllegalArgumentException(); + } + + List results = new ArrayList(percs.length); + + distributeAndFind(list, percentiles, 0, percentiles.length - 1); + + for (int i = 0; i < percs.length; i++) { + results.add(list.get(percs[i])); + } + return results; + } + + private static > void distributeAndFind(List list, int[] percentiles, int beginIdx, int endIdx) { + if (endIdx < beginIdx) { + return; + } + int middleIdxb = beginIdx; + int middleIdxe = beginIdx; + int begin = (beginIdx == 0) ? -1 : percentiles[beginIdx - 1]; + int end = (endIdx == percentiles.length - 1) ? list.size() : percentiles[endIdx + 1]; + double middle = (begin + end) / 2.0; + for (int i = beginIdx; i <= endIdx; i++) { + double value = Math.abs(percentiles[i] - middle) - Math.abs(percentiles[middleIdxb] - middle); + if (percentiles[i] == percentiles[middleIdxb]) { + middleIdxe = i; + } else if (value < 0) { + middleIdxb = i; + do { + middleIdxe = i; + i++; + } while (i <= endIdx && percentiles[middleIdxb] == percentiles[i]); + break; + } + } + + int middlePlace = percentiles[middleIdxb]; + int beginPlace = begin + 1; + int endPlace = end - 1; + + select(list, middlePlace, beginPlace, endPlace); + distributeAndFind(list, percentiles, beginIdx, middleIdxb - 1); + distributeAndFind(list, percentiles, middleIdxe + 1, endIdx); + } + + private static > void select(List list, int place, int begin, int end) { + T split; + if (end - begin < 10) { + split = list.get((int) (Math.random() * (end - begin + 1)) + begin); + } else { + split = split(list, begin, end); + } + + Point result = partition(list, begin, end, split); + + if (place <= result.low) { + select(list, place, begin, result.low); + } else if (place >= result.high) { + select(list, place, result.high, end); + } + } + + private static > T split(List list, int begin, int end) { + T temp; + int num = (end - begin + 1); + int recursiveSize = (int) Math.sqrt((double) num); + int step = num / recursiveSize; + for (int i = 1; i < recursiveSize; i++) { + int swapFrom = i * step + begin; + int swapTo = i + begin; + temp = list.get(swapFrom); + list.set(swapFrom, list.get(swapTo)); + list.set(swapTo, temp); + } + recursiveSize--; + select(list, recursiveSize / 2 + begin, begin, recursiveSize + begin); + return list.get(recursiveSize / 2 + begin); + } + + private static > Point partition(List list, int begin, int end, T indexElement) { + T temp; + int left, right; + for (left = begin, right = end; left <= right; left++, right--) { + while (list.get(left).compareTo(indexElement) < 0) { + left++; + } + while (right != begin - 1 && list.get(right).compareTo(indexElement) >= 0) { + right--; + } + if (right <= left) { + left--; + right++; + break; + } + temp = list.get(left); + list.set(left, list.get(right)); + list.set(right, temp); + } + while (left > begin - 1 && list.get(left).compareTo(indexElement) >= 0) { + left--; + } + while (right < end + 1 && list.get(right).compareTo(indexElement) <= 0) { + right++; + } + int rightMove = right + 1; + while (rightMove < end + 1) { + if (list.get(rightMove).equals(indexElement)) { + temp = list.get(rightMove); + list.set(rightMove, list.get(right)); + list.set(right, temp); + do { + right++; + } while (list.get(right).equals(indexElement)); + if (rightMove <= right) { + rightMove = right; + } + } + rightMove++; + } + return new Point(left, right); + } +} + +class Point { + public int low; + public int high; + + public Point(int low, int high) { + this.low = low; + this.high = high; + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/RangeEndpointCalculator.java b/solr/core/src/java/org/apache/solr/analytics/util/RangeEndpointCalculator.java new file mode 100644 index 00000000000..50e45c0ce81 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/RangeEndpointCalculator.java @@ -0,0 +1,358 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util; + +import java.util.ArrayList; +import java.util.Date; +import java.util.EnumSet; +import java.util.List; +import java.util.Set; + +import org.apache.solr.analytics.request.RangeFacetRequest; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.FacetParams.FacetRangeInclude; +import org.apache.solr.common.params.FacetParams.FacetRangeOther; +import org.apache.solr.schema.DateField; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.TrieField; +import org.apache.solr.util.DateMathParser; + + +@SuppressWarnings("deprecation") +public abstract class RangeEndpointCalculator> { + protected final SchemaField field; + protected final RangeFacetRequest request; + + public RangeEndpointCalculator(final RangeFacetRequest request) { + this.field = request.getField(); + this.request = request; + } + + /** + * Formats a Range endpoint for use as a range label name in the response. + * Default Impl just uses toString() + */ + public String formatValue(final T val) { + return val.toString(); + } + + /** + * Parses a String param into an Range endpoint value throwing + * a useful exception if not possible + */ + public final T getValue(final String rawval) { + try { + return parseVal(rawval); + } catch (Exception e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't parse value "+rawval+" for field: " + field.getName(), e); + } + } + + /** + * Parses a String param into an Range endpoint. + * Can throw a low level format exception as needed. + */ + protected abstract T parseVal(final String rawval) throws java.text.ParseException; + + /** + * Parses a String param into a value that represents the gap and + * can be included in the response, throwing + * a useful exception if not possible. + * + * Note: uses Object as the return type instead of T for things like + * Date where gap is just a DateMathParser string + */ + public final Object getGap(final String gap) { + try { + return parseGap(gap); + } catch (Exception e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't parse gap "+gap+" for field: " + field.getName(), e); + } + } + + /** + * Parses a String param into a value that represents the gap and + * can be included in the response. + * Can throw a low level format exception as needed. + * + * Default Impl calls parseVal + */ + protected Object parseGap(final String rawval) throws java.text.ParseException { + return parseVal(rawval); + } + + /** + * Adds the String gap param to a low Range endpoint value to determine + * the corrisponding high Range endpoint value, throwing + * a useful exception if not possible. + */ + public final T addGap(T value, String gap) { + try { + return parseAndAddGap(value, gap); + } catch (Exception e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't add gap "+gap+" to value " + value + " for field: " + field.getName(), e); + } + } + + /** + * Adds the String gap param to a low Range endpoint value to determine + * the corrisponding high Range endpoint value. + * Can throw a low level format exception as needed. + */ + protected abstract T parseAndAddGap(T value, String gap) throws java.text.ParseException; + + public static class FacetRange { + public final String name; + public final String lower; + public final String upper; + public final boolean includeLower; + public final boolean includeUpper; + + public FacetRange(String name, String lower, String upper, boolean includeLower, boolean includeUpper) { + this.name = name; + this.lower = lower; + this.upper = upper; + this.includeLower = includeLower; + this.includeUpper = includeUpper; + } + } + + public List getRanges(){ + + final T start = getValue(request.getStart()); + T end = getValue(request.getEnd()); // not final, hardend may change this + + if( end.compareTo(start) < 0 ){ + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "range facet 'end' comes before 'start': "+end+" < "+start); + } + + // explicitly return the gap. compute this early so we are more + // likely to catch parse errors before attempting math + final String[] gaps = request.getGaps(); + String gap = gaps[0]; + + final EnumSet include = request.getInclude(); + + T low = start; + + List ranges = new ArrayList(); + + int gapCounter = 0; + + while (low.compareTo(end) < 0) { + if (gapCounter> create(RangeFacetRequest request){ + final SchemaField sf = request.getField(); + final FieldType ft = sf.getType(); + final RangeEndpointCalculator calc; + if (ft instanceof TrieField) { + final TrieField trie = (TrieField)ft; + switch (trie.getType()) { + case FLOAT: + calc = new FloatRangeEndpointCalculator(request); + break; + case DOUBLE: + calc = new DoubleRangeEndpointCalculator(request); + break; + case INTEGER: + calc = new IntegerRangeEndpointCalculator(request); + break; + case LONG: + calc = new LongRangeEndpointCalculator(request); + break; + default: + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unable to range facet on tried field of unexpected type:" + sf.getName()); + } + } else if (ft instanceof DateField) { + calc = new DateRangeEndpointCalculator(request, null); + } else { + throw new SolrException (SolrException.ErrorCode.BAD_REQUEST, "Unable to range facet on field:" + sf); + } + return calc; + } + + public static class FloatRangeEndpointCalculator extends RangeEndpointCalculator { + + public FloatRangeEndpointCalculator(final RangeFacetRequest request) { super(request); } + + @Override + protected Float parseVal(String rawval) { + return Float.valueOf(rawval); + } + + @Override + public Float parseAndAddGap(Float value, String gap) { + return new Float(value.floatValue() + Float.valueOf(gap).floatValue()); + } + + } + + public static class DoubleRangeEndpointCalculator extends RangeEndpointCalculator { + + public DoubleRangeEndpointCalculator(final RangeFacetRequest request) { super(request); } + + @Override + protected Double parseVal(String rawval) { + return Double.valueOf(rawval); + } + + @Override + public Double parseAndAddGap(Double value, String gap) { + return new Double(value.doubleValue() + Double.valueOf(gap).doubleValue()); + } + + } + + public static class IntegerRangeEndpointCalculator extends RangeEndpointCalculator { + + public IntegerRangeEndpointCalculator(final RangeFacetRequest request) { super(request); } + + @Override + protected Integer parseVal(String rawval) { + return Integer.valueOf(rawval); + } + + @Override + public Integer parseAndAddGap(Integer value, String gap) { + return new Integer(value.intValue() + Integer.valueOf(gap).intValue()); + } + + } + + public static class LongRangeEndpointCalculator extends RangeEndpointCalculator { + + public LongRangeEndpointCalculator(final RangeFacetRequest request) { super(request); } + + @Override + protected Long parseVal(String rawval) { + return Long.valueOf(rawval); + } + + @Override + public Long parseAndAddGap(Long value, String gap) { + return new Long(value.longValue() + Long.valueOf(gap).longValue()); + } + + } + + public static class DateRangeEndpointCalculator extends RangeEndpointCalculator { + private final Date now; + public DateRangeEndpointCalculator(final RangeFacetRequest request, final Date now) { + super(request); + this.now = now; + if (! (field.getType() instanceof DateField) ) { + throw new IllegalArgumentException("SchemaField must use filed type extending DateField"); + } + } + + @Override + @SuppressWarnings("deprecation") + public String formatValue(Date val) { + return ((DateField)field.getType()).toExternal(val); + } + + @Override + @SuppressWarnings("deprecation") + protected Date parseVal(String rawval) { + return ((DateField)field.getType()).parseMath(now, rawval); + } + + @Override + protected Object parseGap(final String rawval) { + return rawval; + } + + @Override + public Date parseAndAddGap(Date value, String gap) throws java.text.ParseException { + final DateMathParser dmp = new DateMathParser(); + dmp.setNow(value); + return dmp.parseMath(gap); + } + + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/package.html b/solr/core/src/java/org/apache/solr/analytics/util/package.html new file mode 100644 index 00000000000..3cc6e4a335a --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/package.html @@ -0,0 +1,27 @@ + + + + + + + +

    +Utilities used by analytics component +

    + + diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/AbsoluteValueDoubleFunction.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/AbsoluteValueDoubleFunction.java new file mode 100644 index 00000000000..f42924875d6 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/AbsoluteValueDoubleFunction.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.analytics.util.AnalyticsParams; + +/** + * AbsoluteValueDoubleFunction takes the absolute value of the double value of the source it contains. + */ +public class AbsoluteValueDoubleFunction extends SingleDoubleFunction { + public final static String NAME = AnalyticsParams.ABSOLUTE_VALUE; + + public AbsoluteValueDoubleFunction(ValueSource source) { + super(source); + } + + protected String name() { + return NAME; + } + + @Override + public String description() { + return name()+"("+source.description()+")"; + } + + protected double func(int doc, FunctionValues vals) { + double d = vals.doubleVal(doc); + if (d<0) { + return d*-1; + } else { + return d; + } + } + + @Override + public boolean equals(Object o) { + if (getClass() != o.getClass()) return false; + AbsoluteValueDoubleFunction other = (AbsoluteValueDoubleFunction)o; + return this.source.equals(other.source); + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/AddDoubleFunction.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/AddDoubleFunction.java new file mode 100644 index 00000000000..7784febd053 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/AddDoubleFunction.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.analytics.util.AnalyticsParams; + +/** + * AddDoubleFunction returns the sum of it's components. + */ +public class AddDoubleFunction extends MultiDoubleFunction { + public final static String NAME = AnalyticsParams.ADD; + + public AddDoubleFunction(ValueSource[] sources) { + super(sources); + } + + @Override + protected String name() { + return NAME; + } + + @Override + protected double func(int doc, FunctionValues[] valsArr) { + double sum = 0d; + for (FunctionValues val : valsArr) { + sum += val.doubleVal(doc); + } + return sum; + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ConcatStringFunction.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ConcatStringFunction.java new file mode 100644 index 00000000000..97537a7619e --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ConcatStringFunction.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.analytics.util.AnalyticsParams; + +/** + * ConcatStringFunction concatenates the string values of its + * components in the order given. + */ +public class ConcatStringFunction extends MultiStringFunction { + public final static String NAME = AnalyticsParams.CONCATENATE; + + public ConcatStringFunction(ValueSource[] sources) { + super(sources); + } + + protected String name() { + return NAME; + } + + @Override + protected String func(int doc, FunctionValues[] valsArr) { + StringBuilder sb = new StringBuilder(); + for (FunctionValues val : valsArr) { + String v = val.strVal(doc); + if(v == null){ + return null; + } else { + sb.append(v); + } + } + return sb.toString(); + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ConstDateSource.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ConstDateSource.java new file mode 100644 index 00000000000..1ed8ca77705 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ConstDateSource.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import java.io.IOException; +import java.text.ParseException; +import java.util.Date; +import java.util.Map; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.docvalues.FloatDocValues; +import org.apache.lucene.util.mutable.MutableValue; +import org.apache.lucene.util.mutable.MutableValueDate; +import org.apache.solr.analytics.util.AnalyticsParams; +import org.apache.solr.schema.TrieDateField; + +/** + * ConstDateSource returns a constant date for all documents + */ +public class ConstDateSource extends ConstDoubleSource { + public final static String NAME = AnalyticsParams.CONSTANT_DATE; + + public ConstDateSource(Date constant) throws ParseException { + super(constant.getTime()); + } + + public ConstDateSource(Long constant) { + super(constant); + } + + @SuppressWarnings("deprecation") + @Override + public String description() { + return name()+"(" + TrieDateField.formatExternal(new Date(getLong())) + ")"; + } + + protected String name() { + return NAME; + } + + @Override + public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { + return new FloatDocValues(this) { + @Override + public float floatVal(int doc) { + return getFloat(); + } + @Override + public int intVal(int doc) { + return getInt(); + } + @Override + public long longVal(int doc) { + return getLong(); + } + @Override + public double doubleVal(int doc) { + return getDouble(); + } + @Override + public String toString(int doc) { + return description(); + } + @Override + public Object objectVal(int doc) { + return new Date(longVal(doc)); + } + @SuppressWarnings("deprecation") + @Override + public String strVal(int doc) { + return TrieDateField.formatExternal(new Date(longVal(doc))); + } + @Override + public boolean boolVal(int doc) { + return getFloat() != 0.0f; + } + + @Override + public ValueFiller getValueFiller() { + return new ValueFiller() { + private final MutableValueDate mval = new MutableValueDate(); + + @Override + public MutableValue getValue() { + return mval; + } + + @Override + public void fillValue(int doc) { + mval.value = longVal(doc); + mval.exists = true; + } + }; + } + }; + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ConstDoubleSource.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ConstDoubleSource.java new file mode 100644 index 00000000000..63110867b85 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ConstDoubleSource.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import java.io.IOException; +import java.util.Map; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.docvalues.DoubleDocValues; +import org.apache.lucene.queries.function.valuesource.ConstNumberSource; +import org.apache.lucene.queries.function.valuesource.ConstValueSource; +import org.apache.solr.analytics.util.AnalyticsParams; + +/** + * ConstDoubleSource returns a constant double for all documents + */ +public class ConstDoubleSource extends ConstNumberSource { + public final static String NAME = AnalyticsParams.CONSTANT_NUMBER; + final double constant; + + public ConstDoubleSource(double constant) { + this.constant = constant; + } + + @Override + public String description() { + return name()+"(" + getFloat() + ")"; + } + + protected String name() { + return NAME; + } + + @Override + public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { + return new DoubleDocValues(this) { + @Override + public double doubleVal(int doc) { + return constant; + } + @Override + public boolean exists(int doc) { + return true; + } + }; + } + + @Override + public int hashCode() { + return (int)Double.doubleToLongBits(constant) * 31; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof ConstValueSource)) return false; + ConstDoubleSource other = (ConstDoubleSource)o; + return this.constant == other.constant; + } + + @Override + public int getInt() { + return (int)constant; + } + + @Override + public long getLong() { + return (long)constant; + } + + @Override + public float getFloat() { + return (float)constant; + } + + @Override + public double getDouble() { + return constant; + } + + @Override + public Number getNumber() { + return new Double(constant); + } + + @Override + public boolean getBool() { + return constant != 0.0f; + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ConstStringSource.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ConstStringSource.java new file mode 100644 index 00000000000..c2c9af78e7f --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ConstStringSource.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import org.apache.lucene.queries.function.valuesource.LiteralValueSource; +import org.apache.solr.analytics.util.AnalyticsParams; + +/** + * ConstStringSource returns a constant string for all documents + */ +public class ConstStringSource extends LiteralValueSource { + public final static String NAME = AnalyticsParams.CONSTANT_STRING; + + public ConstStringSource(String string) { + super(string); + } + + @Override + public String description() { + return name()+"(" + string + ")"; + } + + protected String name() { + return NAME; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof ConstStringSource)) return false; + ConstStringSource that = (ConstStringSource) o; + + return getValue().equals(that.getValue()); + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/DateFieldSource.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/DateFieldSource.java new file mode 100644 index 00000000000..2b0dbae72e4 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/DateFieldSource.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import java.io.IOException; +import java.text.ParseException; +import java.util.Date; +import java.util.Map; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.docvalues.LongDocValues; +import org.apache.lucene.queries.function.valuesource.LongFieldSource; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.mutable.MutableValue; +import org.apache.lucene.util.mutable.MutableValueDate; +import org.apache.solr.schema.TrieDateField; + +/** + * Extends {@link LongFieldSource} to have a field source that takes in + * and returns {@link Date} values while working with long values internally. + */ +public class DateFieldSource extends LongFieldSource { + + public DateFieldSource(String field) throws ParseException { + super(field, null); + } + + public DateFieldSource(String field, FieldCache.LongParser parser) { + super(field, parser); + } + + public long externalToLong(String extVal) { + return parser.parseLong(new BytesRef(extVal)); + } + + public Object longToObject(long val) { + return new Date(val); + } + + @SuppressWarnings("deprecation") + public String longToString(long val) { + return TrieDateField.formatExternal((Date)longToObject(val)); + } + + @Override + public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { + final FieldCache.Longs arr = cache.getLongs(readerContext.reader(), field, parser, true); + final Bits valid = cache.getDocsWithField(readerContext.reader(), field); + return new LongDocValues(this) { + @Override + public long longVal(int doc) { + return arr.get(doc); + } + + @Override + public boolean exists(int doc) { + return valid.get(doc); + } + + @Override + public Object objectVal(int doc) { + return exists(doc) ? longToObject(arr.get(doc)) : null; + } + + @Override + public String strVal(int doc) { + return exists(doc) ? longToString(arr.get(doc)) : null; + } + + @Override + public ValueFiller getValueFiller() { + return new ValueFiller() { + private final MutableValueDate mval = new MutableValueDate(); + + @Override + public MutableValue getValue() { + return mval; + } + + @Override + public void fillValue(int doc) { + mval.value = arr.get(doc); + mval.exists = exists(doc); + } + }; + } + + }; + } + + @Override + public boolean equals(Object o) { + if (o.getClass() != this.getClass()) return false; + DateFieldSource other = (DateFieldSource) o; + if (parser==null) { + return field.equals(other.field); + } else { + return field.equals(other.field) && parser.equals(other.parser); + } + } + + @Override + public int hashCode() { + int h = parser == null ? this.getClass().hashCode() : parser.getClass().hashCode(); + h += super.hashCode(); + return h; + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/DateMathFunction.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/DateMathFunction.java new file mode 100644 index 00000000000..f2d4c4a858a --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/DateMathFunction.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import java.text.ParseException; +import java.util.Date; + +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource; +import org.apache.solr.analytics.util.AnalyticsParams; +import org.apache.solr.util.DateMathParser; + +/** + * DateMathFunction returns a start date modified by a list of DateMath operations. + */ +public class DateMathFunction extends MultiDateFunction { + public final static String NAME = AnalyticsParams.DATE_MATH; + final private DateMathParser parser; + + /** + * @param sources A list of ValueSource objects. The first element in the list + * should be a {@link DateFieldSource} or {@link ConstDateSource} object which + * represents the starting date. The rest of the field should be {@link BytesRefFieldSource} + * or {@link ConstStringSource} objects which contain the DateMath operations to perform on + * the start date. + */ + public DateMathFunction(ValueSource[] sources) { + super(sources); + parser = new DateMathParser(); + } + + @Override + protected String name() { + return NAME; + } + + @Override + protected long func(int doc, FunctionValues[] valsArr) { + long time = 0; + Date date = (Date)valsArr[0].objectVal(doc); + try { + parser.setNow(date); + for (int count = 1; count < valsArr.length; count++) { + date = parser.parseMath(valsArr[count].strVal(doc)); + parser.setNow(date); + } + time = parser.getNow().getTime(); + } catch (ParseException e) { + e.printStackTrace(); + time = date.getTime(); + } + return time; + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/DivDoubleFunction.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/DivDoubleFunction.java new file mode 100644 index 00000000000..f029d79a351 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/DivDoubleFunction.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.analytics.util.AnalyticsParams; + +/** + * DivDoubleFunction returns the quotient of 'a' and 'b'. + */ +public class DivDoubleFunction extends DualDoubleFunction { + public final static String NAME = AnalyticsParams.DIVIDE; + + /** + * @param a the numerator. + * @param b the denominator. + */ + public DivDoubleFunction(ValueSource a, ValueSource b) { + super(a, b); + } + + protected String name() { + return NAME; + } + + @Override + protected double func(int doc, FunctionValues aVals, FunctionValues bVals) { + return aVals.doubleVal(doc)/bVals.doubleVal(doc); + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/DualDoubleFunction.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/DualDoubleFunction.java new file mode 100644 index 00000000000..21f5edab710 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/DualDoubleFunction.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import java.io.IOException; +import java.util.Map; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.docvalues.DoubleDocValues; +import org.apache.lucene.search.IndexSearcher; + +/** + * Abstract {@link ValueSource} implementation which wraps two ValueSources + * and applies an extendible double function to their values. + **/ +public abstract class DualDoubleFunction extends ValueSource { + protected final ValueSource a; + protected final ValueSource b; + + public DualDoubleFunction(ValueSource a, ValueSource b) { + this.a = a; + this.b = b; + } + + protected abstract String name(); + protected abstract double func(int doc, FunctionValues aVals, FunctionValues bVals); + + @Override + public String description() { + return name() + "(" + a.description() + "," + b.description() + ")"; + } + + @Override + public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { + final FunctionValues aVals = a.getValues(context, readerContext); + final FunctionValues bVals = b.getValues(context, readerContext); + return new DoubleDocValues(this) { + @Override + public double doubleVal(int doc) { + return func(doc, aVals, bVals); + } + + @Override + public boolean exists(int doc) { + return aVals.exists(doc) & bVals.exists(doc); + } + + @Override + public String toString(int doc) { + return name() + '(' + aVals.toString(doc) + ',' + bVals.toString(doc) + ')'; + } + }; + } + + @Override + public void createWeight(Map context, IndexSearcher searcher) throws IOException { + a.createWeight(context,searcher); + b.createWeight(context,searcher); + } + + @Override + public boolean equals(Object o) { + if (getClass() != o.getClass()) return false; + DualDoubleFunction other = (DualDoubleFunction)o; + return this.a.equals(other.a) + && this.b.equals(other.b); + } + + @Override + public int hashCode() { + int h = a.hashCode(); + h ^= (h << 13) | (h >>> 20); + h += b.hashCode(); + h ^= (h << 23) | (h >>> 10); + h += name().hashCode(); + return h; + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/FilterFieldSource.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/FilterFieldSource.java new file mode 100644 index 00000000000..33a9995845f --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/FilterFieldSource.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import java.io.IOException; +import java.util.Date; +import java.util.Map; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.util.mutable.MutableValue; +import org.apache.solr.analytics.util.AnalyticsParams; +import org.apache.solr.schema.TrieDateField; + +/** + * DefaultIsMissingFieldSource wraps a field source to return missing values + * if the value is equal to the default value. + */ +public class FilterFieldSource extends ValueSource { + public final static String NAME = AnalyticsParams.FILTER; + public final Object missValue; + protected final ValueSource source; + + public FilterFieldSource(ValueSource source, Object missValue) { + this.source = source; + this.missValue = missValue; + } + + protected String name() { + return NAME; + } + + @SuppressWarnings("deprecation") + @Override + public String description() { + if (missValue.getClass().equals(Date.class)) { + return name()+"("+source.description()+","+TrieDateField.formatExternal((Date)missValue)+")"; + } else { + return name()+"("+source.description()+","+missValue.toString()+")"; + } + } + + @Override + public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { + final FunctionValues vals = source.getValues(context, readerContext); + return new FunctionValues() { + + @Override + public byte byteVal(int doc) { + return vals.byteVal(doc); + } + + @Override + public short shortVal(int doc) { + return vals.shortVal(doc); + } + + @Override + public float floatVal(int doc) { + return vals.floatVal(doc); + } + + @Override + public int intVal(int doc) { + return vals.intVal(doc); + } + + @Override + public long longVal(int doc) { + return vals.longVal(doc); + } + + @Override + public double doubleVal(int doc) { + return vals.doubleVal(doc); + } + + @Override + public String strVal(int doc) { + return vals.strVal(doc); + } + + @Override + public Object objectVal(int doc) { + return exists(doc)? vals.objectVal(doc) : null; + } + + @Override + public boolean exists(int doc) { + Object other = vals.objectVal(doc); + return other!=null&&!missValue.equals(other); + } + + @Override + public String toString(int doc) { + return NAME + '(' + vals.toString(doc) + ')'; + } + + @Override + public ValueFiller getValueFiller() { + return new ValueFiller() { + private final ValueFiller delegateFiller = vals.getValueFiller(); + private final MutableValue mval = delegateFiller.getValue(); + + @Override + public MutableValue getValue() { + return mval; + } + + @Override + public void fillValue(int doc) { + delegateFiller.fillValue(doc); + mval.exists = exists(doc); + } + }; + } + }; + } + + public ValueSource getRootSource() { + if (source instanceof FilterFieldSource) { + return ((FilterFieldSource)source).getRootSource(); + } else { + return source; + } + } + + @Override + public boolean equals(Object o) { + if (getClass() != o.getClass()) return false; + FilterFieldSource other = (FilterFieldSource)o; + return this.source.equals(other.source) && this.missValue.equals(other.missValue); + } + + @Override + public int hashCode() { + return source.hashCode()+name().hashCode(); + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/LogDoubleFunction.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/LogDoubleFunction.java new file mode 100644 index 00000000000..c4729a31ad8 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/LogDoubleFunction.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.analytics.util.AnalyticsParams; + +/** + * LogDoubleFunction returns the log of a double value with a given base. + */ +public class LogDoubleFunction extends DualDoubleFunction { + public final static String NAME = AnalyticsParams.LOG; + + public LogDoubleFunction(ValueSource a, ValueSource b) { + super(a,b); + } + + protected String name() { + return NAME; + } + + @Override + protected double func(int doc, FunctionValues aVals, FunctionValues bVals) { + return Math.log(aVals.doubleVal(doc))/Math.log(bVals.doubleVal(doc)); + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/MultiDateFunction.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/MultiDateFunction.java new file mode 100644 index 00000000000..1b51f6dbb38 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/MultiDateFunction.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Map; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.docvalues.LongDocValues; +import org.apache.lucene.util.mutable.MutableValue; +import org.apache.lucene.util.mutable.MutableValueDate; + +/** + * Abstract {@link ValueSource} implementation which wraps multiple ValueSources + * and applies an extendible date function to their values. + **/ +public abstract class MultiDateFunction extends ValueSource { + protected final ValueSource[] sources; + + public MultiDateFunction(ValueSource[] sources) { + this.sources = sources; + } + + abstract protected String name(); + abstract protected long func(int doc, FunctionValues[] valsArr); + + @Override + public String description() { + StringBuilder sb = new StringBuilder(); + sb.append(name()).append('('); + boolean firstTime=true; + for (ValueSource source : sources) { + if (firstTime) { + firstTime=false; + } else { + sb.append(','); + } + sb.append(source); + } + sb.append(')'); + return sb.toString(); + } + + @Override + public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { + final FunctionValues[] valsArr = new FunctionValues[sources.length]; + for (int i=0; iMultiplyDoubleFunction returns the product of it's components. + */ +public class MultiplyDoubleFunction extends MultiDoubleFunction { + public final static String NAME = AnalyticsParams.MULTIPLY; + + public MultiplyDoubleFunction(ValueSource[] sources) { + super(sources); + } + + @Override + protected String name() { + return NAME; + } + + @Override + protected double func(int doc, FunctionValues[] valsArr) { + double product = 1d; + for (FunctionValues val : valsArr) { + product *= val.doubleVal(doc); + } + return product; + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/NegateDoubleFunction.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/NegateDoubleFunction.java new file mode 100644 index 00000000000..4bff8d0845e --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/NegateDoubleFunction.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.analytics.util.AnalyticsParams; + +/** + * NegateDoubleFunction negates the double value of the source it contains. + */ +public class NegateDoubleFunction extends SingleDoubleFunction { + public final static String NAME = AnalyticsParams.NEGATE; + + public NegateDoubleFunction(ValueSource source) { + super(source); + } + + protected String name() { + return NAME; + } + + @Override + public String description() { + return name()+"("+source.description()+")"; + } + + protected double func(int doc, FunctionValues vals) { + return vals.doubleVal(doc)*-1; + } + + @Override + public boolean equals(Object o) { + if (getClass() != o.getClass()) return false; + NegateDoubleFunction other = (NegateDoubleFunction)o; + return this.source.equals(other.source); + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/PowDoubleFunction.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/PowDoubleFunction.java new file mode 100644 index 00000000000..1b4348b0118 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/PowDoubleFunction.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.analytics.util.AnalyticsParams; + +/** + * PowDoubleFunction returns 'a' raised to the power of 'b'. + */ +public class PowDoubleFunction extends DualDoubleFunction { + public final static String NAME = AnalyticsParams.POWER; + + /** + * @param a the base. + * @param b the exponent. + */ + public PowDoubleFunction(ValueSource a, ValueSource b) { + super(a, b); + } + + @Override + protected String name() { + return NAME; + } + + @Override + protected double func(int doc, FunctionValues aVals, FunctionValues bVals) { + return Math.pow(aVals.doubleVal(doc), bVals.doubleVal(doc)); + } +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ReverseStringFunction.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ReverseStringFunction.java new file mode 100644 index 00000000000..568f94e1821 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/ReverseStringFunction.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import org.apache.commons.lang.StringUtils; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.analytics.util.AnalyticsParams; + +/** + * ReverseStringFunction reverses the string value of the source it contains. + */ +public class ReverseStringFunction extends SingleStringFunction { + public final static String NAME = AnalyticsParams.REVERSE; + + public ReverseStringFunction(ValueSource source) { + super(source); + } + + protected String name() { + return NAME; + } + + protected CharSequence func(int doc, FunctionValues vals) { + String val = vals.strVal(doc); + return val != null ? StringUtils.reverse(val) : null; + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/SingleDoubleFunction.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/SingleDoubleFunction.java new file mode 100644 index 00000000000..45fc4caf989 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/SingleDoubleFunction.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import java.io.IOException; +import java.util.Map; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.docvalues.DoubleDocValues; + +/** + * Abstract {@link ValueSource} implementation which wraps one ValueSource + * and applies an extendible double function to its values. + */ +public abstract class SingleDoubleFunction extends ValueSource { + protected final ValueSource source; + + public SingleDoubleFunction(ValueSource source) { + this.source = source; + } + + @Override + public String description() { + return name()+"("+source.description()+")"; + } + + abstract String name(); + abstract double func(int doc, FunctionValues vals); + + @Override + public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { + final FunctionValues vals = source.getValues(context, readerContext); + return new DoubleDocValues(this) { + @Override + public double doubleVal(int doc) { + return func(doc, vals); + } + + @Override + public boolean exists(int doc) { + return vals.exists(doc); + } + + @Override + public String toString(int doc) { + return name() + '(' + vals.toString(doc) + ')'; + } + }; + } + + @Override + public boolean equals(Object o) { + if (getClass() != o.getClass()) return false; + SingleDoubleFunction other = (SingleDoubleFunction)o; + return this.source.equals(other.source); + } + + @Override + public int hashCode() { + return source.hashCode()+name().hashCode(); + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/SingleStringFunction.java b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/SingleStringFunction.java new file mode 100644 index 00000000000..c5178b9d1b7 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/SingleStringFunction.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.analytics.util.valuesource; + +import java.io.IOException; +import java.util.Map; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.docvalues.StrDocValues; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.mutable.MutableValue; +import org.apache.lucene.util.mutable.MutableValueStr; + +/** + * Abstract {@link ValueSource} implementation which wraps one ValueSource + * and applies an extendible string function to its values. + */ +public abstract class SingleStringFunction extends ValueSource { + protected final ValueSource source; + + public SingleStringFunction(ValueSource source) { + this.source = source; + } + + @Override + public String description() { + return name()+"("+source.description()+")"; + } + + abstract String name(); + abstract CharSequence func(int doc, FunctionValues vals); + + @Override + public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { + final FunctionValues vals = source.getValues(context, readerContext); + return new StrDocValues(this) { + @Override + public String strVal(int doc) { + CharSequence cs = func(doc, vals); + return cs != null ? cs.toString() : null; + } + + @Override + public boolean bytesVal(int doc, BytesRef bytes) { + CharSequence cs = func(doc, vals); + if( cs != null ){ + bytes.copyChars(func(doc,vals)); + return true; + } else { + bytes.bytes = BytesRef.EMPTY_BYTES; + bytes.length = 0; + bytes.offset = 0; + return false; + } + } + + @Override + public Object objectVal(int doc) { + return strVal(doc); + } + + @Override + public boolean exists(int doc) { + return vals.exists(doc); + } + + @Override + public String toString(int doc) { + return name() + '(' + strVal(doc) + ')'; + } + + @Override + public ValueFiller getValueFiller() { + return new ValueFiller() { + private final MutableValueStr mval = new MutableValueStr(); + + @Override + public MutableValue getValue() { + return mval; + } + + @Override + public void fillValue(int doc) { + mval.exists = bytesVal(doc, mval.value); + } + }; + } + }; + } + + @Override + public boolean equals(Object o) { + if (getClass() != o.getClass()) return false; + SingleStringFunction other = (SingleStringFunction)o; + return this.source.equals(other.source); + } + + @Override + public int hashCode() { + return source.hashCode()+name().hashCode(); + } + +} diff --git a/solr/core/src/java/org/apache/solr/analytics/util/valuesource/package.html b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/package.html new file mode 100644 index 00000000000..c5059c1920e --- /dev/null +++ b/solr/core/src/java/org/apache/solr/analytics/util/valuesource/package.html @@ -0,0 +1,27 @@ + + + + + + + +

    +ValueSource function/sources used by analytics component +

    + + diff --git a/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java b/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java index 7fde6eebd15..7dc0e0e4c88 100644 --- a/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java +++ b/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java @@ -93,6 +93,8 @@ public class JettySolrRunner { /** Maps servlet holders (i.e. factories: class + init params) to path specs */ private SortedMap extraServlets = new TreeMap(); + private SortedMap extraRequestFilters; + private LinkedList extraFilters; private SSLConfig sslConfig; @@ -167,16 +169,30 @@ public class JettySolrRunner { public JettySolrRunner(String solrHome, String context, int port, String solrConfigFilename, String schemaFileName, boolean stopAtShutdown, SortedMap extraServlets) { - if (null != extraServlets) { this.extraServlets.putAll(extraServlets); } - this.init(solrHome, context, port, stopAtShutdown); - this.solrConfigFilename = solrConfigFilename; - this.schemaFilename = schemaFileName; + this (solrHome, context, port, solrConfigFilename, schemaFileName, + stopAtShutdown, extraServlets, null, null); } public JettySolrRunner(String solrHome, String context, int port, String solrConfigFilename, String schemaFileName, boolean stopAtShutdown, SortedMap extraServlets, SSLConfig sslConfig) { + this (solrHome, context, port, solrConfigFilename, schemaFileName, + stopAtShutdown, extraServlets, sslConfig, null); + } + + /** + * Constructor taking an ordered list of additional (filter holder -> path spec) mappings. + * Filters are placed after the DebugFilter but before the SolrDispatchFilter. + */ + public JettySolrRunner(String solrHome, String context, int port, + String solrConfigFilename, String schemaFileName, boolean stopAtShutdown, + SortedMap extraServlets, SSLConfig sslConfig, + SortedMap extraRequestFilters) { if (null != extraServlets) { this.extraServlets.putAll(extraServlets); } + if (null != extraRequestFilters) { + this.extraRequestFilters = new TreeMap(extraRequestFilters.comparator()); + this.extraRequestFilters.putAll(extraRequestFilters); + } this.init(solrHome, context, port, stopAtShutdown); this.solrConfigFilename = solrConfigFilename; this.schemaFilename = schemaFileName; @@ -227,6 +243,7 @@ public class JettySolrRunner { : new SelectChannelConnector(); c.setReuseAddress(true); c.setLowResourcesMaxIdleTime(1500); + c.setSoLingerTime(0); connector = c; threadPool = (QueuedThreadPool) c.getThreadPool(); } else if ("Socket".equals(connectorName)) { @@ -234,6 +251,7 @@ public class JettySolrRunner { ? new SslSocketConnector(sslcontext) : new SocketConnector(); c.setReuseAddress(true); + c.setSoLingerTime(0); connector = c; threadPool = (QueuedThreadPool) c.getThreadPool(); } else { @@ -307,6 +325,13 @@ public class JettySolrRunner { // SolrDispatchFilter filter = new SolrDispatchFilter(); // FilterHolder fh = new FilterHolder(filter); debugFilter = root.addFilter(DebugFilter.class, "*", EnumSet.of(DispatcherType.REQUEST) ); + if (extraRequestFilters != null) { + extraFilters = new LinkedList(); + for (Class filterClass : extraRequestFilters.keySet()) { + extraFilters.add(root.addFilter(filterClass, extraRequestFilters.get(filterClass), + EnumSet.of(DispatcherType.REQUEST))); + } + } dispatchFilter = root.addFilter(SolrDispatchFilter.class, "*", EnumSet.of(DispatcherType.REQUEST) ); for (ServletHolder servletHolder : extraServlets.keySet()) { String pathSpec = extraServlets.get(servletHolder); @@ -443,6 +468,11 @@ public class JettySolrRunner { //server.destroy(); if (server.getState().equals(Server.FAILED)) { filter.destroy(); + if (extraFilters != null) { + for (FilterHolder f : extraFilters) { + f.getFilter().destroy(); + } + } } server.join(); diff --git a/solr/core/src/java/org/apache/solr/cloud/DistributedQueue.java b/solr/core/src/java/org/apache/solr/cloud/DistributedQueue.java index bbb42dff636..2e6d2ef3dcf 100644 --- a/solr/core/src/java/org/apache/solr/cloud/DistributedQueue.java +++ b/solr/core/src/java/org/apache/solr/cloud/DistributedQueue.java @@ -115,7 +115,7 @@ public class DistributedQueue { * * @return the data at the head of the queue. */ - private QueueEvent element() throws NoSuchElementException, KeeperException, + private QueueEvent element() throws KeeperException, InterruptedException { TreeMap orderedChildren; @@ -130,9 +130,9 @@ public class DistributedQueue { try { orderedChildren = orderedChildren(null); } catch (KeeperException.NoNodeException e) { - throw new NoSuchElementException(); + return null; } - if (orderedChildren.size() == 0) throw new NoSuchElementException(); + if (orderedChildren.size() == 0) return null; for (String headNode : orderedChildren.values()) { if (headNode != null) { @@ -208,7 +208,7 @@ public class DistributedQueue { @Override public void process(WatchedEvent event) { - LOG.info("Watcher fired on path: " + event.getPath() + " state: " + LOG.info("LatchChildWatcher fired on path: " + event.getPath() + " state: " + event.getState() + " type " + event.getType()); synchronized (lock) { this.event = event; @@ -322,11 +322,9 @@ public class DistributedQueue { * @return data at the first element of the queue, or null. */ public byte[] peek() throws KeeperException, InterruptedException { - try { - return element().getBytes(); - } catch (NoSuchElementException e) { - return null; - } + QueueEvent element = element(); + if(element == null) return null; + return element.getBytes(); } public static class QueueEvent { @@ -384,16 +382,29 @@ public class DistributedQueue { /** * Returns the data at the first element of the queue, or null if the queue is - * empty. + * empty and block is false. * + * @param block if true, blocks until an element enters the queue * @return data at the first element of the queue, or null. */ public QueueEvent peek(boolean block) throws KeeperException, InterruptedException { - if (!block) { + return peek(block ? Long.MAX_VALUE : 0); + } + + /** + * Returns the data at the first element of the queue, or null if the queue is + * empty after wait ms. + * + * @param wait max wait time in ms. + * @return data at the first element of the queue, or null. + */ + public QueueEvent peek(long wait) throws KeeperException, InterruptedException { + if (wait == 0) { return element(); } - + TreeMap orderedChildren; + boolean waitedEnough = false; while (true) { LatchChildWatcher childWatcher = new LatchChildWatcher(); try { @@ -402,11 +413,15 @@ public class DistributedQueue { zookeeper.create(dir, new byte[0], acl, CreateMode.PERSISTENT, true); continue; } + if(waitedEnough) { + if(orderedChildren.isEmpty()) return null; + } if (orderedChildren.size() == 0) { - childWatcher.await(DEFAULT_TIMEOUT); + childWatcher.await(wait == Long.MAX_VALUE ? DEFAULT_TIMEOUT: wait); + waitedEnough = wait != Long.MAX_VALUE; continue; } - + for (String headNode : orderedChildren.values()) { String path = dir + "/" + headNode; try { diff --git a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java index ca5634044d5..314bd1094f6 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java +++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java @@ -71,6 +71,10 @@ public abstract class ElectionContext { } abstract void runLeaderProcess(boolean weAreReplacement) throws KeeperException, InterruptedException, IOException; + + public void checkIfIamLeaderFired() {} + + public void joinedElectionFired() {} } class ShardLeaderElectionContextBase extends ElectionContext { @@ -114,9 +118,9 @@ class ShardLeaderElectionContextBase extends ElectionContext { final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase { private static Logger log = LoggerFactory.getLogger(ShardLeaderElectionContext.class); - private ZkController zkController; - private CoreContainer cc; - private SyncStrategy syncStrategy = new SyncStrategy(); + private final ZkController zkController; + private final CoreContainer cc; + private final SyncStrategy syncStrategy; private volatile boolean isClosed = false; @@ -127,6 +131,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase { zkController.getZkStateReader()); this.zkController = zkController; this.cc = cc; + syncStrategy = new SyncStrategy(cc.getUpdateShardHandler()); } @Override @@ -180,6 +185,17 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase { // we are going to attempt to be the leader // first cancel any current recovery core.getUpdateHandler().getSolrCoreState().cancelRecovery(); + + if (weAreReplacement) { + // wait a moment for any floating updates to finish + try { + Thread.sleep(2500); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, e); + } + } + boolean success = false; try { success = syncStrategy.sync(zkController, core, leaderProps); @@ -438,4 +454,15 @@ final class OverseerElectionContext extends ElectionContext { overseer.start(id); } + @Override + public void joinedElectionFired() { + overseer.close(); + } + + @Override + public void checkIfIamLeaderFired() { + // leader changed - close the overseer + overseer.close(); + } + } diff --git a/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java b/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java index a2fd4c70e96..0a8bdccf236 100644 --- a/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java +++ b/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java @@ -63,12 +63,20 @@ public class LeaderElector { protected SolrZkClient zkClient; private ZkCmdExecutor zkCmdExecutor; - + + // for tests + private volatile ElectionContext context; + public LeaderElector(SolrZkClient zkClient) { this.zkClient = zkClient; zkCmdExecutor = new ZkCmdExecutor((int) (zkClient.getZkClientTimeout()/1000.0 + 3000)); } + // for tests + public ElectionContext getContext() { + return context; + } + /** * Check if the candidate with the given n_* sequence number is the leader. * If it is, set the leaderId on the leader zk node. If it is not, start @@ -79,6 +87,7 @@ public class LeaderElector { */ private void checkIfIamLeader(final int seq, final ElectionContext context, boolean replacement) throws KeeperException, InterruptedException, IOException { + context.checkIfIamLeaderFired(); // get all other numbers... final String holdElectionPath = context.electionPath + ELECTION_NODE; List seqs = zkClient.getChildren(holdElectionPath, null, true); @@ -208,6 +217,8 @@ public class LeaderElector { * @return sequential node number */ public int joinElection(ElectionContext context, boolean replacement) throws KeeperException, InterruptedException, IOException { + context.joinedElectionFired(); + final String shardsElectZkPath = context.electionPath + LeaderElector.ELECTION_NODE; long sessionId = zkClient.getSolrZooKeeper().getSessionId(); @@ -273,6 +284,7 @@ public class LeaderElector { */ public void setup(final ElectionContext context) throws InterruptedException, KeeperException { + this.context = context; String electZKPath = context.electionPath + LeaderElector.ELECTION_NODE; zkCmdExecutor.ensureExists(electZKPath, zkClient); diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java index 7da3abb75b8..9298216f301 100644 --- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java +++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java @@ -46,6 +46,8 @@ import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static java.util.Collections.singletonMap; + /** * Cluster leader. Responsible node assignments, cluster state file? */ @@ -63,6 +65,8 @@ public class Overseer { static enum LeaderStatus { DONT_KNOW, NO, YES }; + private long lastUpdatedTime = 0; + private class ClusterStateUpdater implements Runnable, ClosableThread { private final ZkStateReader reader; @@ -151,33 +155,51 @@ public class Overseer { break; } else if (LeaderStatus.YES != isLeader) { - log.debug("am_i_leader unclear {}", isLeader); + log.debug("am_i_leader unclear {}", isLeader); continue; // not a no, not a yes, try ask again } + DistributedQueue.QueueEvent head = null; + try { + head = stateUpdateQueue.peek(true); + } catch (KeeperException e) { + if (e.code() == KeeperException.Code.SESSIONEXPIRED) { + log.warn( + "Solr cannot talk to ZK, exiting Overseer main queue loop", e); + return; + } + log.error("Exception in Overseer main queue loop", e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return; + + } catch (Exception e) { + log.error("Exception in Overseer main queue loop", e); + } synchronized (reader.getUpdateLock()) { try { - byte[] head = stateUpdateQueue.peek(); - - if (head != null) { - reader.updateClusterState(true); - ClusterState clusterState = reader.getClusterState(); + reader.updateClusterState(true); + ClusterState clusterState = reader.getClusterState(); + + while (head != null) { + final ZkNodeProps message = ZkNodeProps.load(head.getBytes()); + final String operation = message.getStr(QUEUE_OPERATION); + + clusterState = processMessage(clusterState, message, operation); + workQueue.offer(head.getBytes()); + + stateUpdateQueue.poll(); + + if (System.currentTimeMillis() - lastUpdatedTime > STATE_UPDATE_DELAY) break; - while (head != null) { - final ZkNodeProps message = ZkNodeProps.load(head); - final String operation = message.getStr(QUEUE_OPERATION); - - clusterState = processMessage(clusterState, message, operation); - workQueue.offer(head); - - stateUpdateQueue.poll(); - head = stateUpdateQueue.peek(); - } - zkClient.setData(ZkStateReader.CLUSTER_STATE, - ZkStateReader.toJSON(clusterState), true); + // if an event comes in the next 100ms batch it together + head = stateUpdateQueue.peek(100); } + lastUpdatedTime = System.currentTimeMillis(); + zkClient.setData(ZkStateReader.CLUSTER_STATE, + ZkStateReader.toJSON(clusterState), true); // clean work queue - while (workQueue.poll() != null); - + while (workQueue.poll() != null) ; + } catch (KeeperException e) { if (e.code() == KeeperException.Code.SESSIONEXPIRED) { log.warn("Solr cannot talk to ZK, exiting Overseer main queue loop", e); @@ -193,11 +215,6 @@ public class Overseer { } } - try { - Thread.sleep(STATE_UPDATE_DELAY); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } } } @@ -449,7 +466,7 @@ public class Overseer { //request new shardId if (collectionExists) { // use existing numShards - numShards = state.getCollectionStates().get(collection).getSlices().size(); + numShards = state.getCollection(collection).getSlices().size(); log.info("Collection already exists with " + ZkStateReader.NUM_SHARDS_PROP + "=" + numShards); } sliceName = Assign.assignShard(collection, state, numShards); @@ -596,11 +613,11 @@ public class Overseer { List ranges = router.partitionRange(shards.size(), router.fullRange()); - Map newCollections = new LinkedHashMap(); +// Map newCollections = new LinkedHashMap(); Map newSlices = new LinkedHashMap(); - newCollections.putAll(state.getCollectionStates()); +// newCollections.putAll(state.getCollectionStates()); for (int i = 0; i < shards.size(); i++) { String sliceName = shards.get(i); /*} @@ -628,9 +645,10 @@ public class Overseer { if(message.getStr("fromApi") == null) collectionProps.put("autoCreated","true"); DocCollection newCollection = new DocCollection(collectionName, newSlices, collectionProps, router); - newCollections.put(collectionName, newCollection); - ClusterState newClusterState = new ClusterState(state.getLiveNodes(), newCollections); - return newClusterState; +// newCollections.put(collectionName, newCollection); + return state.copyWith(singletonMap(newCollection.getName(), newCollection)); +// ClusterState newClusterState = new ClusterState(state.getLiveNodes(), newCollections); +// return newClusterState; } /* @@ -756,6 +774,9 @@ public class Overseer { newCollections.put(collectionName, newCollection); return new ClusterState(state.getLiveNodes(), newCollections); } + private ClusterState newState(ClusterState state, Map colls) { + return state.copyWith(colls); + } /* * Remove collection from cloudstate @@ -764,11 +785,11 @@ public class Overseer { final String collection = message.getStr("name"); - final Map newCollections = new LinkedHashMap(clusterState.getCollectionStates()); // shallow copy - newCollections.remove(collection); +// final Map newCollections = new LinkedHashMap(clusterState.getCollectionStates()); // shallow copy +// newCollections.remove(collection); - ClusterState newState = new ClusterState(clusterState.getLiveNodes(), newCollections); - return newState; +// ClusterState newState = new ClusterState(clusterState.getLiveNodes(), newCollections); + return clusterState.copyWith(singletonMap(collection, (DocCollection)null)); } /* @@ -780,16 +801,17 @@ public class Overseer { log.info("Removing collection: " + collection + " shard: " + sliceId + " from clusterstate"); - final Map newCollections = new LinkedHashMap(clusterState.getCollectionStates()); // shallow copy - DocCollection coll = newCollections.get(collection); +// final Map newCollections = new LinkedHashMap(clusterState.getCollectionStates()); // shallow copy + DocCollection coll = clusterState.getCollection(collection); Map newSlices = new LinkedHashMap(coll.getSlicesMap()); newSlices.remove(sliceId); DocCollection newCollection = new DocCollection(coll.getName(), newSlices, coll.getProperties(), coll.getRouter()); - newCollections.put(newCollection.getName(), newCollection); +// newCollections.put(newCollection.getName(), newCollection); + return newState(clusterState, singletonMap(collection,newCollection)); - return new ClusterState(clusterState.getLiveNodes(), newCollections); +// return new ClusterState(clusterState.getLiveNodes(), newCollections); } /* @@ -801,8 +823,9 @@ public class Overseer { final String collection = message.getStr(ZkStateReader.COLLECTION_PROP); - final Map newCollections = new LinkedHashMap(clusterState.getCollectionStates()); // shallow copy - DocCollection coll = newCollections.get(collection); +// final Map newCollections = new LinkedHashMap(clusterState.getCollectionStates()); // shallow copy +// DocCollection coll = newCollections.get(collection); + DocCollection coll = clusterState.getCollectionOrNull(collection) ; if (coll == null) { // TODO: log/error that we didn't find it? // just in case, remove the zk collection node @@ -851,7 +874,7 @@ public class Overseer { // if there are no slices left in the collection, remove it? if (newSlices.size() == 0) { - newCollections.remove(coll.getName()); +// newCollections.remove(coll.getName()); // TODO: it might be better logically to have this in ZkController // but for tests (it's easier) it seems better for the moment to leave CoreContainer and/or @@ -864,15 +887,18 @@ public class Overseer { } catch (KeeperException e) { SolrException.log(log, "Problem cleaning up collection in zk:" + collection, e); } + return newState(clusterState,singletonMap(collection, (DocCollection) null)); + } else { DocCollection newCollection = new DocCollection(coll.getName(), newSlices, coll.getProperties(), coll.getRouter()); - newCollections.put(newCollection.getName(), newCollection); + return newState(clusterState,singletonMap(collection,newCollection)); +// newCollections.put(newCollection.getName(), newCollection); } - ClusterState newState = new ClusterState(clusterState.getLiveNodes(), newCollections); - return newState; +// ClusterState newState = new ClusterState(clusterState.getLiveNodes(), newCollections); +// return newState; } @Override @@ -935,11 +961,9 @@ public class Overseer { } - private OverseerThread ccThread; + private volatile OverseerThread ccThread; - private OverseerThread updaterThread; - - private volatile boolean isClosed; + private volatile OverseerThread updaterThread; private ZkStateReader reader; @@ -954,6 +978,7 @@ public class Overseer { } public void start(String id) { + close(); log.info("Overseer (id=" + id + ") starting"); createOverseerNode(reader.getZkClient()); //launch cluster state updater thread @@ -970,8 +995,11 @@ public class Overseer { ccThread.start(); } + public OverseerThread getUpdaterThread() { + return updaterThread; + } + public void close() { - isClosed = true; if (updaterThread != null) { try { updaterThread.close(); @@ -988,12 +1016,8 @@ public class Overseer { log.error("Error closing ccThread", t); } } - - try { - reader.close(); - } catch (Throwable t) { - log.error("Error closing zkStateReader", t); - } + updaterThread = null; + ccThread = null; } /** diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java index 1a0dff8fe69..9d427dca8a0 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java @@ -47,13 +47,13 @@ import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction; import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.params.UpdateParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.StrUtils; import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.handler.component.ShardRequest; import org.apache.solr.handler.component.ShardResponse; +import org.apache.solr.update.SolrIndexSplitter; import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -111,11 +111,13 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { public static final String COLL_CONF = "collection.configName"; + public static final String COLL_PROP_PREFIX = "property."; public static final Map COLL_PROPS = ZkNodeProps.makeMap( ROUTER, DocRouter.DEFAULT_NAME, REPLICATION_FACTOR, "1", - MAX_SHARDS_PER_NODE, "1"); + MAX_SHARDS_PER_NODE, "1", + "external",null ); // TODO: use from Overseer? @@ -286,36 +288,34 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { String baseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP); String core = replica.getStr(ZkStateReader.CORE_NAME_PROP); - //assume the core exists and try to unload it - if (!Slice.ACTIVE.equals(replica.getStr(Slice.STATE))) { - deleteCoreNode(collectionName, replicaName, replica, core); - if(waitForCoreNodeGone(collectionName, shard, replicaName)) return; - } else { - Map m = ZkNodeProps.makeMap("qt", adminPath, - CoreAdminParams.ACTION, CoreAdminAction.UNLOAD.toString(), - CoreAdminParams.CORE, core) ; - - ShardRequest sreq = new ShardRequest(); - sreq.purpose = 1; - if (baseUrl.startsWith("http://")) baseUrl = baseUrl.substring(7); - sreq.shards = new String[]{baseUrl}; - sreq.actualShards = sreq.shards; - sreq.params = new ModifiableSolrParams(new MapSolrParams(m) ); - try { - shardHandler.submit(sreq, baseUrl, sreq.params); - } catch (Exception e) { - log.info("Exception trying to unload core "+sreq,e); - } - if (waitForCoreNodeGone(collectionName, shard, replicaName)) return;//check if the core unload removed the corenode zk enry - deleteCoreNode(collectionName, replicaName, replica, core); // this could be because the core is gone but not updated in ZK yet (race condition) - if(waitForCoreNodeGone(collectionName, shard, replicaName)) return; - + + // assume the core exists and try to unload it + Map m = ZkNodeProps.makeMap("qt", adminPath, CoreAdminParams.ACTION, + CoreAdminAction.UNLOAD.toString(), CoreAdminParams.CORE, core); + + ShardRequest sreq = new ShardRequest(); + sreq.purpose = 1; + if (baseUrl.startsWith("http://")) baseUrl = baseUrl.substring(7); + sreq.shards = new String[] {baseUrl}; + sreq.actualShards = sreq.shards; + sreq.params = new ModifiableSolrParams(new MapSolrParams(m)); + try { + shardHandler.submit(sreq, baseUrl, sreq.params); + } catch (Exception e) { + log.warn("Exception trying to unload core " + sreq, e); } - throw new SolrException(ErrorCode.SERVER_ERROR, "Could not remove replica : "+collectionName+"/"+shard+"/"+replicaName); + + collectShardResponses(!Slice.ACTIVE.equals(replica.getStr(Slice.STATE)) ? new NamedList() : results, false, null); + + if (waitForCoreNodeGone(collectionName, shard, replicaName, 5000)) return;//check if the core unload removed the corenode zk enry + deleteCoreNode(collectionName, replicaName, replica, core); // try and ensure core info is removed from clusterstate + if(waitForCoreNodeGone(collectionName, shard, replicaName, 30000)) return; + + throw new SolrException(ErrorCode.SERVER_ERROR, "Could not remove replica : " + collectionName + "/" + shard+"/" + replicaName); } - private boolean waitForCoreNodeGone(String collectionName, String shard, String replicaName) throws InterruptedException { - long waitUntil = System.currentTimeMillis() + 30000; + private boolean waitForCoreNodeGone(String collectionName, String shard, String replicaName, int timeoutms) throws InterruptedException { + long waitUntil = System.currentTimeMillis() + timeoutms; boolean deleted = false; while (System.currentTimeMillis() < waitUntil) { Thread.sleep(100); @@ -545,6 +545,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { params.set(CoreAdminParams.COLLECTION, collectionName); params.set(CoreAdminParams.SHARD, sliceName); params.set(ZkStateReader.NUM_SHARDS_PROP, numSlices); + addPropertyParams(message, params); ShardRequest sreq = new ShardRequest(); params.set("qt", adminPath); @@ -739,7 +740,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { params.set(CoreAdminParams.SHARD_RANGE, subRange.toString()); params.set(CoreAdminParams.SHARD_STATE, Slice.CONSTRUCTION); params.set(CoreAdminParams.SHARD_PARENT, parentSlice.getName()); - + addPropertyParams(message, params); sendShardRequest(nodeName, params); } @@ -849,6 +850,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { params.set(CoreAdminParams.NAME, shardName); params.set(CoreAdminParams.COLLECTION, collectionName); params.set(CoreAdminParams.SHARD, sliceName); + addPropertyParams(message, params); // TODO: Figure the config used by the parent shard and use it. //params.set("collection.configName", configName); @@ -1105,7 +1107,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { private void migrateKey(ClusterState clusterState, DocCollection sourceCollection, Slice sourceSlice, DocCollection targetCollection, Slice targetSlice, String splitKey, int timeout, NamedList results) throws KeeperException, InterruptedException { String tempSourceCollectionName = "split_" + sourceSlice.getName() + "_temp_" + targetSlice.getName(); - if (clusterState.getCollectionStates().containsKey(tempSourceCollectionName)) { + if (clusterState.hasCollection(tempSourceCollectionName)) { log.info("Deleting temporary collection: " + tempSourceCollectionName); Map props = ZkNodeProps.makeMap( QUEUE_OPERATION, DELETECOLLECTION, @@ -1144,7 +1146,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { Overseer.QUEUE_OPERATION, Overseer.ADD_ROUTING_RULE, COLLECTION_PROP, sourceCollection.getName(), SHARD_ID_PROP, sourceSlice.getName(), - "routeKey", splitKey, + "routeKey", SolrIndexSplitter.getRouteKey(splitKey) + "!", "range", splitRange.toString(), "targetCollection", targetCollection.getName(), "expireAt", String.valueOf(System.currentTimeMillis() + timeout)); @@ -1160,8 +1162,8 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { Thread.sleep(100); Map rules = zkStateReader.getClusterState().getSlice(sourceCollection.getName(), sourceSlice.getName()).getRoutingRules(); if (rules != null) { - RoutingRule rule = rules.get(splitKey); - if (rule.getRouteRanges().contains(splitRange)) { + RoutingRule rule = rules.get(SolrIndexSplitter.getRouteKey(splitKey) + "!"); + if (rule != null && rule.getRouteRanges().contains(splitRange)) { added = true; break; } @@ -1177,13 +1179,13 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { Replica sourceLeader = sourceSlice.getLeader(); // create a temporary collection with just one node on the shard leader - String sourceLeaderUrl = zkStateReader.getZkClient().getBaseUrlForNodeName(sourceLeader.getNodeName()); - if (sourceLeaderUrl.startsWith("http://")) sourceLeaderUrl = sourceLeaderUrl.substring(7); + String configName = zkStateReader.readConfigName(sourceCollection.getName()); Map props = ZkNodeProps.makeMap( QUEUE_OPERATION, CREATECOLLECTION, "name", tempSourceCollectionName, REPLICATION_FACTOR, 1, NUM_SLICES, 1, + COLL_CONF, configName, CREATE_NODE_SET, sourceLeader.getNodeName()); log.info("Creating temporary collection: " + props); createCollection(clusterState, new ZkNodeProps(props), results); @@ -1192,6 +1194,23 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { Slice tempSourceSlice = clusterState.getCollection(tempSourceCollectionName).getSlices().iterator().next(); Replica tempSourceLeader = clusterState.getLeader(tempSourceCollectionName, tempSourceSlice.getName()); + String tempCollectionReplica1 = tempSourceCollectionName + "_" + tempSourceSlice.getName() + "_replica1"; + String coreNodeName = waitForCoreNodeName(clusterState.getCollection(tempSourceCollectionName), + zkStateReader.getZkClient().getBaseUrlForNodeName(sourceLeader.getNodeName()), tempCollectionReplica1); + // wait for the replicas to be seen as active on temp source leader + log.info("Asking source leader to wait for: " + tempCollectionReplica1 + " to be alive on: " + sourceLeader.getNodeName()); + CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState(); + cmd.setCoreName(tempCollectionReplica1); + cmd.setNodeName(sourceLeader.getNodeName()); + cmd.setCoreNodeName(coreNodeName); + cmd.setState(ZkStateReader.ACTIVE); + cmd.setCheckLive(true); + cmd.setOnlyIfLeader(true); + sendShardRequest(tempSourceLeader.getNodeName(), new ModifiableSolrParams(cmd.getParams())); + + collectShardResponses(results, true, + "MIGRATE failed to create temp collection leader or timed out waiting for it to come up"); + log.info("Asking source leader to split index"); params = new ModifiableSolrParams(); params.set(CoreAdminParams.ACTION, CoreAdminAction.SPLIT.toString()); @@ -1213,11 +1232,11 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { params.set(CoreAdminParams.SHARD, tempSourceSlice.getName()); sendShardRequest(targetLeader.getNodeName(), params); - String coreNodeName = waitForCoreNodeName(clusterState.getCollection(tempSourceCollectionName), + coreNodeName = waitForCoreNodeName(clusterState.getCollection(tempSourceCollectionName), zkStateReader.getZkClient().getBaseUrlForNodeName(targetLeader.getNodeName()), tempCollectionReplica2); // wait for the replicas to be seen as active on temp source leader log.info("Asking temp source leader to wait for: " + tempCollectionReplica2 + " to be alive on: " + targetLeader.getNodeName()); - CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState(); + cmd = new CoreAdminRequest.WaitForState(); cmd.setCoreName(tempSourceLeader.getStr("core")); cmd.setNodeName(targetLeader.getNodeName()); cmd.setCoreNodeName(coreNodeName); @@ -1287,6 +1306,14 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { shardHandler.submit(sreq, replica, sreq.params); } + private void addPropertyParams(ZkNodeProps message, ModifiableSolrParams params) { + // Now add the property.key=value pairs + for (String key : message.keySet()) { + if (key.startsWith(COLL_PROP_PREFIX)) { + params.set(key, message.getStr(key)); + } + } + } private void createCollection(ClusterState clusterState, ZkNodeProps message, NamedList results) throws KeeperException, InterruptedException { String collectionName = message.getStr("name"); if (clusterState.getCollections().contains(collectionName)) { @@ -1317,7 +1344,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { List createNodeList = ((createNodeSetStr = message.getStr(CREATE_NODE_SET)) == null)?null:StrUtils.splitSmart(createNodeSetStr, ",", true); if (repFactor <= 0) { - throw new SolrException(ErrorCode.BAD_REQUEST, REPLICATION_FACTOR + " must be greater than or equal to 0"); + throw new SolrException(ErrorCode.BAD_REQUEST, REPLICATION_FACTOR + " must be greater than 0"); } if (numSlices <= 0) { @@ -1366,6 +1393,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { + ". This requires " + requestedShardsToCreate + " shards to be created (higher than the allowed number)"); } + String configName = createConfNode(collectionName, message); Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(message)); @@ -1380,8 +1408,6 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { if (!created) throw new SolrException(ErrorCode.SERVER_ERROR, "Could not fully createcollection: " + message.getStr("name")); - - String configName = message.getStr(COLL_CONF); log.info("going to create cores replicas shardNames {} , repFactor : {}", shardNames, repFactor); for (int i = 1; i <= shardNames.size(); i++) { String sliceName = shardNames.get(i-1); @@ -1401,6 +1427,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { params.set(CoreAdminParams.COLLECTION, collectionName); params.set(CoreAdminParams.SHARD, sliceName); params.set(ZkStateReader.NUM_SHARDS_PROP, numSlices); + addPropertyParams(message, params); ShardRequest sreq = new ShardRequest(); params.set("qt", adminPath); @@ -1435,6 +1462,37 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { } } + private String createConfNode(String coll, ZkNodeProps message) throws KeeperException, InterruptedException { + String configName = message.getStr(OverseerCollectionProcessor.COLL_CONF); + if(configName == null){ + // if there is only one conf, use that + List configNames=null; + try { + configNames = zkStateReader.getZkClient().getChildren(ZkController.CONFIGS_ZKNODE, null, true); + if (configNames != null && configNames.size() == 1) { + configName = configNames.get(0); + // no config set named, but there is only 1 - use it + log.info("Only one config set found in zk - using it:" + configName); + } + } catch (KeeperException.NoNodeException e) { + + } + + } + + if(configName!= null){ + log.info("creating collections conf node {} ",ZkStateReader.COLLECTIONS_ZKNODE + "/" + coll); + zkStateReader.getZkClient().makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + coll, + ZkStateReader.toJSON(ZkNodeProps.makeMap(ZkController.CONFIGNAME_PROP,configName)),true ); + + } else { + String msg = "Could not obtain config name"; + log.warn(msg); + } + return configName; + + } + private void collectionCmd(ClusterState clusterState, ZkNodeProps message, ModifiableSolrParams params, NamedList results, String stateMatcher) { log.info("Executing Collection Cmd : " + params); String collectionName = message.getStr("name"); diff --git a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java index 6ad73c8ba62..7955bd6756f 100644 --- a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java +++ b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java @@ -19,12 +19,14 @@ package org.apache.solr.cloud; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.store.Directory; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.request.AbstractUpdateRequest; @@ -41,6 +43,7 @@ import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.UpdateParams; import org.apache.solr.core.CoreContainer; import org.apache.solr.core.CoreDescriptor; +import org.apache.solr.core.DirectoryFactory.DirContext; import org.apache.solr.core.RequestHandlers.LazyRequestHandlerWrapper; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.ReplicationHandler; @@ -161,6 +164,7 @@ public class RecoveryStrategy extends Thread implements ClosableThread { RefCounted searchHolder = core .getNewestSearcher(false); SolrIndexSearcher searcher = searchHolder.get(); + Directory dir = core.getDirectoryFactory().get(core.getIndexDir(), DirContext.META_DATA, null); try { log.debug(core.getCoreDescriptor().getCoreContainer() .getZkController().getNodeName() @@ -170,8 +174,12 @@ public class RecoveryStrategy extends Thread implements ClosableThread { + leaderUrl + " gen:" + core.getDeletionPolicy().getLatestCommit().getGeneration() - + " data:" + core.getDataDir()); + + " data:" + core.getDataDir() + + " index:" + core.getIndexDir() + + " newIndex:" + core.getNewIndexDir() + + " files:" + Arrays.asList(dir.listAll())); } finally { + core.getDirectoryFactory().release(dir); searchHolder.decref(); } } catch (Exception e) { @@ -186,7 +194,6 @@ public class RecoveryStrategy extends Thread implements ClosableThread { HttpSolrServer server = new HttpSolrServer(leaderUrl); try { server.setConnectionTimeout(30000); - server.setSoTimeout(60000); UpdateRequest ureq = new UpdateRequest(); ureq.setParams(new ModifiableSolrParams()); ureq.getParams().set(DistributedUpdateProcessor.COMMIT_END_POINT, true); @@ -202,8 +209,7 @@ public class RecoveryStrategy extends Thread implements ClosableThread { throws SolrServerException, IOException { HttpSolrServer server = new HttpSolrServer(leaderBaseUrl); try { - server.setConnectionTimeout(45000); - server.setSoTimeout(120000); + server.setConnectionTimeout(30000); WaitForState prepCmd = new WaitForState(); prepCmd.setCoreName(leaderCoreName); prepCmd.setNodeName(zkController.getNodeName()); diff --git a/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java b/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java index 8249d2732e8..a3c04cfa24c 100644 --- a/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java +++ b/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java @@ -20,22 +20,19 @@ package org.apache.solr.cloud; import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.SynchronousQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; +import java.util.concurrent.ExecutorService; import org.apache.http.client.HttpClient; import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.impl.HttpClientUtil; import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.request.CoreAdminRequest.RequestRecovery; import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.cloud.ZkCoreNodeProps; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction; import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.CoreDescriptor; import org.apache.solr.core.SolrCore; @@ -43,8 +40,12 @@ import org.apache.solr.handler.component.HttpShardHandlerFactory; import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.handler.component.ShardRequest; import org.apache.solr.handler.component.ShardResponse; +import org.apache.solr.request.LocalSolrQueryRequest; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.request.SolrRequestInfo; +import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.update.PeerSync; -import org.apache.solr.util.DefaultSolrThreadFactory; +import org.apache.solr.update.UpdateShardHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -54,27 +55,18 @@ public class SyncStrategy { private final boolean SKIP_AUTO_RECOVERY = Boolean.getBoolean("solrcloud.skip.autorecovery"); private final ShardHandler shardHandler; - - private ThreadPoolExecutor recoveryCmdExecutor = new ThreadPoolExecutor( - 0, Integer.MAX_VALUE, 5, TimeUnit.SECONDS, - new SynchronousQueue(), new DefaultSolrThreadFactory( - "recoveryCmdExecutor")); private volatile boolean isClosed; private final HttpClient client; - { - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(HttpClientUtil.PROP_MAX_CONNECTIONS, 10000); - params.set(HttpClientUtil.PROP_MAX_CONNECTIONS_PER_HOST, 20); - params.set(HttpClientUtil.PROP_CONNECTION_TIMEOUT, 15000); - params.set(HttpClientUtil.PROP_SO_TIMEOUT, 60000); - params.set(HttpClientUtil.PROP_USE_RETRY, false); - client = HttpClientUtil.createClient(params); - } + + private final ExecutorService updateExecutor; - public SyncStrategy() { + public SyncStrategy(UpdateShardHandler updateShardHandler) { + client = updateShardHandler.getHttpClient(); + shardHandler = new HttpShardHandlerFactory().getShardHandler(client); + updateExecutor = updateShardHandler.getUpdateExecutor(); } private static class ShardCoreRequest extends ShardRequest { @@ -87,17 +79,26 @@ public class SyncStrategy { if (SKIP_AUTO_RECOVERY) { return true; } - if (isClosed) { - log.warn("Closed, skipping sync up."); - return false; - } - log.info("Sync replicas to " + ZkCoreNodeProps.getCoreUrl(leaderProps)); + boolean success; + SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams()); + SolrQueryResponse rsp = new SolrQueryResponse(); + SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp)); + try { + if (isClosed) { + log.warn("Closed, skipping sync up."); + return false; + } + log.info("Sync replicas to " + ZkCoreNodeProps.getCoreUrl(leaderProps)); + + if (core.getUpdateHandler().getUpdateLog() == null) { + log.error("No UpdateLog found - cannot sync"); + return false; + } - if (core.getUpdateHandler().getUpdateLog() == null) { - log.error("No UpdateLog found - cannot sync"); - return false; + success = syncReplicas(zkController, core, leaderProps); + } finally { + SolrRequestInfo.clearRequestInfo(); } - boolean success = syncReplicas(zkController, core, leaderProps); return success; } @@ -257,16 +258,6 @@ public class SyncStrategy { public void close() { this.isClosed = true; - try { - client.getConnectionManager().shutdown(); - } catch (Throwable e) { - SolrException.log(log, e); - } - try { - ExecutorUtil.shutdownNowAndAwaitTermination(recoveryCmdExecutor); - } catch (Throwable e) { - SolrException.log(log, e); - } } private void requestRecovery(final ZkNodeProps leaderProps, final String baseUrl, final String coreName) throws SolrServerException, IOException { @@ -282,8 +273,8 @@ public class SyncStrategy { HttpSolrServer server = new HttpSolrServer(baseUrl, client); try { - server.setConnectionTimeout(15000); - server.setSoTimeout(60000); + server.setConnectionTimeout(30000); + server.setSoTimeout(120000); server.request(recoverRequestCmd); } catch (Throwable t) { SolrException.log(log, ZkCoreNodeProps.getCoreUrl(leaderProps) + ": Could not tell a replica to recover", t); @@ -292,7 +283,7 @@ public class SyncStrategy { } } }; - recoveryCmdExecutor.execute(thread); + updateExecutor.execute(thread); } public static ModifiableSolrParams params(String... params) { diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index d32f37db637..d291eb9ba55 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -23,10 +23,10 @@ import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.request.CoreAdminRequest.WaitForState; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.cloud.BeforeReconnect; import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DefaultConnectionStrategy; import org.apache.solr.common.cloud.DocCollection; -import org.apache.solr.common.cloud.DocRouter; -import org.apache.solr.common.cloud.ImplicitDocRouter; import org.apache.solr.common.cloud.OnReconnect; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; @@ -172,11 +172,9 @@ public final class ZkController { private int clientTimeout; private volatile boolean isClosed; - - private UpdateShardHandler updateShardHandler; public ZkController(final CoreContainer cc, String zkServerAddress, int zkClientTimeout, int zkClientConnectTimeout, String localHost, String locaHostPort, - String localHostContext, int leaderVoteWait, boolean genericCoreNodeNames, int distribUpdateConnTimeout, int distribUpdateSoTimeout, final CurrentCoreDescriptorProvider registerOnReconnect) throws InterruptedException, + String localHostContext, int leaderVoteWait, boolean genericCoreNodeNames, final CurrentCoreDescriptorProvider registerOnReconnect) throws InterruptedException, TimeoutException, IOException { if (cc == null) throw new IllegalArgumentException("CoreContainer cannot be null."); this.cc = cc; @@ -187,8 +185,6 @@ public final class ZkController { // which means the default of "solr" localHostContext = trimLeadingAndTrailingSlashes(localHostContext); - updateShardHandler = new UpdateShardHandler(distribUpdateConnTimeout, distribUpdateSoTimeout); - this.zkServerAddress = zkServerAddress; this.localHostPort = locaHostPort; this.localHostContext = localHostContext; @@ -203,46 +199,53 @@ public final class ZkController { this.leaderVoteWait = leaderVoteWait; this.clientTimeout = zkClientTimeout; - zkClient = new SolrZkClient(zkServerAddress, zkClientTimeout, zkClientConnectTimeout, + zkClient = new SolrZkClient(zkServerAddress, zkClientTimeout, + zkClientConnectTimeout, new DefaultConnectionStrategy(), // on reconnect, reload cloud info new OnReconnect() { - + @Override public void command() { try { markAllAsNotLeader(registerOnReconnect); - // this is troublesome - we dont want to kill anything the old leader accepted - // though I guess sync will likely get those updates back? But only if + // this is troublesome - we dont want to kill anything the old + // leader accepted + // though I guess sync will likely get those updates back? But + // only if // he is involved in the sync, and he certainly may not be - // ExecutorUtil.shutdownAndAwaitTermination(cc.getCmdDistribExecutor()); + // ExecutorUtil.shutdownAndAwaitTermination(cc.getCmdDistribExecutor()); // we need to create all of our lost watches // seems we dont need to do this again... - //Overseer.createClientNodes(zkClient, getNodeName()); + // Overseer.createClientNodes(zkClient, getNodeName()); ShardHandler shardHandler; String adminPath; shardHandler = cc.getShardHandlerFactory().getShardHandler(); adminPath = cc.getAdminPath(); - + cc.cancelCoreRecoveries(); registerAllCoresAsDown(registerOnReconnect, false); - - ZkController.this.overseer = new Overseer(shardHandler, adminPath, zkStateReader); - ElectionContext context = new OverseerElectionContext(zkClient, overseer, getNodeName()); + + ElectionContext context = new OverseerElectionContext(zkClient, + overseer, getNodeName()); + overseerElector.joinElection(context, true); zkStateReader.createClusterStateWatchersAndUpdate(); // we have to register as live first to pick up docs in the buffer createEphemeralLiveNode(); - List descriptors = registerOnReconnect.getCurrentDescriptors(); + List descriptors = registerOnReconnect + .getCurrentDescriptors(); // re register all descriptors - if (descriptors != null) { + if (descriptors != null) { for (CoreDescriptor descriptor : descriptors) { - // TODO: we need to think carefully about what happens when it was - // a leader that was expired - as well as what to do about leaders/overseers + // TODO: we need to think carefully about what happens when it + // was + // a leader that was expired - as well as what to do about + // leaders/overseers // with connection loss try { register(descriptor.getName(), descriptor, true, true); @@ -251,7 +254,7 @@ public final class ZkController { } } } - + } catch (InterruptedException e) { // Restore the interrupted status Thread.currentThread().interrupt(); @@ -262,10 +265,18 @@ public final class ZkController { throw new ZooKeeperException( SolrException.ErrorCode.SERVER_ERROR, "", e); } - } - - + + }, new BeforeReconnect() { + + @Override + public void command() { + try { + ZkController.this.overseer.close(); + } catch (Exception e) { + log.error("Error trying to stop any Overseer threads", e); + } + } }); this.overseerJobQueue = Overseer.getInQueue(zkClient); @@ -396,13 +407,6 @@ public final class ZkController { log.error("Error closing zkClient", t); } - if (updateShardHandler != null) { - try { - updateShardHandler.close(); - } catch(Throwable t) { - log.error("Error closing updateShardHandler", t); - } - } } /** @@ -718,35 +722,6 @@ public final class ZkController { return zkClient.exists(path, true); } - /** - * Returns config value - */ - public String readConfigName(String collection) throws KeeperException, - InterruptedException { - - String configName = null; - - String path = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection; - if (log.isInfoEnabled()) { - log.info("Load collection config from:" + path); - } - byte[] data = zkClient.getData(path, null, null, true); - - if(data != null) { - ZkNodeProps props = ZkNodeProps.load(data); - configName = props.getStr(CONFIGNAME_PROP); - } - - if (configName != null && !zkClient.exists(CONFIGS_ZKNODE + "/" + configName, true)) { - log.error("Specified config does not exist in ZooKeeper:" + configName); - throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, - "Specified config does not exist in ZooKeeper:" + configName); - } - - return configName; - } - - /** * Register shard with ZooKeeper. @@ -1358,30 +1333,31 @@ public final class ZkController { public void preRegister(CoreDescriptor cd ) { String coreNodeName = getCoreNodeName(cd); - - // make sure the node name is set on the descriptor - if (cd.getCloudDescriptor().getCoreNodeName() == null) { - cd.getCloudDescriptor().setCoreNodeName(coreNodeName); - } - // before becoming available, make sure we are not live and active // this also gets us our assigned shard id if it was not specified try { - if(cd.getCloudDescriptor().getCollectionName() !=null && cd.getCloudDescriptor().getCoreNodeName() != null ) { + CloudDescriptor cloudDesc = cd.getCloudDescriptor(); + if(cd.getCloudDescriptor().getCollectionName() !=null && cloudDesc.getCoreNodeName() != null ) { //we were already registered - if(zkStateReader.getClusterState().hasCollection(cd.getCloudDescriptor().getCollectionName())){ - DocCollection coll = zkStateReader.getClusterState().getCollection(cd.getCloudDescriptor().getCollectionName()); + if(zkStateReader.getClusterState().hasCollection(cloudDesc.getCollectionName())){ + DocCollection coll = zkStateReader.getClusterState().getCollection(cloudDesc.getCollectionName()); if(!"true".equals(coll.getStr("autoCreated"))){ - Slice slice = coll.getSlice(cd.getCloudDescriptor().getShardId()); + Slice slice = coll.getSlice(cloudDesc.getShardId()); if(slice != null){ - if(slice.getReplica(cd.getCloudDescriptor().getCoreNodeName()) == null) { + if(slice.getReplica(cloudDesc.getCoreNodeName()) == null) { log.info("core_removed This core is removed from ZK"); - throw new SolrException(ErrorCode.NOT_FOUND,coreNodeName +" is removed"); + throw new SolrException(ErrorCode.NOT_FOUND,cloudDesc.getCoreNodeName() +" is removed"); } } } } } + + // make sure the node name is set on the descriptor + if (cloudDesc.getCoreNodeName() == null) { + cloudDesc.setCoreNodeName(coreNodeName); + } + publish(cd, ZkStateReader.DOWN, false); } catch (KeeperException e) { log.error("", e); @@ -1562,11 +1538,14 @@ public final class ZkController { return clientTimeout; } - // may return null if not in zk mode - public UpdateShardHandler getUpdateShardHandler() { - return updateShardHandler; + public Overseer getOverseer() { + return overseer; } + public LeaderElector getOverseerElector() { + return overseerElector; + } + /** * Returns the nodeName that should be used based on the specified properties. * diff --git a/solr/core/src/java/org/apache/solr/core/ConfigSolr.java b/solr/core/src/java/org/apache/solr/core/ConfigSolr.java index f864402fda8..78f70d0a066 100644 --- a/solr/core/src/java/org/apache/solr/core/ConfigSolr.java +++ b/solr/core/src/java/org/apache/solr/core/ConfigSolr.java @@ -156,6 +156,14 @@ public abstract class ConfigSolr { public int getDistributedSocketTimeout() { return getInt(CfgProp.SOLR_DISTRIBUPDATESOTIMEOUT, 0); } + + public int getMaxUpdateConnections() { + return getInt(CfgProp.SOLR_MAXUPDATECONNECTIONS, 10000); + } + + public int getMaxUpdateConnectionsPerHost() { + return getInt(CfgProp.SOLR_MAXUPDATECONNECTIONSPERHOST, 100); + } public int getCoreLoadThreadCount() { return getInt(ConfigSolr.CfgProp.SOLR_CORELOADTHREADS, DEFAULT_CORE_LOAD_THREADS); @@ -179,6 +187,14 @@ public abstract class ConfigSolr { return get(CfgProp.SOLR_ADMINHANDLER, "org.apache.solr.handler.admin.CoreAdminHandler"); } + public String getCollectionsHandlerClass() { + return get(CfgProp.SOLR_COLLECTIONSHANDLER, "org.apache.solr.handler.admin.CollectionsHandler"); + } + + public String getInfoHandlerClass() { + return get(CfgProp.SOLR_INFOHANDLER, "org.apache.solr.handler.admin.InfoHandler"); + } + public boolean hasSchemaCache() { return getBool(ConfigSolr.CfgProp.SOLR_SHARESCHEMA, false); } @@ -203,13 +219,17 @@ public abstract class ConfigSolr { // Ugly for now, but we'll at least be able to centralize all of the differences between 4x and 5x. protected static enum CfgProp { SOLR_ADMINHANDLER, + SOLR_COLLECTIONSHANDLER, SOLR_CORELOADTHREADS, SOLR_COREROOTDIRECTORY, SOLR_DISTRIBUPDATECONNTIMEOUT, SOLR_DISTRIBUPDATESOTIMEOUT, + SOLR_MAXUPDATECONNECTIONS, + SOLR_MAXUPDATECONNECTIONSPERHOST, SOLR_HOST, SOLR_HOSTCONTEXT, SOLR_HOSTPORT, + SOLR_INFOHANDLER, SOLR_LEADERVOTEWAIT, SOLR_LOGGING_CLASS, SOLR_LOGGING_ENABLED, diff --git a/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java b/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java index 961803b647e..35b7e5e8b6f 100644 --- a/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java +++ b/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java @@ -103,10 +103,14 @@ public class ConfigSolrXml extends ConfigSolr { private void fillPropMap() { propMap.put(CfgProp.SOLR_ADMINHANDLER, doSub("solr/str[@name='adminHandler']")); + propMap.put(CfgProp.SOLR_COLLECTIONSHANDLER, doSub("solr/str[@name='collectionsHandler']")); + propMap.put(CfgProp.SOLR_INFOHANDLER, doSub("solr/str[@name='infoHandler']")); propMap.put(CfgProp.SOLR_CORELOADTHREADS, doSub("solr/int[@name='coreLoadThreads']")); propMap.put(CfgProp.SOLR_COREROOTDIRECTORY, doSub("solr/str[@name='coreRootDirectory']")); propMap.put(CfgProp.SOLR_DISTRIBUPDATECONNTIMEOUT, doSub("solr/solrcloud/int[@name='distribUpdateConnTimeout']")); propMap.put(CfgProp.SOLR_DISTRIBUPDATESOTIMEOUT, doSub("solr/solrcloud/int[@name='distribUpdateSoTimeout']")); + propMap.put(CfgProp.SOLR_MAXUPDATECONNECTIONS, doSub("solr/solrcloud/int[@name='maxUpdateConnections']")); + propMap.put(CfgProp.SOLR_MAXUPDATECONNECTIONSPERHOST, doSub("solr/solrcloud/int[@name='maxUpdateConnectionsPerHost']")); propMap.put(CfgProp.SOLR_HOST, doSub("solr/solrcloud/str[@name='host']")); propMap.put(CfgProp.SOLR_HOSTCONTEXT, doSub("solr/solrcloud/str[@name='hostContext']")); propMap.put(CfgProp.SOLR_HOSTPORT, doSub("solr/solrcloud/int[@name='hostPort']")); diff --git a/solr/core/src/java/org/apache/solr/core/ConfigSolrXmlOld.java b/solr/core/src/java/org/apache/solr/core/ConfigSolrXmlOld.java index 1ee2ff385ef..f33bf620327 100644 --- a/solr/core/src/java/org/apache/solr/core/ConfigSolrXmlOld.java +++ b/solr/core/src/java/org/apache/solr/core/ConfigSolrXmlOld.java @@ -141,10 +141,16 @@ public class ConfigSolrXmlOld extends ConfigSolr { propMap.put(CfgProp.SOLR_ADMINHANDLER, config.getVal("solr/cores/@adminHandler", false)); + propMap.put(CfgProp.SOLR_COLLECTIONSHANDLER, config.getVal("solr/cores/@collectionsHandler", false)); + propMap.put(CfgProp.SOLR_INFOHANDLER, config.getVal("solr/cores/@infoHandler", false)); propMap.put(CfgProp.SOLR_DISTRIBUPDATECONNTIMEOUT, config.getVal("solr/cores/@distribUpdateConnTimeout", false)); propMap.put(CfgProp.SOLR_DISTRIBUPDATESOTIMEOUT, config.getVal("solr/cores/@distribUpdateSoTimeout", false)); + propMap.put(CfgProp.SOLR_MAXUPDATECONNECTIONS, + config.getVal("solr/cores/@maxUpdateConnections", false)); + propMap.put(CfgProp.SOLR_MAXUPDATECONNECTIONSPERHOST, + config.getVal("solr/cores/@maxUpdateConnectionsPerHost", false)); propMap.put(CfgProp.SOLR_HOST, config.getVal("solr/cores/@host", false)); propMap.put(CfgProp.SOLR_HOSTCONTEXT, config.getVal("solr/cores/@hostContext", false)); diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java index fe3c455be23..c088a5214ee 100644 --- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java +++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java @@ -17,7 +17,26 @@ package org.apache.solr.core; -import static com.google.common.base.Preconditions.checkNotNull; +import com.google.common.collect.Maps; +import org.apache.solr.cloud.ZkController; +import org.apache.solr.cloud.ZkSolrResourceLoader; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.cloud.ZooKeeperException; +import org.apache.solr.common.util.ExecutorUtil; +import org.apache.solr.handler.admin.CollectionsHandler; +import org.apache.solr.handler.admin.CoreAdminHandler; +import org.apache.solr.handler.admin.InfoHandler; +import org.apache.solr.handler.component.ShardHandlerFactory; +import org.apache.solr.logging.LogWatcher; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.IndexSchemaFactory; +import org.apache.solr.update.UpdateShardHandler; +import org.apache.solr.util.DefaultSolrThreadFactory; +import org.apache.solr.util.FileUtils; +import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; import java.text.SimpleDateFormat; @@ -40,27 +59,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; -import org.apache.solr.cloud.ZkController; -import org.apache.solr.cloud.ZkSolrResourceLoader; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.SolrException.ErrorCode; -import org.apache.solr.common.cloud.ZooKeeperException; -import org.apache.solr.common.util.ExecutorUtil; -import org.apache.solr.common.util.SolrjNamedThreadFactory; -import org.apache.solr.handler.admin.CollectionsHandler; -import org.apache.solr.handler.admin.CoreAdminHandler; -import org.apache.solr.handler.admin.InfoHandler; -import org.apache.solr.handler.component.ShardHandlerFactory; -import org.apache.solr.logging.LogWatcher; -import org.apache.solr.schema.IndexSchema; -import org.apache.solr.schema.IndexSchemaFactory; -import org.apache.solr.util.DefaultSolrThreadFactory; -import org.apache.solr.util.FileUtils; -import org.apache.zookeeper.KeeperException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.collect.Maps; +import static com.google.common.base.Preconditions.checkNotNull; /** @@ -88,8 +87,7 @@ public class CoreContainer { protected ZkContainer zkSys = new ZkContainer(); private ShardHandlerFactory shardHandlerFactory; - private ExecutorService updateExecutor = Executors.newCachedThreadPool( - new SolrjNamedThreadFactory("updateExecutor")); + private UpdateShardHandler updateShardHandler; protected LogWatcher logging = null; @@ -102,6 +100,8 @@ public class CoreContainer { protected final CoresLocator coresLocator; private String hostName; + + // private ClientConnectionManager clientConnectionManager = new PoolingClientConnectionManager(); { log.info("New CoreContainer " + System.identityHashCode(this)); @@ -196,6 +196,8 @@ public class CoreContainer { } shardHandlerFactory = ShardHandlerFactory.newInstance(cfg.getShardHandlerFactoryPluginInfo(), loader); + + updateShardHandler = new UpdateShardHandler(cfg); solrCores.allocateLazyCores(cfg.getTransientCacheSize(), loader); @@ -212,9 +214,9 @@ public class CoreContainer { zkSys.initZooKeeper(this, solrHome, cfg); - collectionsHandler = new CollectionsHandler(this); - infoHandler = new InfoHandler(this); - coreAdminHandler = createMultiCoreHandler(cfg.getCoreAdminHandlerClass()); + collectionsHandler = createHandler(cfg.getCollectionsHandlerClass(), CollectionsHandler.class); + infoHandler = createHandler(cfg.getInfoHandlerClass(), InfoHandler.class); + coreAdminHandler = createHandler(cfg.getCoreAdminHandlerClass(), CoreAdminHandler.class); containerProperties = cfg.getSolrProperties("solr"); @@ -358,7 +360,6 @@ public class CoreContainer { cancelCoreRecoveries(); } - try { // First wake up the closer thread, it'll terminate almost immediately since it checks isShutDown. synchronized (solrCores.getModifyLock()) { @@ -384,16 +385,20 @@ public class CoreContainer { } } finally { - if (shardHandlerFactory != null) { - shardHandlerFactory.close(); + try { + if (shardHandlerFactory != null) { + shardHandlerFactory.close(); + } + } finally { + try { + if (updateShardHandler != null) { + updateShardHandler.close(); + } + } finally { + // we want to close zk stuff last + zkSys.close(); + } } - - ExecutorUtil.shutdownAndAwaitTermination(updateExecutor); - - // we want to close zk stuff last - - zkSys.close(); - } org.apache.lucene.util.IOUtils.closeWhileHandlingException(loader); // best effort } @@ -676,7 +681,7 @@ public class CoreContainer { String collection = cd.getCloudDescriptor().getCollectionName(); zkSys.getZkController().createCollectionZkNode(cd.getCloudDescriptor()); - String zkConfigName = zkSys.getZkController().readConfigName(collection); + String zkConfigName = zkSys.getZkController().getZkStateReader().readConfigName(collection); if (zkConfigName == null) { log.error("Could not find config name for collection:" + collection); throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, @@ -726,7 +731,7 @@ public class CoreContainer { n1 = checkDefault(n1); solrCores.swap(n0, n1); - coresLocator.persist(this, solrCores.getCoreDescriptor(n0), solrCores.getCoreDescriptor(n1)); + coresLocator.swap(this, solrCores.getCoreDescriptor(n0), solrCores.getCoreDescriptor(n1)); log.info("swapped: "+n0 + " with " + n1); } @@ -773,10 +778,10 @@ public class CoreContainer { return null; } - /** + /** * Gets a core by name and increase its refcount. * - * @see SolrCore#close() + * @see SolrCore#close() * @param name the core name * @return the core if found, null if a SolrCore by this name does not exist * @exception SolrException if a SolrCore with this name failed to be initialized @@ -795,7 +800,7 @@ public class CoreContainer { // OK, it's not presently in any list, is it in the list of dynamic cores but not loaded yet? If so, load it. CoreDescriptor desc = solrCores.getDynamicDescriptor(name); if (desc == null) { //Nope, no transient core with this name - + // if there was an error initalizing this core, throw a 500 // error with the details for clients attempting to access it. Exception e = getCoreInitFailures().get(name); @@ -826,7 +831,7 @@ public class CoreContainer { } } catch(Exception ex){ // remains to be seen how transient cores and such - // will work in SolrCloud mode, but just to be future + // will work in SolrCloud mode, but just to be future // proof... /*if (isZooKeeperAware()) { try { @@ -846,34 +851,33 @@ public class CoreContainer { return core; } - // ---------------- Multicore self related methods --------------- - /** - * Creates a CoreAdminHandler for this MultiCore. - * @return a CoreAdminHandler - */ - protected CoreAdminHandler createMultiCoreHandler(final String adminHandlerClass) { - return loader.newAdminHandlerInstance(CoreContainer.this, adminHandlerClass); + // ---------------- CoreContainer request handlers -------------- + + protected T createHandler(String handlerClass, Class clazz) { + return loader.newInstance(handlerClass, clazz, null, new Class[] { CoreContainer.class }, new Object[] { this }); } public CoreAdminHandler getMultiCoreHandler() { return coreAdminHandler; } - + public CollectionsHandler getCollectionsHandler() { return collectionsHandler; } - + public InfoHandler getInfoHandler() { return infoHandler; } - + + // ---------------- Multicore self related methods --------------- + /** * the default core name, or null if there is no default core name */ public String getDefaultCoreName() { return cfg.getDefaultCoreName(); } - + // all of the following properties aren't synchronized // but this should be OK since they normally won't be changed rapidly @Deprecated @@ -952,8 +956,8 @@ public class CoreContainer { return shardHandlerFactory; } - public ExecutorService getUpdateExecutor() { - return updateExecutor; + public UpdateShardHandler getUpdateShardHandler() { + return updateShardHandler; } // Just to tidy up the code where it did this in-line. @@ -969,8 +973,6 @@ public class CoreContainer { String getCoreToOrigName(SolrCore core) { return solrCores.getCoreToOrigName(core); } - - } class CloserThread extends Thread { diff --git a/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java b/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java index 087a37b3f6f..d167cb55895 100644 --- a/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java +++ b/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java @@ -91,7 +91,11 @@ public class CorePropertiesLocator implements CoresLocator { @Override public void delete(CoreContainer cc, CoreDescriptor... coreDescriptors) { + if (coreDescriptors == null) { + return; + } for (CoreDescriptor cd : coreDescriptors) { + if (cd == null) continue; File instanceDir = new File(cd.getInstanceDir()); File propertiesFile = new File(instanceDir, PROPERTIES_FILENAME); propertiesFile.renameTo(new File(instanceDir, PROPERTIES_FILENAME + ".unloaded")); @@ -106,6 +110,11 @@ public class CorePropertiesLocator implements CoresLocator { persist(cc, newCD); } + @Override + public void swap(CoreContainer cc, CoreDescriptor cd1, CoreDescriptor cd2) { + persist(cc, cd1, cd2); + } + @Override public List discover(CoreContainer cc) { logger.info("Looking for core definitions underneath {}", rootDirectory.getAbsolutePath()); diff --git a/solr/core/src/java/org/apache/solr/core/CoresLocator.java b/solr/core/src/java/org/apache/solr/core/CoresLocator.java index 6195ef92c4a..daeef04b88b 100644 --- a/solr/core/src/java/org/apache/solr/core/CoresLocator.java +++ b/solr/core/src/java/org/apache/solr/core/CoresLocator.java @@ -55,6 +55,14 @@ public interface CoresLocator { */ public void rename(CoreContainer cc, CoreDescriptor oldCD, CoreDescriptor newCD); + /** + * Swap two core definitions + * @param cc the CoreContainer + * @param cd1 the core descriptor of the first core, after swapping + * @param cd2 the core descriptor of the second core, after swapping + */ + public void swap(CoreContainer cc, CoreDescriptor cd1, CoreDescriptor cd2); + /** * Load all the CoreDescriptors from persistence store * @param cc the CoreContainer diff --git a/solr/core/src/java/org/apache/solr/core/JmxMonitoredMap.java b/solr/core/src/java/org/apache/solr/core/JmxMonitoredMap.java index 45b1e358750..384fe0896da 100644 --- a/solr/core/src/java/org/apache/solr/core/JmxMonitoredMap.java +++ b/solr/core/src/java/org/apache/solr/core/JmxMonitoredMap.java @@ -320,7 +320,7 @@ public class JmxMonitoredMap extends try { list.add(new Attribute(attribute, getAttribute(attribute))); } catch (Exception e) { - LOG.warn("Could not get attibute " + attribute); + LOG.warn("Could not get attribute " + attribute); } } diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index 1b8f433b432..ddc1723f766 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -17,9 +17,42 @@ package org.apache.solr.core; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Writer; +import java.lang.reflect.Constructor; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.IdentityHashMap; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.StringTokenizer; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.ReentrantLock; + +import javax.xml.parsers.ParserConfigurationException; + import org.apache.commons.io.IOUtils; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexDeletionPolicy; import org.apache.lucene.index.IndexWriter; @@ -40,6 +73,7 @@ import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.core.DirectoryFactory.DirContext; import org.apache.solr.handler.SnapPuller; import org.apache.solr.handler.admin.ShowFileRequestHandler; +import org.apache.solr.handler.component.AnalyticsComponent; import org.apache.solr.handler.component.DebugComponent; import org.apache.solr.handler.component.FacetComponent; import org.apache.solr.handler.component.HighlightComponent; @@ -93,39 +127,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; -import javax.xml.parsers.ParserConfigurationException; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Writer; -import java.lang.reflect.Constructor; -import java.net.URL; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; -import java.util.IdentityHashMap; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Properties; -import java.util.Set; -import java.util.StringTokenizer; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.locks.ReentrantLock; - /** * @@ -769,7 +770,7 @@ public final class SolrCore implements SolrInfoMBean { updateProcessorChains = loadUpdateProcessorChains(); reqHandlers = new RequestHandlers(this); reqHandlers.initHandlersFromConfig(solrConfig); - + // Handle things that should eventually go away initDeprecatedSupport(); @@ -853,13 +854,21 @@ public final class SolrCore implements SolrInfoMBean { CoreContainer cc = cd.getCoreContainer(); - if (cc != null && cc.isZooKeeperAware() && Slice.CONSTRUCTION.equals(cd.getCloudDescriptor().getShardState())) { - // set update log to buffer before publishing the core - getUpdateHandler().getUpdateLog().bufferUpdates(); - - cd.getCloudDescriptor().setShardState(null); - cd.getCloudDescriptor().setShardRange(null); - cd.getCloudDescriptor().setShardParent(null); + if (cc != null && cc.isZooKeeperAware()) { + SolrRequestHandler realtimeGetHandler = reqHandlers.get("/get"); + if (realtimeGetHandler == null) { + log.warn("WARNING: RealTimeGetHandler is not registered at /get. " + + "SolrCloud will always use full index replication instead of the more efficient PeerSync method."); + } + + if (Slice.CONSTRUCTION.equals(cd.getCloudDescriptor().getShardState())) { + // set update log to buffer before publishing the core + getUpdateHandler().getUpdateLog().bufferUpdates(); + + cd.getCloudDescriptor().setShardState(null); + cd.getCloudDescriptor().setShardRange(null); + cd.getCloudDescriptor().setShardParent(null); + } } // For debugging // numOpens.incrementAndGet(); @@ -1197,6 +1206,7 @@ public final class SolrCore implements SolrInfoMBean { addIfNotPresent(components,StatsComponent.COMPONENT_NAME,StatsComponent.class); addIfNotPresent(components,DebugComponent.COMPONENT_NAME,DebugComponent.class); addIfNotPresent(components,RealTimeGetComponent.COMPONENT_NAME,RealTimeGetComponent.class); + addIfNotPresent(components,AnalyticsComponent.COMPONENT_NAME,AnalyticsComponent.class); return components; } private void addIfNotPresent(Map registry, String name, Class c){ diff --git a/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java b/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java index 0e8e6d6faf8..fe86a0b6e8d 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java +++ b/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java @@ -79,7 +79,7 @@ public class SolrResourceLoader implements ResourceLoader,Closeable static final String project = "solr"; static final String base = "org.apache" + "." + project; - static final String[] packages = {"","analysis.","schema.","handler.","search.","update.","core.","response.","request.","update.processor.","util.", "spelling.", "handler.component.", "handler.dataimport." }; + static final String[] packages = {"","analysis.","schema.","handler.","search.","update.","core.","response.","request.","update.processor.","util.", "spelling.", "handler.component.", "handler.dataimport.", "spelling.suggest.", "spelling.suggest.fst." }; protected URLClassLoader classLoader; private final String instanceDir; diff --git a/solr/core/src/java/org/apache/solr/core/SolrXMLCoresLocator.java b/solr/core/src/java/org/apache/solr/core/SolrXMLCoresLocator.java index 7c1d85844c2..632b4d6fef6 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrXMLCoresLocator.java +++ b/solr/core/src/java/org/apache/solr/core/SolrXMLCoresLocator.java @@ -19,7 +19,6 @@ package org.apache.solr.core; import com.google.common.base.Charsets; import com.google.common.collect.ImmutableList; - import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; @@ -32,11 +31,9 @@ import java.io.OutputStreamWriter; import java.io.Writer; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Properties; -import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -190,6 +187,11 @@ public class SolrXMLCoresLocator implements CoresLocator { this.persist(cc); } + @Override + public void swap(CoreContainer cc, CoreDescriptor cd1, CoreDescriptor cd2) { + this.persist(cc); + } + @Override public List discover(CoreContainer cc) { diff --git a/solr/core/src/java/org/apache/solr/core/ZkContainer.java b/solr/core/src/java/org/apache/solr/core/ZkContainer.java index ab349612045..6b281d8fa47 100644 --- a/solr/core/src/java/org/apache/solr/core/ZkContainer.java +++ b/solr/core/src/java/org/apache/solr/core/ZkContainer.java @@ -51,8 +51,6 @@ public class ZkContainer { private String host; private int leaderVoteWait; private Boolean genericCoreNodeNames; - private int distribUpdateConnTimeout; - private int distribUpdateSoTimeout; public ZkContainer() { @@ -67,13 +65,11 @@ public class ZkContainer { initZooKeeper(cc, solrHome, config.getZkHost(), config.getZkClientTimeout(), config.getZkHostPort(), config.getZkHostContext(), - config.getHost(), config.getLeaderVoteWait(), config.getGenericCoreNodeNames(), - config.getDistributedConnectionTimeout(), config.getDistributedSocketTimeout()); + config.getHost(), config.getLeaderVoteWait(), config.getGenericCoreNodeNames()); } public void initZooKeeper(final CoreContainer cc, String solrHome, String zkHost, int zkClientTimeout, String hostPort, - String hostContext, String host, int leaderVoteWait, boolean genericCoreNodeNames, - int distribUpdateConnTimeout, int distribUpdateSoTimeout) { + String hostContext, String host, int leaderVoteWait, boolean genericCoreNodeNames) { ZkController zkController = null; // if zkHost sys property is not set, we are not using ZooKeeper @@ -92,8 +88,6 @@ public class ZkContainer { this.host = host; this.leaderVoteWait = leaderVoteWait; this.genericCoreNodeNames = genericCoreNodeNames; - this.distribUpdateConnTimeout = distribUpdateConnTimeout; - this.distribUpdateSoTimeout = distribUpdateSoTimeout; if (zkRun == null && zookeeperHost == null) return; // not in zk mode @@ -125,7 +119,7 @@ public class ZkContainer { } } - int zkClientConnectTimeout = 15000; + int zkClientConnectTimeout = 30000; if (zookeeperHost != null) { @@ -147,7 +141,7 @@ public class ZkContainer { } zkController = new ZkController(cc, zookeeperHost, zkClientTimeout, zkClientConnectTimeout, host, hostPort, hostContext, - leaderVoteWait, genericCoreNodeNames, distribUpdateConnTimeout, distribUpdateSoTimeout, + leaderVoteWait, genericCoreNodeNames, new CurrentCoreDescriptorProvider() { @Override @@ -220,7 +214,7 @@ public class ZkContainer { String collection = dcore.getCloudDescriptor().getCollectionName(); zkController.createCollectionZkNode(dcore.getCloudDescriptor()); - zkConfigName = zkController.readConfigName(collection); + zkConfigName = zkController.getZkStateReader().readConfigName(collection); if (zkConfigName == null) { log.error("Could not find config name for collection:" + collection); throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, diff --git a/solr/core/src/java/org/apache/solr/handler/SnapPuller.java b/solr/core/src/java/org/apache/solr/handler/SnapPuller.java index 2ed2ff481ac..12ca0ddca19 100644 --- a/solr/core/src/java/org/apache/solr/handler/SnapPuller.java +++ b/solr/core/src/java/org/apache/solr/handler/SnapPuller.java @@ -16,6 +16,7 @@ */ package org.apache.solr.handler; +import static org.apache.lucene.util.IOUtils.CHARSET_UTF_8; import static org.apache.solr.handler.ReplicationHandler.ALIAS; import static org.apache.solr.handler.ReplicationHandler.CHECKSUM; import static org.apache.solr.handler.ReplicationHandler.CMD_DETAILS; @@ -47,6 +48,7 @@ import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Date; @@ -55,6 +57,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Properties; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -75,9 +78,6 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; - -import static org.apache.lucene.util.IOUtils.CHARSET_UTF_8; - import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpClientUtil; import org.apache.solr.client.solrj.impl.HttpSolrServer; @@ -104,6 +104,7 @@ import org.apache.solr.util.FileUtils; import org.apache.solr.util.PropertiesInputStream; import org.apache.solr.util.PropertiesOutputStream; import org.apache.solr.util.RefCounted; +import org.eclipse.jetty.util.log.Log; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -162,25 +163,18 @@ public class SnapPuller { */ private AtomicBoolean pollDisabled = new AtomicBoolean(false); - // HttpClient shared by all cores (used if timeout is not specified for a core) - private static HttpClient client; - // HttpClient for this instance if connectionTimeout or readTimeout has been specified private final HttpClient myHttpClient; - private static synchronized HttpClient createHttpClient(String connTimeout, String readTimeout, String httpBasicAuthUser, String httpBasicAuthPassword, boolean useCompression) { - if (connTimeout == null && readTimeout == null && client != null) return client; + private static HttpClient createHttpClient(SolrCore core, String connTimeout, String readTimeout, String httpBasicAuthUser, String httpBasicAuthPassword, boolean useCompression) { final ModifiableSolrParams httpClientParams = new ModifiableSolrParams(); httpClientParams.set(HttpClientUtil.PROP_CONNECTION_TIMEOUT, connTimeout != null ? connTimeout : "5000"); httpClientParams.set(HttpClientUtil.PROP_SO_TIMEOUT, readTimeout != null ? readTimeout : "20000"); httpClientParams.set(HttpClientUtil.PROP_BASIC_AUTH_USER, httpBasicAuthUser); httpClientParams.set(HttpClientUtil.PROP_BASIC_AUTH_PASS, httpBasicAuthPassword); httpClientParams.set(HttpClientUtil.PROP_ALLOW_COMPRESSION, useCompression); - // Keeping a very high number so that if you have a large number of cores - // no requests are kept waiting for an idle connection. - httpClientParams.set(HttpClientUtil.PROP_MAX_CONNECTIONS, 10000); - httpClientParams.set(HttpClientUtil.PROP_MAX_CONNECTIONS_PER_HOST, 10000); - HttpClient httpClient = HttpClientUtil.createClient(httpClientParams); - if (client == null && connTimeout == null && readTimeout == null) client = httpClient; + + HttpClient httpClient = HttpClientUtil.createClient(httpClientParams, core.getCoreDescriptor().getCoreContainer().getUpdateShardHandler().getConnectionManager()); + return httpClient; } @@ -207,7 +201,7 @@ public class SnapPuller { String readTimeout = (String) initArgs.get(HttpClientUtil.PROP_SO_TIMEOUT); String httpBasicAuthUser = (String) initArgs.get(HttpClientUtil.PROP_BASIC_AUTH_USER); String httpBasicAuthPassword = (String) initArgs.get(HttpClientUtil.PROP_BASIC_AUTH_PASS); - myHttpClient = createHttpClient(connTimeout, readTimeout, httpBasicAuthUser, httpBasicAuthPassword, useExternal); + myHttpClient = createHttpClient(solrCore, connTimeout, readTimeout, httpBasicAuthUser, httpBasicAuthPassword, useExternal); if (pollInterval != null && pollInterval > 0) { startExecutorService(); } else { @@ -388,8 +382,8 @@ public class SnapPuller { fsyncService = Executors.newSingleThreadExecutor(new DefaultSolrThreadFactory("fsyncService")); // use a synchronized list because the list is read by other threads (to show details) filesDownloaded = Collections.synchronizedList(new ArrayList>()); - // if the generateion of master is older than that of the slave , it means they are not compatible to be copied - // then a new index direcory to be created and all the files need to be copied + // if the generation of master is older than that of the slave , it means they are not compatible to be copied + // then a new index directory to be created and all the files need to be copied boolean isFullCopyNeeded = IndexDeletionPolicyWrapper .getCommitTimestamp(commit) >= latestVersion || commit.getGeneration() >= latestGeneration || forceReplication; @@ -408,59 +402,76 @@ public class SnapPuller { if (isIndexStale(indexDir)) { isFullCopyNeeded = true; } - LOG.info("Starting download to " + tmpIndexDir + " fullCopy=" + isFullCopyNeeded); - successfulInstall = false; - downloadIndexFiles(isFullCopyNeeded, tmpIndexDir, latestGeneration); - LOG.info("Total time taken for download : " + ((System.currentTimeMillis() - replicationStartTime) / 1000) + " secs"); - Collection> modifiedConfFiles = getModifiedConfFiles(confFilesToDownload); - if (!modifiedConfFiles.isEmpty()) { - downloadConfFiles(confFilesToDownload, latestGeneration); - if (isFullCopyNeeded) { - successfulInstall = modifyIndexProps(tmpIdxDirName); - deleteTmpIdxDir = false; - } else { - solrCore.getUpdateHandler().getSolrCoreState() - .closeIndexWriter(core, true); - try { - successfulInstall = moveIndexFiles(tmpIndexDir, indexDir); - } finally { - solrCore.getUpdateHandler().getSolrCoreState() - .openIndexWriter(core); - } - } - if (successfulInstall) { + if (!isFullCopyNeeded) { + // rollback - and do it before we download any files + // so we don't remove files we thought we didn't need + // to download later + solrCore.getUpdateHandler().getSolrCoreState() + .closeIndexWriter(core, true); + } + + boolean reloadCore = false; + + try { + LOG.info("Starting download to " + tmpIndexDir + " fullCopy=" + + isFullCopyNeeded); + successfulInstall = false; + + downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir, + latestGeneration); + LOG.info("Total time taken for download : " + + ((System.currentTimeMillis() - replicationStartTime) / 1000) + + " secs"); + Collection> modifiedConfFiles = getModifiedConfFiles(confFilesToDownload); + if (!modifiedConfFiles.isEmpty()) { + downloadConfFiles(confFilesToDownload, latestGeneration); if (isFullCopyNeeded) { - // let the system know we are changing dir's and the old one - // may be closed - if (indexDir != null) { - LOG.info("removing old index directory " + indexDir); - core.getDirectoryFactory().doneWithDirectory(indexDir); - core.getDirectoryFactory().remove(indexDir); - } - } - - LOG.info("Configuration files are modified, core will be reloaded"); - logReplicationTimeAndConfFiles(modifiedConfFiles, successfulInstall);//write to a file time of replication and conf files. - reloadCore(); - } - } else { - terminateAndWaitFsyncService(); - if (isFullCopyNeeded) { - successfulInstall = modifyIndexProps(tmpIdxDirName); - deleteTmpIdxDir = false; - } else { - solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(core, true); - try { + successfulInstall = modifyIndexProps(tmpIdxDirName); + deleteTmpIdxDir = false; + } else { successfulInstall = moveIndexFiles(tmpIndexDir, indexDir); - } finally { - solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(core); + } + if (successfulInstall) { + if (isFullCopyNeeded) { + // let the system know we are changing dir's and the old one + // may be closed + if (indexDir != null) { + LOG.info("removing old index directory " + indexDir); + core.getDirectoryFactory().doneWithDirectory(indexDir); + core.getDirectoryFactory().remove(indexDir); + } + } + + LOG.info("Configuration files are modified, core will be reloaded"); + logReplicationTimeAndConfFiles(modifiedConfFiles, + successfulInstall);// write to a file time of replication and + // conf files. + reloadCore = true; + } + } else { + terminateAndWaitFsyncService(); + if (isFullCopyNeeded) { + successfulInstall = modifyIndexProps(tmpIdxDirName); + deleteTmpIdxDir = false; + } else { + successfulInstall = moveIndexFiles(tmpIndexDir, indexDir); + } + if (successfulInstall) { + logReplicationTimeAndConfFiles(modifiedConfFiles, + successfulInstall); } } - if (successfulInstall) { - logReplicationTimeAndConfFiles(modifiedConfFiles, successfulInstall); + } finally { + if (!isFullCopyNeeded) { + solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(core); } } + + // we must reload the core after we open the IW back up + if (reloadCore) { + reloadCore(); + } if (successfulInstall) { if (isFullCopyNeeded) { @@ -690,6 +701,7 @@ public class SnapPuller { } private void reloadCore() { + final CountDownLatch latch = new CountDownLatch(1); new Thread() { @Override public void run() { @@ -697,9 +709,17 @@ public class SnapPuller { solrCore.getCoreDescriptor().getCoreContainer().reload(solrCore.getName()); } catch (Exception e) { LOG.error("Could not reload core ", e); + } finally { + latch.countDown(); } } }.start(); + try { + latch.await(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted while waiting for core reload to finish", e); + } } private void downloadConfFiles(List> confFilesToDownload, long latestGeneration) throws Exception { @@ -732,29 +752,28 @@ public class SnapPuller { * Download the index files. If a new index is needed, download all the files. * * @param downloadCompleteIndex is it a fresh index copy - * @param tmpIndexDir the directory to which files need to be downloadeed to + * @param tmpIndexDir the directory to which files need to be downloadeed to + * @param indexDir the indexDir to be merged to * @param latestGeneration the version number */ private void downloadIndexFiles(boolean downloadCompleteIndex, - Directory tmpIndexDir, long latestGeneration) throws Exception { - String indexDir = solrCore.getIndexDir(); - - // it's okay to use null for lock factory since we know this dir will exist - Directory dir = solrCore.getDirectoryFactory().get(indexDir, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType); - try { - for (Map file : filesToDownload) { - if (!dir.fileExists((String) file.get(NAME)) || downloadCompleteIndex) { - dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file, - (String) file.get(NAME), false, latestGeneration); - currentFile = file; - dirFileFetcher.fetchFile(); - filesDownloaded.add(new HashMap(file)); - } else { - LOG.info("Skipping download for " + file.get(NAME) + " because it already exists"); - } + Directory indexDir, Directory tmpIndexDir, long latestGeneration) + throws Exception { + if (LOG.isDebugEnabled()) { + LOG.debug("Download files to dir: " + Arrays.asList(indexDir.listAll())); + } + for (Map file : filesToDownload) { + if (!indexDir.fileExists((String) file.get(NAME)) + || downloadCompleteIndex) { + dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file, + (String) file.get(NAME), false, latestGeneration); + currentFile = file; + dirFileFetcher.fetchFile(); + filesDownloaded.add(new HashMap(file)); + } else { + LOG.info("Skipping download for " + file.get(NAME) + + " because it already exists"); } - } finally { - solrCore.getDirectoryFactory().release(dir); } } @@ -769,6 +788,8 @@ public class SnapPuller { for (Map file : filesToDownload) { if (dir.fileExists((String) file.get(NAME)) && dir.fileLength((String) file.get(NAME)) != (Long) file.get(SIZE)) { + LOG.warn("File " + file.get(NAME) + " expected to be " + file.get(SIZE) + + " while it is " + dir.fileLength((String) file.get(NAME))); // file exists and size is different, therefore we must assume // corrupted index return true; @@ -782,6 +803,7 @@ public class SnapPuller { *

    */ private boolean moveAFile(Directory tmpIdxDir, Directory indexDir, String fname, List copiedfiles) { + LOG.debug("Moving file: {}", fname); boolean success = false; try { if (indexDir.fileExists(fname)) { @@ -805,6 +827,14 @@ public class SnapPuller { * Copy all index files from the temp index dir to the actual index. The segments_N file is copied last. */ private boolean moveIndexFiles(Directory tmpIdxDir, Directory indexDir) { + if (LOG.isDebugEnabled()) { + try { + LOG.info("From dir files:" + Arrays.asList(tmpIdxDir.listAll())); + LOG.info("To dir files:" + Arrays.asList(indexDir.listAll())); + } catch (IOException e) { + throw new RuntimeException(e); + } + } String segmentsFile = null; List movedfiles = new ArrayList(); for (Map f : filesDownloaded) { diff --git a/solr/core/src/java/org/apache/solr/handler/UpdateRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/UpdateRequestHandler.java index 3d136ebb28d..aa46b2ef0b8 100644 --- a/solr/core/src/java/org/apache/solr/handler/UpdateRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/UpdateRequestHandler.java @@ -75,7 +75,7 @@ public class UpdateRequestHandler extends ContentStreamHandlerBase { type = stream.getContentType(); } if( type == null ) { // Normal requests will not get here. - throw new SolrException(ErrorCode.BAD_REQUEST, "Missing ContentType"); + throw new SolrException(ErrorCode.UNSUPPORTED_MEDIA_TYPE, "Missing ContentType"); } int idx = type.indexOf(';'); if(idx>0) { @@ -83,7 +83,7 @@ public class UpdateRequestHandler extends ContentStreamHandlerBase { } ContentStreamLoader loader = loaders.get(type); if(loader==null) { - throw new SolrException(ErrorCode.BAD_REQUEST, "Unsupported ContentType: " + throw new SolrException(ErrorCode.UNSUPPORTED_MEDIA_TYPE, "Unsupported ContentType: " +type+ " Not in: "+loaders.keySet()); } if(loader.getDefaultWT()!=null) { diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java index 87bcf14ccfa..83b773424b2 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java @@ -318,6 +318,7 @@ public class CollectionsHandler extends RequestHandlerBase { SHARDS_PROP, "router."); + copyPropertiesIfNotNull(req.getParams(), props); ZkNodeProps m = new ZkNodeProps(props); handleResponse(OverseerCollectionProcessor.CREATECOLLECTION, m, rsp); @@ -342,6 +343,7 @@ public class CollectionsHandler extends RequestHandlerBase { Map map = makeMap(QUEUE_OPERATION, CREATESHARD); copyIfNotNull(req.getParams(),map,COLLECTION_PROP, SHARD_ID_PROP, REPLICATION_FACTOR,CREATE_NODE_SET); + copyPropertiesIfNotNull(req.getParams(), map); ZkNodeProps m = new ZkNodeProps(map); handleResponse(CREATESHARD, m, rsp); } @@ -372,7 +374,18 @@ public class CollectionsHandler extends RequestHandlerBase { } } - + + private void copyPropertiesIfNotNull(SolrParams params, Map props) { + Iterator iter = params.getParameterNamesIterator(); + while (iter.hasNext()) { + String param = iter.next(); + if (param.startsWith(OverseerCollectionProcessor.COLL_PROP_PREFIX)) { + props.put(param, params.get(param)); + } + } + } + + private void handleDeleteShardAction(SolrQueryRequest req, SolrQueryResponse rsp) throws InterruptedException, KeeperException { log.info("Deleting Shard : " + req.getParamString()); @@ -420,6 +433,7 @@ public class CollectionsHandler extends RequestHandlerBase { if (rangesStr != null) { props.put(CoreAdminParams.RANGES, rangesStr); } + copyPropertiesIfNotNull(req.getParams(), props); ZkNodeProps m = new ZkNodeProps(props); diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java index 23c4c4794e6..3b4cb48fa06 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java @@ -596,6 +596,15 @@ public class CoreAdminHandler extends RequestHandlerBase { "No such core exists '" + cname + "'"); } else { if (coreContainer.getZkController() != null) { + // we are unloading, cancel any ongoing recovery + // so there are no races to publish state + // we will try to cancel again later before close + if (core != null) { + if (coreContainer.getZkController() != null) { + core.getSolrCoreState().cancelRecovery(); + } + } + log.info("Unregistering core " + core.getName() + " from cloudstate."); try { coreContainer.getZkController().unregister(cname, @@ -796,7 +805,7 @@ public class CoreAdminHandler extends RequestHandlerBase { try { core = coreContainer.getCore(cname); if (core != null) { - syncStrategy = new SyncStrategy(); + syncStrategy = new SyncStrategy(core.getCoreDescriptor().getCoreContainer().getUpdateShardHandler()); Map props = new HashMap(); props.put(ZkStateReader.BASE_URL_PROP, zkController.getBaseUrl()); diff --git a/solr/core/src/java/org/apache/solr/handler/admin/EditFileRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/EditFileRequestHandler.java new file mode 100644 index 00000000000..fbc4c1b3873 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/admin/EditFileRequestHandler.java @@ -0,0 +1,357 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.handler.admin; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.solr.cloud.ZkController; +import org.apache.solr.cloud.ZkSolrResourceLoader; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.util.ContentStream; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.Config; +import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.CoreDescriptor; +import org.apache.solr.core.SolrConfig; +import org.apache.solr.core.SolrCore; +import org.apache.solr.handler.RequestHandlerBase; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.RawResponseWriter; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.InputSource; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Locale; +import java.util.Set; + +/** + * This handler uses the RawResponseWriter to give client access to + * files inside ${solr.home}/conf + *

    + * If you want to selectively restrict access some configuration files, you can list + * these files in the hidden invariants. For example to hide + * synonyms.txt and anotherfile.txt, you would register: + *

    + *

    + * <requestHandler name="/admin/fileupdate" class="org.apache.solr.handler.admin.EditFileRequestHandler" >
    + *   <lst name="defaults">
    + *    <str name="echoParams">explicit</str>
    + *   </lst>
    + *   <lst name="invariants">
    + *    <str name="hidden">synonyms.txt</str>
    + *    <str name="hidden">anotherfile.txt</str>
    + *    <str name="hidden">*</str>
    + *   </lst>
    + * </requestHandler>
    + * 
    + *

    + * At present, there is only explicit file names (including path) or the glob '*' are supported. Variants like '*.xml' + * are NOT supported.ere + *

    + *

    + * The EditFileRequestHandler uses the {@link RawResponseWriter} (wt=raw) to return + * file contents. If you need to use a different writer, you will need to change + * the registered invariant param for wt. + *

    + * If you want to override the contentType header returned for a given file, you can + * set it directly using: CONTENT_TYPE. For example, to get a plain text + * version of schema.xml, try: + *

    + *   http://localhost:8983/solr/admin/fileedit?file=schema.xml&contentType=text/plain
    + * 
    + * + * @since solr 4.7 + *

    + *

    + * You can use this handler to modify any files in the conf directory, e.g. solrconfig.xml + * or schema.xml, or even in sub-directories (e.g. velocity/error.vm) by POSTing a file. Here's an example cURL command + *

    + *                                            curl -X POST --form "fileupload=@schema.new" 'http://localhost:8983/solr/collection1/admin/fileedit?op=write&file=schema.xml'
    + *                                           
    + * + * or + *
    + *                                            curl -X POST --form "fileupload=@error.new" 'http://localhost:8983/solr/collection1/admin/file?op=write&file=velocity/error.vm'
    + *                                           
    + * + * For the first iteration, this is probably going to be used from the Solr admin screen. + * + * NOTE: Specifying a directory or simply leaving the any "file=XXX" parameters will list the contents of a directory. + * + * NOTE: You must reload the core/collection for any changes made via this handler to take effect! + * + * NOTE: If the core does not load (say schema.xml is not well formed for instance) you may be unable to replace + * the files with this interface. + * + * NOTE: Leaving this handler enabled is a security risk! This handler should be disabled in all but trusted + * (probably development only) environments! + * + * Configuration files in ZooKeeper are supported. + */ +public class EditFileRequestHandler extends RequestHandlerBase { + + protected static final Logger log = LoggerFactory.getLogger(EditFileRequestHandler.class); + + private final static String OP_PARAM = "op"; + private final static String OP_WRITE = "write"; + private final static String OP_TEST = "test"; + + ContentStream stream; + private byte[] data = null; + Set hiddenFiles; + + public EditFileRequestHandler() { + super(); + } + + @Override + public void init(NamedList args) { + super.init(args); + hiddenFiles = ShowFileRequestHandler.initHidden(invariants); + } + + @Override + public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) + throws InterruptedException, KeeperException, IOException { + + CoreContainer coreContainer = req.getCore().getCoreDescriptor().getCoreContainer(); + String op = req.getParams().get(OP_PARAM); + if (OP_WRITE.equalsIgnoreCase(op) || OP_TEST.equalsIgnoreCase(op)) { + String fname = req.getParams().get("file", null); + if (fname == null) { + rsp.setException(new SolrException(ErrorCode.BAD_REQUEST, "No file name specified for write operation.")); + } else { + fname = fname.replace('\\', '/'); + stream = getOneInputStream(req, rsp); + if (stream == null) { + return; // Error already in rsp. + } + + data = IOUtils.toByteArray(new InputStreamReader(stream.getStream(), "UTF-8"), "UTF-8"); + + // If it's "solrconfig.xml", try parsing it as that object. Otherwise, if it ends in '.xml', + // see if it at least parses. + if ("solrconfig.xml".equals(fname)) { + try { + new SolrConfig("unused", new InputSource(new ByteArrayInputStream(data))); + } catch (Exception e) { + rsp.setException(new SolrException(ErrorCode.BAD_REQUEST, "Invalid solr config file: " + e.getMessage())); + return; + } + } else if (fname.endsWith(".xml")) { // At least do a rudimentary test, see if the thing parses. + try { + new Config(null, null, new InputSource(new ByteArrayInputStream(data)), null, false); + } catch (Exception e) { + rsp.setException(new SolrException(ErrorCode.BAD_REQUEST, "Invalid XML file: " + e.getMessage())); + return; + } + } + if (ShowFileRequestHandler.isHiddenFile(req, rsp, fname, true, hiddenFiles) == false) { + if (coreContainer.isZooKeeperAware()) { + writeToZooKeeper(req, rsp); + } else { + writeToFileSystem(req, rsp); + } + } + } + } + } + + // write the file contained in the parameter "file=XXX" to ZooKeeper. The file may be a path, e.g. + // file=velocity/error.vm or file=schema.xml + // + // Important: Assumes that the file already exists in ZK, so far we aren't creating files there. + private void writeToZooKeeper(SolrQueryRequest req, SolrQueryResponse rsp) + throws KeeperException, InterruptedException, IOException { + + CoreContainer coreContainer = req.getCore().getCoreDescriptor().getCoreContainer(); + SolrZkClient zkClient = coreContainer.getZkController().getZkClient(); + + String adminFile = ShowFileRequestHandler.getAdminFileFromZooKeeper(req, rsp, zkClient, hiddenFiles); + String fname = req.getParams().get("file", null); + if (OP_TEST.equals(req.getParams().get(OP_PARAM))) { + testReloadSuccess(req, rsp); + return; + } + // Persist the managed schema + try { + // Assumption: the path exists + zkClient.setData(adminFile, data, true); + log.info("Saved " + fname + " to ZooKeeper successfully."); + } catch (KeeperException.BadVersionException e) { + log.error("Cannot save file: " + fname + " to Zookeeper, " + + "ZooKeeper error: " + e.getMessage()); + rsp.setException(new SolrException(ErrorCode.SERVER_ERROR, "Cannot save file: " + fname + " to Zookeeper, " + + "ZooKeeper error: " + e.getMessage())); + } + } + + // Used when POSTing the configuration files to Solr (either ZooKeeper or locally). + // + // It takes some effort to insure that there is one (and only one) stream provided, there's no provision for + // more than one stream at present. + private ContentStream getOneInputStream(SolrQueryRequest req, SolrQueryResponse rsp) { + String file = req.getParams().get("file"); + if (file == null) { + log.error("You must specify a file for the write operation."); + rsp.setException(new SolrException(ErrorCode.BAD_REQUEST, "You must specify a file for the write operation.")); + return null; + } + + // Now, this is truly clumsy + Iterable streams = req.getContentStreams(); + if (streams == null) { + log.error("Input stream list was null for admin file write operation."); + rsp.setException(new SolrException(ErrorCode.BAD_REQUEST, "Input stream list was null for admin file write operation.")); + return null; + } + Iterator iter = streams.iterator(); + if (!iter.hasNext()) { + log.error("No input streams were in the list for admin file write operation."); + rsp.setException(new SolrException(ErrorCode.BAD_REQUEST, "No input streams were in the list for admin file write operation.")); + return null; + } + ContentStream stream = iter.next(); + if (iter.hasNext()) { + log.error("More than one input stream was found for admin file write operation."); + rsp.setException(new SolrException(ErrorCode.BAD_REQUEST, "More than one input stream was found for admin file write operation.")); + return null; + } + return stream; + } + + // Write the data passed in from the stream to the file indicated by the file=XXX parameter on the local file system + private void writeToFileSystem(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException { + + File adminFile = ShowFileRequestHandler.getAdminFileFromFileSystem(req, rsp, hiddenFiles); + if (adminFile == null || adminFile.isDirectory()) { + String fname = req.getParams().get("file", null); + + if (adminFile == null) { + log.error("File " + fname + " was not found."); + rsp.setException(new SolrException(ErrorCode.BAD_REQUEST, "File " + fname + " was not found.")); + return; + } + log.error("File " + fname + " is a directory."); + rsp.setException(new SolrException(ErrorCode.BAD_REQUEST, "File " + fname + " is a directory.")); + return; + } + if (OP_TEST.equals(req.getParams().get(OP_PARAM))) { + testReloadSuccess(req, rsp); + return; + } + + FileUtils.copyInputStreamToFile(stream.getStream(), adminFile); + log.info("Successfully saved file " + adminFile.getAbsolutePath() + " locally"); + } + + private boolean testReloadSuccess(SolrQueryRequest req, SolrQueryResponse rsp) { + // Try writing the config to a temporary core and reloading to see that we don't allow people to shoot themselves + // in the foot. + File home = null; + try { + home = new File(FileUtils.getTempDirectory(), "SOLR_5459"); // Unlikely to name a core or collection this! + FileUtils.writeStringToFile(new File(home, "solr.xml"), "", "UTF-8"); // Use auto-discovery + File coll = new File(home, "SOLR_5459"); + + SolrCore core = req.getCore(); + CoreDescriptor desc = core.getCoreDescriptor(); + CoreContainer coreContainer = desc.getCoreContainer(); + + if (coreContainer.isZooKeeperAware()) { + try { + String confPath = ((ZkSolrResourceLoader) core.getResourceLoader()).getCollectionZkPath(); + + ZkController.downloadConfigDir(coreContainer.getZkController().getZkClient(), confPath, + new File(coll, "conf")); + } catch (Exception ex) { + log.error("Error when attempting to download conf from ZooKeeper: " + ex.getMessage()); + rsp.setException(new SolrException(ErrorCode.BAD_REQUEST, + "Error when attempting to download conf from ZooKeeper" + ex.getMessage())); + return false; + } + } else { + FileUtils.copyDirectory(new File(desc.getInstanceDir(), "conf"), + new File(coll, "conf")); + } + + FileUtils.writeStringToFile(new File(coll, "core.properties"), "name=SOLR_5459", "UTF-8"); + + FileUtils.writeByteArrayToFile(new File(new File(coll, "conf"), req.getParams().get("file", null)), data); + + return tryReloading(rsp, home); + + } catch (IOException ex) { + log.warn("Caught IO exception when trying to verify configs. " + ex.getMessage()); + rsp.setException(new SolrException(ErrorCode.SERVER_ERROR, + "Caught IO exception when trying to verify configs. " + ex.getMessage())); + return false; + } finally { + if (home != null) { + try { + FileUtils.deleteDirectory(home); + } catch (IOException e) { + log.warn("Caught IO exception trying to delete temporary directory " + home + e.getMessage()); + return true; // Don't fail for this reason! + } + } + } + } + + private boolean tryReloading(SolrQueryResponse rsp, File home) { + CoreContainer cc = null; + try { + cc = CoreContainer.createAndLoad(home.getAbsolutePath(), new File(home, "solr.xml")); + if (cc.getCoreInitFailures().size() > 0) { + for (Exception ex : cc.getCoreInitFailures().values()) { + log.error("Error when attempting to reload core: " + ex.getMessage()); + rsp.setException(new SolrException(ErrorCode.BAD_REQUEST, + "Error when attempting to reload core after writing config" + ex.getMessage())); + } + return false; + } + return true; + } finally { + if (cc != null) { + cc.shutdown(); + } + } + } + + //////////////////////// SolrInfoMBeans methods ////////////////////// + + @Override + public String getDescription() { + return "Admin Config File -- update config files directly"; + } + + @Override + public String getSource() { + return "$URL: https://svn.apache.org/repos/asf/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/ShowFileRequestHandler.java $"; + } +} diff --git a/solr/core/src/java/org/apache/solr/handler/admin/ShowFileRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/ShowFileRequestHandler.java index c971c85313c..d8314258be2 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/ShowFileRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/ShowFileRequestHandler.java @@ -23,6 +23,7 @@ import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.ContentStreamBase; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; @@ -33,7 +34,11 @@ import org.apache.solr.handler.RequestHandlerBase; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.RawResponseWriter; import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.ManagedIndexSchema; import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; @@ -60,10 +65,15 @@ import java.util.Set; * </lst> * <lst name="invariants"> * <str name="hidden">synonyms.txt</str> - * <str name="hidden">anotherfile.txt</str> + * <str name="hidden">anotherfile.txt</str> + * <str name="hidden">*</str> * </lst> * </requestHandler> * + * + * At present, there is only explicit file names (including path) or the glob '*' are supported. Variants like '*.xml' + * are NOT supported.ere + * *

    * The ShowFileRequestHandler uses the {@link RawResponseWriter} (wt=raw) to return * file contents. If you need to use a different writer, you will need to change @@ -75,7 +85,7 @@ import java.util.Set; *

      *   http://localhost:8983/solr/admin/file?file=schema.xml&contentType=text/plain
      * 
    - * + * * * @since solr 1.3 */ @@ -83,9 +93,13 @@ public class ShowFileRequestHandler extends RequestHandlerBase { public static final String HIDDEN = "hidden"; public static final String USE_CONTENT_TYPE = "contentType"; - + protected Set hiddenFiles; - + + protected static final Logger log = LoggerFactory + .getLogger(ShowFileRequestHandler.class); + + public ShowFileRequestHandler() { super(); @@ -94,27 +108,28 @@ public class ShowFileRequestHandler extends RequestHandlerBase @Override public void init(NamedList args) { super.init( args ); + hiddenFiles = initHidden(invariants); + } + public static Set initHidden(SolrParams invariants) { + + Set hiddenRet = new HashSet(); // Build a list of hidden files - hiddenFiles = new HashSet(); - if( invariants != null ) { - String[] hidden = invariants.getParams( HIDDEN ); - if( hidden != null ) { - for( String s : hidden ) { - hiddenFiles.add( s.toUpperCase(Locale.ROOT) ); + if (invariants != null) { + String[] hidden = invariants.getParams(HIDDEN); + if (hidden != null) { + for (String s : hidden) { + hiddenRet.add(s.toUpperCase(Locale.ROOT)); } } } + return hiddenRet; } - - public Set getHiddenFiles() - { - return hiddenFiles; - } - + @Override - public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, KeeperException, InterruptedException - { + public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) + throws InterruptedException, KeeperException, IOException { + CoreContainer coreContainer = req.getCore().getCoreDescriptor().getCoreContainer(); if (coreContainer.isZooKeeperAware()) { showFromZooKeeper(req, rsp, coreContainer); @@ -123,58 +138,32 @@ public class ShowFileRequestHandler extends RequestHandlerBase } } + // Get a list of files from ZooKeeper for from the path in the file= parameter. private void showFromZooKeeper(SolrQueryRequest req, SolrQueryResponse rsp, CoreContainer coreContainer) throws KeeperException, InterruptedException, UnsupportedEncodingException { - String adminFile = null; - SolrCore core = req.getCore(); + SolrZkClient zkClient = coreContainer.getZkController().getZkClient(); - final ZkSolrResourceLoader loader = (ZkSolrResourceLoader) core - .getResourceLoader(); - String confPath = loader.getCollectionZkPath(); - - String fname = req.getParams().get("file", null); - if (fname == null) { - adminFile = confPath; - } else { - fname = fname.replace('\\', '/'); // normalize slashes - if (hiddenFiles.contains(fname.toUpperCase(Locale.ROOT))) { - rsp.setException(new SolrException(ErrorCode.FORBIDDEN, "Can not access: " + fname)); - return; - } - if (fname.indexOf("..") >= 0) { - rsp.setException(new SolrException(ErrorCode.FORBIDDEN, "Invalid path: " + fname)); - return; - } - if (fname.startsWith("/")) { // Only files relative to conf are valid - fname = fname.substring(1); - } - adminFile = confPath + "/" + fname; - } - - // Make sure the file exists, is readable and is not a hidden file - if (!zkClient.exists(adminFile, true)) { - rsp.setException(new SolrException(ErrorCode.NOT_FOUND, "Can not find: " - + adminFile)); + + String adminFile = getAdminFileFromZooKeeper(req, rsp, zkClient, hiddenFiles); + + if (adminFile == null) { return; } - + // Show a directory listing List children = zkClient.getChildren(adminFile, null, true); if (children.size() > 0) { NamedList> files = new SimpleOrderedMap>(); for (String f : children) { - if (hiddenFiles.contains(f.toUpperCase(Locale.ROOT))) { - continue; // don't show 'hidden' files + if (isHiddenFile(req, rsp, f, false, hiddenFiles)) { + continue; } - if (f.startsWith(".")) { - continue; // skip hidden system files... - } - + SimpleOrderedMap fileInfo = new SimpleOrderedMap(); files.add(f, fileInfo); - List fchildren = zkClient.getChildren(adminFile, null, true); + List fchildren = zkClient.getChildren(adminFile + "/" + f, null, true); if (fchildren.size() > 0) { fileInfo.add("directory", true); } else { @@ -199,45 +188,24 @@ public class ShowFileRequestHandler extends RequestHandlerBase rsp.setHttpCaching(false); } + // Return the file indicated (or the directory listing) from the local file system. private void showFromFileSystem(SolrQueryRequest req, SolrQueryResponse rsp) { - File adminFile = null; - - final SolrResourceLoader loader = req.getCore().getResourceLoader(); - File configdir = new File( loader.getConfigDir() ); - if (!configdir.exists()) { - // TODO: maybe we should just open it this way to start with? - try { - configdir = new File( loader.getClassLoader().getResource(loader.getConfigDir()).toURI() ); - } catch (URISyntaxException e) { - rsp.setException(new SolrException( ErrorCode.FORBIDDEN, "Can not access configuration directory!", e)); - return; - } + File adminFile = getAdminFileFromFileSystem(req, rsp, hiddenFiles); + + if (adminFile == null) { // exception already recorded + return; } - String fname = req.getParams().get("file", null); - if( fname == null ) { - adminFile = configdir; - } - else { - fname = fname.replace( '\\', '/' ); // normalize slashes - if( hiddenFiles.contains( fname.toUpperCase(Locale.ROOT) ) ) { - rsp.setException(new SolrException( ErrorCode.FORBIDDEN, "Can not access: "+fname )); - return; - } - if( fname.indexOf( ".." ) >= 0 ) { - rsp.setException(new SolrException( ErrorCode.FORBIDDEN, "Invalid path: "+fname )); - return; - } - adminFile = new File( configdir, fname ); - } - + // Make sure the file exists, is readable and is not a hidden file if( !adminFile.exists() ) { + log.error("Can not find: "+adminFile.getName() + " ["+adminFile.getAbsolutePath()+"]"); rsp.setException(new SolrException ( ErrorCode.NOT_FOUND, "Can not find: "+adminFile.getName() + " ["+adminFile.getAbsolutePath()+"]" )); return; } if( !adminFile.canRead() || adminFile.isHidden() ) { + log.error("Can not show: "+adminFile.getName() + " ["+adminFile.getAbsolutePath()+"]"); rsp.setException(new SolrException ( ErrorCode.NOT_FOUND, "Can not show: "+adminFile.getName() + " ["+adminFile.getAbsolutePath()+"]" )); @@ -246,19 +214,17 @@ public class ShowFileRequestHandler extends RequestHandlerBase // Show a directory listing if( adminFile.isDirectory() ) { - - int basePath = configdir.getAbsolutePath().length() + 1; + // it's really a directory, just go for it. + int basePath = adminFile.getAbsolutePath().length() + 1; NamedList> files = new SimpleOrderedMap>(); for( File f : adminFile.listFiles() ) { String path = f.getAbsolutePath().substring( basePath ); path = path.replace( '\\', '/' ); // normalize slashes - if( hiddenFiles.contains( path.toUpperCase(Locale.ROOT) ) ) { - continue; // don't show 'hidden' files + + if (isHiddenFile(req, rsp, f.getName().replace('\\', '/'), false, hiddenFiles)) { + continue; } - if( f.isHidden() || f.getName().startsWith( "." ) ) { - continue; // skip hidden system files... - } - + SimpleOrderedMap fileInfo = new SimpleOrderedMap(); files.add( path, fileInfo ); if( f.isDirectory() ) { @@ -270,7 +236,7 @@ public class ShowFileRequestHandler extends RequestHandlerBase } fileInfo.add( "modified", new Date( f.lastModified() ) ); } - rsp.add( "files", files ); + rsp.add("files", files); } else { // Include the file contents @@ -280,19 +246,139 @@ public class ShowFileRequestHandler extends RequestHandlerBase req.setParams(params); ContentStreamBase content = new ContentStreamBase.FileStream( adminFile ); - content.setContentType( req.getParams().get( USE_CONTENT_TYPE ) ); + content.setContentType(req.getParams().get(USE_CONTENT_TYPE)); rsp.add(RawResponseWriter.CONTENT, content); } rsp.setHttpCaching(false); } - - + + //////////////////////// Static methods ////////////////////////////// + + public static boolean isHiddenFile(SolrQueryRequest req, SolrQueryResponse rsp, String fnameIn, boolean reportError, + Set hiddenFiles) { + String fname = fnameIn.toUpperCase(Locale.ROOT); + if (hiddenFiles.contains(fname) || hiddenFiles.contains("*")) { + if (reportError) { + log.error("Cannot access " + fname); + rsp.setException(new SolrException(SolrException.ErrorCode.FORBIDDEN, "Can not access: " + fnameIn)); + } + return true; + } + + // This is slightly off, a valid path is something like ./schema.xml. I don't think it's worth the effort though + // to fix it to handle all possibilities though. + if (fname.indexOf("..") >= 0 || fname.startsWith(".")) { + if (reportError) { + log.error("Invalid path: " + fname); + rsp.setException(new SolrException(SolrException.ErrorCode.FORBIDDEN, "Invalid path: " + fnameIn)); + } + return true; + } + + // Make sure that if the schema is managed, we don't allow editing. Don't really want to put + // this in the init since we're not entirely sure when the managed schema will get initialized relative to this + // handler. + SolrCore core = req.getCore(); + IndexSchema schema = core.getLatestSchema(); + if (schema instanceof ManagedIndexSchema) { + String managed = schema.getResourceName(); + + if (fname.equalsIgnoreCase(managed)) { + return true; + } + } + return false; + } + + // Refactored to be usable from multiple methods. Gets the path of the requested file from ZK. + // Returns null if the file is not found. + // + // Assumes that the file is in a parameter called "file". + + public static String getAdminFileFromZooKeeper(SolrQueryRequest req, SolrQueryResponse rsp, SolrZkClient zkClient, + Set hiddenFiles) + throws KeeperException, InterruptedException { + String adminFile = null; + SolrCore core = req.getCore(); + + final ZkSolrResourceLoader loader = (ZkSolrResourceLoader) core + .getResourceLoader(); + String confPath = loader.getCollectionZkPath(); + + String fname = req.getParams().get("file", null); + if (fname == null) { + adminFile = confPath; + } else { + fname = fname.replace('\\', '/'); // normalize slashes + if (isHiddenFile(req, rsp, fname, true, hiddenFiles)) { + return null; + } + if (fname.startsWith("/")) { // Only files relative to conf are valid + fname = fname.substring(1); + } + adminFile = confPath + "/" + fname; + } + + // Make sure the file exists, is readable and is not a hidden file + if (!zkClient.exists(adminFile, true)) { + log.error("Can not find: " + adminFile); + rsp.setException(new SolrException(SolrException.ErrorCode.NOT_FOUND, "Can not find: " + + adminFile)); + return null; + } + + return adminFile; + } + + + // Find the file indicated by the "file=XXX" parameter or the root of the conf directory on the local + // file system. Respects all the "interesting" stuff around what the resource loader does to find files. + public static File getAdminFileFromFileSystem(SolrQueryRequest req, SolrQueryResponse rsp, + Set hiddenFiles) { + File adminFile = null; + final SolrResourceLoader loader = req.getCore().getResourceLoader(); + File configdir = new File( loader.getConfigDir() ); + if (!configdir.exists()) { + // TODO: maybe we should just open it this way to start with? + try { + configdir = new File( loader.getClassLoader().getResource(loader.getConfigDir()).toURI() ); + } catch (URISyntaxException e) { + log.error("Can not access configuration directory!"); + rsp.setException(new SolrException( SolrException.ErrorCode.FORBIDDEN, "Can not access configuration directory!", e)); + return null; + } + } + String fname = req.getParams().get("file", null); + if( fname == null ) { + adminFile = configdir; + } + else { + fname = fname.replace( '\\', '/' ); // normalize slashes + if( hiddenFiles.contains( fname.toUpperCase(Locale.ROOT) ) ) { + log.error("Can not access: "+ fname); + rsp.setException(new SolrException( SolrException.ErrorCode.FORBIDDEN, "Can not access: "+fname )); + return null; + } + if( fname.indexOf( ".." ) >= 0 ) { + log.error("Invalid path: "+ fname); + rsp.setException(new SolrException( SolrException.ErrorCode.FORBIDDEN, "Invalid path: "+fname )); + return null; + } + adminFile = new File( configdir, fname ); + } + return adminFile; + } + + public final Set getHiddenFiles() { + return hiddenFiles; + } + //////////////////////// SolrInfoMBeans methods ////////////////////// @Override public String getDescription() { - return "Admin Get File -- view config files directly"; + return "Admin Config File -- view or update config files directly"; } @Override diff --git a/solr/core/src/java/org/apache/solr/handler/component/AnalyticsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/AnalyticsComponent.java new file mode 100644 index 00000000000..ed51a6f7c41 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/AnalyticsComponent.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.handler.component; + +import java.io.IOException; + +import org.apache.solr.analytics.plugin.AnalyticsStatisticsCollector; +import org.apache.solr.analytics.request.AnalyticsStats; +import org.apache.solr.analytics.util.AnalyticsParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; + +public class AnalyticsComponent extends SearchComponent { + public static final String COMPONENT_NAME = "analytics"; + private final AnalyticsStatisticsCollector analyticsCollector = new AnalyticsStatisticsCollector();; + + @Override + public void prepare(ResponseBuilder rb) throws IOException { + if (rb.req.getParams().getBool(AnalyticsParams.ANALYTICS,false)) { + rb.setNeedDocSet( true ); + } + } + + @Override + public void process(ResponseBuilder rb) throws IOException { + if (rb.req.getParams().getBool(AnalyticsParams.ANALYTICS,false)) { + SolrParams params = rb.req.getParams(); + AnalyticsStats s = new AnalyticsStats(rb.req, rb.getResults().docSet, params, analyticsCollector); + rb.rsp.add( "stats", s.execute() ); + } + } + + /* + @Override + public int distributedProcess(ResponseBuilder rb) throws IOException { + return ResponseBuilder.STAGE_DONE; + } + + @Override + public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) { + // TODO Auto-generated method stub + super.modifyRequest(rb, who, sreq); + } + + @Override + public void handleResponses(ResponseBuilder rb, ShardRequest sreq) { + // TODO Auto-generated method stub + super.handleResponses(rb, sreq); + } + + @Override + public void finishStage(ResponseBuilder rb) { + // TODO Auto-generated method stub + super.finishStage(rb); + } + */ + + @Override + public String getName() { + return COMPONENT_NAME; + } + + @Override + public String getDescription() { + return "Perform analytics"; + } + + @Override + public String getSource() { + return "$URL$"; + } + + @Override + public String getVersion() { + return getClass().getPackage().getSpecificationVersion(); + } + + @Override + public NamedList getStatistics() { + return analyticsCollector.getStatistics(); + } +} diff --git a/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java b/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java index 6cd9a190573..0a8703ebfae 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java +++ b/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java @@ -48,6 +48,7 @@ public class FieldFacetStats { public final String name; final SchemaField facet_sf; final SchemaField field_sf; + final boolean calcDistinct; public final Map facetStatsValues; @@ -63,10 +64,11 @@ public class FieldFacetStats { private final BytesRef tempBR = new BytesRef(); - public FieldFacetStats(SolrIndexSearcher searcher, String name, SchemaField field_sf, SchemaField facet_sf) { + public FieldFacetStats(SolrIndexSearcher searcher, String name, SchemaField field_sf, SchemaField facet_sf, boolean calcDistinct) { this.name = name; this.field_sf = field_sf; this.facet_sf = facet_sf; + this.calcDistinct = calcDistinct; topLevelReader = searcher.getAtomicReader(); valueSource = facet_sf.getType().getValueSource(facet_sf, null); @@ -78,7 +80,7 @@ public class FieldFacetStats { private StatsValues getStatsValues(String key) throws IOException { StatsValues stats = facetStatsValues.get(key); if (stats == null) { - stats = StatsValuesFactory.createStatsValues(field_sf); + stats = StatsValuesFactory.createStatsValues(field_sf, calcDistinct); facetStatsValues.put(key, stats); stats.setNextReader(context); } @@ -139,7 +141,7 @@ public class FieldFacetStats { String key = (String) pairs.getKey(); StatsValues facetStats = facetStatsValues.get(key); if (facetStats == null) { - facetStats = StatsValuesFactory.createStatsValues(field_sf); + facetStats = StatsValuesFactory.createStatsValues(field_sf, calcDistinct); facetStatsValues.put(key, facetStats); } Integer count = (Integer) pairs.getValue(); diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java index d55e01f0701..a2432cd994f 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java +++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java @@ -16,19 +16,6 @@ package org.apache.solr.handler.component; * limitations under the License. */ -import java.io.IOException; -import java.net.MalformedURLException; -import java.util.Collections; -import java.util.List; -import java.util.Random; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.CompletionService; -import java.util.concurrent.ExecutorCompletionService; -import java.util.concurrent.SynchronousQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; - import org.apache.http.client.HttpClient; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpClientUtil; @@ -44,6 +31,18 @@ import org.apache.solr.util.DefaultSolrThreadFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Random; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CompletionService; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.SynchronousQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.apache.solr.util.plugin.PluginInfoInitialized { protected static Logger log = LoggerFactory.getLogger(HttpShardHandlerFactory.class); @@ -158,12 +157,7 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org. } protected LBHttpSolrServer createLoadbalancer(HttpClient httpClient){ - try { - return new LBHttpSolrServer(httpClient); - } catch (MalformedURLException e) { - // should be impossible since we're not passing any URLs here - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); - } + return new LBHttpSolrServer(httpClient); } protected T getParameter(NamedList initArgs, String configKey, T defaultValue) { diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index cdd5b2d3c6b..55ff8ad913f 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -34,7 +34,6 @@ import org.apache.lucene.search.grouping.SearchGroup; import org.apache.lucene.search.grouping.TopGroups; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.UnicodeUtil; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; @@ -47,6 +46,7 @@ import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.ResultContext; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; @@ -190,7 +190,7 @@ public class QueryComponent extends SearchComponent // groupSort defaults to sort String groupSortStr = params.get(GroupParams.GROUP_SORT); //TODO: move weighting of sort - Sort sortWithinGroup = groupSortStr == null ? groupSort : searcher.weightSort(QueryParsing.parseSort(groupSortStr, req)); + Sort sortWithinGroup = groupSortStr == null ? groupSort : searcher.weightSort(QueryParsing.parseSortSpec(groupSortStr, req).getSort()); if (sortWithinGroup == null) { sortWithinGroup = Sort.RELEVANCE; } @@ -449,16 +449,12 @@ public class QueryComponent extends SearchComponent { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; - final CharsRef spare = new CharsRef(); // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't // currently have an option to return sort field values. Because of this, we // take the documents given and re-derive the sort values. boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES,false); if(fsv){ - Sort sort = searcher.weightSort(rb.getSortSpec().getSort()); - SortField[] sortFields = sort==null ? new SortField[]{SortField.FIELD_SCORE} : sort.getSort(); NamedList sortVals = new NamedList(); // order is important for the sort fields - Field field = new StringField("dummy", "", Field.Store.NO); // a dummy Field IndexReaderContext topReaderContext = searcher.getTopReaderContext(); List leaves = topReaderContext.leaves(); AtomicReaderContext currentLeaf = null; @@ -479,18 +475,22 @@ public class QueryComponent extends SearchComponent } Arrays.sort(sortedIds); + SortSpec sortSpec = rb.getSortSpec(); + Sort sort = searcher.weightSort(sortSpec.getSort()); + SortField[] sortFields = sort==null ? new SortField[]{SortField.FIELD_SCORE} : sort.getSort(); + List schemaFields = sortSpec.getSchemaFields(); + + for (int fld = 0; fld < schemaFields.size(); fld++) { + SchemaField schemaField = schemaFields.get(fld); + FieldType ft = null == schemaField? null : schemaField.getType(); + SortField sortField = sortFields[fld]; - for (SortField sortField: sortFields) { SortField.Type type = sortField.getType(); + // :TODO: would be simpler to always serialize every position of SortField[] if (type==SortField.Type.SCORE || type==SortField.Type.DOC) continue; FieldComparator comparator = null; - - String fieldname = sortField.getField(); - FieldType ft = fieldname==null ? null : searcher.getSchema().getFieldTypeNoEx(fieldname); - Object[] vals = new Object[nDocs]; - int lastIdx = -1; int idx = 0; @@ -516,31 +516,11 @@ public class QueryComponent extends SearchComponent doc -= currentLeaf.docBase; // adjust for what segment this is in comparator.copy(0, doc); Object val = comparator.value(0); - - // Sortable float, double, int, long types all just use a string - // comparator. For these, we need to put the type into a readable - // format. One reason for this is that XML can't represent all - // string values (or even all unicode code points). - // indexedToReadable() should be a no-op and should - // thus be harmless anyway (for all current ways anyway) - if (val instanceof String) { - field.setStringValue((String)val); - val = ft.toObject(field); - } - - // Must do the same conversion when sorting by a - // String field in Lucene, which returns the terms - // data as BytesRef: - if (val instanceof BytesRef) { - UnicodeUtil.UTF8toUTF16((BytesRef)val, spare); - field.setStringValue(spare.toString()); - val = ft.toObject(field); - } - + if (null != ft) val = ft.marshalSortValue(val); vals[position] = val; } - sortVals.add(fieldname, vals); + sortVals.add(sortField.getField(), vals); } rsp.add("sort_values", sortVals); @@ -778,7 +758,8 @@ public class QueryComponent extends SearchComponent sortFields = new SortField[]{SortField.FIELD_SCORE}; } - SchemaField uniqueKeyField = rb.req.getSchema().getUniqueKeyField(); + IndexSchema schema = rb.req.getSchema(); + SchemaField uniqueKeyField = schema.getUniqueKeyField(); // id to shard mapping, to eliminate any accidental dups @@ -787,7 +768,7 @@ public class QueryComponent extends SearchComponent // Merge the docs via a priority queue so we don't have to sort *all* of the // documents... we only need to order the top (rows+start) ShardFieldSortedHitQueue queue; - queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount()); + queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher()); NamedList shardInfo = null; if(rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) { @@ -813,11 +794,15 @@ public class QueryComponent extends SearchComponent StringWriter trace = new StringWriter(); t.printStackTrace(new PrintWriter(trace)); nl.add("trace", trace.toString() ); + if (srsp.getShardAddress() != null) { + nl.add("shardAddress", srsp.getShardAddress()); + } } else { docs = (SolrDocumentList)srsp.getSolrResponse().getResponse().get("response"); nl.add("numFound", docs.getNumFound()); nl.add("maxScore", docs.getMaxScore()); + nl.add("shardAddress", srsp.getShardAddress()); } if(srsp.getSolrResponse()!=null) { nl.add("time", srsp.getSolrResponse().getElapsedTime()); @@ -882,7 +867,7 @@ public class QueryComponent extends SearchComponent } } - shardDoc.sortFieldValues = sortFieldValues; + shardDoc.sortFieldValues = unmarshalSortValues(ss, sortFieldValues, schema); queue.insertWithOverflow(shardDoc); } // end for-each-doc-in-response @@ -924,6 +909,47 @@ public class QueryComponent extends SearchComponent } } + private NamedList unmarshalSortValues(SortSpec sortSpec, + NamedList sortFieldValues, + IndexSchema schema) { + NamedList unmarshalledSortValsPerField = new NamedList(); + + if (0 == sortFieldValues.size()) return unmarshalledSortValsPerField; + + List schemaFields = sortSpec.getSchemaFields(); + SortField[] sortFields = sortSpec.getSort().getSort(); + + int marshalledFieldNum = 0; + for (int sortFieldNum = 0; sortFieldNum < sortFields.length; sortFieldNum++) { + final SortField sortField = sortFields[sortFieldNum]; + final SortField.Type type = sortField.getType(); + + // :TODO: would be simpler to always serialize every position of SortField[] + if (type==SortField.Type.SCORE || type==SortField.Type.DOC) continue; + + final String sortFieldName = sortField.getField(); + final String valueFieldName = sortFieldValues.getName(marshalledFieldNum); + assert sortFieldName.equals(valueFieldName) + : "sortFieldValues name key does not match expected SortField.getField"; + + List sortVals = (List)sortFieldValues.getVal(marshalledFieldNum); + + final SchemaField schemaField = schemaFields.get(sortFieldNum); + if (null == schemaField) { + unmarshalledSortValsPerField.add(sortField.getField(), sortVals); + } else { + FieldType fieldType = schemaField.getType(); + List unmarshalledSortVals = new ArrayList(); + for (Object sortVal : sortVals) { + unmarshalledSortVals.add(fieldType.unmarshalSortValue(sortVal)); + } + unmarshalledSortValsPerField.add(sortField.getField(), unmarshalledSortVals); + } + marshalledFieldNum++; + } + return unmarshalledSortValsPerField; + } + private void createRetrieveDocs(ResponseBuilder rb) { // TODO: in a system with nTiers > 2, we could be passed "ids" here diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index 476e53d66b9..15ca4e695fa 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -208,7 +208,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore ZkController zkController = core.getCoreDescriptor().getCoreContainer().getZkController(); if (zkController != null) { // TODO : shouldn't have to keep reading the config name when it has been read before - exists = zkController.configFileExists(zkController.readConfigName(core.getCoreDescriptor().getCloudDescriptor().getCollectionName()), f); + exists = zkController.configFileExists(zkController.getZkStateReader().readConfigName(core.getCoreDescriptor().getCloudDescriptor().getCollectionName()), f); } else { File fC = new File(core.getResourceLoader().getConfigDir(), f); File fD = new File(core.getDataDir(), f); @@ -419,16 +419,16 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore // insert documents in their proper place SortSpec sortSpec = rb.getSortSpec(); if (sortSpec.getSort() == null) { - sortSpec.setSort(new Sort(new SortField[]{ - new SortField("_elevate_", comparator, true), - new SortField(null, SortField.Type.SCORE, false) - })); + sortSpec.setSortAndFields(new Sort(new SortField[]{ + new SortField("_elevate_", comparator, true), + new SortField(null, SortField.Type.SCORE, false) + }), + Arrays.asList(new SchemaField[2])); } else { // Check if the sort is based on score - SortField[] current = sortSpec.getSort().getSort(); - Sort modified = this.modifySort(current, force, comparator); - if(modified != null) { - sortSpec.setSort(modified); + SortSpec modSortSpec = this.modifySortSpec(sortSpec, force, comparator); + if (null != modSortSpec) { + rb.setSortSpec(modSortSpec); } } @@ -470,22 +470,43 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore } private Sort modifySort(SortField[] current, boolean force, ElevationComparatorSource comparator) { + SortSpec tmp = new SortSpec(new Sort(current), Arrays.asList(new SchemaField[current.length])); + tmp = modifySortSpec(tmp, force, comparator); + return null == tmp ? null : tmp.getSort(); + } + + private SortSpec modifySortSpec(SortSpec current, boolean force, ElevationComparatorSource comparator) { boolean modify = false; - ArrayList sorts = new ArrayList(current.length + 1); + SortField[] currentSorts = current.getSort().getSort(); + List currentFields = current.getSchemaFields(); + + ArrayList sorts = new ArrayList(currentSorts.length + 1); + List fields = new ArrayList(currentFields.size() + 1); + // Perhaps force it to always sort by score - if (force && current[0].getType() != SortField.Type.SCORE) { + if (force && currentSorts[0].getType() != SortField.Type.SCORE) { sorts.add(new SortField("_elevate_", comparator, true)); + fields.add(null); modify = true; } - for (SortField sf : current) { + for (int i = 0; i < currentSorts.length; i++) { + SortField sf = currentSorts[i]; if (sf.getType() == SortField.Type.SCORE) { sorts.add(new SortField("_elevate_", comparator, !sf.getReverse())); + fields.add(null); modify = true; } sorts.add(sf); + fields.add(currentFields.get(i)); } - - return modify ? new Sort(sorts.toArray(new SortField[sorts.size()])) : null; + if (modify) { + SortSpec newSpec = new SortSpec(new Sort(sorts.toArray(new SortField[sorts.size()])), + fields); + newSpec.setOffset(current.getOffset()); + newSpec.setCount(current.getCount()); + return newSpec; + } + return null; } @Override diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java index 713574450cb..05c766441ad 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java @@ -17,27 +17,29 @@ package org.apache.solr.handler.component; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ShardParams; -import org.apache.solr.util.RTimer; +import org.apache.solr.common.util.ContentStream; import org.apache.solr.core.CloseHook; import org.apache.solr.core.PluginInfo; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.RequestHandlerBase; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.util.RTimer; import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.util.plugin.PluginInfoInitialized; import org.apache.solr.util.plugin.SolrCoreAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; - /** * @@ -69,6 +71,7 @@ public class SearchHandler extends RequestHandlerBase implements SolrCoreAware , names.add( HighlightComponent.COMPONENT_NAME ); names.add( StatsComponent.COMPONENT_NAME ); names.add( DebugComponent.COMPONENT_NAME ); + names.add( AnalyticsComponent.COMPONENT_NAME ); return names; } @@ -164,6 +167,10 @@ public class SearchHandler extends RequestHandlerBase implements SolrCoreAware , { // int sleep = req.getParams().getInt("sleep",0); // if (sleep > 0) {log.error("SLEEPING for " + sleep); Thread.sleep(sleep);} + if (req.getContentStreams() != null && req.getContentStreams().iterator().hasNext()) { + throw new SolrException(ErrorCode.BAD_REQUEST, "Search requests cannot accept content streams"); + } + ResponseBuilder rb = new ResponseBuilder(req, rsp, components); if (rb.requestInfo != null) { rb.requestInfo.setResponseBuilder(rb); diff --git a/solr/core/src/java/org/apache/solr/handler/component/ShardDoc.java b/solr/core/src/java/org/apache/solr/handler/component/ShardDoc.java index 3ffb0a97ecc..603f262d925 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ShardDoc.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ShardDoc.java @@ -16,18 +16,21 @@ */ package org.apache.solr.handler.component; -import org.apache.lucene.search.FieldComparatorSource; +import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.SortField; import org.apache.lucene.util.PriorityQueue; +import org.apache.solr.common.SolrException; import org.apache.solr.common.util.NamedList; -import org.apache.solr.search.MissingStringLastComparatorSource; +import org.apache.solr.search.SolrIndexSearcher; -import java.text.Collator; +import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; import java.util.List; -import java.util.Locale; + +import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR; public class ShardDoc extends FieldDoc { public String shard; @@ -101,7 +104,7 @@ public class ShardDoc extends FieldDoc { class ShardFieldSortedHitQueue extends PriorityQueue { /** Stores a comparator corresponding to each field being sorted by */ - protected Comparator[] comparators; + protected Comparator[] comparators; /** Stores the sort criteria being used. */ protected SortField[] fields; @@ -109,9 +112,10 @@ class ShardFieldSortedHitQueue extends PriorityQueue { /** The order of these fieldNames should correspond to the order of sort field values retrieved from the shard */ protected List fieldNames = new ArrayList(); - public ShardFieldSortedHitQueue(SortField[] fields, int size) { + public ShardFieldSortedHitQueue(SortField[] fields, int size, IndexSearcher searcher) { super(size); final int n = fields.length; + //noinspection unchecked comparators = new Comparator[n]; this.fields = new SortField[n]; for (int i = 0; i < n; ++i) { @@ -123,8 +127,7 @@ class ShardFieldSortedHitQueue extends PriorityQueue { } String fieldname = fields[i].getField(); - comparators[i] = getCachedComparator(fieldname, fields[i] - .getType(), fields[i].getComparatorSource()); + comparators[i] = getCachedComparator(fields[i], searcher); if (fields[i].getType() == SortField.Type.STRING) { this.fields[i] = new SortField(fieldname, SortField.Type.STRING, @@ -169,47 +172,36 @@ class ShardFieldSortedHitQueue extends PriorityQueue { return c < 0; } - Comparator getCachedComparator(String fieldname, SortField.Type type, FieldComparatorSource factory) { - Comparator comparator = null; - switch (type) { - case SCORE: - comparator = comparatorScore(fieldname); - break; - case STRING: - comparator = comparatorNatural(fieldname); - break; - case CUSTOM: - if (factory instanceof MissingStringLastComparatorSource){ - comparator = comparatorMissingStringLast(fieldname); - } else { - // TODO: support other types such as random... is there a way to - // support generically? Perhaps just comparing Object - comparator = comparatorNatural(fieldname); - // throw new RuntimeException("Custom sort not supported factory is "+factory.getClass()); + Comparator getCachedComparator(SortField sortField, IndexSearcher searcher) { + SortField.Type type = sortField.getType(); + if (type == SortField.Type.SCORE) { + return comparatorScore(); + } else if (type == SortField.Type.REWRITEABLE) { + try { + sortField = sortField.rewrite(searcher); + } catch (IOException e) { + throw new SolrException(SERVER_ERROR, "Exception rewriting sort field " + sortField, e); } - break; - case DOC: - // TODO: we can support this! - throw new RuntimeException("Doc sort not supported"); - default: - comparator = comparatorNatural(fieldname); - break; } - return comparator; + return comparatorFieldComparator(sortField); } - class ShardComparator implements Comparator { - String fieldName; - int fieldNum; - public ShardComparator(String fieldName) { - this.fieldName = fieldName; - this.fieldNum=0; + abstract class ShardComparator implements Comparator { + final SortField sortField; + final String fieldName; + final int fieldNum; + + public ShardComparator(SortField sortField) { + this.sortField = sortField; + this.fieldName = sortField.getField(); + int fieldNum = 0; for (int i=0; i { List lst = (List)shardDoc.sortFieldValues.getVal(fieldNum); return lst.get(shardDoc.orderInShard); } - - @Override - public int compare(Object o1, Object o2) { - return 0; - } } - static Comparator comparatorScore(final String fieldName) { - return new Comparator() { + static Comparator comparatorScore() { + return new Comparator() { @Override - public final int compare(final Object o1, final Object o2) { - ShardDoc e1 = (ShardDoc) o1; - ShardDoc e2 = (ShardDoc) o2; - - final float f1 = e1.score; - final float f2 = e2.score; + public final int compare(final ShardDoc o1, final ShardDoc o2) { + final float f1 = o1.score; + final float f2 = o2.score; if (f1 < f2) return -1; if (f1 > f2) @@ -242,71 +226,24 @@ class ShardFieldSortedHitQueue extends PriorityQueue { }; } - // The lucene natural sort ordering corresponds to numeric - // and string natural sort orderings (ascending). Since - // the PriorityQueue keeps the biggest elements by default, - // we need to reverse the natural compare ordering so that the - // smallest elements are kept instead of the largest... hence - // the negative sign on the final compareTo(). - Comparator comparatorNatural(String fieldName) { - return new ShardComparator(fieldName) { + Comparator comparatorFieldComparator(SortField sortField) { + final FieldComparator fieldComparator; + try { + fieldComparator = sortField.getComparator(0, 0); + } catch (IOException e) { + throw new RuntimeException("Unable to get FieldComparator for sortField " + sortField); + } + + return new ShardComparator(sortField) { + // Since the PriorityQueue keeps the biggest elements by default, + // we need to reverse the field compare ordering so that the + // smallest elements are kept instead of the largest... hence + // the negative sign. @Override - public final int compare(final Object o1, final Object o2) { - ShardDoc sd1 = (ShardDoc) o1; - ShardDoc sd2 = (ShardDoc) o2; - Comparable v1 = (Comparable)sortVal(sd1); - Comparable v2 = (Comparable)sortVal(sd2); - if (v1==v2) - return 0; - if (v1==null) - return 1; - if(v2==null) - return -1; - return -v1.compareTo(v2); + public int compare(final ShardDoc o1, final ShardDoc o2) { + //noinspection unchecked + return -fieldComparator.compareValues(sortVal(o1), sortVal(o2)); } }; } - - - Comparator comparatorStringLocale(final String fieldName, - Locale locale) { - final Collator collator = Collator.getInstance(locale); - return new ShardComparator(fieldName) { - @Override - public final int compare(final Object o1, final Object o2) { - ShardDoc sd1 = (ShardDoc) o1; - ShardDoc sd2 = (ShardDoc) o2; - Comparable v1 = (Comparable)sortVal(sd1); - Comparable v2 = (Comparable)sortVal(sd2); - if (v1==v2) - return 0; - if (v1==null) - return 1; - if(v2==null) - return -1; - return -collator.compare(v1,v2); - } - }; - } - - - Comparator comparatorMissingStringLast(final String fieldName) { - return new ShardComparator(fieldName) { - @Override - public final int compare(final Object o1, final Object o2) { - ShardDoc sd1 = (ShardDoc) o1; - ShardDoc sd2 = (ShardDoc) o2; - Comparable v1 = (Comparable)sortVal(sd1); - Comparable v2 = (Comparable)sortVal(sd2); - if (v1==v2) - return 0; - if (v1==null) - return -1; - if(v2==null) - return 1; - return -v1.compareTo(v2); - } - }; - } - } diff --git a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java index 35a6c66592f..a2c609e3a9f 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java @@ -320,7 +320,16 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar if (maxResultsForSuggest==null || !isCorrectlySpelled) { for (ShardRequest sreq : rb.finished) { for (ShardResponse srsp : sreq.responses) { - NamedList nl = (NamedList) srsp.getSolrResponse().getResponse().get("spellcheck"); + NamedList nl = null; + try { + nl = (NamedList) srsp.getSolrResponse().getResponse().get("spellcheck"); + } catch (Exception e) { + if (rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false)) { + continue; // looks like a shard did not return anything + } + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "Unable to read spelling info for shard: " + srsp.getShard(), e); + } LOG.info(srsp.getShard() + " " + nl); if (nl != null) { mergeData.totalNumberShardResponses++; diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java index 11a4c824e99..34601804370 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java @@ -102,7 +102,16 @@ public class StatsComponent extends SearchComponent { StatsInfo si = rb._statsInfo; for (ShardResponse srsp : sreq.responses) { - NamedList stats = (NamedList) srsp.getSolrResponse().getResponse().get("stats"); + NamedList stats = null; + try { + stats = (NamedList) srsp.getSolrResponse().getResponse().get("stats"); + } catch (Exception e) { + if (rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false)) { + continue; // looks like a shard did not return anything + } + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "Unable to read stats info for shard: " + srsp.getShard(), e); + } NamedList stats_fields = (NamedList) stats.get("stats_fields"); if (stats_fields != null) { @@ -167,8 +176,9 @@ class StatsInfo { String[] statsFs = params.getParams(StatsParams.STATS_FIELD); if (statsFs != null) { for (String field : statsFs) { + boolean calcDistinct = params.getFieldBool(field, StatsParams.STATS_CALC_DISTINCT, false); SchemaField sf = rb.req.getSchema().getField(field); - statsFields.put(field, StatsValuesFactory.createStatsValues(sf)); + statsFields.put(field, StatsValuesFactory.createStatsValues(sf, calcDistinct)); } } } @@ -207,6 +217,7 @@ class SimpleStats { if (null != statsFs) { final IndexSchema schema = searcher.getSchema(); for (String f : statsFs) { + boolean calcDistinct = params.getFieldBool(f, StatsParams.STATS_CALC_DISTINCT, false); String[] facets = params.getFieldParams(f, StatsParams.STATS_FACET); if (facets == null) { facets = new String[0]; // make sure it is something... @@ -218,9 +229,9 @@ class SimpleStats { if (sf.multiValued() || ft.multiValuedFieldCache()) { //use UnInvertedField for multivalued fields UnInvertedField uif = UnInvertedField.getUnInvertedField(f, searcher); - stv = uif.getStats(searcher, docs, facets).getStatsValues(); + stv = uif.getStats(searcher, docs, calcDistinct, facets).getStatsValues(); } else { - stv = getFieldCacheStats(f, facets); + stv = getFieldCacheStats(f, calcDistinct, facets); } if (isShard == true || (Long) stv.get("count") > 0) { res.add(f, stv); @@ -232,11 +243,11 @@ class SimpleStats { return res; } - public NamedList getFieldCacheStats(String fieldName, String[] facet) throws IOException { + public NamedList getFieldCacheStats(String fieldName, boolean calcDistinct, String[] facet) throws IOException { IndexSchema schema = searcher.getSchema(); final SchemaField sf = schema.getField(fieldName); - final StatsValues allstats = StatsValuesFactory.createStatsValues(sf); + final StatsValues allstats = StatsValuesFactory.createStatsValues(sf, calcDistinct); List facetStats = new ArrayList(); for( String facetField : facet ) { @@ -247,7 +258,7 @@ class SimpleStats { "Stats can only facet on single-valued fields, not: " + facetField ); } - facetStats.add(new FieldFacetStats(searcher, facetField, sf, fsf)); + facetStats.add(new FieldFacetStats(searcher, facetField, sf, fsf, calcDistinct)); } final Iterator ctxIt = searcher.getIndexReader().leaves().iterator(); diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java index 2777a48a01e..b59283cfc78 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java +++ b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java @@ -18,10 +18,7 @@ package org.apache.solr.handler.component; import java.io.IOException; -import java.util.Collections; -import java.util.Date; -import java.util.Map; -import java.util.HashMap; +import java.util.*; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.queries.function.FunctionValues; @@ -44,7 +41,7 @@ public class StatsValuesFactory { * @param sf SchemaField for the field whose statistics will be created by the resulting StatsValues * @return Instance of StatsValues that will create statistics from values from a field of the given type */ - public static StatsValues createStatsValues(SchemaField sf) { + public static StatsValues createStatsValues(SchemaField sf, boolean calcDistinct) { // TODO: allow for custom field types FieldType fieldType = sf.getType(); if (DoubleField.class.isInstance(fieldType) || @@ -56,13 +53,13 @@ public class StatsValuesFactory { SortableIntField.class.isInstance(fieldType) || SortableLongField.class.isInstance(fieldType) || SortableFloatField.class.isInstance(fieldType)) { - return new NumericStatsValues(sf); + return new NumericStatsValues(sf, calcDistinct); } else if (DateField.class.isInstance(fieldType)) { - return new DateStatsValues(sf); + return new DateStatsValues(sf, calcDistinct); } else if (StrField.class.isInstance(fieldType)) { - return new StringStatsValues(sf); + return new StringStatsValues(sf, calcDistinct); } else if (sf.getType().getClass().equals(EnumField.class)) { - return new EnumStatsValues(sf); + return new EnumStatsValues(sf, calcDistinct); } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field type " + fieldType + " is not currently supported"); } @@ -84,15 +81,20 @@ abstract class AbstractStatsValues implements StatsValues { protected T min; protected long missing; protected long count; + protected long countDistinct; + protected Set distinctValues; private ValueSource valueSource; protected FunctionValues values; + protected boolean calcDistinct = false; // facetField facetValue protected Map> facets = new HashMap>(); - protected AbstractStatsValues(SchemaField sf) { + protected AbstractStatsValues(SchemaField sf, boolean calcDistinct) { this.sf = sf; this.ft = sf.getType(); + this.distinctValues = new TreeSet(); + this.calcDistinct = calcDistinct; } /** @@ -102,6 +104,10 @@ abstract class AbstractStatsValues implements StatsValues { public void accumulate(NamedList stv) { count += (Long) stv.get("count"); missing += (Long) stv.get("missing"); + if (calcDistinct) { + distinctValues.addAll((Collection) stv.get("distinctValues")); + countDistinct = distinctValues.size(); + } updateMinMax((T) stv.get("min"), (T) stv.get("max")); updateTypeSpecificStats(stv); @@ -123,7 +129,7 @@ abstract class AbstractStatsValues implements StatsValues { String val = vals.getName(j); StatsValues vvals = addTo.get(val); if (vvals == null) { - vvals = StatsValuesFactory.createStatsValues(sf); + vvals = StatsValuesFactory.createStatsValues(sf, calcDistinct); addTo.put(val, vvals); } vvals.accumulate((NamedList) vals.getVal(j)); @@ -142,6 +148,10 @@ abstract class AbstractStatsValues implements StatsValues { public void accumulate(T value, int count) { this.count += count; + if (calcDistinct) { + distinctValues.add(value); + countDistinct = distinctValues.size(); + } updateMinMax(value, value); updateTypeSpecificStats(value, count); } @@ -181,6 +191,11 @@ abstract class AbstractStatsValues implements StatsValues { res.add("max", max); res.add("count", count); res.add("missing", missing); + if (calcDistinct) { + res.add("distinctValues", distinctValues); + res.add("countDistinct", countDistinct); + } + addTypeSpecificStats(res); // add the facet stats @@ -242,8 +257,8 @@ class NumericStatsValues extends AbstractStatsValues { double sum; double sumOfSquares; - public NumericStatsValues(SchemaField sf) { - super(sf); + public NumericStatsValues(SchemaField sf, boolean calcDistinct) { + super(sf, calcDistinct); min = Double.POSITIVE_INFINITY; max = Double.NEGATIVE_INFINITY; } @@ -317,8 +332,8 @@ class NumericStatsValues extends AbstractStatsValues { */ class EnumStatsValues extends AbstractStatsValues { - public EnumStatsValues(SchemaField sf) { - super(sf); + public EnumStatsValues(SchemaField sf, boolean calcDistinct) { + super(sf, calcDistinct); } /** @@ -386,8 +401,8 @@ class DateStatsValues extends AbstractStatsValues { private long sum = -1; double sumOfSquares = 0; - public DateStatsValues(SchemaField sf) { - super(sf); + public DateStatsValues(SchemaField sf, boolean calcDistinct) { + super(sf, calcDistinct); } @Override @@ -404,8 +419,11 @@ class DateStatsValues extends AbstractStatsValues { */ @Override protected void updateTypeSpecificStats(NamedList stv) { - sum += ((Date) stv.get("sum")).getTime(); - sumOfSquares += ((Number)stv.get("sumOfSquares")).doubleValue(); + Date date = (Date) stv.get("sum"); + if (date != null) { + sum += date.getTime(); + sumOfSquares += ((Number)stv.get("sumOfSquares")).doubleValue(); + } } /** @@ -469,8 +487,8 @@ class DateStatsValues extends AbstractStatsValues { */ class StringStatsValues extends AbstractStatsValues { - public StringStatsValues(SchemaField sf) { - super(sf); + public StringStatsValues(SchemaField sf, boolean calcDistinct) { + super(sf, calcDistinct); } @Override diff --git a/solr/core/src/java/org/apache/solr/handler/component/SuggestComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SuggestComponent.java new file mode 100644 index 00000000000..c8aea7d93aa --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/SuggestComponent.java @@ -0,0 +1,449 @@ +package org.apache.solr.handler.component; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.Lookup.LookupResult; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.ShardParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrEventListener; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.spelling.suggest.SolrSuggester; +import org.apache.solr.spelling.suggest.SuggesterOptions; +import org.apache.solr.spelling.suggest.SuggesterParams; +import org.apache.solr.spelling.suggest.SuggesterResult; +import org.apache.solr.util.plugin.SolrCoreAware; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * SuggestComponent: interacts with multiple {@link SolrSuggester} to serve up suggestions + * Responsible for routing commands and queries to the appropriate {@link SolrSuggester} + * and for initializing them as specified by SolrConfig + */ +public class SuggestComponent extends SearchComponent implements SolrCoreAware, SuggesterParams { + private static final Logger LOG = LoggerFactory.getLogger(SuggestComponent.class); + + /** Name used to identify whether the user query concerns this component */ + public static final String COMPONENT_NAME = "suggest"; + + /** Name assigned to an unnamed suggester (at most one suggester) can be unnamed */ + private static final String DEFAULT_DICT_NAME = SolrSuggester.DEFAULT_DICT_NAME; + + /** SolrConfig label to identify Config time settings */ + private static final String CONFIG_PARAM_LABEL = "suggester"; + + /** SolrConfig label to identify boolean value to build suggesters on commit */ + private static final String BUILD_ON_COMMIT_LABEL = "buildOnCommit"; + + /** SolrConfig label to identify boolean value to build suggesters on optimize */ + private static final String BUILD_ON_OPTIMIZE_LABEL = "buildOnOptimize"; + + @SuppressWarnings("unchecked") + protected NamedList initParams; + + /** + * Key is the dictionary name used in SolrConfig, value is the corrosponding {@link SolrSuggester} + */ + protected Map suggesters = new ConcurrentHashMap(); + + /** Container for various labels used in the responses generated by this component */ + private static class SuggesterResultLabels { + static final String SUGGEST = "suggest"; + static final String SUGGESTIONS = "suggestions"; + static final String SUGGESTION_NUM_FOUND = "numFound"; + static final String SUGGESTION_TERM = "term"; + static final String SUGGESTION_WEIGHT = "weight"; + static final String SUGGESTION_PAYLOAD = "payload"; + } + + @Override + @SuppressWarnings("unchecked") + public void init(NamedList args) { + super.init(args); + this.initParams = args; + } + + @Override + public void inform(SolrCore core) { + if (initParams != null) { + LOG.info("Initializing SuggestComponent"); + boolean hasDefault = false; + for (int i = 0; i < initParams.size(); i++) { + if (initParams.getName(i).equals(CONFIG_PARAM_LABEL)) { + NamedList suggesterParams = (NamedList) initParams.getVal(i); + SolrSuggester suggester = new SolrSuggester(); + String dictionary = suggester.init(suggesterParams, core); + if (dictionary != null) { + boolean isDefault = dictionary.equals(DEFAULT_DICT_NAME); + if (isDefault && !hasDefault) { + hasDefault = true; + } else if (isDefault){ + throw new RuntimeException("More than one dictionary is missing name."); + } + suggesters.put(dictionary, suggester); + } else { + if (!hasDefault){ + suggesters.put(DEFAULT_DICT_NAME, suggester); + hasDefault = true; + } else { + throw new RuntimeException("More than one dictionary is missing name."); + } + } + + // Register event listeners for this Suggester + core.registerFirstSearcherListener(new SuggesterListener(core, suggester, false, false)); + boolean buildOnCommit = Boolean.parseBoolean((String) suggesterParams.get(BUILD_ON_COMMIT_LABEL)); + boolean buildOnOptimize = Boolean.parseBoolean((String) suggesterParams.get(BUILD_ON_OPTIMIZE_LABEL)); + if (buildOnCommit || buildOnOptimize) { + LOG.info("Registering newSearcher listener for suggester: " + suggester.getName()); + core.registerNewSearcherListener(new SuggesterListener(core, suggester, buildOnCommit, buildOnOptimize)); + } + } + } + } + } + + /** Responsible for issuing build and rebload command to the specified {@link SolrSuggester} */ + @Override + public void prepare(ResponseBuilder rb) throws IOException { + SolrParams params = rb.req.getParams(); + LOG.info("SuggestComponent prepare with : " + params); + if (!params.getBool(COMPONENT_NAME, false)) { + return; + } + + SolrSuggester suggester = getSuggester(params); + if (suggester == null) { + if (params.get(SUGGEST_DICT) != null) { + throw new IllegalArgumentException("No suggester named " + params.get(SUGGEST_DICT) +" was configured"); + } else { + throw new IllegalArgumentException("No default suggester was configured"); + } + } + if (params.getBool(SUGGEST_BUILD, false)) { + suggester.build(rb.req.getCore(), rb.req.getSearcher()); + rb.rsp.add("command", "build"); + } else if (params.getBool(SUGGEST_RELOAD, false)) { + suggester.reload(rb.req.getCore(), rb.req.getSearcher()); + rb.rsp.add("command", "reload"); + } + } + + /** Dispatch shard request in STAGE_EXECUTE_QUERY stage */ + @Override + public int distributedProcess(ResponseBuilder rb) { + SolrParams params = rb.req.getParams(); + LOG.info("SuggestComponent distributedProcess with : " + params); + if (rb.stage < ResponseBuilder.STAGE_EXECUTE_QUERY) + return ResponseBuilder.STAGE_EXECUTE_QUERY; + if (rb.stage == ResponseBuilder.STAGE_EXECUTE_QUERY) { + ShardRequest sreq = new ShardRequest(); + sreq.purpose = ShardRequest.PURPOSE_GET_TOP_IDS; + sreq.params = new ModifiableSolrParams(rb.req.getParams()); + sreq.params.remove(ShardParams.SHARDS); + rb.addRequest(this, sreq); + return ResponseBuilder.STAGE_GET_FIELDS; + } + + return ResponseBuilder.STAGE_DONE; + } + + /** + * Responsible for using the specified suggester to get the suggestions + * for the query and write the results + * */ + @Override + public void process(ResponseBuilder rb) throws IOException { + SolrParams params = rb.req.getParams(); + LOG.info("SuggestComponent process with : " + params); + if (!params.getBool(COMPONENT_NAME, false) || suggesters.isEmpty()) { + return; + } + + SolrSuggester suggester = getSuggester(params); + String query = params.get(SUGGEST_Q); + if (query == null) { + query = rb.getQueryString(); + if (query == null) { + query = params.get(CommonParams.Q); + } + } + + if (query != null) { + int count = params.getInt(SUGGEST_COUNT, 1); + SuggesterOptions options = new SuggesterOptions(new CharsRef(query), count); + SuggesterResult suggesterResult = suggester.getSuggestions(options); + + NamedList> namedListResult = toNamedList(suggesterResult); + rb.rsp.add(SuggesterResultLabels.SUGGEST, namedListResult); + } + } + + /** + * Used in Distributed Search, merges the suggestion results from every shard + * */ + @Override + public void finishStage(ResponseBuilder rb) { + SolrParams params = rb.req.getParams(); + LOG.info("SuggestComponent finishStage with : " + params); + if (!params.getBool(COMPONENT_NAME, false) || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) + return; + int count = params.getInt(SUGGEST_COUNT, 1); + + List suggesterResults = new ArrayList(); + NamedList> namedListResult = null; + + // Collect Shard responses + for (ShardRequest sreq : rb.finished) { + for (ShardResponse srsp : sreq.responses) { + NamedList resp; + if((resp = srsp.getSolrResponse().getResponse()) != null) { + @SuppressWarnings("unchecked") + NamedList> namedList = + (NamedList>) resp.get(SuggesterResultLabels.SUGGEST); + LOG.info(srsp.getShard() + " : " + namedList); + suggesterResults.add(toSuggesterResult(namedList)); + } + } + } + + // Merge Shard responses + SuggesterResult suggesterResult = merge(suggesterResults, count); + namedListResult = toNamedList(suggesterResult); + + rb.rsp.add(SuggesterResultLabels.SUGGEST, namedListResult); + } + + /** + * Given a list of {@link SuggesterResult} and count + * returns a {@link SuggesterResult} containing count + * number of {@link LookupResult}, sorted by their associated + * weights + * */ + private static SuggesterResult merge(List suggesterResults, int count) { + if (suggesterResults.size() == 1) { + return suggesterResults.get(0); + } + SuggesterResult result = new SuggesterResult(); + Set allTokens = new HashSet(); + + // collect all tokens + for (SuggesterResult shardResult : suggesterResults) { + allTokens.addAll(shardResult.getTokens()); + } + + // Get Top N for every token in every shard (using weights) + for (String token : allTokens) { + Lookup.LookupPriorityQueue resultQueue = new Lookup.LookupPriorityQueue( + count); + for (SuggesterResult shardResult : suggesterResults) { + List suggests = shardResult.getLookupResult(token); + if (suggests == null) { + continue; + } + for (LookupResult res : suggests) { + resultQueue.insertWithOverflow(res); + } + } + List sortedSuggests = new LinkedList(); + Collections.addAll(sortedSuggests, resultQueue.getResults()); + result.add(token, sortedSuggests); + } + return result; + } + + @Override + public String getDescription() { + return "Suggester component"; + } + + @Override + public String getSource() { + return "$URL$"; + } + + @Override + public NamedList getStatistics() { + NamedList stats = new SimpleOrderedMap(); + stats.add("totalSizeInBytes", String.valueOf(sizeInBytes())); + for (Map.Entry entry : suggesters.entrySet()) { + SolrSuggester suggester = entry.getValue(); + stats.add(entry.getKey(), suggester.toString()); + } + return stats; + } + + /** Returns the total size of all the suggester */ + public long sizeInBytes() { + long sizeInBytes = 0; + for (SolrSuggester suggester : suggesters.values()) { + sizeInBytes += suggester.sizeInBytes(); + } + return sizeInBytes; + } + + private SolrSuggester getSuggester(SolrParams params) { + return suggesters.get(getSuggesterName(params)); + + } + + private String getSuggesterName(SolrParams params){ + return (params.get(SUGGEST_DICT) != null) ? + (String)params.get(SUGGEST_DICT) + : DEFAULT_DICT_NAME; + + } + + /** Convert {@link SuggesterResult} to NamedList for constructing responses */ + private NamedList> toNamedList(SuggesterResult suggesterResult) { + NamedList> results = new SimpleOrderedMap>(); + for (String token : suggesterResult.getTokens()) { + SimpleOrderedMap suggestionBody = new SimpleOrderedMap(); + List lookupResults = suggesterResult.getLookupResult(token); + suggestionBody.add(SuggesterResultLabels.SUGGESTION_NUM_FOUND, lookupResults.size()); + List> suggestEntriesNamedList = new ArrayList>(); + for (LookupResult lookupResult : lookupResults) { + String suggestionString = lookupResult.key.toString(); + long weight = lookupResult.value; + String payload = (lookupResult.payload != null) ? + lookupResult.payload.utf8ToString() + : ""; + + SimpleOrderedMap suggestEntryNamedList = new SimpleOrderedMap(); + suggestEntryNamedList.add(SuggesterResultLabels.SUGGESTION_TERM, suggestionString); + suggestEntryNamedList.add(SuggesterResultLabels.SUGGESTION_WEIGHT, weight); + suggestEntryNamedList.add(SuggesterResultLabels.SUGGESTION_PAYLOAD, payload); + suggestEntriesNamedList.add(suggestEntryNamedList); + + } + suggestionBody.add(SuggesterResultLabels.SUGGESTIONS, suggestEntriesNamedList); + results.add(token, suggestionBody); + } + return results; + } + + /** Convert NamedList (suggester response) to {@link SuggesterResult} */ + private SuggesterResult toSuggesterResult(NamedList> suggestions) { + SuggesterResult result = new SuggesterResult(); + if (suggestions == null) { + return result; + } + // for each token + for(int i = 0; i < suggestions.size() ; i++) { + String tokenString = suggestions.getName(i); + List lookupResults = new ArrayList(); + NamedList suggestion = (NamedList) suggestions.getVal(i); + // for each suggestion + for (int j = 0; j < suggestion.size(); j++) { + String property = suggestion.getName(j); + if (property.equals(SuggesterResultLabels.SUGGESTIONS)) { + @SuppressWarnings("unchecked") + List> suggestionEntries = (List>) suggestion.getVal(j); + for(NamedList suggestionEntry : suggestionEntries) { + String term = (String) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_TERM); + Long weight = (Long) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_WEIGHT); + String payload = (String) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_PAYLOAD); + LookupResult res = new LookupResult(new CharsRef(term), weight, new BytesRef(payload)); + lookupResults.add(res); + } + } + result.add(tokenString, lookupResults); + } + } + return result; + } + + /** Listener to build or reload the maintained {@link SolrSuggester} by this component */ + private static class SuggesterListener implements SolrEventListener { + private final SolrCore core; + private final SolrSuggester suggester; + private final boolean buildOnCommit; + private final boolean buildOnOptimize; + + public SuggesterListener(SolrCore core, SolrSuggester checker, boolean buildOnCommit, boolean buildOnOptimize) { + this.core = core; + this.suggester = checker; + this.buildOnCommit = buildOnCommit; + this.buildOnOptimize = buildOnOptimize; + } + + @Override + public void init(NamedList args) {} + + @Override + public void newSearcher(SolrIndexSearcher newSearcher, + SolrIndexSearcher currentSearcher) { + if (currentSearcher == null) { + // firstSearcher event + try { + LOG.info("Loading suggester index for: " + suggester.getName()); + suggester.reload(core, newSearcher); + } catch (IOException e) { + log.error("Exception in reloading suggester index for: " + suggester.getName(), e); + } + } else { + // newSearcher event + if (buildOnCommit) { + buildSuggesterIndex(newSearcher); + } else if (buildOnOptimize) { + if (newSearcher.getIndexReader().leaves().size() == 1) { + buildSuggesterIndex(newSearcher); + } else { + LOG.info("Index is not optimized therefore skipping building suggester index for: " + + suggester.getName()); + } + } + } + + } + + private void buildSuggesterIndex(SolrIndexSearcher newSearcher) { + try { + LOG.info("Building suggester index for: " + suggester.getName()); + suggester.build(core, newSearcher); + } catch (Exception e) { + log.error("Exception in building suggester index for: " + suggester.getName(), e); + } + } + + @Override + public void postCommit() {} + + @Override + public void postSoftCommit() {} + + } +} diff --git a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java index 98a2140759c..81c04f87cde 100644 --- a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java +++ b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java @@ -232,9 +232,20 @@ public class DocValuesFacets { return res; } - /** accumulates per-segment single-valued facet counts, mapping to global ordinal space */ - // specialized since the single-valued case is different + /** accumulates per-segment single-valued facet counts */ static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { + if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) { + // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic): + // collect separately per-segment, then map to global ords + accumSingleSeg(counts, si, disi, subIndex, map); + } else { + // otherwise: do collect+map on the fly + accumSingleGeneric(counts, startTermIndex, si, disi, subIndex, map); + } + } + + /** accumulates per-segment single-valued facet counts, mapping to global ordinal space on-the-fly */ + static void accumSingleGeneric(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int term = si.getOrd(doc); @@ -246,8 +257,41 @@ public class DocValuesFacets { } } - /** accumulates per-segment multi-valued facet counts, mapping to global ordinal space */ + /** "typical" single-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */ + static void accumSingleSeg(int counts[], SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { + // First count in seg-ord space: + final int segCounts[]; + if (map == null) { + segCounts = counts; + } else { + segCounts = new int[1+si.getValueCount()]; + } + + int doc; + while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + segCounts[1+si.getOrd(doc)]++; + } + + // migrate to global ords (if necessary) + if (map != null) { + migrateGlobal(counts, segCounts, subIndex, map); + } + } + + /** accumulates per-segment multi-valued facet counts */ static void accumMulti(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { + if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) { + // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic): + // collect separately per-segment, then map to global ords + accumMultiSeg(counts, si, disi, subIndex, map); + } else { + // otherwise: do collect+map on the fly + accumMultiGeneric(counts, startTermIndex, si, disi, subIndex, map); + } + } + + /** accumulates per-segment multi-valued facet counts, mapping to global ordinal space on-the-fly */ + static void accumMultiGeneric(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); @@ -269,4 +313,47 @@ public class DocValuesFacets { } while ((term = (int) si.nextOrd()) >= 0); } } + + /** "typical" multi-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */ + static void accumMultiSeg(int counts[], SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { + // First count in seg-ord space: + final int segCounts[]; + if (map == null) { + segCounts = counts; + } else { + segCounts = new int[1+(int)si.getValueCount()]; + } + + int doc; + while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + si.setDocument(doc); + int term = (int) si.nextOrd(); + if (term < 0) { + counts[0]++; // missing + } else { + do { + segCounts[1+term]++; + } while ((term = (int)si.nextOrd()) >= 0); + } + } + + // migrate to global ords (if necessary) + if (map != null) { + migrateGlobal(counts, segCounts, subIndex, map); + } + } + + /** folds counts in segment ordinal space (segCounts) into global ordinal space (counts) */ + static void migrateGlobal(int counts[], int segCounts[], int subIndex, OrdinalMap map) { + // missing count + counts[0] += segCounts[0]; + + // migrate actual ordinals + for (int ord = 1; ord < segCounts.length; ord++) { + int count = segCounts[ord]; + if (count != 0) { + counts[1+(int) map.getGlobalOrd(subIndex, ord-1)] += count; + } + } + } } diff --git a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java index f1d2965eafa..1fdeec23198 100644 --- a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java +++ b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java @@ -566,7 +566,7 @@ public class SimpleFacets { throw se; } catch (Exception e) { throw new SolrException(ErrorCode.SERVER_ERROR, - "Exception during facet.field: " + workerFacetValue, e.getCause()); + "Exception during facet.field: " + workerFacetValue, e); } finally { semaphore.release(); } diff --git a/solr/core/src/java/org/apache/solr/request/UnInvertedField.java b/solr/core/src/java/org/apache/solr/request/UnInvertedField.java index 1939b63a684..1a33b8a51ca 100644 --- a/solr/core/src/java/org/apache/solr/request/UnInvertedField.java +++ b/solr/core/src/java/org/apache/solr/request/UnInvertedField.java @@ -464,11 +464,12 @@ public class UnInvertedField extends DocTermOrds { * * @param searcher The Searcher to use to gather the statistics * @param baseDocs The {@link org.apache.solr.search.DocSet} to gather the stats on + * @param calcDistinct whether distinct values should be collected and counted * @param facet One or more fields to facet on. * @return The {@link org.apache.solr.handler.component.StatsValues} collected * @throws IOException If there is a low-level I/O error. */ - public StatsValues getStats(SolrIndexSearcher searcher, DocSet baseDocs, String[] facet) throws IOException { + public StatsValues getStats(SolrIndexSearcher searcher, DocSet baseDocs, boolean calcDistinct, String[] facet) throws IOException { //this function is ripped off nearly wholesale from the getCounts function to use //for multiValued fields within the StatsComponent. may be useful to find common //functionality between the two and refactor code somewhat @@ -477,7 +478,7 @@ public class UnInvertedField extends DocTermOrds { SchemaField sf = searcher.getSchema().getField(field); // FieldType ft = sf.getType(); - StatsValues allstats = StatsValuesFactory.createStatsValues(sf); + StatsValues allstats = StatsValuesFactory.createStatsValues(sf, calcDistinct); DocSet docs = baseDocs; @@ -494,7 +495,7 @@ public class UnInvertedField extends DocTermOrds { SortedDocValues si; for (String f : facet) { SchemaField facet_sf = searcher.getSchema().getField(f); - finfo[i] = new FieldFacetStats(searcher, f, sf, facet_sf); + finfo[i] = new FieldFacetStats(searcher, f, sf, facet_sf, calcDistinct); i++; } diff --git a/solr/core/src/java/org/apache/solr/schema/CollationField.java b/solr/core/src/java/org/apache/solr/schema/CollationField.java index 2c47097e596..891e6736139 100644 --- a/solr/core/src/java/org/apache/solr/schema/CollationField.java +++ b/solr/core/src/java/org/apache/solr/schema/CollationField.java @@ -22,6 +22,9 @@ import java.io.InputStream; import java.text.Collator; import java.text.ParseException; import java.text.RuleBasedCollator; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; import java.util.Locale; import java.util.Map; @@ -30,7 +33,12 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.collation.CollationKeyAnalyzer; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.StorableField; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DocTermOrdsRangeFilter; +import org.apache.lucene.search.FieldCacheRangeFilter; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermRangeQuery; @@ -39,6 +47,7 @@ import org.apache.lucene.util.Version; import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.Base64; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; @@ -138,8 +147,7 @@ public class CollationField extends FieldType { else throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid decomposition: " + decomposition); } - // we use 4.0 because it ensures we just encode the pure byte[] keys. - analyzer = new CollationKeyAnalyzer(Version.LUCENE_40, collator); + analyzer = new CollationKeyAnalyzer(Version.LUCENE_CURRENT, collator); } /** @@ -209,30 +217,81 @@ public class CollationField extends FieldType { * its just that all methods are synced), this keeps things * simple (we already have a threadlocal clone in the reused TS) */ - private BytesRef analyzeRangePart(String field, String part) { - try (TokenStream source = analyzer.tokenStream(field, part)) { + private BytesRef getCollationKey(String field, String text) { + try (TokenStream source = analyzer.tokenStream(field, text)) { source.reset(); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); // we control the analyzer here: most errors are impossible if (!source.incrementToken()) - throw new IllegalArgumentException("analyzer returned no terms for range part: " + part); + throw new IllegalArgumentException("analyzer returned no terms for text: " + text); termAtt.fillBytesRef(); assert !source.incrementToken(); source.end(); return BytesRef.deepCopyOf(bytes); } catch (IOException e) { - throw new RuntimeException("Unable to analyze range part: " + part, e); + throw new RuntimeException("Unable to analyze text: " + text, e); } } @Override public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { String f = field.getName(); - BytesRef low = part1 == null ? null : analyzeRangePart(f, part1); - BytesRef high = part2 == null ? null : analyzeRangePart(f, part2); - return new TermRangeQuery(field.getName(), low, high, minInclusive, maxInclusive); + BytesRef low = part1 == null ? null : getCollationKey(f, part1); + BytesRef high = part2 == null ? null : getCollationKey(f, part2); + if (!field.indexed() && field.hasDocValues()) { + if (field.multiValued()) { + return new ConstantScoreQuery(DocTermOrdsRangeFilter.newBytesRefRange( + field.getName(), low, high, minInclusive, maxInclusive)); + } else { + return new ConstantScoreQuery(FieldCacheRangeFilter.newBytesRefRange( + field.getName(), low, high, minInclusive, maxInclusive)); + } + } else { + return new TermRangeQuery(field.getName(), low, high, minInclusive, maxInclusive); + } + } + + @Override + public void checkSchemaField(SchemaField field) { + // no-op + } + + @Override + public List createFields(SchemaField field, Object value, float boost) { + if (field.hasDocValues()) { + List fields = new ArrayList(); + fields.add(createField(field, value, boost)); + final BytesRef bytes = getCollationKey(field.getName(), value.toString()); + if (field.multiValued()) { + fields.add(new SortedSetDocValuesField(field.getName(), bytes)); + } else { + fields.add(new SortedDocValuesField(field.getName(), bytes)); + } + return fields; + } else { + return Collections.singletonList(createField(field, value, boost)); + } + } + + @Override + public Object marshalSortValue(Object value) { + if (null == value) { + return null; + } + final BytesRef val = (BytesRef)value; + return Base64.byteArrayToBase64(val.bytes, val.offset, val.length); + } + + @Override + public Object unmarshalSortValue(Object value) { + if (null == value) { + return null; + } + final String val = (String)value; + final byte[] bytes = Base64.base64ToByteArray(val); + return new BytesRef(bytes); } } diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index 5d03d383060..8d2a1d3aded 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -932,4 +932,20 @@ public abstract class FieldType extends FieldProperties { } return analyzerProps; } + + /** + * Convert a value used by the FieldComparator for this FieldType's SortField + * into a marshalable value for distributed sorting. + */ + public Object marshalSortValue(Object value) { + return value; + } + + /** + * Convert a value marshaled via {@link #marshalSortValue} back + * into a value usable by the FieldComparator for this FieldType's SortField + */ + public Object unmarshalSortValue(Object value) { + return value; + } } diff --git a/solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java b/solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java index 2f4b0a65dda..382dfd430d4 100644 --- a/solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java +++ b/solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java @@ -100,6 +100,27 @@ public class SortableDoubleField extends PrimitiveFieldType implements DoubleVal String sval = f.stringValue(); writer.writeDouble(name, NumberUtils.SortableStr2double(sval)); } + + @Override + public Object marshalSortValue(Object value) { + if (null == value) { + return null; + } + CharsRef chars = new CharsRef(); + UnicodeUtil.UTF8toUTF16((BytesRef)value, chars); + return NumberUtils.SortableStr2double(chars.toString()); + } + + @Override + public Object unmarshalSortValue(Object value) { + if (null == value) { + return null; + } + String sortableString = NumberUtils.double2sortableStr(value.toString()); + BytesRef bytes = new BytesRef(); + UnicodeUtil.UTF16toUTF8(sortableString, 0, sortableString.length(), bytes); + return bytes; + } } class SortableDoubleFieldSource extends FieldCacheSource { diff --git a/solr/core/src/java/org/apache/solr/schema/SortableFloatField.java b/solr/core/src/java/org/apache/solr/schema/SortableFloatField.java index e66e25563e7..aa7a075c867 100644 --- a/solr/core/src/java/org/apache/solr/schema/SortableFloatField.java +++ b/solr/core/src/java/org/apache/solr/schema/SortableFloatField.java @@ -101,6 +101,27 @@ public class SortableFloatField extends PrimitiveFieldType implements FloatValue String sval = f.stringValue(); writer.writeFloat(name, NumberUtils.SortableStr2float(sval)); } + + @Override + public Object marshalSortValue(Object value) { + if (null == value) { + return null; + } + CharsRef chars = new CharsRef(); + UnicodeUtil.UTF8toUTF16((BytesRef)value, chars); + return NumberUtils.SortableStr2float(chars.toString()); + } + + @Override + public Object unmarshalSortValue(Object value) { + if (null == value) { + return null; + } + String sortableString = NumberUtils.float2sortableStr(value.toString()); + BytesRef bytes = new BytesRef(); + UnicodeUtil.UTF16toUTF8(sortableString, 0, sortableString.length(), bytes); + return bytes; + } } diff --git a/solr/core/src/java/org/apache/solr/schema/SortableIntField.java b/solr/core/src/java/org/apache/solr/schema/SortableIntField.java index 955857370f9..97cbfe2b134 100644 --- a/solr/core/src/java/org/apache/solr/schema/SortableIntField.java +++ b/solr/core/src/java/org/apache/solr/schema/SortableIntField.java @@ -104,6 +104,27 @@ public class SortableIntField extends PrimitiveFieldType implements IntValueFiel String sval = f.stringValue(); writer.writeInt(name, NumberUtils.SortableStr2int(sval,0,sval.length())); } + + @Override + public Object marshalSortValue(Object value) { + if (null == value) { + return null; + } + CharsRef chars = new CharsRef(); + UnicodeUtil.UTF8toUTF16((BytesRef)value, chars); + return NumberUtils.SortableStr2int(chars.toString()); + } + + @Override + public Object unmarshalSortValue(Object value) { + if (null == value) { + return null; + } + String sortableString = NumberUtils.int2sortableStr(value.toString()); + BytesRef bytes = new BytesRef(); + UnicodeUtil.UTF16toUTF8(sortableString, 0, sortableString.length(), bytes); + return bytes; + } } diff --git a/solr/core/src/java/org/apache/solr/schema/SortableLongField.java b/solr/core/src/java/org/apache/solr/schema/SortableLongField.java index 0e61eef6f91..3c48af14bad 100644 --- a/solr/core/src/java/org/apache/solr/schema/SortableLongField.java +++ b/solr/core/src/java/org/apache/solr/schema/SortableLongField.java @@ -100,6 +100,27 @@ public class SortableLongField extends PrimitiveFieldType { String sval = f.stringValue(); writer.writeLong(name, NumberUtils.SortableStr2long(sval,0,sval.length())); } + + @Override + public Object marshalSortValue(Object value) { + if (null == value) { + return null; + } + CharsRef chars = new CharsRef(); + UnicodeUtil.UTF8toUTF16((BytesRef)value, chars); + return NumberUtils.SortableStr2long(chars.toString()); + } + + @Override + public Object unmarshalSortValue(Object value) { + if (null == value) { + return null; + } + String sortableString = NumberUtils.long2sortableStr(value.toString()); + BytesRef bytes = new BytesRef(); + UnicodeUtil.UTF16toUTF8(sortableString, 0, sortableString.length(), bytes); + return bytes; + } } diff --git a/solr/core/src/java/org/apache/solr/schema/StrField.java b/solr/core/src/java/org/apache/solr/schema/StrField.java index 2c9600c67de..15060b9b74d 100644 --- a/solr/core/src/java/org/apache/solr/schema/StrField.java +++ b/solr/core/src/java/org/apache/solr/schema/StrField.java @@ -29,6 +29,8 @@ import org.apache.lucene.index.StorableField; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.UnicodeUtil; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; @@ -81,6 +83,27 @@ public class StrField extends PrimitiveFieldType { @Override public void checkSchemaField(SchemaField field) { } + + @Override + public Object marshalSortValue(Object value) { + if (null == value) { + return null; + } + CharsRef spare = new CharsRef(); + UnicodeUtil.UTF8toUTF16((BytesRef)value, spare); + return spare.toString(); + } + + @Override + public Object unmarshalSortValue(Object value) { + if (null == value) { + return null; + } + BytesRef spare = new BytesRef(); + String stringVal = (String)value; + UnicodeUtil.UTF16toUTF8(stringVal, 0, stringVal.length(), spare); + return spare; + } } diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index b7fd860f11a..f0741f51445 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -23,7 +23,9 @@ import org.apache.lucene.index.StorableField; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.QueryBuilder; +import org.apache.lucene.util.UnicodeUtil; import org.apache.solr.common.SolrException; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; @@ -165,4 +167,25 @@ public class TextField extends FieldType { public boolean isExplicitMultiTermAnalyzer() { return isExplicitMultiTermAnalyzer; } + + @Override + public Object marshalSortValue(Object value) { + if (null == value) { + return null; + } + CharsRef spare = new CharsRef(); + UnicodeUtil.UTF8toUTF16((BytesRef)value, spare); + return spare.toString(); + } + + @Override + public Object unmarshalSortValue(Object value) { + if (null == value) { + return null; + } + BytesRef spare = new BytesRef(); + String stringVal = (String)value; + UnicodeUtil.UTF16toUTF8(stringVal, 0, stringVal.length(), spare); + return spare; + } } diff --git a/solr/core/src/java/org/apache/solr/search/BitDocSet.java b/solr/core/src/java/org/apache/solr/search/BitDocSet.java index 0346ab71a57..f793982a9ff 100644 --- a/solr/core/src/java/org/apache/solr/search/BitDocSet.java +++ b/solr/core/src/java/org/apache/solr/search/BitDocSet.java @@ -296,11 +296,11 @@ public class BitDocSet extends DocSetBase { @Override public long cost() { // we don't want to actually compute cardinality, but - // if its already been computed, we use it + // if its already been computed, we use it (pro-rated for the segment) if (size != -1) { - return size; + return (long)(size * ((OpenBitSet.bits2words(maxDoc)<<6) / (float)bs.capacity())); } else { - return bs.capacity(); + return maxDoc; } } }; diff --git a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java index e0ac59ef276..4195115e7a8 100644 --- a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java @@ -118,21 +118,15 @@ public class CollapsingQParserPlugin extends QParserPlugin { } } - private class CollapsingPostFilter extends ExtendedQueryBase implements PostFilter { + public class CollapsingPostFilter extends ExtendedQueryBase implements PostFilter, ScoreFilter { private Object cacheId; private String field; - private int leafCount; - private SortedDocValues docValues; - private int maxDoc; private String max; private String min; - private FieldType fieldType; + private boolean needsScores = true; private int nullPolicy; - private SolrIndexSearcher searcher; - private SolrParams solrParams; private Map context; - private IndexSchema schema; public static final int NULL_POLICY_IGNORE = 0; public static final int NULL_POLICY_COLLAPSE = 1; public static final int NULL_POLICY_EXPAND = 2; @@ -180,7 +174,13 @@ public class CollapsingQParserPlugin extends QParserPlugin { public CollapsingPostFilter(SolrParams localParams, SolrParams params, SolrQueryRequest request) throws IOException { this.cacheId = new Object(); this.field = localParams.get("field"); - this.solrParams = params; + this.max = localParams.get("max"); + this.min = localParams.get("min"); + this.context = request.getContext(); + if(this.min != null || this.max != null) { + this.needsScores = needsScores(params); + } + String nPolicy = localParams.get("nullPolicy", NULL_IGNORE); if(nPolicy.equals(NULL_IGNORE)) { this.nullPolicy = NULL_POLICY_IGNORE; @@ -191,34 +191,12 @@ public class CollapsingQParserPlugin extends QParserPlugin { } else { throw new IOException("Invalid nullPolicy:"+nPolicy); } - this.searcher = request.getSearcher(); - this.leafCount = searcher.getTopReaderContext().leaves().size(); - this.maxDoc = searcher.maxDoc(); - this.schema = searcher.getSchema(); - SchemaField schemaField = schema.getField(this.field); - if(schemaField.hasDocValues()) { - this.docValues = searcher.getAtomicReader().getSortedDocValues(this.field); - } else { - this.docValues = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), this.field); - } - - this.max = localParams.get("max"); - if(this.max != null) { - this.fieldType = searcher.getSchema().getField(this.max).getType(); - } - - this.min = localParams.get("min"); - if(this.min != null) { - this.fieldType = searcher.getSchema().getField(this.min).getType(); - } - - this.context = request.getContext(); } - private IntOpenHashSet getBoostDocs(IndexSearcher indexSearcher, Set boosted) throws IOException { + private IntOpenHashSet getBoostDocs(SolrIndexSearcher indexSearcher, Set boosted) throws IOException { IntOpenHashSet boostDocs = null; if(boosted != null) { - SchemaField idField = this.schema.getUniqueKeyField(); + SchemaField idField = indexSearcher.getSchema().getUniqueKeyField(); String fieldName = idField.getName(); HashSet localBoosts = new HashSet(boosted.size()*2); Iterator boostedIt = boosted.iterator(); @@ -258,22 +236,47 @@ public class CollapsingQParserPlugin extends QParserPlugin { public DelegatingCollector getFilterCollector(IndexSearcher indexSearcher) { try { - IntOpenHashSet boostDocs = getBoostDocs(indexSearcher, (Set) (this.context.get(QueryElevationComponent.BOOSTED))); + + SolrIndexSearcher searcher = (SolrIndexSearcher)indexSearcher; + IndexSchema schema = searcher.getSchema(); + SchemaField schemaField = schema.getField(this.field); + + SortedDocValues docValues = null; + + if(schemaField.hasDocValues()) { + docValues = searcher.getAtomicReader().getSortedDocValues(this.field); + } else { + docValues = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), this.field); + } + + FieldType fieldType = null; + + if(this.max != null) { + fieldType = searcher.getSchema().getField(this.max).getType(); + } + + if(this.min != null) { + fieldType = searcher.getSchema().getField(this.min).getType(); + } + + int maxDoc = searcher.maxDoc(); + int leafCount = searcher.getTopReaderContext().leaves().size(); + + IntOpenHashSet boostDocs = getBoostDocs(searcher, (Set) (this.context.get(QueryElevationComponent.BOOSTED))); if(this.min != null || this.max != null) { - return new CollapsingFieldValueCollector(this.maxDoc, - this.leafCount, - this.docValues, - this.searcher, - this.nullPolicy, - max != null ? this.max : this.min, - max != null, - needsScores(this.solrParams), - this.fieldType, - boostDocs); + return new CollapsingFieldValueCollector(maxDoc, + leafCount, + docValues, + this.nullPolicy, + max != null ? this.max : this.min, + max != null, + this.needsScores, + fieldType, + boostDocs); } else { - return new CollapsingScoreCollector(this.maxDoc, this.leafCount, this.docValues, this.nullPolicy, boostDocs); + return new CollapsingScoreCollector(maxDoc, leafCount, docValues, this.nullPolicy, boostDocs); } } catch (Exception e) { throw new RuntimeException(e); @@ -283,7 +286,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { private boolean needsScores(SolrParams params) { String sortSpec = params.get("sort"); - if(sortSpec != null) { + if(sortSpec != null && sortSpec.length()!=0) { String[] sorts = sortSpec.split(","); for(String s: sorts) { String parts[] = s.split(" "); @@ -500,7 +503,6 @@ public class CollapsingQParserPlugin extends QParserPlugin { public CollapsingFieldValueCollector(int maxDoc, int segments, SortedDocValues values, - SolrIndexSearcher searcher, int nullPolicy, String field, boolean max, @@ -516,11 +518,11 @@ public class CollapsingQParserPlugin extends QParserPlugin { this.needsScores = needsScores; this.boostDocs = boostDocs; if(fieldType instanceof TrieIntField) { - this.fieldValueCollapse = new IntValueCollapse(searcher, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs); + this.fieldValueCollapse = new IntValueCollapse(maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs); } else if(fieldType instanceof TrieLongField) { - this.fieldValueCollapse = new LongValueCollapse(searcher, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs); + this.fieldValueCollapse = new LongValueCollapse(maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs); } else if(fieldType instanceof TrieFloatField) { - this.fieldValueCollapse = new FloatValueCollapse(searcher, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs); + this.fieldValueCollapse = new FloatValueCollapse(maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs); } else { throw new IOException("min/max must be either TrieInt, TrieLong or TrieFloat."); } @@ -616,7 +618,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { public abstract void collapse(int ord, int contextDoc, int globalDoc) throws IOException; public abstract void setNextReader(AtomicReaderContext context) throws IOException; - public FieldValueCollapse(SolrIndexSearcher searcher, + public FieldValueCollapse(int maxDoc, String field, int nullPolicy, boolean max, @@ -626,7 +628,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { this.nullPolicy = nullPolicy; this.max = max; this.needsScores = needsScores; - this.collapsedSet = new OpenBitSet(searcher.maxDoc()); + this.collapsedSet = new OpenBitSet(maxDoc); this.boostDocs = boostDocs; if(this.boostDocs != null) { Iterator it = boostDocs.iterator(); @@ -676,14 +678,14 @@ public class CollapsingQParserPlugin extends QParserPlugin { private int nullVal; private int[] ordVals; - public IntValueCollapse(SolrIndexSearcher searcher, + public IntValueCollapse(int maxDoc, String field, int nullPolicy, int[] ords, boolean max, boolean needsScores, IntOpenHashSet boostDocs) throws IOException { - super(searcher, field, nullPolicy, max, needsScores, boostDocs); + super(maxDoc, field, nullPolicy, max, needsScores, boostDocs); this.ords = ords; this.ordVals = new int[ords.length]; Arrays.fill(ords, -1); @@ -745,14 +747,13 @@ public class CollapsingQParserPlugin extends QParserPlugin { private long nullVal; private long[] ordVals; - public LongValueCollapse(SolrIndexSearcher searcher, - String field, + public LongValueCollapse(int maxDoc, String field, int nullPolicy, int[] ords, boolean max, boolean needsScores, IntOpenHashSet boostDocs) throws IOException { - super(searcher, field, nullPolicy, max, needsScores, boostDocs); + super(maxDoc, field, nullPolicy, max, needsScores, boostDocs); this.ords = ords; this.ordVals = new long[ords.length]; Arrays.fill(ords, -1); @@ -814,14 +815,14 @@ public class CollapsingQParserPlugin extends QParserPlugin { private float nullVal; private float[] ordVals; - public FloatValueCollapse(SolrIndexSearcher searcher, + public FloatValueCollapse(int maxDoc, String field, int nullPolicy, int[] ords, boolean max, boolean needsScores, IntOpenHashSet boostDocs) throws IOException { - super(searcher, field, nullPolicy, max, needsScores, boostDocs); + super(maxDoc, field, nullPolicy, max, needsScores, boostDocs); this.ords = ords; this.ordVals = new float[ords.length]; Arrays.fill(ords, -1); diff --git a/solr/core/src/java/org/apache/solr/search/LuceneQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/LuceneQParserPlugin.java index 00c4424b2f1..8ea19187328 100644 --- a/solr/core/src/java/org/apache/solr/search/LuceneQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/LuceneQParserPlugin.java @@ -86,9 +86,9 @@ class OldLuceneQParser extends LuceneQParser { public SortSpec getSort(boolean useGlobal) throws SyntaxError { SortSpec sort = super.getSort(useGlobal); if (sortStr != null && sortStr.length()>0 && sort.getSort()==null) { - Sort oldSort = QueryParsing.parseSort(sortStr, getReq()); - if( oldSort != null ) { - sort.sort = oldSort; + SortSpec oldSort = QueryParsing.parseSortSpec(sortStr, getReq()); + if( oldSort.getSort() != null ) { + sort.setSortAndFields(oldSort.getSort(), oldSort.getSchemaFields()); } } return sort; diff --git a/solr/core/src/java/org/apache/solr/search/QParser.java b/solr/core/src/java/org/apache/solr/search/QParser.java index 16db2d36b5f..e39d424bd9b 100644 --- a/solr/core/src/java/org/apache/solr/search/QParser.java +++ b/solr/core/src/java/org/apache/solr/search/QParser.java @@ -276,11 +276,11 @@ public abstract class QParser { int start = startS != null ? Integer.parseInt(startS) : 0; int rows = rowsS != null ? Integer.parseInt(rowsS) : 10; - Sort sort = null; - if( sortStr != null ) { - sort = QueryParsing.parseSort(sortStr, req); - } - return new SortSpec( sort, start, rows ); + SortSpec sort = QueryParsing.parseSortSpec(sortStr, req); + + sort.setOffset(start); + sort.setCount(rows); + return sort; } public String[] getDefaultHighlightFields() { diff --git a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java index 4cbe8b38dfc..a0f572d2a86 100644 --- a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java @@ -52,7 +52,8 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI MaxScoreQParserPlugin.NAME, MaxScoreQParserPlugin.class, BlockJoinParentQParserPlugin.NAME, BlockJoinParentQParserPlugin.class, BlockJoinChildQParserPlugin.NAME, BlockJoinChildQParserPlugin.class, - CollapsingQParserPlugin.NAME, CollapsingQParserPlugin.class + CollapsingQParserPlugin.NAME, CollapsingQParserPlugin.class, + SimpleQParserPlugin.NAME, SimpleQParserPlugin.class }; /** return a {@link QParser} */ diff --git a/solr/core/src/java/org/apache/solr/search/QueryParsing.java b/solr/core/src/java/org/apache/solr/search/QueryParsing.java index 1cdb4c4e3f7..75d5b527d06 100644 --- a/solr/core/src/java/org/apache/solr/search/QueryParsing.java +++ b/solr/core/src/java/org/apache/solr/search/QueryParsing.java @@ -43,6 +43,7 @@ import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import java.io.IOException; +import java.util.Collections; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -219,16 +220,24 @@ public class QueryParsing { return new MapSolrParams(localParams); } + /** + * Returns the Sort object represented by the string, or null if default sort + * by score descending should be used. + * @see #parseSortSpec + * @deprecated use {@link #parseSortSpec} + */ + @Deprecated + public static Sort parseSort(String sortSpec, SolrQueryRequest req) { + return parseSortSpec(sortSpec, req).getSort(); + } /** - * Returns null if the sortSpec is the standard sort desc. - *

    *

    * The form of the sort specification string currently parsed is: *

    *
        * SortSpec ::= SingleSort [, SingleSort]*
    -   * SingleSort ::= <fieldname> SortDirection
    +   * SingleSort ::= <fieldname|function> SortDirection
        * SortDirection ::= top | desc | bottom | asc
        * 
    * Examples: @@ -239,10 +248,15 @@ public class QueryParsing { * height desc,weight desc #sort by height descending, and use weight descending to break any ties * height desc,weight asc #sort by height descending, using weight ascending as a tiebreaker * + * @return a SortSpec object populated with the appropriate Sort (which may be null if + * default score sort is used) and SchemaFields (where applicable) using + * hardcoded default count & offset values. */ - public static Sort parseSort(String sortSpec, SolrQueryRequest req) { - if (sortSpec == null || sortSpec.length() == 0) return null; - List lst = new ArrayList(4); + public static SortSpec parseSortSpec(String sortSpec, SolrQueryRequest req) { + if (sortSpec == null || sortSpec.length() == 0) return newEmptySortSpec(); + + List sorts = new ArrayList(4); + List fields = new ArrayList(4); try { @@ -299,10 +313,11 @@ public class QueryParsing { if (null != top) { // we have a Query and a valid direction if (q instanceof FunctionQuery) { - lst.add(((FunctionQuery)q).getValueSource().getSortField(top)); + sorts.add(((FunctionQuery)q).getValueSource().getSortField(top)); } else { - lst.add((new QueryValueSource(q, 0.0f)).getSortField(top)); + sorts.add((new QueryValueSource(q, 0.0f)).getSortField(top)); } + fields.add(null); continue; } } catch (Exception e) { @@ -327,12 +342,14 @@ public class QueryParsing { if (SCORE.equals(field)) { if (top) { - lst.add(SortField.FIELD_SCORE); + sorts.add(SortField.FIELD_SCORE); } else { - lst.add(new SortField(null, SortField.Type.SCORE, true)); + sorts.add(new SortField(null, SortField.Type.SCORE, true)); } + fields.add(null); } else if (DOCID.equals(field)) { - lst.add(new SortField(null, SortField.Type.DOC, top)); + sorts.add(new SortField(null, SortField.Type.DOC, top)); + fields.add(null); } else { // try to find the field SchemaField sf = req.getSchema().getFieldOrNull(field); @@ -348,7 +365,8 @@ public class QueryParsing { (SolrException.ErrorCode.BAD_REQUEST, "sort param field can't be found: " + field); } - lst.add(sf.getSortField(top)); + sorts.add(sf.getSortField(top)); + fields.add(sf); } } @@ -358,13 +376,17 @@ public class QueryParsing { // normalize a sort on score desc to null - if (lst.size()==1 && lst.get(0) == SortField.FIELD_SCORE) { - return null; + if (sorts.size()==1 && sorts.get(0) == SortField.FIELD_SCORE) { + return newEmptySortSpec(); } - return new Sort(lst.toArray(new SortField[lst.size()])); + Sort s = new Sort(sorts.toArray(new SortField[sorts.size()])); + return new SortSpec(s, fields); } + private static SortSpec newEmptySortSpec() { + return new SortSpec(null, Collections.emptyList()); + } /////////////////////////// diff --git a/solr/core/src/java/org/apache/solr/search/ScoreFilter.java b/solr/core/src/java/org/apache/solr/search/ScoreFilter.java new file mode 100644 index 00000000000..dfe51b5997a --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/ScoreFilter.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search; + +public interface ScoreFilter { + +} \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java new file mode 100644 index 00000000000..3962d0d2a83 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java @@ -0,0 +1,208 @@ +package org.apache.solr.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryparser.simple.SimpleQueryParser; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.SimpleParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.parser.QueryParser; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.TextField; +import org.apache.solr.util.SolrPluginUtils; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +/** + * Create a query from the input value that will be parsed by Lucene's SimpleQueryParser. + * See {@link org.apache.lucene.queryparser.simple.SimpleQueryParser} for details on the exact syntax allowed + * to be used for queries. + *
    + * The following options may be applied for parsing the query. + *
      + *
    • + * q.operations - Used to enable specific operations for parsing. The operations that can be enabled are + * and, not, or, prefix, phrase, precedence, escape, and whitespace. By default all operations + * are enabled. All operations can be disabled by passing in an empty string to this parameter. + *
    • + *
    • + * q.op - Used to specify the operator to be used if whitespace is a delimiter. Either 'AND' or 'OR' + * can be specified for this parameter. Any other string will cause an exception to be thrown. + * If this parameter is not specified 'OR' will be used by default. + *
    • + *
    • + * qf - The list of query fields and boosts to use when building the simple query. The format is the following: + * fieldA^1.0 fieldB^2.2. A field can also be specified without a boost by simply listing the + * field as fieldA fieldB. Any field without a boost will default to use a boost of 1.0. + *
    • + *
    • + * df - An override for the default field specified in the schema or a default field if one is not specified + * in the schema. If qf is not specified the default field will be used as the field to run the query + * against. + *
    • + *
    + */ +public class SimpleQParserPlugin extends QParserPlugin { + /** The name that can be used to specify this plugin should be used to parse the query. */ + public static String NAME = "simple"; + + /** Enables {@code AND} operator (+) */ + private static final String AND_OPERATOR = "AND"; + /** Enables {@code NOT} operator (-) */ + private static final String NOT_OPERATOR = "NOT"; + /** Enables {@code OR} operator (|) */ + private static final String OR_OPERATOR = "OR"; + /** Enables {@code PREFIX} operator (*) */ + private static final String PREFIX_OPERATOR = "PREFIX"; + /** Enables {@code PHRASE} operator (") */ + private static final String PHRASE_OPERATOR = "PHRASE"; + /** Enables {@code PRECEDENCE} operators: {@code (} and {@code )} */ + private static final String PRECEDENCE_OPERATORS = "PRECEDENCE"; + /** Enables {@code ESCAPE} operator (\) */ + private static final String ESCAPE_OPERATOR = "ESCAPE"; + /** Enables {@code WHITESPACE} operators: ' ' '\n' '\r' '\t' */ + private static final String WHITESPACE_OPERATOR = "WHITESPACE"; + + /** Map of string operators to their int counterparts in SimpleQueryParser. */ + private static final Map OPERATORS = new HashMap(); + + /* Setup the map of possible operators. */ + static { + OPERATORS.put(AND_OPERATOR, SimpleQueryParser.AND_OPERATOR); + OPERATORS.put(NOT_OPERATOR, SimpleQueryParser.NOT_OPERATOR); + OPERATORS.put(OR_OPERATOR, SimpleQueryParser.OR_OPERATOR); + OPERATORS.put(PREFIX_OPERATOR, SimpleQueryParser.PREFIX_OPERATOR); + OPERATORS.put(PHRASE_OPERATOR, SimpleQueryParser.PHRASE_OPERATOR); + OPERATORS.put(PRECEDENCE_OPERATORS, SimpleQueryParser.PRECEDENCE_OPERATORS); + OPERATORS.put(ESCAPE_OPERATOR, SimpleQueryParser.ESCAPE_OPERATOR); + OPERATORS.put(WHITESPACE_OPERATOR, SimpleQueryParser.WHITESPACE_OPERATOR); + } + + /** No initialization is necessary so this method is empty. */ + @Override + public void init(NamedList args) { + } + + /** Returns a QParser that will create a query by using Lucene's SimpleQueryParser. */ + @Override + public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + // Some of the parameters may come in through localParams, so combine them with params. + SolrParams defaultParams = SolrParams.wrapDefaults(localParams, params); + + // This will be used to specify what fields and boosts will be used by SimpleQueryParser. + Map queryFields = SolrPluginUtils.parseFieldBoosts(defaultParams.get(SimpleParams.QF)); + + if (queryFields.isEmpty()) { + // It qf is not specified setup up the queryFields map to use the defaultField. + String defaultField = QueryParsing.getDefaultField(req.getSchema(), defaultParams.get(CommonParams.DF)); + + if (defaultField == null) { + // A query cannot be run without having a field or set of fields to run against. + throw new IllegalStateException("Neither " + SimpleParams.QF + ", " + CommonParams.DF + + ", nor the default search field are present."); + } + + queryFields.put(defaultField, 1.0F); + } + else { + for (Map.Entry queryField : queryFields.entrySet()) { + if (queryField.getValue() == null) { + // Some fields may be specified without a boost, so default the boost to 1.0 since a null value + // will not be accepted by SimpleQueryParser. + queryField.setValue(1.0F); + } + } + } + + // Setup the operations that are enabled for the query. + int enabledOps = 0; + String opParam = defaultParams.get(SimpleParams.QO); + + if (opParam == null) { + // All operations will be enabled. + enabledOps = -1; + } else { + // Parse the specified enabled operations to be used by the query. + String[] operations = opParam.split(","); + + for (String operation : operations) { + Integer enabledOp = OPERATORS.get(operation.trim().toUpperCase(Locale.getDefault())); + + if (enabledOp != null) { + enabledOps |= enabledOp; + } + } + } + + // Create a SimpleQueryParser using the analyzer from the schema. + final IndexSchema schema = req.getSchema(); + final SimpleQueryParser parser = new SimpleQueryParser(req.getSchema().getAnalyzer(), queryFields, enabledOps) { + // Override newPrefixQuery to provide a multi term analyzer for prefix queries run against TextFields. + @Override + protected Query newPrefixQuery(String text) { + BooleanQuery bq = new BooleanQuery(true); + + for (Map.Entry entry : weights.entrySet()) { + String field = entry.getKey(); + FieldType type = schema.getFieldType(field); + Query prefix; + + if (type instanceof TextField) { + // If the field type is a TextField then use the multi term analyzer. + Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer(); + String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString(); + prefix = new PrefixQuery(new Term(field, term)); + } else { + // If the type is *not* a TextField don't do any analysis. + prefix = new PrefixQuery(new Term(entry.getKey(), text)); + } + + prefix.setBoost(entry.getValue()); + bq.add(prefix, BooleanClause.Occur.SHOULD); + } + + return simplify(bq); + } + }; + + // Set the default operator to be either 'AND' or 'OR' for the query. + QueryParser.Operator defaultOp = QueryParsing.getQueryParserDefaultOperator(req.getSchema(), defaultParams.get(QueryParsing.OP)); + + if (defaultOp == QueryParser.Operator.AND) { + parser.setDefaultOperator(BooleanClause.Occur.MUST); + } + + // Return a QParser that wraps a SimpleQueryParser. + return new QParser(qstr, localParams, params, req) { + public Query parse() throws SyntaxError { + return parser.parse(qstr); + } + }; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index bad1d954625..f2e6297390f 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -863,6 +863,25 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn } }; + private DocSet getDocSetScore(List queries) throws IOException { + Query main = queries.remove(0); + ProcessedFilter pf = getProcessedFilter(null, queries); + DocSetCollector setCollector = new DocSetCollector(maxDoc()>>6, maxDoc()); + Collector collector = setCollector; + if (pf.postFilter != null) { + pf.postFilter.setLastDelegate(collector); + collector = pf.postFilter; + } + + search(main, pf.filter, collector); + + if(collector instanceof DelegatingCollector) { + ((DelegatingCollector) collector).finish(); + } + + DocSet docSet = setCollector.getDocSet(); + return docSet; + } /** * Returns the set of document ids matching all queries. @@ -873,6 +892,15 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn * The DocSet returned should not be modified. */ public DocSet getDocSet(List queries) throws IOException { + + if(queries != null) { + for(Query q : queries) { + if(q instanceof ScoreFilter) { + return getDocSetScore(queries); + } + } + } + ProcessedFilter pf = getProcessedFilter(null, queries); if (pf.answer != null) return pf.answer; diff --git a/solr/core/src/java/org/apache/solr/search/SortSpec.java b/solr/core/src/java/org/apache/solr/search/SortSpec.java index 4573bf691bd..6655aa67a79 100644 --- a/solr/core/src/java/org/apache/solr/search/SortSpec.java +++ b/solr/core/src/java/org/apache/solr/search/SortSpec.java @@ -20,29 +20,63 @@ package org.apache.solr.search; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; +import org.apache.solr.schema.SchemaField; + +import java.util.Arrays; +import java.util.List; +import java.util.ArrayList; +import java.util.Collections; /*** * SortSpec encapsulates a Lucene Sort and a count of the number of documents * to return. */ public class SortSpec { - Sort sort; - int num; - int offset; + private Sort sort; + private List fields; + private int num = 10; + private int offset = 0; + public SortSpec(Sort sort, List fields) { + setSortAndFields(sort, fields); + } + public SortSpec(Sort sort, SchemaField[] fields) { + setSortAndFields(sort, Arrays.asList(fields)); + } + + /** @deprecated Specify both Sort and SchemaField[] when constructing */ + @Deprecated public SortSpec(Sort sort, int num) { this(sort,0,num); } + /** @deprecated Specify both Sort and SchemaField[] when constructing */ + @Deprecated public SortSpec(Sort sort, int offset, int num) { - this.sort=sort; + setSort(sort); this.offset=offset; this.num=num; } + /** @deprecated use {@link #setSortAndFields} */ + @Deprecated public void setSort( Sort s ) { sort = s; + fields = Collections.unmodifiableList(Arrays.asList(new SchemaField[s.getSort().length])); + } + + /** + * the specified SchemaFields must correspond one to one with the Sort's SortFields, + * using null where appropriate. + */ + public void setSortAndFields( Sort s, List fields ) + { + + assert null == s || s.getSort().length == fields.size() + : "SortFields and SchemaFields do not have equal lengths"; + this.sort = s; + this.fields = Collections.unmodifiableList(fields); } public static boolean includesScore(Sort sort) { @@ -63,10 +97,20 @@ public class SortSpec */ public Sort getSort() { return sort; } + /** + * Gets the Solr SchemaFields that correspond to each of the SortFields used + * in this sort. The array may contain null if a SortField doesn't correspond directly + * to a SchemaField (ie: score, lucene docid, custom function sorting, etc...) + * + * @return an immutable list, may be empty if getSort is null + */ + public List getSchemaFields() { return fields; } + /** * Offset into the list of results. */ public int getOffset() { return offset; } + public void setOffset(int offset) { this.offset = offset; } /** * Gets the number of documents to return after sorting. @@ -74,6 +118,7 @@ public class SortSpec * @return number of docs to return, or -1 for no cut off (just sort) */ public int getCount() { return num; } + public void setCount(int count) { this.num = count; } @Override public String toString() { diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java index e426b8b8fac..4c6cb941a3b 100644 --- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java +++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java @@ -198,8 +198,8 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin { ValueSource source = fp.parseValueSource(); float min = fp.parseFloat(); float max = fp.parseFloat(); - float target = fp.parseFloat(); - Float def = fp.hasMoreArguments() ? fp.parseFloat() : null; + ValueSource target = fp.parseValueSource(); + ValueSource def = fp.hasMoreArguments() ? fp.parseValueSource() : null; return new RangeMapFloatFunction(source, min, max, target, def); } }); diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java index 8dea1d318ed..ce06e4f8155 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java @@ -92,7 +92,9 @@ public class SearchGroupShardResponseProcessor implements ShardResponseProcessor if (srsp.getSolrResponse() != null) { nl.add("time", srsp.getSolrResponse().getElapsedTime()); } - + if (srsp.getShardAddress() != null) { + nl.add("shardAddress", srsp.getShardAddress()); + } shardInfo.add(srsp.getShard(), nl); } if (rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false) && srsp.getException() != null) { diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java index 1f41b073979..dd9bdbb62cd 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java @@ -108,7 +108,9 @@ public class TopGroupsShardResponseProcessor implements ShardResponseProcessor { if (srsp.getSolrResponse() != null) { individualShardInfo.add("time", srsp.getSolrResponse().getElapsedTime()); } - + if (srsp.getShardAddress() != null) { + individualShardInfo.add("shardAddress", srsp.getShardAddress()); + } shardInfo.add(srsp.getShard(), individualShardInfo); } if (rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false) && srsp.getException() != null) { diff --git a/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java b/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java index f11cc6adab5..01b23eae3c0 100644 --- a/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java +++ b/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java @@ -52,6 +52,7 @@ import org.apache.solr.response.QueryResponseWriter; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.servlet.cache.HttpCacheHeaderUtil; import org.apache.solr.servlet.cache.Method; +import org.apache.solr.update.processor.DistributingUpdateProcessorFactory; import org.apache.solr.util.FastWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -64,6 +65,7 @@ import javax.servlet.ServletRequest; import javax.servlet.ServletResponse; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; + import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; @@ -92,6 +94,9 @@ import java.util.WeakHashMap; */ public class SolrDispatchFilter implements Filter { + private static final String CONNECTION_HEADER = "Connection"; + private static final String TRANSFER_ENCODING_HEADER = "Transfer-Encoding"; + final Logger log; protected volatile CoreContainer cores; @@ -309,8 +314,12 @@ public class SolrDispatchFilter implements Filter // if we couldn't find it locally, look on other nodes if (core == null && idx > 0) { String coreUrl = getRemotCoreUrl(cores, corename, origCorename); - if (coreUrl != null) { - path = path.substring( idx ); + // don't proxy for internal update requests + SolrParams queryParams = SolrRequestParsers.parseQueryString(req.getQueryString()); + if (coreUrl != null + && queryParams + .get(DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM) == null) { + path = path.substring(idx); remoteQuery(coreUrl + path, req, solrReq, resp); return; } else { @@ -493,10 +502,14 @@ public class SolrDispatchFilter implements Filter con.setRequestMethod(req.getMethod()); con.setUseCaches(false); - con.setDoOutput(true); + boolean isPostOrPutRequest = "POST".equals(req.getMethod()) || "PUT".equals(req.getMethod()); + + if (isPostOrPutRequest) { + con.setDoOutput(true); + } con.setDoInput(true); - for (Enumeration e = req.getHeaderNames(); e.hasMoreElements();) { - String headerName = e.nextElement().toString(); + for (Enumeration e = req.getHeaderNames(); e.hasMoreElements();) { + String headerName = e.nextElement(); con.setRequestProperty(headerName, req.getHeader(headerName)); } try { @@ -504,7 +517,7 @@ public class SolrDispatchFilter implements Filter InputStream is; OutputStream os; - if ("POST".equals(req.getMethod())) { + if (isPostOrPutRequest) { is = req.getInputStream(); os = con.getOutputStream(); // side effect: method is switched to POST try { @@ -518,11 +531,18 @@ public class SolrDispatchFilter implements Filter resp.setStatus(con.getResponseCode()); - for (Iterator i = con.getHeaderFields().entrySet().iterator(); i - .hasNext();) { - Map.Entry mapEntry = (Map.Entry) i.next(); - if (mapEntry.getKey() != null) resp.setHeader(mapEntry.getKey() - .toString(), ((List) mapEntry.getValue()).get(0).toString()); + for (Iterator>> i = con.getHeaderFields().entrySet().iterator(); i.hasNext();) { + Map.Entry> mapEntry = i.next(); + String header = mapEntry.getKey(); + + // We pull out these two headers below because they can cause chunked + // encoding issues with Tomcat and certain clients + if (header != null && !header.equals(TRANSFER_ENCODING_HEADER) + && !header.equals(CONNECTION_HEADER)) { + for (String value : mapEntry.getValue()) { + resp.addHeader(mapEntry.getKey(), value); + } + } } resp.setCharacterEncoding(con.getContentEncoding()); @@ -552,13 +572,14 @@ public class SolrDispatchFilter implements Filter ClusterState clusterState = cores.getZkController().getClusterState(); Collection slices = clusterState.getActiveSlices(collectionName); boolean byCoreName = false; + if (slices == null) { + slices = new ArrayList(); // look by core name byCoreName = true; - Set collections = clusterState.getCollections(); - for (String collection : collections) { - slices = new ArrayList(); - slices.addAll(clusterState.getActiveSlices(collection)); + slices = getSlicesForCollections(clusterState, slices, true); + if (slices == null || slices.size() == 0) { + slices = getSlicesForCollections(clusterState, slices, false); } } @@ -566,6 +587,21 @@ public class SolrDispatchFilter implements Filter return null; } + String coreUrl = getCoreUrl(cores, collectionName, origCorename, clusterState, + slices, byCoreName, true); + + if (coreUrl == null) { + coreUrl = getCoreUrl(cores, collectionName, origCorename, clusterState, + slices, byCoreName, false); + } + + return coreUrl; + } + + private String getCoreUrl(CoreContainer cores, String collectionName, + String origCorename, ClusterState clusterState, Collection slices, + boolean byCoreName, boolean activeReplicas) { + String coreUrl; Set liveNodes = clusterState.getLiveNodes(); Iterator it = slices.iterator(); while (it.hasNext()) { @@ -573,8 +609,9 @@ public class SolrDispatchFilter implements Filter Map sliceShards = slice.getReplicasMap(); for (ZkNodeProps nodeProps : sliceShards.values()) { ZkCoreNodeProps coreNodeProps = new ZkCoreNodeProps(nodeProps); - if (liveNodes.contains(coreNodeProps.getNodeName()) - && coreNodeProps.getState().equals(ZkStateReader.ACTIVE)) { + if (!activeReplicas || (liveNodes.contains(coreNodeProps.getNodeName()) + && coreNodeProps.getState().equals(ZkStateReader.ACTIVE))) { + if (byCoreName && !collectionName.equals(coreNodeProps.getCoreName())) { // if it's by core name, make sure they match continue; @@ -583,7 +620,7 @@ public class SolrDispatchFilter implements Filter // don't count a local core continue; } - String coreUrl; + if (origCorename != null) { coreUrl = coreNodeProps.getBaseUrl() + "/" + origCorename; } else { @@ -599,6 +636,19 @@ public class SolrDispatchFilter implements Filter } return null; } + + private Collection getSlicesForCollections(ClusterState clusterState, + Collection slices, boolean activeSlices) { + Set collections = clusterState.getCollections(); + for (String collection : collections) { + if (activeSlices) { + slices.addAll(clusterState.getActiveSlices(collection)); + } else { + slices.addAll(clusterState.getSlices(collection)); + } + } + return slices; + } private SolrCore getCoreByCollection(CoreContainer cores, String corename, String path) { String collection = corename; @@ -708,6 +758,7 @@ public class SolrDispatchFilter implements Filter ServletRequest request, HttpServletResponse response, Throwable ex) throws IOException { + SolrCore localCore = null; try { SolrQueryResponse solrResp = new SolrQueryResponse(); if(ex instanceof Exception) { @@ -717,7 +768,9 @@ public class SolrDispatchFilter implements Filter solrResp.setException(new RuntimeException(ex)); } if(core==null) { - core = cores.getCore(""); // default core + localCore = cores.getCore(""); // default core + } else { + localCore = core; } if(req==null) { final SolrParams solrParams; @@ -737,6 +790,10 @@ public class SolrDispatchFilter implements Filter SimpleOrderedMap info = new SimpleOrderedMap(); int code = ResponseUtils.getErrorInfo(ex, info, log); response.sendError( code, info.toString() ); + } finally { + if (core == null && localCore != null) { + localCore.close(); + } } } diff --git a/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java b/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java index 4e76eac2611..ccba0d65ed1 100644 --- a/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java +++ b/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java @@ -584,7 +584,7 @@ public class SolrRequestParsers if (!isFormData(req)) { throw new SolrException( ErrorCode.BAD_REQUEST, "Not application/x-www-form-urlencoded content: "+req.getContentType() ); } - + final Map map = new HashMap(); // also add possible URL parameters and include into the map (parsed using UTF-8): @@ -600,7 +600,7 @@ public class SolrRequestParsers throw new SolrException(ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded content length (" + totalLength + " bytes) exceeds upload limit of " + uploadLimitKB + " KB"); } - + // get query String from request body, using the charset given in content-type: final String cs = ContentStreamBase.getCharsetFromContentType(req.getContentType()); final Charset charset = (cs == null) ? IOUtils.CHARSET_UTF_8 : Charset.forName(cs); @@ -680,7 +680,10 @@ public class SolrRequestParsers if (ServletFileUpload.isMultipartContent(req)) { return multipart.parseParamsAndFillStreams(req, streams); } - return raw.parseParamsAndFillStreams(req, streams); + if (req.getContentType() != null) { + return raw.parseParamsAndFillStreams(req, streams); + } + throw new SolrException(ErrorCode.UNSUPPORTED_MEDIA_TYPE, "Must specify a Content-Type header with POST requests"); } throw new SolrException(ErrorCode.BAD_REQUEST, "Unsupported method: " + method + " for request " + req); } diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/DictionaryFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/DictionaryFactory.java new file mode 100644 index 00000000000..831ddaaad33 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/DictionaryFactory.java @@ -0,0 +1,48 @@ +package org.apache.solr.spelling.suggest; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.spell.Dictionary; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.search.SolrIndexSearcher; + +/** + * Encapsulates shared fields for all types of dictionaryFactory classes + */ +public abstract class DictionaryFactory { + + /** Default dictionary implementation to use for FileBasedDictionaries */ + public static String DEFAULT_FILE_BASED_DICT = FileDictionaryFactory.class.getName(); + + /** Default dictionary implementation to use for IndexBasedDictionaries */ + public static String DEFAULT_INDEX_BASED_DICT = HighFrequencyDictionaryFactory.class.getName(); + + protected NamedList params; + + /** Sets the parameters available to SolrSuggester for use in Dictionary creation */ + public void setParams(NamedList params) { + this.params = params; + } + + /** + * Create a Dictionary using options in core and optionally + * uses searcher, in case of index based dictionaries + */ + public abstract Dictionary create(SolrCore core, SolrIndexSearcher searcher); + +} diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/DocumentDictionaryFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/DocumentDictionaryFactory.java new file mode 100644 index 00000000000..7a2f8f4dd71 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/DocumentDictionaryFactory.java @@ -0,0 +1,56 @@ +package org.apache.solr.spelling.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.spell.Dictionary; +import org.apache.lucene.search.suggest.DocumentDictionary; +import org.apache.solr.core.SolrCore; +import org.apache.solr.search.SolrIndexSearcher; + +/** + * Factory for {@link DocumentDictionary} + */ +public class DocumentDictionaryFactory extends DictionaryFactory { + + public static final String FIELD = "field"; + + public static final String WEIGHT_FIELD = "weightField"; + + public static final String PAYLOAD_FIELD = "payloadField"; + + @Override + public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { + if(params == null) { + // should not happen; implies setParams was not called + throw new IllegalStateException("Value of params not set"); + } + String field = (String) params.get(FIELD); + String weightField = (String) params.get(WEIGHT_FIELD); + String payloadField = (String) params.get(PAYLOAD_FIELD); + + if (field == null) { + throw new IllegalArgumentException(FIELD + " is a mandatory parameter"); + } + if (weightField == null) { + throw new IllegalArgumentException(WEIGHT_FIELD + " is a mandatory parameter"); + } + + return new DocumentDictionary(searcher.getIndexReader(), field, weightField, payloadField); + } + +} diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/DocumentExpressionDictionaryFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/DocumentExpressionDictionaryFactory.java new file mode 100644 index 00000000000..fc7736eb2f5 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/DocumentExpressionDictionaryFactory.java @@ -0,0 +1,112 @@ +package org.apache.solr.spelling.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.spell.Dictionary; +import org.apache.lucene.search.suggest.DocumentExpressionDictionary; +import org.apache.solr.core.SolrCore; +import org.apache.solr.schema.DoubleField; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.FloatField; +import org.apache.solr.schema.IntField; +import org.apache.solr.schema.LongField; +import org.apache.solr.schema.TrieDoubleField; +import org.apache.solr.schema.TrieFloatField; +import org.apache.solr.schema.TrieIntField; +import org.apache.solr.schema.TrieLongField; +import org.apache.solr.search.SolrIndexSearcher; + +/** + * Factory for {@link DocumentExpressionDictionary} + */ +public class DocumentExpressionDictionaryFactory extends DictionaryFactory { + + /** Label for defining field to use for terms */ + public static final String FIELD = "field"; + + /** Label for defining payloadField to use for terms (optional) */ + public static final String PAYLOAD_FIELD = "payloadField"; + + /** Label for defining expression to evaluate the weight for the terms */ + public static final String WEIGHT_EXPRESSION = "weightExpression"; + + /** Label used to define the name of the + * sortField used in the {@link #WEIGHT_EXPRESSION} */ + public static final String SORT_FIELD = "sortField"; + + @Override + public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { + if(params == null) { + // should not happen; implies setParams was not called + throw new IllegalStateException("Value of params not set"); + } + + String field = (String) params.get(FIELD); + String payloadField = (String) params.get(PAYLOAD_FIELD); + String weightExpression = (String) params.get(WEIGHT_EXPRESSION); + Set sortFields = new HashSet(); + + if (field == null) { + throw new IllegalArgumentException(FIELD + " is a mandatory parameter"); + } + + if (weightExpression == null) { + throw new IllegalArgumentException(WEIGHT_EXPRESSION + " is a mandatory parameter"); + } + + for(int i = 0; i < params.size(); i++) { + if (params.getName(i).equals(SORT_FIELD)) { + String sortFieldName = (String) params.getVal(i); + + SortField.Type sortFieldType = getSortFieldType(core, sortFieldName); + + if (sortFieldType == null) { + throw new IllegalArgumentException(sortFieldName + " could not be mapped to any appropriate type" + + " [long, int, float, double]"); + } + + SortField sortField = new SortField(sortFieldName, sortFieldType); + sortFields.add(sortField); + } + } + + return new DocumentExpressionDictionary(searcher.getIndexReader(), field, weightExpression, + sortFields, payloadField); + } + + private SortField.Type getSortFieldType(SolrCore core, String sortFieldName) { + SortField.Type type = null; + String fieldTypeName = core.getLatestSchema().getField(sortFieldName).getType().getTypeName(); + FieldType ft = core.getLatestSchema().getFieldTypes().get(fieldTypeName); + if (ft instanceof FloatField || ft instanceof TrieFloatField) { + type = SortField.Type.FLOAT; + } else if (ft instanceof IntField || ft instanceof TrieIntField) { + type = SortField.Type.INT; + } else if (ft instanceof LongField || ft instanceof TrieLongField) { + type = SortField.Type.LONG; + } else if (ft instanceof DoubleField || ft instanceof TrieDoubleField) { + type = SortField.Type.DOUBLE; + } + return type; + } + +} diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/FileDictionaryFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/FileDictionaryFactory.java new file mode 100644 index 00000000000..986fe7ec64e --- /dev/null +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/FileDictionaryFactory.java @@ -0,0 +1,62 @@ +package org.apache.solr.spelling.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.InputStreamReader; + +import org.apache.lucene.search.spell.Dictionary; +import org.apache.lucene.search.suggest.FileDictionary; +import org.apache.lucene.util.IOUtils; +import org.apache.solr.core.SolrCore; +import org.apache.solr.search.SolrIndexSearcher; + +/** + * Factory for {@link FileDictionary} + */ +public class FileDictionaryFactory extends DictionaryFactory { + + /** Label for defining fieldDelimiter to be used */ + public static final String FIELD_DELIMITER = "fieldDelimiter"; + + @Override + public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { + if (params == null) { + // should not happen; implies setParams was not called + throw new IllegalStateException("Value of params not set"); + } + + String sourceLocation = (String)params.get(Suggester.LOCATION); + + if (sourceLocation == null) { + throw new IllegalArgumentException(Suggester.LOCATION + " parameter is mandatory for using FileDictionary"); + } + + String fieldDelimiter = (params.get(FIELD_DELIMITER) != null) + ? (String) params.get(FIELD_DELIMITER) : + FileDictionary.DEFAULT_FIELD_DELIMITER; + + try { + return new FileDictionary(new InputStreamReader( + core.getResourceLoader().openResource(sourceLocation), IOUtils.CHARSET_UTF_8), fieldDelimiter); + } catch (IOException e) { + throw new RuntimeException(); + } + } + +} diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/HighFrequencyDictionaryFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/HighFrequencyDictionaryFactory.java new file mode 100644 index 00000000000..2d7c42bfd9d --- /dev/null +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/HighFrequencyDictionaryFactory.java @@ -0,0 +1,53 @@ +package org.apache.solr.spelling.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.spell.Dictionary; +import org.apache.lucene.search.spell.HighFrequencyDictionary; +import org.apache.solr.core.SolrCore; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.spelling.SolrSpellChecker; + +/** + * Factory for {@link HighFrequencyDictionary} + */ +public class HighFrequencyDictionaryFactory extends DictionaryFactory { + /** + * Minimum frequency of terms to consider when building the dictionary. + */ + public static final String THRESHOLD_TOKEN_FREQUENCY = "threshold"; + + @Override + public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { + if(params == null) { + // should not happen; implies setParams was not called + throw new IllegalStateException("Value of params not set"); + } + String field = (String)params.get(SolrSpellChecker.FIELD); + + if (field == null) { + throw new IllegalArgumentException(SolrSpellChecker.FIELD + " is a mandatory parameter"); + } + + float threshold = params.get(THRESHOLD_TOKEN_FREQUENCY) == null ? 0.0f + : (Float)params.get(THRESHOLD_TOKEN_FREQUENCY); + + return new HighFrequencyDictionary(searcher.getIndexReader(), field, threshold); + } + +} diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/LookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/LookupFactory.java index 147b2611a98..33d1732e723 100644 --- a/solr/core/src/java/org/apache/solr/spelling/suggest/LookupFactory.java +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/LookupFactory.java @@ -20,11 +20,25 @@ package org.apache.solr.spelling.suggest; import org.apache.lucene.search.suggest.Lookup; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; +import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory; /** * Suggester factory for creating {@link Lookup} instances. */ public abstract class LookupFactory { + + /** Default lookup implementation to use for SolrSuggester */ + public static String DEFAULT_FILE_BASED_DICT = JaspellLookupFactory.class.getName(); + + /** + * Create a Lookup using config options in params and + * current core + */ public abstract Lookup create(NamedList params, SolrCore core); + + /** + *

    Returns the filename in which the in-memory data structure is stored

    + * NOTE: not all {@link Lookup} implementations store in-memory data structures + * */ public abstract String storeFileName(); } diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/SolrSuggester.java b/solr/core/src/java/org/apache/solr/spelling/suggest/SolrSuggester.java new file mode 100644 index 00000000000..0f10128b494 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/SolrSuggester.java @@ -0,0 +1,206 @@ +package org.apache.solr.spelling.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.search.spell.Dictionary; +import org.apache.lucene.search.suggest.Lookup.LookupResult; +import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.util.IOUtils; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.search.SolrIndexSearcher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Responsible for loading the lookup and dictionary Implementations specified by + * the SolrConfig. + * Interacts (query/build/reload) with Lucene Suggesters through {@link Lookup} and + * {@link Dictionary} + * */ +public class SolrSuggester { + private static final Logger LOG = LoggerFactory.getLogger(SolrSuggester.class); + + /** Name used when an unnamed suggester config is passed */ + public static final String DEFAULT_DICT_NAME = "default"; + + /** Label to identify the name of the suggester */ + public static final String NAME = "name"; + + /** Location of the source data - either a path to a file, or null for the + * current IndexReader. + * */ + public static final String LOCATION = "sourceLocation"; + + /** Fully-qualified class of the {@link Lookup} implementation. */ + public static final String LOOKUP_IMPL = "lookupImpl"; + + /** Fully-qualified class of the {@link Dictionary} implementation */ + public static final String DICTIONARY_IMPL = "dictionaryImpl"; + + /** + * Name of the location where to persist the dictionary. If this location + * is relative then the data will be stored under the core's dataDir. If this + * is null the storing will be disabled. + */ + public static final String STORE_DIR = "storeDir"; + + static SuggesterResult EMPTY_RESULT = new SuggesterResult(); + + private String sourceLocation; + private File storeDir; + private Dictionary dictionary; + private Lookup lookup; + private String lookupImpl; + private String dictionaryImpl; + private String name; + + private LookupFactory factory; + private DictionaryFactory dictionaryFactory; + + /** + * Uses the config and the core to initialize the underlying + * Lucene suggester + * */ + public String init(NamedList config, SolrCore core) { + LOG.info("init: " + config); + + // read the config + name = config.get(NAME) != null ? (String) config.get(NAME) + : DEFAULT_DICT_NAME; + sourceLocation = (String) config.get(LOCATION); + lookupImpl = (String) config.get(LOOKUP_IMPL); + dictionaryImpl = (String) config.get(DICTIONARY_IMPL); + String store = (String)config.get(STORE_DIR); + + if (lookupImpl == null) { + lookupImpl = LookupFactory.DEFAULT_FILE_BASED_DICT; + LOG.info("No " + LOOKUP_IMPL + " parameter was provided falling back to " + lookupImpl); + } + // initialize appropriate lookup instance + factory = core.getResourceLoader().newInstance(lookupImpl, LookupFactory.class); + lookup = factory.create(config, core); + + // if store directory is provided make it or load up the lookup with its content + if (store != null) { + storeDir = new File(store); + if (!storeDir.isAbsolute()) { + storeDir = new File(core.getDataDir() + File.separator + storeDir); + } + if (!storeDir.exists()) { + storeDir.mkdirs(); + } else { + // attempt reload of the stored lookup + try { + lookup.load(new FileInputStream(new File(storeDir, factory.storeFileName()))); + } catch (IOException e) { + LOG.warn("Loading stored lookup data failed, possibly not cached yet"); + } + } + } + + // dictionary configuration + if (dictionaryImpl == null) { + dictionaryImpl = (sourceLocation == null) ? DictionaryFactory.DEFAULT_INDEX_BASED_DICT : + DictionaryFactory.DEFAULT_FILE_BASED_DICT; + LOG.info("No " + DICTIONARY_IMPL + " parameter was provided falling back to " + dictionaryImpl); + } + + dictionaryFactory = core.getResourceLoader().newInstance(dictionaryImpl, DictionaryFactory.class); + dictionaryFactory.setParams(config); + LOG.info("Dictionary loaded with params: " + config); + + return name; + } + + /** Build the underlying Lucene Suggester */ + public void build(SolrCore core, SolrIndexSearcher searcher) throws IOException { + LOG.info("build()"); + + dictionary = dictionaryFactory.create(core, searcher); + lookup.build(dictionary); + if (storeDir != null) { + File target = new File(storeDir, factory.storeFileName()); + if(!lookup.store(new FileOutputStream(target))) { + LOG.error("Store Lookup build failed"); + } else { + LOG.info("Stored suggest data to: " + target.getAbsolutePath()); + } + } + } + + /** Reloads the underlying Lucene Suggester */ + public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException { + LOG.info("reload()"); + if (dictionary == null && storeDir != null) { + // this may be a firstSearcher event, try loading it + FileInputStream is = new FileInputStream(new File(storeDir, factory.storeFileName())); + try { + if (lookup.load(is)) { + return; // loaded ok + } + } finally { + IOUtils.closeWhileHandlingException(is); + } + LOG.debug("load failed, need to build Lookup again"); + } + // loading was unsuccessful - build it again + build(core, searcher); + } + + /** Returns suggestions based on the {@link SuggesterOptions} passed */ + public SuggesterResult getSuggestions(SuggesterOptions options) throws IOException { + LOG.debug("getSuggestions: " + options.token); + if (lookup == null) { + LOG.info("Lookup is null - invoke suggest.build first"); + return EMPTY_RESULT; + } + + SuggesterResult res = new SuggesterResult(); + List suggestions = lookup.lookup(options.token, false, options.count); + res.add(options.token.toString(), suggestions); + return res; + } + + /** Returns the unique name of the suggester */ + public String getName() { + return name; + } + + /** Returns the size of the in-memory data structure used by the underlying lookup implementation */ + public long sizeInBytes() { + return lookup.sizeInBytes(); + } + + @Override + public String toString() { + return "SolrSuggester [ name=" + name + ", " + + "sourceLocation=" + sourceLocation + ", " + + "storeDir=" + ((storeDir == null) ? "" : storeDir.getAbsoluteFile()) + ", " + + "lookupImpl=" + lookupImpl + ", " + + "dictionaryImpl=" + dictionaryImpl + ", " + + "sizeInBytes=" + ((lookup!=null) ? String.valueOf(sizeInBytes()) : "0") + " ]"; + } + +} diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/SuggesterOptions.java b/solr/core/src/java/org/apache/solr/spelling/suggest/SuggesterOptions.java new file mode 100644 index 00000000000..67ba529ab6b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/SuggesterOptions.java @@ -0,0 +1,38 @@ +package org.apache.solr.spelling.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.CharsRef; + +/** + * Encapsulates the inputs required to be passed on to + * the underlying suggester in {@link SolrSuggester} + **/ +public class SuggesterOptions { + + /** The token to lookup */ + CharsRef token; + + /** Number of suggestions requested */ + int count; + + public SuggesterOptions(CharsRef token, int count) { + this.token = token; + this.count = count; + } +} diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/SuggesterParams.java b/solr/core/src/java/org/apache/solr/spelling/suggest/SuggesterParams.java new file mode 100644 index 00000000000..d953dc06bc1 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/SuggesterParams.java @@ -0,0 +1,56 @@ +package org.apache.solr.spelling.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public interface SuggesterParams { + public static final String SUGGEST_PREFIX = "suggest."; + + /** + * The name of the dictionary to be used for giving the suggestion for a + * request. The value for this parameter is configured in solrconfig.xml + */ + public static final String SUGGEST_DICT = SUGGEST_PREFIX + "dictionary"; + + /** + * The count of suggestions to return for each query term not in the index and/or dictionary. + *

    + * If this parameter is absent in the request then only one suggestion is + * returned. If it is more than one then a maximum of given suggestions are + * returned for each token in the query. + */ + public static final String SUGGEST_COUNT = SUGGEST_PREFIX + "count"; + + /** + * Use the value for this parameter as the query to spell check. + *

    + * This parameter is optional. If absent, then the q parameter is + * used. + */ + public static final String SUGGEST_Q = SUGGEST_PREFIX + "q"; + + /** + * Whether to build the index or not. Optional and false by default. + */ + public static final String SUGGEST_BUILD = SUGGEST_PREFIX + "build"; + + /** + * Whether to reload the index. Optional and false by default. + */ + public static final String SUGGEST_RELOAD = SUGGEST_PREFIX + "reload"; + +} diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/SuggesterResult.java b/solr/core/src/java/org/apache/solr/spelling/suggest/SuggesterResult.java new file mode 100644 index 00000000000..6764a3b86d1 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/SuggesterResult.java @@ -0,0 +1,62 @@ +package org.apache.solr.spelling.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.search.suggest.Lookup.LookupResult; + +/** + * Encapsulates the results returned by the suggester in {@link SolrSuggester} + * */ +public class SuggesterResult { + + public SuggesterResult() {} + + /** token -> lookup results mapping*/ + private Map> suggestions = new HashMap>(); + + /** Add suggestion results for token */ + public void add(String token, List results) { + List res = this.suggestions.get(token); + if (res == null) { + res = results; + this.suggestions.put(token, res); + } + } + + /** + * Get a list of lookup result for a given token + * null can be returned, if there are no lookup results + * for the token + * */ + public List getLookupResult(String token) { + return this.suggestions.get(token); + } + + /** + * Get the set of tokens that are present in the + * instance + */ + public Set getTokens() { + return this.suggestions.keySet(); + } +} diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java index e32859eebc0..e8196413cc6 100644 --- a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java @@ -69,6 +69,9 @@ public class AnalyzingInfixLookupFactory extends LookupFactory { throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory"); } FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString()); + if (ft == null) { + throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema"); + } Analyzer indexAnalyzer = ft.getAnalyzer(); Analyzer queryAnalyzer = ft.getQueryAnalyzer(); diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingLookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingLookupFactory.java index 4dd4a186d7d..cba9d3a661b 100644 --- a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingLookupFactory.java +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingLookupFactory.java @@ -83,6 +83,10 @@ public class AnalyzingLookupFactory extends LookupFactory { throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory"); } FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString()); + if (ft == null) { + throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema"); + } + Analyzer indexAnalyzer = ft.getAnalyzer(); Analyzer queryAnalyzer = ft.getQueryAnalyzer(); diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/FuzzyLookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/FuzzyLookupFactory.java index 4eec5939c93..5dae427c729 100644 --- a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/FuzzyLookupFactory.java +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/FuzzyLookupFactory.java @@ -78,6 +78,9 @@ public class FuzzyLookupFactory extends LookupFactory { } // retrieve index and query analyzers for the field FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString()); + if (ft == null) { + throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema"); + } Analyzer indexAnalyzer = ft.getAnalyzer(); Analyzer queryAnalyzer = ft.getQueryAnalyzer(); diff --git a/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java b/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java index 8e9fcb0f2e9..af8c0973a4b 100644 --- a/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java +++ b/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java @@ -54,7 +54,6 @@ public class HdfsDirectory extends BaseDirectory { public HdfsDirectory(Path hdfsDirPath, Configuration configuration) throws IOException { - assert hdfsDirPath.toString().startsWith("hdfs:/") : hdfsDirPath.toString(); setLockFactory(NoLockFactory.getNoLockFactory()); this.hdfsDirPath = hdfsDirPath; this.configuration = configuration; diff --git a/solr/core/src/java/org/apache/solr/store/hdfs/HdfsLockFactory.java b/solr/core/src/java/org/apache/solr/store/hdfs/HdfsLockFactory.java index ecf113ace17..d46965511e5 100644 --- a/solr/core/src/java/org/apache/solr/store/hdfs/HdfsLockFactory.java +++ b/solr/core/src/java/org/apache/solr/store/hdfs/HdfsLockFactory.java @@ -21,14 +21,18 @@ import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockFactory; import org.apache.lucene.store.LockReleaseFailedException; import org.apache.solr.util.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class HdfsLockFactory extends LockFactory { + public static Logger log = LoggerFactory.getLogger(HdfsLockFactory.class); private Path lockPath; private Configuration configuration; @@ -98,9 +102,14 @@ public class HdfsLockFactory extends LockFactory { FileSystem fs = null; try { fs = FileSystem.newInstance(lockPath.toUri(), conf); - + if (!fs.exists(lockPath)) { + fs.mkdirs(lockPath); + } file = fs.create(new Path(lockPath, lockName), false); - } catch (IOException e) { + } catch (FileAlreadyExistsException e) { + return false; + }catch (IOException e) { + log.error("Error creating lock file", e); return false; } finally { IOUtils.closeQuietly(file); diff --git a/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java b/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java index 3facac9f554..6def045784f 100644 --- a/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java +++ b/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java @@ -109,14 +109,16 @@ public class AddUpdateCommand extends UpdateCommand implements Iterable absComparator = new Comparator() { @@ -141,7 +129,7 @@ public class PeerSync { this.maxUpdates = nUpdates; this.cantReachIsSuccess = cantReachIsSuccess; this.getNoVersionsIsSuccess = getNoVersionsIsSuccess; - + this.client = core.getCoreDescriptor().getCoreContainer().getUpdateShardHandler().getHttpClient(); uhandler = core.getUpdateHandler(); ulog = uhandler.getUpdateLog(); @@ -317,7 +305,8 @@ public class PeerSync { } if (cantReachIsSuccess && sreq.purpose == 1 && srsp.getException() instanceof SolrException && ((SolrException) srsp.getException()).code() == 404) { - log.warn(msg() + " got a 404 from " + srsp.getShardAddress() + ", counting as success"); + log.warn(msg() + " got a 404 from " + srsp.getShardAddress() + ", counting as success. " + + "Perhaps /get is not registered?"); return true; } // TODO: at least log??? diff --git a/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java b/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java index 0e4d9fbfaa9..e5e5baf7bc9 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java +++ b/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java @@ -18,13 +18,14 @@ package org.apache.solr.update; */ import java.io.IOException; +import java.net.ConnectException; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.ExecutorService; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpSolrServer; +import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException; import org.apache.solr.client.solrj.request.AbstractUpdateRequest; import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.common.SolrException; @@ -39,11 +40,14 @@ import org.slf4j.LoggerFactory; public class SolrCmdDistributor { - private static final int MAX_RETRIES_ON_FORWARD = 15; + private static final int MAX_RETRIES_ON_FORWARD = 25; public static Logger log = LoggerFactory.getLogger(SolrCmdDistributor.class); private StreamingSolrServers servers; + private int retryPause = 500; + private int maxRetriesOnForward = MAX_RETRIES_ON_FORWARD; + private List allErrors = new ArrayList(); private List errors = new ArrayList(); @@ -51,8 +55,14 @@ public class SolrCmdDistributor { public boolean abortCheck(); } - public SolrCmdDistributor(ExecutorService updateExecutor) { - servers = new StreamingSolrServers(updateExecutor); + public SolrCmdDistributor(UpdateShardHandler updateShardHandler) { + servers = new StreamingSolrServers(updateShardHandler); + } + + public SolrCmdDistributor(StreamingSolrServers servers, int maxRetriesOnForward, int retryPause) { + this.servers = servers; + this.maxRetriesOnForward = maxRetriesOnForward; + this.retryPause = retryPause; } public void finish() { @@ -69,59 +79,72 @@ public class SolrCmdDistributor { List errors = new ArrayList(this.errors); errors.addAll(servers.getErrors()); + List resubmitList = new ArrayList(); - boolean blockUntilFinishedAgain = false; for (Error err : errors) { - String oldNodeUrl = err.req.node.getUrl(); - - // if there is a retry url, we want to retry... - boolean isRetry = err.req.node.checkRetry(); - boolean doRetry = false; - int rspCode = err.statusCode; - - if (testing_errorHook != null) Diagnostics.call(testing_errorHook, err.e); - - // this can happen in certain situations such as shutdown - if (isRetry) { - if (rspCode == 404 || rspCode == 403 || rspCode == 503 - || rspCode == 500) { - doRetry = true; - } + try { + String oldNodeUrl = err.req.node.getUrl(); - // if its an ioexception, lets try again - if (err.e instanceof IOException) { - doRetry = true; - } else if (err.e instanceof SolrServerException) { - if (((SolrServerException) err.e).getRootCause() instanceof IOException) { + // if there is a retry url, we want to retry... + boolean isRetry = err.req.node.checkRetry(); + + boolean doRetry = false; + int rspCode = err.statusCode; + + if (testing_errorHook != null) Diagnostics.call(testing_errorHook, + err.e); + + // this can happen in certain situations such as shutdown + if (isRetry) { + if (rspCode == 404 || rspCode == 403 || rspCode == 503) { doRetry = true; } - } - if (err.req.retries < MAX_RETRIES_ON_FORWARD && doRetry) { - err.req.retries++; - SolrException.log(SolrCmdDistributor.log, "forwarding update to " - + oldNodeUrl + " failed - retrying ... retries: " + err.req.retries); - try { - Thread.sleep(500); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - log.warn(null, e); + // if its a connect exception, lets try again + if (err.e instanceof SolrServerException) { + if (((SolrServerException) err.e).getRootCause() instanceof ConnectException) { + doRetry = true; + } } - submit(err.req); - blockUntilFinishedAgain = true; + if (err.e instanceof ConnectException) { + doRetry = true; + } + + if (err.req.retries < maxRetriesOnForward && doRetry) { + err.req.retries++; + + SolrException.log(SolrCmdDistributor.log, "forwarding update to " + + oldNodeUrl + " failed - retrying ... retries: " + + err.req.retries + " " + err.req.cmdString + " params:" + + err.req.uReq.getParams() + " rsp:" + rspCode, err.e); + try { + Thread.sleep(retryPause); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + log.warn(null, e); + } + + resubmitList.add(err); + } else { + allErrors.add(err); + } } else { allErrors.add(err); } - } else { - allErrors.add(err); + } catch (Exception e) { + // continue on + log.error("Unexpected Error while doing request retries", e); } } servers.clearErrors(); this.errors.clear(); + for (Error err : resubmitList) { + submit(err.req); + } - if (blockUntilFinishedAgain) { + if (resubmitList.size() > 0) { servers.blockUntilFinished(); doRetriesIfNeeded(); } @@ -142,7 +165,7 @@ public class SolrCmdDistributor { uReq.deleteByQuery(cmd.query); } - submit(new Req(node, uReq, sync)); + submit(new Req(cmd.toString(), node, uReq, sync)); } } @@ -156,7 +179,7 @@ public class SolrCmdDistributor { UpdateRequest uReq = new UpdateRequest(); uReq.setParams(params); uReq.add(cmd.solrDoc, cmd.commitWithin, cmd.overwrite); - submit(new Req(node, uReq, synchronous)); + submit(new Req(cmd.toString(), node, uReq, synchronous)); } } @@ -173,10 +196,10 @@ public class SolrCmdDistributor { addCommit(uReq, cmd); - log.debug("Distrib commit to:" + nodes + " params:" + params); + log.debug("Distrib commit to: {} params: {}", nodes, params); for (Node node : nodes) { - submit(new Req(node, uReq, false)); + submit(new Req(cmd.toString(), node, uReq, false)); } } @@ -204,7 +227,11 @@ public class SolrCmdDistributor { return; } - + if (log.isDebugEnabled()) { + log.debug("sending update to " + + req.node.getUrl() + " retry:" + + req.retries + " " + req.cmdString + " params:" + req.uReq.getParams()); + } try { SolrServer solrServer = servers.getSolrServer(req); NamedList rsp = solrServer.request(req.uReq); @@ -225,11 +252,13 @@ public class SolrCmdDistributor { public UpdateRequest uReq; public int retries; public boolean synchronous; + public String cmdString; - public Req(Node node, UpdateRequest uReq, boolean synchronous) { + public Req(String cmdString, Node node, UpdateRequest uReq, boolean synchronous) { this.node = node; this.uReq = uReq; this.synchronous = synchronous; + this.cmdString = cmdString; } } @@ -243,7 +272,7 @@ public class SolrCmdDistributor { public static class Error { public Exception e; - public int statusCode; + public int statusCode = -1; public Req req; } @@ -351,10 +380,14 @@ public class SolrCmdDistributor { } catch (InterruptedException e) { Thread.currentThread().interrupt(); return false; + } catch (Exception e) { + // we retry with same info + log.warn(null, e); + return true; } - + this.nodeProps = leaderProps; - + return true; } diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java b/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java index 168906501f3..094103b77c5 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java +++ b/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java @@ -215,7 +215,7 @@ public class SolrIndexSplitter { } public static String getRouteKey(String idString) { - int idx = idString.indexOf(CompositeIdRouter.separator); + int idx = idString.indexOf(CompositeIdRouter.SEPARATOR); if (idx <= 0) return null; String part1 = idString.substring(0, idx); int commaIdx = part1.indexOf(CompositeIdRouter.bitsSeparator); diff --git a/solr/core/src/java/org/apache/solr/update/StreamingSolrServers.java b/solr/core/src/java/org/apache/solr/update/StreamingSolrServers.java index 02ec08939e6..24f8e113f06 100644 --- a/solr/core/src/java/org/apache/solr/update/StreamingSolrServers.java +++ b/solr/core/src/java/org/apache/solr/update/StreamingSolrServers.java @@ -20,8 +20,10 @@ package org.apache.solr.update; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ExecutorService; import org.apache.http.client.HttpClient; @@ -33,28 +35,30 @@ import org.apache.solr.client.solrj.impl.HttpClientUtil; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.update.SolrCmdDistributor.Error; +import org.apache.solr.update.processor.DistributedUpdateProcessor; +import org.apache.solr.update.processor.DistributingUpdateProcessorFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class StreamingSolrServers { public static Logger log = LoggerFactory.getLogger(StreamingSolrServers.class); - private static HttpClient httpClient; - static { - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(HttpClientUtil.PROP_MAX_CONNECTIONS, 128); - params.set(HttpClientUtil.PROP_MAX_CONNECTIONS_PER_HOST, 32); - params.set(HttpClientUtil.PROP_FOLLOW_REDIRECTS, false); - httpClient = HttpClientUtil.createClient(params); - } + private HttpClient httpClient; private Map solrServers = new HashMap(); private List errors = Collections.synchronizedList(new ArrayList()); private ExecutorService updateExecutor; - public StreamingSolrServers(ExecutorService updateExecutor) { - this.updateExecutor = updateExecutor; + public StreamingSolrServers(UpdateShardHandler updateShardHandler) { + this.updateExecutor = updateShardHandler.getUpdateExecutor(); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(HttpClientUtil.PROP_FOLLOW_REDIRECTS, false); + params.set(HttpClientUtil.PROP_CONNECTION_TIMEOUT, 30000); + params.set(HttpClientUtil.PROP_USE_RETRY, false); + + httpClient = updateShardHandler.getHttpClient(); } public List getErrors() { @@ -69,7 +73,7 @@ public class StreamingSolrServers { String url = getFullUrl(req.node.getUrl()); ConcurrentUpdateSolrServer server = solrServers.get(url); if (server == null) { - server = new ConcurrentUpdateSolrServer(url, httpClient, 100, 1, updateExecutor) { + server = new ConcurrentUpdateSolrServer(url, httpClient, 100, 1, updateExecutor, true) { @Override public void handleError(Throwable ex) { log.error("error", ex); @@ -85,6 +89,10 @@ public class StreamingSolrServers { server.setParser(new BinaryResponseParser()); server.setRequestWriter(new BinaryRequestWriter()); server.setPollQueueTime(0); + Set queryParams = new HashSet(2); + queryParams.add(DistributedUpdateProcessor.DISTRIB_FROM); + queryParams.add(DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM); + server.setQueryParams(queryParams); solrServers.put(url, server); } diff --git a/solr/core/src/java/org/apache/solr/update/TransactionLog.java b/solr/core/src/java/org/apache/solr/update/TransactionLog.java index ac13dd172d9..a9cefc657aa 100644 --- a/solr/core/src/java/org/apache/solr/update/TransactionLog.java +++ b/solr/core/src/java/org/apache/solr/update/TransactionLog.java @@ -564,7 +564,10 @@ public class TransactionLog { } public long getLogSize() { - return tlogFile.length(); + if (tlogFile != null) { + return tlogFile.length(); + } + return 0; } /** Returns a reader that can be used while a log is still in use. @@ -579,7 +582,6 @@ public class TransactionLog { return new FSReverseReader(); } - public class LogReader { private ChannelFastInputStream fis; private LogCodec codec = new LogCodec(resolver); diff --git a/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java b/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java index 051e11f9120..c72bae38cb2 100644 --- a/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java +++ b/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java @@ -17,16 +17,18 @@ package org.apache.solr.update; * limitations under the License. */ -import java.util.concurrent.SynchronousQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import org.apache.http.client.HttpClient; +import org.apache.http.conn.ClientConnectionManager; +import org.apache.http.impl.conn.PoolingClientConnectionManager; import org.apache.solr.client.solrj.impl.HttpClientUtil; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.ExecutorUtil; -import org.apache.solr.util.DefaultSolrThreadFactory; +import org.apache.solr.common.util.SolrjNamedThreadFactory; +import org.apache.solr.core.ConfigSolr; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,36 +36,48 @@ public class UpdateShardHandler { private static Logger log = LoggerFactory.getLogger(UpdateShardHandler.class); - private ThreadPoolExecutor cmdDistribExecutor = new ThreadPoolExecutor(0, - Integer.MAX_VALUE, 5, TimeUnit.SECONDS, new SynchronousQueue(), - new DefaultSolrThreadFactory("cmdDistribExecutor")); + private ExecutorService updateExecutor = Executors.newCachedThreadPool( + new SolrjNamedThreadFactory("updateExecutor")); + + private PoolingClientConnectionManager clientConnectionManager; private final HttpClient client; - public UpdateShardHandler(int distribUpdateConnTimeout, int distribUpdateSoTimeout) { + public UpdateShardHandler(ConfigSolr cfg) { + + clientConnectionManager = new PoolingClientConnectionManager(); + clientConnectionManager.setDefaultMaxPerRoute(cfg.getMaxUpdateConnections()); + clientConnectionManager.setDefaultMaxPerRoute(cfg.getMaxUpdateConnectionsPerHost()); + + ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(HttpClientUtil.PROP_MAX_CONNECTIONS, 500); - params.set(HttpClientUtil.PROP_MAX_CONNECTIONS_PER_HOST, 16); - params.set(HttpClientUtil.PROP_SO_TIMEOUT, distribUpdateSoTimeout); - params.set(HttpClientUtil.PROP_CONNECTION_TIMEOUT, distribUpdateConnTimeout); - client = HttpClientUtil.createClient(params); + params.set(HttpClientUtil.PROP_SO_TIMEOUT, cfg.getDistributedSocketTimeout()); + params.set(HttpClientUtil.PROP_CONNECTION_TIMEOUT, cfg.getDistributedConnectionTimeout()); + params.set(HttpClientUtil.PROP_USE_RETRY, false); + client = HttpClientUtil.createClient(params, clientConnectionManager); } public HttpClient getHttpClient() { return client; } + + public ClientConnectionManager getConnectionManager() { + return clientConnectionManager; + } - public ThreadPoolExecutor getCmdDistribExecutor() { - return cmdDistribExecutor; + public ExecutorService getUpdateExecutor() { + return updateExecutor; } public void close() { try { - ExecutorUtil.shutdownNowAndAwaitTermination(cmdDistribExecutor); + ExecutorUtil.shutdownAndAwaitTermination(updateExecutor); } catch (Throwable e) { SolrException.log(log, e); + } finally { + clientConnectionManager.shutdown(); } - client.getConnectionManager().shutdown(); } + } diff --git a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java index 345b3fd827b..de2b0885262 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java +++ b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java @@ -17,10 +17,24 @@ package org.apache.solr.update.processor; * limitations under the License. */ -import org.apache.http.client.HttpClient; +import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReentrantLock; + import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; -import org.apache.solr.client.solrj.impl.HttpClientUtil; import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.request.CoreAdminRequest.RequestRecovery; import org.apache.solr.cloud.CloudDescriptor; @@ -51,10 +65,7 @@ import org.apache.solr.common.params.UpdateParams; import org.apache.solr.common.util.Hash; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.CoreDescriptor; -import org.apache.solr.core.SolrCore; -import org.apache.solr.handler.component.HttpShardHandlerFactory; import org.apache.solr.handler.component.RealTimeGetComponent; -import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.response.SolrQueryResponse; @@ -78,25 +89,13 @@ import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.locks.ReentrantLock; - -import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM; - // NOT mt-safe... create a new processor for each add thread // TODO: we really should not wait for distrib after local? unless a certain replication factor is asked for public class DistributedUpdateProcessor extends UpdateRequestProcessor { + public static final String DISTRIB_FROM_SHARD = "distrib.from.shard"; + public static final String DISTRIB_FROM_COLLECTION = "distrib.from.collection"; + public static final String DISTRIB_FROM_PARENT = "distrib.from.parent"; + public static final String DISTRIB_FROM = "distrib.from"; private static final String TEST_DISTRIB_SKIP_SERVERS = "test.distrib.skip.servers"; public final static Logger log = LoggerFactory.getLogger(DistributedUpdateProcessor.class); @@ -122,17 +121,6 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { } } } - - private final HttpClient client; - { - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(HttpClientUtil.PROP_MAX_CONNECTIONS, 10000); - params.set(HttpClientUtil.PROP_MAX_CONNECTIONS_PER_HOST, 20); - params.set(HttpClientUtil.PROP_CONNECTION_TIMEOUT, 15000); - params.set(HttpClientUtil.PROP_SO_TIMEOUT, 60000); - params.set(HttpClientUtil.PROP_USE_RETRY, false); - client = HttpClientUtil.createClient(params); - } public static final String COMMIT_END_POINT = "commit_end_point"; public static final String LOG_REPLAY = "log_replay"; @@ -197,7 +185,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { this.zkEnabled = coreDesc.getCoreContainer().isZooKeeperAware(); zkController = req.getCore().getCoreDescriptor().getCoreContainer().getZkController(); if (zkEnabled) { - cmdDistrib = new SolrCmdDistributor(coreDesc.getCoreContainer().getUpdateExecutor()); + cmdDistrib = new SolrCmdDistributor(coreDesc.getCoreContainer().getUpdateShardHandler()); } //this.rsp = reqInfo != null ? reqInfo.getRsp() : null; @@ -268,7 +256,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { // if request is coming from another collection then we want it to be sent to all replicas // even if it's phase is FROMLEADER - String fromCollection = updateCommand.getReq().getParams().get("distrib.from.collection"); + String fromCollection = updateCommand.getReq().getParams().get(DISTRIB_FROM_COLLECTION); if (DistribPhase.FROMLEADER == phase && !isSubShardLeader && fromCollection == null) { // we are coming from the leader, just go local - add no urls @@ -408,13 +396,12 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { for (DocRouter.Range range : ranges) { if (range.includes(hash)) { if (nodes == null) nodes = new ArrayList(); - Collection activeSlices = cstate.getActiveSlices(rule.getTargetCollectionName()); + DocCollection targetColl = cstate.getCollection(rule.getTargetCollectionName()); + Collection activeSlices = targetColl.getRouter().getSearchSlicesSingle(id, null, targetColl); if (activeSlices == null || activeSlices.isEmpty()) { throw new SolrException(ErrorCode.SERVER_ERROR, - "No active slices found for target collection: " + rule.getTargetCollectionName()); + "No active slices serving " + id + " found for target collection: " + rule.getTargetCollectionName()); } - // it doesn't matter where we forward it so just choose the first one - // todo this can be optimized Replica targetLeader = cstate.getLeader(rule.getTargetCollectionName(), activeSlices.iterator().next().getName()); nodes.add(new StdNode(new ZkCoreNodeProps(targetLeader))); break; @@ -459,13 +446,13 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { boolean isReplayOrPeersync = (updateCommand.getFlags() & (UpdateCommand.REPLAY | UpdateCommand.PEER_SYNC)) != 0; if (isReplayOrPeersync) return; - String from = req.getParams().get("distrib.from"); + String from = req.getParams().get(DISTRIB_FROM); ClusterState clusterState = zkController.getClusterState(); CloudDescriptor cloudDescriptor = req.getCore().getCoreDescriptor().getCloudDescriptor(); Slice mySlice = clusterState.getSlice(collection, cloudDescriptor.getShardId()); boolean localIsLeader = cloudDescriptor.isLeader(); if (DistribPhase.FROMLEADER == phase && localIsLeader && from != null) { // from will be null on log replay - String fromShard = req.getParams().get("distrib.from.parent"); + String fromShard = req.getParams().get(DISTRIB_FROM_PARENT); if (fromShard != null) { if (Slice.ACTIVE.equals(mySlice.getState())) { throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, @@ -480,7 +467,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { "Request says it is coming from parent shard leader but parent hash range is not superset of my range"); } } else { - String fromCollection = req.getParams().get("distrib.from.collection"); // is it because of a routing rule? + String fromCollection = req.getParams().get(DISTRIB_FROM_COLLECTION); // is it because of a routing rule? if (fromCollection == null) { log.error("Request says it is coming from leader, but we are the leader: " + req.getParamString()); throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Request says it is coming from leader, but we are the leader"); @@ -559,9 +546,9 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { if (subShardLeaders != null && !subShardLeaders.isEmpty()) { ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams())); params.set(DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString()); - params.set("distrib.from", ZkCoreNodeProps.getCoreUrl( + params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl( zkController.getBaseUrl(), req.getCore().getName())); - params.set("distrib.from.parent", req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId()); + params.set(DISTRIB_FROM_PARENT, req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId()); for (Node subShardLeader : subShardLeaders) { cmdDistrib.distribAdd(cmd, Collections.singletonList(subShardLeader), params, true); } @@ -570,10 +557,10 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { if (nodesByRoutingRules != null && !nodesByRoutingRules.isEmpty()) { ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams())); params.set(DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString()); - params.set("distrib.from", ZkCoreNodeProps.getCoreUrl( + params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl( zkController.getBaseUrl(), req.getCore().getName())); - params.set("distrib.from.collection", req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName()); - params.set("distrib.from.shard", req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId()); + params.set(DISTRIB_FROM_COLLECTION, req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName()); + params.set(DISTRIB_FROM_SHARD, req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId()); for (Node nodesByRoutingRule : nodesByRoutingRules) { cmdDistrib.distribAdd(cmd, Collections.singletonList(nodesByRoutingRule), params, true); } @@ -588,13 +575,9 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { (isLeader || isSubShardLeader ? DistribPhase.FROMLEADER.toString() : DistribPhase.TOLEADER.toString())); - if (isLeader || isSubShardLeader) { - params.set("distrib.from", ZkCoreNodeProps.getCoreUrl( - zkController.getBaseUrl(), req.getCore().getName())); - } - - params.set("distrib.from", ZkCoreNodeProps.getCoreUrl( + params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl( zkController.getBaseUrl(), req.getCore().getName())); + cmdDistrib.distribAdd(cmd, nodes, params); } @@ -681,7 +664,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { } } }; - ExecutorService executor = req.getCore().getCoreDescriptor().getCoreContainer().getUpdateExecutor(); + ExecutorService executor = req.getCore().getCoreDescriptor().getCoreContainer().getUpdateShardHandler().getUpdateExecutor(); executor.execute(thread); } @@ -745,7 +728,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { boolean isReplayOrPeersync = (cmd.getFlags() & (UpdateCommand.REPLAY | UpdateCommand.PEER_SYNC)) != 0; boolean leaderLogic = isLeader && !isReplayOrPeersync; - boolean forwardedFromCollection = cmd.getReq().getParams().get("distrib.from.collection") != null; + boolean forwardedFromCollection = cmd.getReq().getParams().get(DISTRIB_FROM_COLLECTION) != null; VersionBucket bucket = vinfo.bucket(bucketHash); @@ -990,9 +973,9 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { if (subShardLeaders != null && !subShardLeaders.isEmpty()) { ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams())); params.set(DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString()); - params.set("distrib.from", ZkCoreNodeProps.getCoreUrl( + params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl( zkController.getBaseUrl(), req.getCore().getName())); - params.set("distrib.from.parent", cloudDesc.getShardId()); + params.set(DISTRIB_FROM_PARENT, cloudDesc.getShardId()); cmdDistrib.distribDelete(cmd, subShardLeaders, params, true); } @@ -1000,10 +983,10 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { if (nodesByRoutingRules != null && !nodesByRoutingRules.isEmpty()) { ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams())); params.set(DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString()); - params.set("distrib.from", ZkCoreNodeProps.getCoreUrl( + params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl( zkController.getBaseUrl(), req.getCore().getName())); - params.set("distrib.from.collection", req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName()); - params.set("distrib.from.shard", req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId()); + params.set(DISTRIB_FROM_COLLECTION, req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName()); + params.set(DISTRIB_FROM_SHARD, req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId()); for (Node nodesByRoutingRule : nodesByRoutingRules) { cmdDistrib.distribDelete(cmd, Collections.singletonList(nodesByRoutingRule), params, true); } @@ -1016,13 +999,11 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { params = new ModifiableSolrParams(filterParams(req.getParams())); params.set(DISTRIB_UPDATE_PARAM, - (isLeader || isSubShardLeader ? - DistribPhase.FROMLEADER.toString() : - DistribPhase.TOLEADER.toString())); - if (isLeader || isSubShardLeader) { - params.set("distrib.from", ZkCoreNodeProps.getCoreUrl( - zkController.getBaseUrl(), req.getCore().getName())); - } + (isLeader || isSubShardLeader ? DistribPhase.FROMLEADER.toString() + : DistribPhase.TOLEADER.toString())); + params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl( + zkController.getBaseUrl(), req.getCore().getName())); + cmdDistrib.distribDelete(cmd, nodes, params); } @@ -1083,6 +1064,8 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { ModifiableSolrParams outParams = new ModifiableSolrParams(filterParams(req.getParams())); outParams.set(DISTRIB_UPDATE_PARAM, DistribPhase.TOLEADER.toString()); + outParams.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl( + zkController.getBaseUrl(), req.getCore().getName())); SolrParams params = req.getParams(); String route = params.get(ShardParams._ROUTE_); @@ -1198,7 +1181,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams())); params.set(VERSION_FIELD, Long.toString(cmd.getVersion())); params.set(DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString()); - params.set("update.from", ZkCoreNodeProps.getCoreUrl( + params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl( zkController.getBaseUrl(), req.getCore().getName())); boolean someReplicas = false; @@ -1235,10 +1218,10 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { if (nodesByRoutingRules != null && !nodesByRoutingRules.isEmpty()) { params = new ModifiableSolrParams(filterParams(req.getParams())); params.set(DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString()); - params.set("distrib.from", ZkCoreNodeProps.getCoreUrl( + params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl( zkController.getBaseUrl(), req.getCore().getName())); - params.set("distrib.from.collection", req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName()); - params.set("distrib.from.shard", req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId()); + params.set(DISTRIB_FROM_COLLECTION, req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName()); + params.set(DISTRIB_FROM_SHARD, req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId()); cmdDistrib.distribDelete(cmd, nodesByRoutingRules, params, true); } if (replicas != null) { @@ -1333,7 +1316,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { boolean isReplayOrPeersync = (cmd.getFlags() & (UpdateCommand.REPLAY | UpdateCommand.PEER_SYNC)) != 0; boolean leaderLogic = isLeader && !isReplayOrPeersync; - boolean forwardedFromCollection = cmd.getReq().getParams().get("distrib.from.collection") != null; + boolean forwardedFromCollection = cmd.getReq().getParams().get(DISTRIB_FROM_COLLECTION) != null; if (!leaderLogic && versionOnUpdate==0) { throw new SolrException(ErrorCode.BAD_REQUEST, "missing _version_ on update from leader"); @@ -1438,7 +1421,9 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams())); if (!req.getParams().getBool(COMMIT_END_POINT, false)) { params.set(COMMIT_END_POINT, true); - + params.set(DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString()); + params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl( + zkController.getBaseUrl(), req.getCore().getName())); if (nodes != null) { cmdDistrib.distribCommit(cmd, nodes, params); finish(); diff --git a/solr/core/src/java/org/apache/solr/util/SolrLogLayout.java b/solr/core/src/java/org/apache/solr/util/SolrLogLayout.java index 4fc04e93216..6b6d011acf3 100644 --- a/solr/core/src/java/org/apache/solr/util/SolrLogLayout.java +++ b/solr/core/src/java/org/apache/solr/util/SolrLogLayout.java @@ -112,7 +112,9 @@ public class SolrLogLayout extends Layout { public String _format(LoggingEvent event) { String message = (String) event.getMessage(); - + if (message == null) { + message = ""; + } StringBuilder sb = new StringBuilder(message.length() + 80); long now = event.timeStamp; diff --git a/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java b/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java index 8e9e405f404..af7a9633a76 100644 --- a/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java +++ b/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java @@ -430,7 +430,7 @@ public class SolrPluginUtils { // we can use the Lucene sort ability. Sort sort = null; if (commands.size() >= 2) { - sort = QueryParsing.parseSort(commands.get(1), req); + sort = QueryParsing.parseSortSpec(commands.get(1), req).getSort(); } DocList results = req.getSearcher().getDocList(query,(DocSet)null, sort, start, limit); @@ -825,7 +825,7 @@ public class SolrPluginUtils { SolrException sortE = null; Sort ss = null; try { - ss = QueryParsing.parseSort(sort, req); + ss = QueryParsing.parseSortSpec(sort, req).getSort(); } catch (SolrException e) { sortE = e; } diff --git a/solr/core/src/test-files/analytics/requestFiles/expressions.txt b/solr/core/src/test-files/analytics/requestFiles/expressions.txt new file mode 100644 index 00000000000..329d32db0e2 --- /dev/null +++ b/solr/core/src/test-files/analytics/requestFiles/expressions.txt @@ -0,0 +1,70 @@ +o.ar.s.sum=sum(int_id) +o.ar.s.unique=unique(long_ld) +o.ar.s.su=add(sum(int_id),unique(long_ld)) +o.ar.s.mean=mean(int_id) +o.ar.s.count=count(long_ld) +o.ar.s.median=median(int_id) +o.ar.s.mcm=add(mean(int_id),count(long_ld),median(int_id)) + +o.mr.s.sum=sum(int_id) +o.mr.s.unique=unique(long_ld) +o.mr.s.su=mult(sum(int_id),unique(long_ld)) +o.mr.s.mean=mean(int_id) +o.mr.s.count=count(long_ld) +o.mr.s.median=median(int_id) +o.mr.s.mcm=mult(mean(int_id),count(long_ld),median(int_id)) + +o.dr.s.sum=sum(int_id) +o.dr.s.unique=unique(long_ld) +o.dr.s.su=div(sum(int_id),unique(long_ld)) +o.dr.s.mean=mean(int_id) +o.dr.s.count=count(long_ld) +o.dr.s.mc=div(mean(int_id),count(long_ld)) + +o.pr.s.sum=sum(int_id) +o.pr.s.unique=unique(long_ld) +o.pr.s.su=pow(sum(int_id),unique(long_ld)) +o.pr.s.mean=mean(int_id) +o.pr.s.count=count(long_ld) +o.pr.s.mc=pow(mean(int_id),count(long_ld)) + +o.nr.s.sum=sum(int_id) +o.nr.s.s=neg(sum(int_id)) +o.nr.s.count=count(long_ld) +o.nr.s.c=neg(count(long_ld)) + +o.avr.s.sum=sum(int_id) +o.avr.s.s=abs(neg(sum(int_id))) +o.avr.s.count=count(long_ld) +o.avr.s.c=abs(neg(count(long_ld))) + +o.cnr.s.c8=const_num(8) +o.cnr.s.c10=const_num(10) + +o.dmr.s.median=median(date_dtd) +o.dmr.s.cme=const_str(+2YEARS) +o.dmr.s.dmme=date_math(median(date_dtd),const_str(+2YEARS)) +o.dmr.s.max=max(date_dtd) +o.dmr.s.cma=const_str(+2MONTHS) +o.dmr.s.dmma=date_math(max(date_dtd),const_str(+2MONTHS)) + +o.cdr.s.cd1=const_date(1800-12-31T23:59:59Z) +o.cdr.s.cs1=const_str(1800-12-31T23:59:59Z) +o.cdr.s.cd2=const_date(1804-06-30T23:59:59Z) +o.cdr.s.cs2=const_str(1804-06-30T23:59:59Z) + +o.csr.s.cs1=const_str(this is the first) +o.csr.s.cs2=const_str(this is the second) +o.csr.s.cs3=const_str(this is the third) + +o.cr.s.csmin=const_str(this is the first) +o.cr.s.min=min(string_sd) +o.cr.s.ccmin=concat(const_str(this is the first),min(string_sd)) +o.cr.s.csmax=const_str(this is the second) +o.cr.s.max=max(string_sd) +o.cr.s.ccmax=concat(const_str(this is the second),max(string_sd)) + +o.rr.s.min=min(string_sd) +o.rr.s.rmin=rev(min(string_sd)) +o.rr.s.max=max(string_sd) +o.rr.s.rmax=rev(max(string_sd)) diff --git a/solr/core/src/test-files/analytics/requestFiles/fieldFacetExtras.txt b/solr/core/src/test-files/analytics/requestFiles/fieldFacetExtras.txt new file mode 100644 index 00000000000..3979f57bd95 --- /dev/null +++ b/solr/core/src/test-files/analytics/requestFiles/fieldFacetExtras.txt @@ -0,0 +1,66 @@ +o.sr.s.mean=mean(int_id) +o.sr.s.median=median(int_id) +o.sr.s.count=count(int_id) +o.sr.s.percentile_20=percentile(20,int_id) +o.sr.ff=long_ld +o.sr.ff.long_ld.ss=mean +o.sr.ff.long_ld.sd=asc +o.sr.ff=float_fd +o.sr.ff.float_fd.ss=median +o.sr.ff.float_fd.sd=desc +o.sr.ff=double_dd +o.sr.ff.double_dd.ss=count +o.sr.ff.double_dd.sd=asc +o.sr.ff=string_sd +o.sr.ff.string_sd.ss=percentile_20 +o.sr.ff.string_sd.sd=desc + +o.lr.s.mean=mean(int_id) +o.lr.s.median=median(int_id) +o.lr.s.count=count(int_id) +o.lr.s.percentile_20=percentile(20,int_id) +o.lr.ff=long_ld +o.lr.ff.long_ld.ss=mean +o.lr.ff.long_ld.sd=asc +o.lr.ff.long_ld.limit=5 +o.lr.ff=float_fd +o.lr.ff.float_fd.ss=median +o.lr.ff.float_fd.sd=desc +o.lr.ff.float_fd.limit=3 +o.lr.ff=double_dd +o.lr.ff.double_dd.ss=count +o.lr.ff.double_dd.sd=asc +o.lr.ff.double_dd.limit=7 +o.lr.ff=string_sd +o.lr.ff.string_sd.ss=percentile_20 +o.lr.ff.string_sd.sd=desc +o.lr.ff.string_sd.limit=1 + + + +o.offAll.s.mean=mean(int_id) +o.offAll.ff=long_ld +o.offAll.ff.long_ld.ss=mean +o.offAll.ff.long_ld.sd=asc +o.offAll.ff.long_ld.limit=7 + +o.off0.s.mean=mean(int_id) +o.off0.ff=long_ld +o.off0.ff.long_ld.ss=mean +o.off0.ff.long_ld.sd=asc +o.off0.ff.long_ld.limit=2 +o.off0.ff.long_ld.offset=0 + +o.off1.s.mean=mean(int_id) +o.off1.ff=long_ld +o.off1.ff.long_ld.ss=mean +o.off1.ff.long_ld.sd=asc +o.off1.ff.long_ld.limit=2 +o.off1.ff.long_ld.offset=2 + +o.off2.s.mean=mean(int_id) +o.off2.ff=long_ld +o.off2.ff.long_ld.ss=mean +o.off2.ff.long_ld.sd=asc +o.off2.ff.long_ld.limit=3 +o.off2.ff.long_ld.offset=4 diff --git a/solr/core/src/test-files/analytics/requestFiles/fieldFacets.txt b/solr/core/src/test-files/analytics/requestFiles/fieldFacets.txt new file mode 100644 index 00000000000..5ba5953a0c8 --- /dev/null +++ b/solr/core/src/test-files/analytics/requestFiles/fieldFacets.txt @@ -0,0 +1,132 @@ +o.sum.s.int=sum(int_id) +o.sum.s.long=sum(long_ld) +o.sum.s.float=sum(float_fd) +o.sum.s.double=sum(double_dd) +o.sum.ff=string_sd +o.sum.ff=date_dtd + +o.mean.s.int=mean(int_id) +o.mean.s.long=mean(long_ld) +o.mean.s.float=mean(float_fd) +o.mean.s.double=mean(double_dd) +o.mean.ff=string_sd +o.mean.ff=date_dtd + +o.sumOfSquares.s.int=sumofsquares(int_id) +o.sumOfSquares.s.long=sumofsquares(long_ld) +o.sumOfSquares.s.float=sumofsquares(float_fd) +o.sumOfSquares.s.double=sumofsquares(double_dd) +o.sumOfSquares.ff=string_sd +o.sumOfSquares.ff=date_dtd + +o.stddev.s.int=stddev(int_id) +o.stddev.s.long=stddev(long_ld) +o.stddev.s.float=stddev(float_fd) +o.stddev.s.double=stddev(double_dd) +o.stddev.ff=string_sd +o.stddev.ff=date_dtd + +o.median.s.int=median(int_id) +o.median.s.long=median(long_ld) +o.median.s.float=median(float_fd) +o.median.s.double=median(double_dd) +o.median.ff=string_sd +o.median.ff=date_dtd + +o.percentile_20n.s.int=percentile(20,int_id) +o.percentile_20n.s.long=percentile(20,long_ld) +o.percentile_20n.s.float=percentile(20,float_fd) +o.percentile_20n.s.double=percentile(20,double_dd) +o.percentile_20n.ff=string_sd +o.percentile_20n.ff=date_dtd + +o.percentile_20.s.str=percentile(20,string_sd) +o.percentile_20.s.date=percentile(20,date_dtd) +o.percentile_20.ff=int_id +o.percentile_20.ff=long_ld + +o.percentile_60n.s.int=percentile(60,int_id) +o.percentile_60n.s.long=percentile(60,long_ld) +o.percentile_60n.s.float=percentile(60,float_fd) +o.percentile_60n.s.double=percentile(60,double_dd) +o.percentile_60n.ff=string_sd +o.percentile_60n.ff=date_dtd + +o.percentile_60.s.str=percentile(60,string_sd) +o.percentile_60.s.date=percentile(60,date_dtd) +o.percentile_60.ff=int_id +o.percentile_60.ff=long_ld + +o.minn.s.int=min(int_id) +o.minn.s.long=min(long_ld) +o.minn.s.float=min(float_fd) +o.minn.s.double=min(double_dd) +o.minn.ff=string_sd +o.minn.ff=date_dtd + +o.min.s.str=min(string_sd) +o.min.s.date=min(date_dtd) +o.min.ff=int_id +o.min.ff=long_ld + +o.maxn.s.int=max(int_id) +o.maxn.s.long=max(long_ld) +o.maxn.s.float=max(float_fd) +o.maxn.s.double=max(double_dd) +o.maxn.ff=string_sd +o.maxn.ff=date_dtd + +o.max.s.str=max(string_sd) +o.max.s.date=max(date_dtd) +o.max.ff=int_id +o.max.ff=long_ld + +o.countn.s.int=count(int_id) +o.countn.s.long=count(long_ld) +o.countn.s.float=count(float_fd) +o.countn.s.double=count(double_dd) +o.countn.ff=string_sd +o.countn.ff=date_dtd + +o.count.s.str=count(string_sd) +o.count.s.date=count(date_dtd) +o.count.ff=int_id +o.count.ff=long_ld + +o.uniquen.s.int=unique(int_id) +o.uniquen.s.long=unique(long_ld) +o.uniquen.s.float=unique(float_fd) +o.uniquen.s.double=unique(double_dd) +o.uniquen.ff=string_sd +o.uniquen.ff=date_dtd + +o.unique.s.str=unique(string_sd) +o.unique.s.date=unique(date_dtd) +o.unique.ff=int_id +o.unique.ff=long_ld + +o.missingn.s.int=missing(int_id) +o.missingn.s.long=missing(long_ld) +o.missingn.s.float=missing(float_fd) +o.missingn.s.double=missing(double_dd) +o.missingn.ff=string_sd +o.missingn.ff=date_dtd + +o.missing.s.str=missing(string_sd) +o.missing.s.date=missing(date_dtd) +o.missing.ff=int_id +o.missing.ff=long_ld + +o.multivalued.s.mean=mean(int_id) +o.multivalued.ff=long_ldm +o.multivalued.ff=string_sdm +o.multivalued.ff=date_dtdm + +o.missingf.s.mean=mean(int_id) +o.missingf.ff=date_dtd +o.missingf.ff.date_dtd.dim=true +o.missingf.ff=string_sd +o.missingf.ff.string_sd.dim=true +o.missingf.ff.string_sd.sm=true +o.missingf.ff=date_dtdm +o.missingf.ff.date_dtdm.sm=true diff --git a/solr/core/src/test-files/analytics/requestFiles/functions.txt b/solr/core/src/test-files/analytics/requestFiles/functions.txt new file mode 100644 index 00000000000..e4930b6ba6b --- /dev/null +++ b/solr/core/src/test-files/analytics/requestFiles/functions.txt @@ -0,0 +1,62 @@ +o.ar.s.sum=sum(add(int_id,float_fd)) +o.ar.s.sumc=sum(add_if_dd) +o.ar.s.mean=mean(add(long_ld,double_dd,float_fd)) +o.ar.s.meanc=mean(add_ldf_dd) + +o.mr.s.sum=sum(mult(int_id,float_fd)) +o.mr.s.sumc=sum(mult_if_dd) +o.mr.s.mean=mean(mult(long_ld,double_dd,float_fd)) +o.mr.s.meanc=mean(mult_ldf_dd) + +o.dr.s.sum=sum(div(int_id,float_fd)) +o.dr.s.sumc=sum(div_if_dd) +o.dr.s.mean=mean(div(long_ld,double_dd)) +o.dr.s.meanc=mean(div_ld_dd) + +o.pr.s.sum=sum(pow(int_id,float_fd)) +o.pr.s.sumc=sum(pow_if_dd) +o.pr.s.mean=mean(pow(long_ld,double_dd)) +o.pr.s.meanc=mean(pow_ld_dd) + +o.nr.s.sum=sum(neg(int_id)) +o.nr.s.sumc=sum(neg_i_dd) +o.nr.s.mean=mean(neg(long_ld)) +o.nr.s.meanc=mean(neg_l_dd) + +o.avr.s.sum=sum(abs(neg(int_id))) +o.avr.s.sumc=sum(int_id) +o.avr.s.mean=mean(abs(neg(int_id))) +o.avr.s.meanc=mean(int_id) + +o.cnr.s.sum=sum(const_num(8)) +o.cnr.s.sumc=sum(const_8_dd) +o.cnr.s.mean=mean(const_num(10)) +o.cnr.s.meanc=mean(const_10_dd) + +o.dmr.s.median=median(date_math(date_dtd,const_str(+2YEARS))) +o.dmr.s.medianc=median(dm_2y_dtd) +o.dmr.s.max=max(date_math(date_dtd,const_str(+2MONTHS))) +o.dmr.s.maxc=max(dm_2m_dtd) + +o.cdr.s.median=median(const_date(1800-06-30T23:59:59Z)) +o.cdr.s.medianc=median(const_00_dtd) +o.cdr.s.max=max(const_date(1804-06-30T23:59:59Z)) +o.cdr.s.maxc=max(const_04_dtd) + +o.csr.s.min=min(const_str(this is the first)) +o.csr.s.minc=min(const_first_sd) +o.csr.s.max=max(const_str(this is the second)) +o.csr.s.maxc=max(const_second_sd) + +o.cr.s.min=min(concat(const_str(this is the first),string_sd)) +o.cr.s.minc=min(concat_first_sd) +o.cr.s.max=max(concat(const_str(this is the second),string_sd)) +o.cr.s.maxc=max(concat_second_sd) + +o.rr.s.min=min(rev(string_sd)) +o.rr.s.minc=min(rev_sd) +o.rr.s.max=max(rev(string_sd)) +o.rr.s.maxc=max(rev_sd) + +o.ms.s.min=min(miss_dd) +o.ms.s.max=max(miss_dd) diff --git a/solr/core/src/test-files/analytics/requestFiles/noFacets.txt b/solr/core/src/test-files/analytics/requestFiles/noFacets.txt new file mode 100644 index 00000000000..b3d91638cf8 --- /dev/null +++ b/solr/core/src/test-files/analytics/requestFiles/noFacets.txt @@ -0,0 +1,74 @@ +o.sr.s.int_id=sum(int_i) +o.sr.s.long_ld=sum(long_l) +o.sr.s.float_fd=sum(float_f) +o.sr.s.double_dd=sum(double_d) + +o.sosr.s.int_id=sumofsquares(int_id) +o.sosr.s.long_ld=sumofsquares(long_ld) +o.sosr.s.float_fd=sumofsquares(float_fd) +o.sosr.s.double_dd=sumofsquares(double_dd) + +o.mr.s.int_id=mean(int_id) +o.mr.s.long_ld=mean(long_ld) +o.mr.s.float_fd=mean(float_fd) +o.mr.s.double_dd=mean(double_dd) + +o.str.s.int_id=stddev(int_id) +o.str.s.long_ld=stddev(long_ld) +o.str.s.float_fd=stddev(float_fd) +o.str.s.double_dd=stddev(double_dd) + +o.medr.s.int_id=median(int_id) +o.medr.s.long_ld=median(long_ld) +o.medr.s.float_fd=median(float_fd) +o.medr.s.double_dd=median(double_dd) +o.medr.s.date_dtd=median(date_dtd) + +o.p2r.s.int_id=percentile(20,int_id) +o.p2r.s.long_ld=percentile(20,long_ld) +o.p2r.s.float_fd=percentile(20,float_fd) +o.p2r.s.double_dd=percentile(20,double_dd) +o.p2r.s.date_dtd=percentile(20,date_dtd) +o.p2r.s.string_sd=percentile(20,string_sd) + +o.p6r.s.int_id=percentile(60,int_id) +o.p6r.s.long_ld=percentile(60,long_ld) +o.p6r.s.float_fd=percentile(60,float_fd) +o.p6r.s.double_dd=percentile(60,double_dd) +o.p6r.s.date_dtd=percentile(60,date_dtd) +o.p6r.s.string_sd=percentile(60,string_sd) + +o.mir.s.int_id=min(int_id) +o.mir.s.long_ld=min(long_ld) +o.mir.s.float_fd=min(float_fd) +o.mir.s.double_dd=min(double_dd) +o.mir.s.date_dtd=min(date_dtd) +o.mir.s.string_sd=min(string_sd) + +o.mar.s.int_id=max(int_id) +o.mar.s.long_ld=max(long_ld) +o.mar.s.float_fd=max(float_fd) +o.mar.s.double_dd=max(double_dd) +o.mar.s.date_dtd=max(date_dtd) +o.mar.s.string_sd=max(string_sd) + +o.cr.s.int_id=count(int_id) +o.cr.s.long_ld=count(long_ld) +o.cr.s.float_fd=count(float_fd) +o.cr.s.double_dd=count(double_dd) +o.cr.s.date_dtd=count(date_dtd) +o.cr.s.string_sd=count(string_sd) + +o.ur.s.int_id=unique(int_id) +o.ur.s.long_ld=unique(long_ld) +o.ur.s.float_fd=unique(float_fd) +o.ur.s.double_dd=unique(double_dd) +o.ur.s.date_dtd=unique(date_dtd) +o.ur.s.string_sd=unique(string_sd) + +o.misr.s.int_id=missing(int_id) +o.misr.s.long_ld=missing(long_ld) +o.misr.s.float_fd=missing(float_fd) +o.misr.s.double_dd=missing(double_dd) +o.misr.s.date_dtd=missing(date_dtd) +o.misr.s.string_sd=missing(string_sd) diff --git a/solr/core/src/test-files/analytics/requestFiles/queryFacets.txt b/solr/core/src/test-files/analytics/requestFiles/queryFacets.txt new file mode 100644 index 00000000000..6be4a4e1a60 --- /dev/null +++ b/solr/core/src/test-files/analytics/requestFiles/queryFacets.txt @@ -0,0 +1,45 @@ +o.ir.s.sum=sum(int_id) +o.ir.s.mean=mean(int_id) +o.ir.s.median=median(int_id) +o.ir.s.percentile_8=percentile(8,int_id) +o.ir.ff=string_sd +o.ir.ff.string_sd.h=true +o.ir.qf=float1 +o.ir.qf.float1.q=float_fd:[* TO 50] +o.ir.qf=float2 +o.ir.qf.float2.q=float_fd:[* TO 30] + +o.pr.s.sum=sum(int_id) +o.pr.s.mean=mean(int_id) +o.pr.s.median=median(int_id) +o.pr.s.q1=concat(const_str(float_fd:[), percentile(10,int_id), const_str( TO ), median(int_id), const_str(])) +o.pr.hs.q2=concat(const_str(float_fd:[), percentile(30,int_id), const_str( TO ), median(int_id), const_str(])) +o.pr.hs.q3=concat(const_str(float_fd:[), percentile(40,int_id), const_str( TO ), median(int_id), const_str(])) +o.pr.s.percentile_8=percentile(8,int_id) +o.pr.ff=string_sd +o.pr.ff.string_sd.h=true +o.pr.qf=float3 +o.pr.qf.float3.q=result(q1) +o.pr.qf.float3.q=result(q2) +o.pr.qf.float3.q=result(q3) +o.pr.qf.float3.q=result(q1,string_sd,abc2) +o.pr.qf=float4 +o.pr.qf.float4.d=float3 +o.pr.qf.float4.q=qresult(q1,float3,result(q1)) + +o.lr.s.sum=sum(long_ld) +o.lr.s.mean=mean(long_ld) +o.lr.s.median=median(long_ld) +o.lr.s.percentile_8=percentile(8,long_ld) +o.lr.qf=string +o.lr.qf.string.q=string_sd:abc1 +o.lr.qf.string.q=string_sd:abc2 + +o.fr.s.sum=sum(float_fd) +o.fr.s.mean=mean(float_fd) +o.fr.s.median=median(float_fd) +o.fr.s.percentile_8=percentile(8,float_fd) +o.fr.qf=lad +o.fr.qf.lad.q=long_ld:[20 TO *] +o.fr.qf.lad.q=long_ld:[30 TO *] +o.fr.qf.lad.q=double_dd:[* TO 50] diff --git a/solr/core/src/test-files/analytics/requestFiles/rangeFacets.txt b/solr/core/src/test-files/analytics/requestFiles/rangeFacets.txt new file mode 100644 index 00000000000..cbfe052a250 --- /dev/null +++ b/solr/core/src/test-files/analytics/requestFiles/rangeFacets.txt @@ -0,0 +1,170 @@ +o.ri.s.sum=sum(int_id) +o.ri.s.mean=mean(int_id) +o.ri.s.median=median(int_id) +o.ri.s.count=count(int_id) +o.ri.s.sumOfSquares=sumofsquares(int_id) +o.ri.rf=long_ld +o.ri.rf.long_ld.st=5 +o.ri.rf.long_ld.e=30 +o.ri.rf.long_ld.g=5 +o.ri.rf.long_ld.ib=lower +o.ri.rf.long_ld.or=all +o.ri.rf=double_dd +o.ri.rf.double_dd.st=3 +o.ri.rf.double_dd.e=39 +o.ri.rf.double_dd.g=7 +o.ri.rf.double_dd.ib=upper +o.ri.rf.double_dd.ib=outer +o.ri.rf.double_dd.or=all +o.ri.rf=date_dtd +o.ri.rf.date_dtd.st=1007-01-01T23:59:59Z +o.ri.rf.date_dtd.e=1044-01-01T23:59:59Z +o.ri.rf.date_dtd.g=+7YEARS +o.ri.rf.date_dtd.ib=lower +o.ri.rf.date_dtd.ib=edge +o.ri.rf.date_dtd.ib=outer +o.ri.rf.date_dtd.or=all + +o.rf.s.sum=sum(float_fd) +o.rf.s.mean=mean(float_fd) +o.rf.s.median=median(float_fd) +o.rf.s.count=count(float_fd) +o.rf.s.sumOfSquares=sumofsquares(float_fd) +o.rf.rf=long_ld +o.rf.rf.long_ld.st=0 +o.rf.rf.long_ld.e=29 +o.rf.rf.long_ld.g=4 +o.rf.rf.long_ld.ib=all +o.rf.rf.long_ld.or=all +o.rf.rf=double_dd +o.rf.rf.double_dd.st=4 +o.rf.rf.double_dd.e=47 +o.rf.rf.double_dd.g=11 +o.rf.rf.double_dd.ib=edge +o.rf.rf.double_dd.or=all +o.rf.rf=date_dtd +o.rf.rf.date_dtd.st=1004-01-01T23:59:59Z +o.rf.rf.date_dtd.e=1046-01-01T23:59:59Z +o.rf.rf.date_dtd.g=+5YEARS +o.rf.rf.date_dtd.ib=upper +o.rf.rf.date_dtd.ib=edge +o.rf.rf.date_dtd.or=all + +o.hi.s.sum=sum(int_id) +o.hi.s.mean=mean(int_id) +o.hi.s.median=median(int_id) +o.hi.s.count=count(int_id) +o.hi.s.sumOfSquares=sumofsquares(int_id) +o.hi.rf=long_ld +o.hi.rf.long_ld.st=5 +o.hi.rf.long_ld.e=30 +o.hi.rf.long_ld.g=5 +o.hi.rf.long_ld.he=true +o.hi.rf.long_ld.ib=lower +o.hi.rf.long_ld.or=all +o.hi.rf=double_dd +o.hi.rf.double_dd.st=3 +o.hi.rf.double_dd.e=39 +o.hi.rf.double_dd.g=7 +o.hi.rf.double_dd.he=true +o.hi.rf.double_dd.ib=upper +o.hi.rf.double_dd.ib=outer +o.hi.rf.double_dd.or=all +o.hi.rf=date_dtd +o.hi.rf.date_dtd.st=1007-01-01T23:59:59Z +o.hi.rf.date_dtd.e=1044-01-01T23:59:59Z +o.hi.rf.date_dtd.g=+7YEARS +o.hi.rf.date_dtd.he=true +o.hi.rf.date_dtd.ib=lower +o.hi.rf.date_dtd.ib=edge +o.hi.rf.date_dtd.ib=outer +o.hi.rf.date_dtd.or=all + +o.hf.s.sum=sum(float_fd) +o.hf.s.mean=mean(float_fd) +o.hf.s.median=median(float_fd) +o.hf.s.count=count(float_fd) +o.hf.s.sumOfSquares=sumofsquares(float_fd) +o.hf.rf=long_ld +o.hf.rf.long_ld.st=0 +o.hf.rf.long_ld.e=29 +o.hf.rf.long_ld.g=4 +o.hf.rf.long_ld.he=true +o.hf.rf.long_ld.ib=all +o.hf.rf.long_ld.or=all +o.hf.rf=double_dd +o.hf.rf.double_dd.st=4 +o.hf.rf.double_dd.e=47 +o.hf.rf.double_dd.g=11 +o.hf.rf.double_dd.he=true +o.hf.rf.double_dd.ib=edge +o.hf.rf.double_dd.or=all +o.hf.rf=date_dtd +o.hf.rf.date_dtd.st=1004-01-01T23:59:59Z +o.hf.rf.date_dtd.e=1046-01-01T23:59:59Z +o.hf.rf.date_dtd.g=+5YEARS +o.hf.rf.date_dtd.he=true +o.hf.rf.date_dtd.ib=upper +o.hf.rf.date_dtd.ib=edge +o.hf.rf.date_dtd.or=all + +o.mi.s.sum=sum(int_id) +o.mi.s.mean=mean(int_id) +o.mi.s.median=median(int_id) +o.mi.s.count=count(int_id) +o.mi.s.sumOfSquares=sumofsquares(int_id) +o.mi.rf=long_ld +o.mi.rf.long_ld.st=5 +o.mi.rf.long_ld.e=30 +o.mi.rf.long_ld.g=4,2,6,3 +o.mi.rf.long_ld.ib=lower +o.mi.rf.long_ld.or=all +o.mi.rf=double_dd +o.mi.rf.double_dd.st=3 +o.mi.rf.double_dd.e=39 +o.mi.rf.double_dd.g=3,1,7 +o.mi.rf.double_dd.ib=upper +o.mi.rf.double_dd.ib=outer +o.mi.rf.double_dd.or=all +o.mi.rf=date_dtd +o.mi.rf.date_dtd.st=1007-01-01T23:59:59Z +o.mi.rf.date_dtd.e=1044-01-01T23:59:59Z +o.mi.rf.date_dtd.g=+2YEARS,+7YEARS +o.mi.rf.date_dtd.ib=lower +o.mi.rf.date_dtd.ib=edge +o.mi.rf.date_dtd.ib=outer +o.mi.rf.date_dtd.or=all + +o.mf.s.sum=sum(float_fd) +o.mf.s.mean=mean(float_fd) +o.mf.s.median=median(float_fd) +o.mf.s.count=count(float_fd) +o.mf.s.sumOfSquares=sumofsquares(float_fd) +o.mf.rf=long_ld +o.mf.rf.long_ld.st=0 +o.mf.rf.long_ld.e=29 +o.mf.rf.long_ld.g=1,4 +o.mf.rf.long_ld.ib=all +o.mf.rf.long_ld.or=all +o.mf.rf=double_dd +o.mf.rf.double_dd.st=4 +o.mf.rf.double_dd.e=47 +o.mf.rf.double_dd.g=2,3,11 +o.mf.rf.double_dd.ib=edge +o.mf.rf.double_dd.or=all +o.mf.rf=date_dtd +o.mf.rf.date_dtd.st=1004-01-01T23:59:59Z +o.mf.rf.date_dtd.e=1046-01-01T23:59:59Z +o.mf.rf.date_dtd.g=+4YEARS,+5YEARS +o.mf.rf.date_dtd.ib=upper +o.mf.rf.date_dtd.ib=edge +o.mf.rf.date_dtd.or=all + +o.pf.s.mean=mean(float_fd) +o.pf.hs.min=min(date_dtd) +o.pf.hs.max=max(date_dtd) +o.pf.hs.gap=const_str(+5YEARS) +o.pf.rf=date_dtd +o.pf.rf.date_dtd.st=result(min) +o.pf.rf.date_dtd.e=result(max) +o.pf.rf.date_dtd.g=result(gap) diff --git a/solr/core/src/test-files/analytics/requestXMLFiles/expressions.xml b/solr/core/src/test-files/analytics/requestXMLFiles/expressions.xml new file mode 100644 index 00000000000..511805de935 --- /dev/null +++ b/solr/core/src/test-files/analytics/requestXMLFiles/expressions.xml @@ -0,0 +1,285 @@ + + + + Add Request + + + sum(int(int_id)) + sum + + + unique(long(long_ld)) + unique + + + add(sum(int(int_id)),unique(long(long_ld))) + add sum and unique + + + + mean(int(int_id)) + mean + + + count(long(long_ld)) + count + + + median(int(int_id)) + median + + + add(mean(int(int_id)),count(long(long_ld)),median(int(int_id))) + add mean and count and median + + + + + Multiply Request + + + sum(int(int_id)) + sum + + + unique(long(long_ld)) + unique + + + mult(sum(int(int_id)),unique(long(long_ld))) + multiply sum and unique + + + + mean(int(int_id)) + mean + + + count(long(long_ld)) + count + + + median(int(int_id)) + median + + + mult(mean(int(int_id)),count(long(long_ld)),median(int(int_id))) + multiply mean and count and median + + + + + Divide Request + + + sum(int(int_id)) + sum + + + unique(long(long_ld)) + unique + + + div(sum(int(int_id)),unique(long(long_ld))) + divide sum by unique + + + + mean(int(int_id)) + mean + + + count(long(long_ld)) + count + + + div(mean(int(int_id)),count(long(long_ld))) + divide mean by count + + + + + Power Request + + + sum(int(int_id)) + sum + + + unique(long(long_ld)) + unique + + + pow(sum(int(int_id)),unique(long(long_ld))) + power sum by unique + + + + mean(int(int_id)) + mean + + + count(long(long_ld)) + count + + + pow(mean(int(int_id)),count(long(long_ld))) + power mean by count + + + + + Negate Request + + + sum(int(int_id)) + sum + + + neg(sum(int(int_id))) + negate of sum + + + + count(long(long_ld)) + count + + + neg(count(long(long_ld))) + negate of count + + + + + Const Num Request + + + const_num(8) + constant 8 + + + const_num(10) + constant 10 + + + + + Date Math Request + + + median(date(date_dtd)) + median + + + const_str(+2YEARS) + constant str median + + + date_math(median(date(date_dtd)),const_str(+2YEARS)) + date math median + + + + max(date(date_dtd)) + max + + + const_str(+2MONTHS) + constant str max + + + date_math(max(date(date_dtd)),const_str(+2MONTHS)) + date math max + + + + + Constant Date Request + + + const_str(1800-12-31T23:59:59Z) + const str 1 + + + const_date(1800-12-31T23:59:59Z) + const date 1 + + + const_str(1804-06-30T23:59:59Z) + const str 2 + + + const_date(1804-06-30T23:59:59Z) + const date 2 + + + + + Constant String Request + + + const_str(this is the first) + const str 1 + + + const_str(this is the second) + const str 2 + + + const_str(this is the third) + const str 3 + + + + + Concatenate Request + + + const_str(this is the first) + const str min + + + min(str(string_sd)) + min + + + concat(const_str(this is the first),min(str(string_sd))) + concat const and min + + + + const_str(this is the second) + const str max + + + max(str(string_sd)) + max + + + concat(const_str(this is the second),max(str(string_sd))) + concat const and max + + + + + Reverse Request + + + min(str(string_sd)) + min + + + rev(min(str(string_sd))) + reverse min + + + + max(str(string_sd)) + max + + + rev(max(str(string_sd))) + reverse max + + + diff --git a/solr/core/src/test-files/analytics/requestXMLFiles/fieldFacetExtras.xml b/solr/core/src/test-files/analytics/requestXMLFiles/fieldFacetExtras.xml new file mode 100644 index 00000000000..5d7bf074e3a --- /dev/null +++ b/solr/core/src/test-files/analytics/requestXMLFiles/fieldFacetExtras.xml @@ -0,0 +1,101 @@ + + + + sort request + + + mean(int(int_id)) + mean + + + median(int(int_id)) + median + + + count(int(int_id)) + count + + + perc(20,int(int_id)) + perc_20 + + + + long_ld + + mean + asc + + + + float_fd + + median + desc + + + + double_dd + + count + asc + + + + string_sd + + perc_20 + desc + + + + + limit request + + + mean(int(int_id)) + mean + + + median(int(int_id)) + median + + + count(int(int_id)) + count + + + perc(20,int(int_id)) + perc_20 + + + + long_ld + + mean + asc + + + + float_fd + + median + desc + + + + double_dd + + count + asc + + + + string_sd + + perc_20 + desc + + + + diff --git a/solr/core/src/test-files/analytics/requestXMLFiles/fieldFacets.xml b/solr/core/src/test-files/analytics/requestXMLFiles/fieldFacets.xml new file mode 100644 index 00000000000..53dd2d3ed89 --- /dev/null +++ b/solr/core/src/test-files/analytics/requestXMLFiles/fieldFacets.xml @@ -0,0 +1,496 @@ + + + + sum + + + sum(int(int_id)) + int + + + sum(long(long_ld)) + long + + + sum(float(float_fd)) + float + + + sum(double(double_dd)) + double + + + + string_sd + + + date_dtd + + + + mean + + + mean(int(int_id)) + int + + + mean(long(long_ld)) + long + + + mean(float(float_fd)) + float + + + mean(double(double_dd)) + double + + + + string_sd + + + date_dtd + + + + sumOfSquares + + + sumofsquares(int(int_id)) + int + + + sumofsquares(long(long_ld)) + long + + + sumofsquares(float(float_fd)) + float + + + sumofsquares(double(double_dd)) + double + + + + string_sd + + + date_dtd + + + + stddev + + + stddev(int(int_id)) + int + + + stddev(long(long_ld)) + long + + + stddev(float(float_fd)) + float + + + stddev(double(double_dd)) + double + + + + string_sd + + + date_dtd + + + + median + + + median(int(int_id)) + int + + + median(long(long_ld)) + long + + + median(float(float_fd)) + float + + + median(double(double_dd)) + double + + + + string_sd + + + date_dtd + + + + perc_20 numeric + + + perc(20,int(int_id)) + int + + + perc(20,long(long_ld)) + long + + + perc(20,float(float_fd)) + float + + + perc(20,double(double_dd)) + double + + + + string_sd + + + date_dtd + + + + perc_20 + + + perc(20,str(string_sd)) + str + + + perc(20,date(date_dtd)) + date + + + + int_id + + + long_ld + + + + perc_60 numeric + + + perc(60,int(int_id)) + int + + + perc(60,long(long_ld)) + long + + + perc(60,float(float_fd)) + float + + + perc(60,double(double_dd)) + double + + + + string_sd + + + date_dtd + + + + perc_60 + + + perc(60,str(string_sd)) + str + + + perc(60,date(date_dtd)) + date + + + + int_id + + + long_ld + + + + min numeric + + + min(int(int_id)) + int + + + min(long(long_ld)) + long + + + min(float(float_fd)) + float + + + min(double(double_dd)) + double + + + + string_sd + + + date_dtd + + + + min + + + min(str(string_sd)) + str + + + min(date(date_dtd)) + date + + + + int_id + + + long_ld + + + + max numeric + + + max(int(int_id)) + int + + + max(long(long_ld)) + long + + + max(float(float_fd)) + float + + + max(double(double_dd)) + double + + + + string_sd + + + date_dtd + + + + max + + + max(str(string_sd)) + str + + + max(date(date_dtd)) + date + + + + int_id + + + long_ld + + + + count numeric + + + count(int(int_id)) + int + + + count(long(long_ld)) + long + + + count(float(float_fd)) + float + + + count(double(double_dd)) + double + + + + string_sd + + + date_dtd + + + + count + + + count(str(string_sd)) + str + + + count(date(date_dtd)) + date + + + + int_id + + + long_ld + + + + unique numeric + + + unique(int(int_id)) + int + + + unique(long(long_ld)) + long + + + unique(float(float_fd)) + float + + + unique(double(double_dd)) + double + + + + string_sd + + + date_dtd + + + + unique + + + unique(str(string_sd)) + str + + + unique(date(date_dtd)) + date + + + + int_id + + + long_ld + + + + missing numeric + + + missing(int{int_id}) + int + + + missing(long{long_ld}) + long + + + missing(float{float_fd}) + float + + + missing(double{double_dd}) + double + + + + string_sd + + + date_dtd + + + + missing + + + missing(str{string_sd}) + str + + + missing(date{date_dtd}) + date + + + + int_id + + + long_ld + + + + multivalued + + + mean(int(int_id)) + mean + + + + long_ldm + + + string_sdm + + + date_dtdm + + + + missing facet + + + mean(int(int_id)) + mean + + + + date_dtd + + + string_sd + + + date_dtdm + + + diff --git a/solr/core/src/test-files/analytics/requestXMLFiles/functions.xml b/solr/core/src/test-files/analytics/requestXMLFiles/functions.xml new file mode 100644 index 00000000000..40f5adad239 --- /dev/null +++ b/solr/core/src/test-files/analytics/requestXMLFiles/functions.xml @@ -0,0 +1,246 @@ + + + + Add Request + + + sum(add(int(int_id),float(float_fd))) + sum + + + sum(double(add_if_dd)) + sum calced + + + + mean(add(long(long_ld),double(double_dd),float(float_fd))) + mean + + + mean(double(add_ldf_dd)) + mean calced + + + + + Multiply Request + + + sum(mult(int(int_id),float(float_fd))) + sum + + + sum(double(mult_if_dd)) + sum calced + + + + mean(mult(long(long_ld),double(double_dd),float(float_fd))) + mean + + + mean(double(mult_ldf_dd)) + mean calced + + + + + Divide Request + + + sum(div(int(int_id),float(float_fd))) + sum + + + sum(double(div_if_dd)) + sum calced + + + + mean(div(long(long_ld),double(double_dd))) + mean + + + + mean(double(div_ld_dd)) + mean calced + + + + + Power Request + + + sum(pow(int(int_id),float(float_fd)) + sum + + + sum(double(pow_if_dd)) + sum calced + + + + mean(pow(long(long_ld),double(double_dd))) + mean + + + + mean(double(pow_ld_dd)) + mean calced + + + + + Negate Request + + + sum(neg(int(int_id))) + sum + + + sum(double(neg_i_dd)) + sum calced + + + + mean(neg(long(long_ld))) + mean + + + mean(double(neg_l_dd)) + mean calced + + + + + Const Num Request + + + sum(const_num(8)) + sum + + + sum(double(const_8_dd)) + sum calced + + + + mean(const_num(10)) + mean + + + mean(double(const_10_dd)) + mean calced + + + + + Date Math Request + + + median(date_math(date(date_dtd),const_str(+2YEARS))) + median + + + median(date(dm_2y_dtd)) + median calced + + + + max(date_math(date(date_dtd),const_str(+2MONTHS))) + max + + + max(date(dm_2m_dtd)) + max calced + + + + + Constant Date Request + + + median(const_date(1800-06-30T23:59:59Z)) + median + + + median(date(const_00_dtd)) + median calced + + + + max(const_date(1804-06-30T23:59:59Z)) + max + + + max(date(const_04_dtd)) + max calced + + + + + Constant String Request + + + min(const_str(this is the first)) + min + + + min(str(const_first_sd)) + min calced + + + + max(const_str(this is the second)) + max + + + max(str(const_second_sd)) + max calced + + + + + Concatenate Request + + + min(concat(const_str(this is the first),str(string_sd))) + min + + + min(str(concat_first_sd)) + min calced + + + + max(concat(const_str(this is the second),str(string_sd))) + max + + + max(str(concat_second_sd)) + max calced + + + + + Reverse Request + + + min(rev(str(string_sd))) + min + + + min(str(rev_sd)) + min calced + + + + max(rev(str(string_sd))) + max + + + max(str(rev_sd)) + max calced + + + diff --git a/solr/core/src/test-files/analytics/requestXMLFiles/noFacets.xml b/solr/core/src/test-files/analytics/requestXMLFiles/noFacets.xml new file mode 100644 index 00000000000..ce00d3840ac --- /dev/null +++ b/solr/core/src/test-files/analytics/requestXMLFiles/noFacets.xml @@ -0,0 +1,310 @@ + + + + Sum Request + + + sum(int(int_id)) + int_id + + + sum(long(long_ld)) + long_ld + + + sum(float(float_fd)) + float_fd + + + sum(double(double_dd)) + double_dd + + + + + SumOfSquares Request + + + sumofsquares(int(int_id)) + int_id + + + sumofsquares(long(long_ld)) + long_ld + + + sumofsquares(float(float_fd)) + float_fd + + + sumofsquares(double(double_dd)) + double_dd + + + + + Mean Request + + + mean(int(int_id)) + int_id + + + mean(long(long_ld)) + long_ld + + + mean(float(float_fd)) + float_fd + + + mean(double(double_dd)) + double_dd + + + + + Stddev Request + + + stddev(int(int_id)) + int_id + + + stddev(long(long_ld)) + long_ld + + + stddev(float(float_fd)) + float_fd + + + stddev(double(double_dd)) + double_dd + + + + + Median Request + + + median(int(int_id)) + int_id + + + median(long(long_ld)) + long_ld + + + median(float(float_fd)) + float_fd + + + median(double(double_dd)) + double_dd + + + + + Perc 20 Request + + + perc(20,int(int_id)) + int_id + + + perc(20,long(long_ld)) + long_ld + + + perc(20,float(float_fd)) + float_fd + + + perc(20,double(double_dd)) + double_dd + + + perc(20,date(date_dtd)) + date_dtd + + + perc(20,str(string_sd)) + string_sd + + + + + Perc 60 Request + + + perc(60,int(int_id)) + int_id + + + perc(60,long(long_ld)) + long_ld + + + perc(60,float(float_fd)) + float_fd + + + perc(60,double(double_dd)) + double_dd + + + perc(60,date(date_dtd)) + date_dtd + + + perc(60,str(string_sd)) + string_sd + + + + + Min Request + + + min(int(int_id)) + int_id + + + min(long(long_ld)) + long_ld + + + min(float(float_fd)) + float_fd + + + min(double(double_dd)) + double_dd + + + min(date(date_dtd)) + date_dtd + + + min(str(string_sd)) + string_sd + + + + + Max Request + + + max(int(int_id)) + int_id + + + max(long(long_ld)) + long_ld + + + max(float(float_fd)) + float_fd + + + max(double(double_dd)) + double_dd + + + max(date(date_dtd)) + date_dtd + + + max(str(string_sd)) + string_sd + + + + + Unique Request + + + unique(int(int_id)) + int_id + + + unique(long(long_ld)) + long_ld + + + unique(float(float_fd)) + float_fd + + + unique(double(double_dd)) + double_dd + + + unique(date(date_dtd)) + date_dtd + + + unique(str(string_sd)) + string_sd + + + + + Count Request + + + count(int(int_id)) + int_id + + + count(long(long_ld)) + long_ld + + + count(float(float_fd)) + float_fd + + + count(double(double_dd)) + double_dd + + + count(date(date_dtd)) + date_dtd + + + count(str(string_sd)) + string_sd + + + + + Missing Request + + + missing(int{int_id}) + int_id + + + missing(long{long_ld}) + long_ld + + + missing(float{float_fd}) + float_fd + + + missing(double{double_dd}) + double_dd + + + missing(date{date_dtd}) + date_dtd + + + missing(str{string_sd}) + string_sd + + + diff --git a/solr/core/src/test-files/analytics/requestXMLFiles/queryFacets.xml b/solr/core/src/test-files/analytics/requestXMLFiles/queryFacets.xml new file mode 100644 index 00000000000..73f615b2b25 --- /dev/null +++ b/solr/core/src/test-files/analytics/requestXMLFiles/queryFacets.xml @@ -0,0 +1,94 @@ + + + + int request + + + sum(int(int_id)) + sum + + + + mean(int(int_id)) + mean + + + + median(int(int_id)) + median + + + + perc(8,int(int_id)) + perc_8 + + + + float1 + float_fd:[* TO 50] + + + float2 + float_fd:[* TO 30] + + + + long request + + + sum(long(long_ld)) + sum + + + + mean(long(long_ld)) + mean + + + + median(long(long_ld)) + median + + + + perc(8,long(long_ld)) + perc_8 + + + + string + string_sd:abc1 + string_sd:abc2 + + + + float request + + + sum(float(float_fd)) + sum + + + + mean(float(float_fd)) + mean + + + + median(float(float_fd)) + median + + + + perc(8,float(float_fd)) + perc_8 + + + + long and double + long_ld:[20 TO *] + long_ld:[30 TO *] + double_dd:[* TO 50] + + + diff --git a/solr/core/src/test-files/analytics/requestXMLFiles/rangeFacets.xml b/solr/core/src/test-files/analytics/requestXMLFiles/rangeFacets.xml new file mode 100644 index 00000000000..3434d2e0220 --- /dev/null +++ b/solr/core/src/test-files/analytics/requestXMLFiles/rangeFacets.xml @@ -0,0 +1,319 @@ + + + + regular int + + + mean(int(int_id)) + mean + + + sum(int(int_id)) + sum + + + median(int(int_id)) + median + + + count(int(int_id)) + count + + + sumofsquares(int(int_id)) + sumOfSquares + + + + long_ld + 5 + 30 + 5 + lower + all + + + double_dd + 3 + 39 + 7 + upper + outer + all + + + date_dtd + 1007-01-01T23:59:59Z + 1044-01-01T23:59:59Z + +7YEARS + lower + edge + outer + all + + + + regular float + + + mean(float(float_fd)) + mean + + + sum(float(float_fd)) + sum + + + median(float(float_fd)) + median + + + count(float(float_fd)) + count + + + sumofsquares(float(float_fd)) + sumOfSquares + + + + long_ld + 0 + 29 + 4 + all + all + + + double_dd + 4 + 47 + 11 + edge + all + + + date_dtd + 1004-01-01T23:59:59Z + 1046-01-01T23:59:59Z + +5YEARS + upper + edge + all + + + + hardend int + + + mean(int(int_id)) + mean + + + sum(int(int_id)) + sum + + + median(int(int_id)) + median + + + count(int(int_id)) + count + + + sumofsquares(int(int_id)) + sumOfSquares + + + + long_ld + 5 + 30 + 5 + lower + all + + + double_dd + 3 + 39 + 7 + upper + outer + all + + + date_dtd + 1007-01-01T23:59:59Z + 1044-01-01T23:59:59Z + +7YEARS + lower + edge + outer + all + + + + hardend float + + + mean(float(float_fd)) + mean + + + sum(float(float_fd)) + sum + + + median(float(float_fd)) + median + + + count(float(float_fd)) + count + + + sumofsquares(float(float_fd)) + sumOfSquares + + + + long_ld + 0 + 29 + 4 + all + all + + + double_dd + 4 + 47 + 11 + edge + all + + + date_dtd + 1004-01-01T23:59:59Z + 1046-01-01T23:59:59Z + +5YEARS + upper + edge + all + + + + multigap int + + + mean(int(int_id)) + mean + + + sum(int(int_id)) + sum + + + median(int(int_id)) + median + + + count(int(int_id)) + count + + + sumofsquares(int(int_id)) + sumOfSquares + + + + long_ld + 5 + 30 + 4 + 2 + 6 + 3 + lower + all + + + double_dd + 3 + 39 + 3 + 1 + 7 + upper + outer + all + + + date_dtd + 1007-01-01T23:59:59Z + 1044-01-01T23:59:59Z + +2YEARS + +7YEARS + lower + edge + outer + all + + + + multigap float + + + mean(float(float_fd)) + mean + + + sum(float(float_fd)) + sum + + + median(float(float_fd)) + median + + + count(float(float_fd)) + count + + + sumofsquares(float(float_fd)) + sumOfSquares + + + + long_ld + 0 + 29 + 1 + 4 + all + all + + + double_dd + 4 + 47 + 2 + 3 + 11 + edge + all + + + date_dtd + 1004-01-01T23:59:59Z + 1046-01-01T23:59:59Z + +4YEARS + +5YEARS + upper + edge + all + + + diff --git a/solr/core/src/test-files/log4j.properties b/solr/core/src/test-files/log4j.properties index 9b74a5f22b2..08a32f3f38f 100644 --- a/solr/core/src/test-files/log4j.properties +++ b/solr/core/src/test-files/log4j.properties @@ -8,3 +8,20 @@ log4j.appender.CONSOLE.layout.ConversionPattern=%-5p - %d{yyyy-MM-dd HH:mm:ss.SS log4j.logger.org.apache.zookeeper=WARN log4j.logger.org.apache.hadoop=WARN +log4j.logger.org.apache.solr.hadoop=INFO + +#log4j.logger.org.apache.solr.update.processor.LogUpdateProcessor=DEBUG +#log4j.logger.org.apache.solr.update.processor.DistributedUpdateProcessor=DEBUG +#log4j.logger.org.apache.solr.update.PeerSync=DEBUG +#log4j.logger.org.apache.solr.core.CoreContainer=DEBUG +#log4j.logger.org.apache.solr.cloud.RecoveryStrategy=DEBUG +#log4j.logger.org.apache.solr.cloud.SyncStrategy=DEBUG +#log4j.logger.org.apache.solr.handler.admin.CoreAdminHandler=DEBUG +#log4j.logger.org.apache.solr.cloud.ZkController=DEBUG +#log4j.logger.org.apache.solr.update.DefaultSolrCoreState=DEBUG +#log4j.logger.org.apache.solr.common.cloud.ConnectionManager=DEBUG +#log4j.logger.org.apache.solr.update.UpdateLog=DEBUG +#log4j.logger.org.apache.solr.cloud.ChaosMonkey=DEBUG +#log4j.logger.org.apache.solr.update.TransactionLog=DEBUG +#log4j.logger.org.apache.solr.handler.ReplicationHandler=DEBUG +#log4j.logger.org.apache.solr.handler.SnapPuller=DEBUG \ No newline at end of file diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-analytics.xml b/solr/core/src/test-files/solr/collection1/conf/schema-analytics.xml new file mode 100644 index 00000000000..3c5713d929b --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-analytics.xml @@ -0,0 +1,94 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-collate-dv.xml b/solr/core/src/test-files/solr/collection1/conf/schema-collate-dv.xml new file mode 100644 index 00000000000..933e405ec29 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-collate-dv.xml @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + id + + + + + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-custom-field.xml b/solr/core/src/test-files/solr/collection1/conf/schema-custom-field.xml new file mode 100644 index 00000000000..602527541c9 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-custom-field.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + text + id + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-distributed-missing-sort.xml b/solr/core/src/test-files/solr/collection1/conf/schema-distributed-missing-sort.xml new file mode 100644 index 00000000000..c78c11c3da1 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-distributed-missing-sort.xml @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-phrasesuggest.xml b/solr/core/src/test-files/solr/collection1/conf/schema-phrasesuggest.xml index f5ed9155e66..7d4876c270a 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-phrasesuggest.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-phrasesuggest.xml @@ -53,6 +53,7 @@ + text diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-simpleqpplugin.xml b/solr/core/src/test-files/solr/collection1/conf/schema-simpleqpplugin.xml new file mode 100644 index 00000000000..8ad2b4d3d43 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-simpleqpplugin.xml @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text0 + id + diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-SOLR-749.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-SOLR-749.xml index 1fabd5c202f..867a535f4a0 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-SOLR-749.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-SOLR-749.xml @@ -24,6 +24,11 @@ + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml index b4f560ed32f..a25ac604c15 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml @@ -83,6 +83,42 @@ phrase_suggest + + + + + fuzzy_suggest_analyzing_with_high_freq_dict + org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory + org.apache.solr.spelling.suggest.HighFrequencyDictionaryFactory + fuzzy_suggest_analyzing + false + + + true + text + false + stext + + + + + + + + fuzzy_suggest_analyzing_with_file_dict + org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory + org.apache.solr.spelling.suggest.FileDictionaryFactory + fuzzy_suggest_analyzing + false + + + true + text + false + + fuzzysuggest.txt + + @@ -215,6 +251,26 @@ + + + true + fuzzy_suggest_analyzing_with_file_dict + + + fuzzy_suggest_analyzing_with_file_dict + + + + + + true + fuzzy_suggest_analyzing_with_high_freq_dict + + + fuzzy_suggest_analyzing_with_high_freq_dict + + + true diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-suggestercomponent.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-suggestercomponent.xml new file mode 100644 index 00000000000..1d1a1e92082 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-suggestercomponent.xml @@ -0,0 +1,95 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + ${solr.data.dir:} + + + + + + + + + + + suggest_fuzzy_with_high_freq_dict + FuzzyLookupFactory + cat + suggest_fuzzy_with_high_freq_dict + text + true + + + 0.0 + + + + + suggest_fuzzy_file_based + FuzzyLookupFactory + fuzzysuggest.txt + suggest_fuzzy_file_based + text + true + + + + + suggest_fuzzy_doc_dict + FuzzyLookupFactory + DocumentDictionaryFactory + cat + price + suggest_fuzzy_doc_dict_payload + text + true + + + + + suggest_fuzzy_doc_expr_dict + DocumentExpressionDictionaryFactory + FuzzyLookupFactory + cat + ((price * 2) + weight) + weight + price + suggest_fuzzy_doc_expr_dict + text + true + + + + + + true + + + suggest + + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml index d55845c13d0..22c5b3ff57b 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml @@ -86,6 +86,18 @@ + + + bogus.txt + + + + + + bogus.txt + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml index 1750afe51ba..d2413b09654 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml @@ -234,6 +234,18 @@ + + + bogus.txt + + + + + + bogus.txt + + + @@ -334,6 +346,8 @@ termsComp + + - + application/json add-unknown-fields-to-the-schema - + application/csv add-unknown-fields-to-the-schema diff --git a/solr/example/exampledocs/monitor.xml b/solr/example/exampledocs/monitor.xml index db986fa0b7f..311b757eab3 100644 --- a/solr/example/exampledocs/monitor.xml +++ b/solr/example/exampledocs/monitor.xml @@ -21,8 +21,7 @@ Dell, Inc. dell - electronics - monitor + electronics and computer1 30" TFT active matrix LCD, 2560 x 1600, .25mm dot pitch, 700:1 contrast USB cable 401.6 diff --git a/solr/example/exampledocs/monitor2.xml b/solr/example/exampledocs/monitor2.xml index 79b99494319..eaf9e223ccd 100644 --- a/solr/example/exampledocs/monitor2.xml +++ b/solr/example/exampledocs/monitor2.xml @@ -21,8 +21,7 @@ ViewSonic Corp. viewsonic - electronics - monitor + electronics and stuff2 19" TFT active matrix LCD, 8ms response time, 1280 x 1024 native resolution 190.4 279.95 diff --git a/solr/example/cloud-scripts/log4j.properties b/solr/example/scripts/cloud-scripts/log4j.properties similarity index 100% rename from solr/example/cloud-scripts/log4j.properties rename to solr/example/scripts/cloud-scripts/log4j.properties diff --git a/solr/example/cloud-scripts/zkcli.bat b/solr/example/scripts/cloud-scripts/zkcli.bat similarity index 67% rename from solr/example/cloud-scripts/zkcli.bat rename to solr/example/scripts/cloud-scripts/zkcli.bat index 8232a726cac..ac092e01874 100644 --- a/solr/example/cloud-scripts/zkcli.bat +++ b/solr/example/scripts/cloud-scripts/zkcli.bat @@ -8,4 +8,4 @@ REM Find location of this script set SDIR=%~dp0 if "%SDIR:~-1%"=="\" set SDIR=%SDIR:~0,-1% -"%JVM%" -Dlog4j.configuration=file:%SDIR%\log4j.properties -classpath "%SDIR%\..\solr-webapp\webapp\WEB-INF\lib\*;%SDIR%\..\lib\ext\*" org.apache.solr.cloud.ZkCLI %* +"%JVM%" -Dlog4j.configuration=file:%SDIR%\log4j.properties -classpath "%SDIR%\..\..\solr-webapp\webapp\WEB-INF\lib\*;%SDIR%\..\..\lib\ext\*" org.apache.solr.cloud.ZkCLI %* diff --git a/solr/example/cloud-scripts/zkcli.sh b/solr/example/scripts/cloud-scripts/zkcli.sh similarity index 62% rename from solr/example/cloud-scripts/zkcli.sh rename to solr/example/scripts/cloud-scripts/zkcli.sh index ab5da966fa5..15b5392d2e5 100644 --- a/solr/example/cloud-scripts/zkcli.sh +++ b/solr/example/scripts/cloud-scripts/zkcli.sh @@ -9,5 +9,5 @@ JVM="java" sdir="`dirname \"$0\"`" -PATH=$JAVA_HOME/bin:$PATH $JVM -Dlog4j.configuration=file:$sdir/log4j.properties -classpath "$sdir/../solr-webapp/webapp/WEB-INF/lib/*:$sdir/../lib/ext/*" org.apache.solr.cloud.ZkCLI ${1+"$@"} +PATH=$JAVA_HOME/bin:$PATH $JVM -Dlog4j.configuration=file:$sdir/log4j.properties -classpath "$sdir/../../solr-webapp/webapp/WEB-INF/lib/*:$sdir/../../lib/ext/*" org.apache.solr.cloud.ZkCLI ${1+"$@"} diff --git a/solr/example/scripts/map-reduce/set-map-reduce-classpath.sh b/solr/example/scripts/map-reduce/set-map-reduce-classpath.sh new file mode 100644 index 00000000000..f8284807e7c --- /dev/null +++ b/solr/example/scripts/map-reduce/set-map-reduce-classpath.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +export HADOOP_CLASSPATH="$sdir/../../../dist/*:$sdir/../../../contrib/map-reduce/lib/*:$sdir/../../../contrib/morphlines-core/lib/*:$sdir/../../../contrib/morphlines-cell/lib/*:$sdir/../../../contrib/extraction/lib/*:$sdir/../../solr-webapp/webapp/WEB-INF/lib/*:$sdir/../../lib/ext/*" \ No newline at end of file diff --git a/solr/example/solr/collection1/conf/schema.xml b/solr/example/solr/collection1/conf/schema.xml index 95e9c36dcad..9829987a7ab 100755 --- a/solr/example/solr/collection1/conf/schema.xml +++ b/solr/example/solr/collection1/conf/schema.xml @@ -63,7 +63,7 @@ (int, float, boolean, string...) --> - + + class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"> + + + + + ${solr.hdfs.home:} + + ${solr.hdfs.confdir:} + + ${solr.hdfs.blockcache.enabled:true} + + + current implementation relies on the updateLog feature being enabled. + + ** WARNING ** + Do NOT disable the realtime get handler at /get if you are using + SolrCloud otherwise any leader election will cause a full sync in ALL + replicas for the shard in question. Similarly, a replica recovery will + also always fetch the complete index from the leader because a partial + sync will not be possible in the absence of this handler. + --> true @@ -1011,12 +1032,12 @@ - + application/json - + application/csv @@ -1116,7 +1137,9 @@ --> + + @@ -1317,7 +1354,7 @@ --> - + + DocumentDictionaryFactory + cat + price + string + + + + + + true + 10 + + + suggest + + -
    +
    - +
    -
    +
    + +
    #tree
    + +
    +
    + +
    + + + +
    + +
    + +
    Loading …
    + +
    + +
    + +
    \ No newline at end of file