diff --git a/dev-tools/idea/.idea/libraries/Derby.xml b/dev-tools/idea/.idea/libraries/Derby.xml deleted file mode 100644 index a23a28e477d..00000000000 --- a/dev-tools/idea/.idea/libraries/Derby.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/dev-tools/idea/.idea/libraries/HSQLDB.xml b/dev-tools/idea/.idea/libraries/HSQLDB.xml deleted file mode 100644 index 39efcbfc489..00000000000 --- a/dev-tools/idea/.idea/libraries/HSQLDB.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/dev-tools/idea/.idea/libraries/Solr_DIH_core_library.xml b/dev-tools/idea/.idea/libraries/Solr_DIH_core_library.xml deleted file mode 100644 index d363b92ecd0..00000000000 --- a/dev-tools/idea/.idea/libraries/Solr_DIH_core_library.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/dev-tools/idea/.idea/libraries/Solr_DIH_extras_library.xml b/dev-tools/idea/.idea/libraries/Solr_DIH_extras_library.xml deleted file mode 100644 index 1bfc63bb64d..00000000000 --- a/dev-tools/idea/.idea/libraries/Solr_DIH_extras_library.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/dev-tools/idea/.idea/libraries/Solr_DIH_test_library.xml b/dev-tools/idea/.idea/libraries/Solr_DIH_test_library.xml deleted file mode 100644 index 304589ccb3d..00000000000 --- a/dev-tools/idea/.idea/libraries/Solr_DIH_test_library.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/dev-tools/idea/.idea/modules.xml b/dev-tools/idea/.idea/modules.xml index e87ff94fb6e..53f2bda39f5 100644 --- a/dev-tools/idea/.idea/modules.xml +++ b/dev-tools/idea/.idea/modules.xml @@ -53,8 +53,6 @@ - - diff --git a/dev-tools/idea/.idea/workspace.xml b/dev-tools/idea/.idea/workspace.xml index 85032971175..49ddb4ff9ba 100644 --- a/dev-tools/idea/.idea/workspace.xml +++ b/dev-tools/idea/.idea/workspace.xml @@ -284,22 +284,6 @@ - - - - - - - - - - - + @@ -376,13 +360,11 @@ - - - - - - - + + + + + diff --git a/dev-tools/idea/solr/contrib/dataimporthandler-extras/dataimporthandler-extras.iml b/dev-tools/idea/solr/contrib/dataimporthandler-extras/dataimporthandler-extras.iml deleted file mode 100644 index 8bc21aabf41..00000000000 --- a/dev-tools/idea/solr/contrib/dataimporthandler-extras/dataimporthandler-extras.iml +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dev-tools/idea/solr/contrib/dataimporthandler/dataimporthandler.iml b/dev-tools/idea/solr/contrib/dataimporthandler/dataimporthandler.iml deleted file mode 100644 index 8240ff2c8ee..00000000000 --- a/dev-tools/idea/solr/contrib/dataimporthandler/dataimporthandler.iml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dev-tools/maven/solr/contrib/pom.xml.template b/dev-tools/maven/solr/contrib/pom.xml.template new file mode 100644 index 00000000000..0b1f83c26a2 --- /dev/null +++ b/dev-tools/maven/solr/contrib/pom.xml.template @@ -0,0 +1,55 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../pom.xml + + org.apache.solr + solr-contrib-aggregator + Apache Solr Contrib aggregator POM + pom + + analysis-extras + analytics + clustering + extraction + jaegertracer-configurator + langid + ltr + prometheus-exporter + velocity + + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/dev-tools/scripts/SOLR-2452.patch.hack.pl b/dev-tools/scripts/SOLR-2452.patch.hack.pl index 2f6c7fc2d29..244242cca49 100755 --- a/dev-tools/scripts/SOLR-2452.patch.hack.pl +++ b/dev-tools/scripts/SOLR-2452.patch.hack.pl @@ -48,33 +48,6 @@ my @moves = ( 'solr/contrib/clustering/src/main/java' => 'solr/contrib/clustering/src/java', - 'solr/contrib/dataimporthandler/src/test/java' - => 'solr/contrib/dataimporthandler/src/test', - - 'solr/contrib/dataimporthandler/src/test/resources/solr-dih' - => 'solr/contrib/dataimporthandler/src/test-files/dih/solr', - - 'solr/contrib/dataimporthandler/src/test/resources' - => 'solr/contrib/dataimporthandler/src/test-files/dih', - - 'solr/contrib/dataimporthandler/src/main/java' - => 'solr/contrib/dataimporthandler/src/java', - - 'solr/contrib/dataimporthandler/src/main/webapp' - => 'solr/contrib/dataimporthandler/src/webapp', - - 'solr/contrib/dataimporthandler/src/extras/test/java' - => 'solr/contrib/dataimporthandler-extras/src/test', - - 'solr/contrib/dataimporthandler/src/extras/test/resources/solr-dihextras' - => 'solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr', - - 'solr/contrib/dataimporthandler/src/extras/test/resources' - => 'solr/contrib/dataimporthandler-extras/src/test-files/dihextras', - - 'solr/contrib/dataimporthandler/src/extras/main/java' - => 'solr/contrib/dataimporthandler-extras/src/java', - 'solr/contrib/extraction/src/test/java' => 'solr/contrib/extraction/src/test', diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py index 768474bb48b..e2d336d52e1 100755 --- a/dev-tools/scripts/smokeTestRelease.py +++ b/dev-tools/scripts/smokeTestRelease.py @@ -225,8 +225,7 @@ def checkAllJARs(topDir, project, gitRevision, version, tmpDir, baseURL): for file in files: if file.lower().endswith('.jar'): if project == 'solr': - if ((normRoot.endswith('/contrib/dataimporthandler-extras/lib') and (file.startswith('javax.mail-') or file.startswith('activation-'))) - or (normRoot.endswith('/test-framework/lib') and file.startswith('jersey-')) + if ((normRoot.endswith('/test-framework/lib') and file.startswith('jersey-')) or (normRoot.endswith('/contrib/extraction/lib') and file.startswith('xml-apis-'))): print(' **WARNING**: skipping check of %s/%s: it has javax.* classes' % (root, file)) continue diff --git a/gradle/ant-compat/resolve.gradle b/gradle/ant-compat/resolve.gradle index ee18aa87552..53e4dc51788 100644 --- a/gradle/ant-compat/resolve.gradle +++ b/gradle/ant-compat/resolve.gradle @@ -164,10 +164,6 @@ configure(project(":solr:example")) { into "exampledocs/" }) - from(configurations.dih, { - into "example-DIH/solr/db/lib" - }) - into projectDir } } @@ -224,4 +220,4 @@ configure(project(":solr:solrj")) { into "lib" } -} \ No newline at end of file +} diff --git a/gradle/ant-compat/test-classes-cross-deps.gradle b/gradle/ant-compat/test-classes-cross-deps.gradle index 1c32dba1279..d0985eb71d6 100644 --- a/gradle/ant-compat/test-classes-cross-deps.gradle +++ b/gradle/ant-compat/test-classes-cross-deps.gradle @@ -20,8 +20,7 @@ configure([project(":lucene:spatial3d"), project(":lucene:analysis:common"), project(":lucene:backward-codecs"), - project(":lucene:queryparser"), - project(":solr:contrib:dataimporthandler")]) { + project(":lucene:queryparser")]) { plugins.withType(JavaPlugin) { configurations { testClassesExported @@ -56,15 +55,6 @@ configure(project(":solr:contrib:analysis-extras")) { plugins.withType(JavaPlugin) { dependencies { testImplementation project(path: ':lucene:analysis:common', configuration: 'testClassesExported') - testImplementation project(path: ':solr:contrib:dataimporthandler', configuration: 'testClassesExported') - } - } -} - -configure(project(":solr:contrib:dataimporthandler-extras")) { - plugins.withType(JavaPlugin) { - dependencies { - testImplementation project(path: ':solr:contrib:dataimporthandler', configuration: 'testClassesExported') } } } diff --git a/gradle/maven/defaults-maven.gradle b/gradle/maven/defaults-maven.gradle index 6c4b4582fbd..570d0118d7f 100644 --- a/gradle/maven/defaults-maven.gradle +++ b/gradle/maven/defaults-maven.gradle @@ -60,8 +60,6 @@ configure(rootProject) { ":solr:core", ":solr:solrj", ":solr:contrib:analysis-extras", - ":solr:contrib:dataimporthandler", - ":solr:contrib:dataimporthandler-extras", ":solr:contrib:analytics", ":solr:contrib:clustering", ":solr:contrib:extraction", diff --git a/gradle/testing/policies/solr-tests.policy b/gradle/testing/policies/solr-tests.policy index 1290a387e78..35b3e8407b1 100644 --- a/gradle/testing/policies/solr-tests.policy +++ b/gradle/testing/policies/solr-tests.policy @@ -108,7 +108,7 @@ grant { // needed by hadoop htrace permission java.net.NetPermission "getNetworkInformation"; - // needed by DIH + // needed by DIH - possibly even after DIH is a package permission java.sql.SQLPermission "deregisterDriver"; permission java.util.logging.LoggingPermission "control"; @@ -214,4 +214,4 @@ grant { permission java.io.FilePermission "${gradle.worker.jar}", "read"; // Allow reading from classpath JARs (resources). permission java.io.FilePermission "${gradle.user.home}${/}-", "read"; -}; \ No newline at end of file +}; diff --git a/gradle/validation/owasp-dependency-check/exclusions.xml b/gradle/validation/owasp-dependency-check/exclusions.xml index d6de0e43a91..0a77b99d3b8 100644 --- a/gradle/validation/owasp-dependency-check/exclusions.xml +++ b/gradle/validation/owasp-dependency-check/exclusions.xml @@ -46,30 +46,6 @@ ^pkg:maven/org\.jruby/dirgra@.*$ cpe:/a:jruby:jruby - - - ^pkg:maven/org\.apache\.derby/derby@.*$ - cpe:/a:apache:derby - - - - ^pkg:maven/org\.apache\.derby/derby@.*$ - CVE-2015-1832 - - - - ^pkg:maven/org\.apache\.derby/derby@.*$ - CVE-2018-1313 - where is one of: cloud : SolrCloud example - dih : Data Import Handler (rdbms, mail, atom, tika) schemaless : Schema-less example (schema is inferred from data during indexing) techproducts : Kitchen sink example providing comprehensive examples of Solr features ``` -For instance, if you want to run the Solr Data Import Handler example, do: +For instance, if you want to run the SolrCloud example, do: ``` - bin/solr -e dih + bin/solr -e cloud ``` Indexing Documents @@ -142,8 +141,7 @@ server/ example/ Contains example documents and an alternative Solr home - directory containing examples of how to use the Data Import Handler, - see example/example-DIH/README.md for more information. + directory containing various examples. dist/solr--XX.jar The Apache Solr libraries. To compile Apache Solr Plugins, diff --git a/solr/bin/solr b/solr/bin/solr index f6062a5155a..6ef2a29b007 100755 --- a/solr/bin/solr +++ b/solr/bin/solr @@ -386,7 +386,6 @@ function print_usage() { echo " -e Name of the example to run; available examples:" echo " cloud: SolrCloud example" echo " techproducts: Comprehensive example illustrating many of Solr's core capabilities" - echo " dih: Data Import Handler" echo " schemaless: Schema-less example" echo "" echo " -a Additional parameters to pass to the JVM when starting Solr, such as to setup" diff --git a/solr/bin/solr.cmd b/solr/bin/solr.cmd index 53fafe271ee..b4e17409dcd 100755 --- a/solr/bin/solr.cmd +++ b/solr/bin/solr.cmd @@ -360,7 +360,6 @@ goto done @echo -e example Name of the example to run; available examples: @echo cloud: SolrCloud example @echo techproducts: Comprehensive example illustrating many of Solr's core capabilities -@echo dih: Data Import Handler @echo schemaless: Schema-less example @echo. @echo -a opts Additional parameters to pass to the JVM when starting Solr, such as to setup diff --git a/solr/common-build.xml b/solr/common-build.xml new file mode 100644 index 00000000000..eb1fc52477a --- /dev/null +++ b/solr/common-build.xml @@ -0,0 +1,547 @@ + + + + + This file is designed for importing into a main build file, and not intended + for standalone use. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/dataimporthandler-extras/build.gradle b/solr/contrib/dataimporthandler-extras/build.gradle deleted file mode 100644 index fde00c3cec8..00000000000 --- a/solr/contrib/dataimporthandler-extras/build.gradle +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -apply plugin: 'java-library' - -description = 'Data Import Handler Extras' - -dependencies { - implementation project(':solr:core') - - implementation project(':solr:contrib:dataimporthandler') - implementation project(':solr:contrib:extraction') - - implementation ('javax.activation:activation') - implementation ('com.sun.mail:javax.mail') - implementation ('com.sun.mail:gimap') - - testImplementation project(':solr:test-framework') -} diff --git a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java deleted file mode 100644 index 6861ae3a30f..00000000000 --- a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java +++ /dev/null @@ -1,901 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import com.sun.mail.imap.IMAPMessage; - -import org.apache.solr.common.util.SuppressForbidden; -import org.apache.solr.handler.dataimport.config.ConfigNameConstants; -import org.apache.solr.util.RTimer; -import org.apache.tika.Tika; -import org.apache.tika.metadata.Metadata; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.mail.*; -import javax.mail.internet.AddressException; -import javax.mail.internet.ContentType; -import javax.mail.internet.InternetAddress; -import javax.mail.internet.MimeMessage; -import javax.mail.search.*; - -import java.io.InputStream; -import java.lang.invoke.MethodHandles; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.*; -import java.util.function.Supplier; - -import com.sun.mail.gimap.GmailFolder; -import com.sun.mail.gimap.GmailRawSearchTerm; - -/** - * An EntityProcessor instance which can index emails along with their - * attachments from POP3 or IMAP sources. Refer to http://wiki.apache.org/solr/DataImportHandler for more details. This - * API is experimental and subject to change - * - * @since solr 1.4 - */ -public class MailEntityProcessor extends EntityProcessorBase { - - private static final SimpleDateFormat sinceDateParser = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT); - private static final SimpleDateFormat afterFmt = - new SimpleDateFormat("yyyy/MM/dd", Locale.ROOT); - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - public static interface CustomFilter { - public SearchTerm getCustomSearch(Folder folder); - } - - public void init(Context context) { - super.init(context); - // set attributes using XXX getXXXFromContext(attribute, defaultValue); - // applies variable resolver and return default if value is not found or null - // REQUIRED : connection and folder info - user = getStringFromContext("user", null); - password = getStringFromContext("password", null); - host = getStringFromContext("host", null); - protocol = getStringFromContext("protocol", null); - folderNames = getStringFromContext("folders", null); - // validate - if (host == null || protocol == null || user == null || password == null - || folderNames == null) throw new DataImportHandlerException( - DataImportHandlerException.SEVERE, - "'user|password|protocol|host|folders' are required attributes"); - - // OPTIONAL : have defaults and are optional - recurse = getBoolFromContext("recurse", true); - - exclude.clear(); - String excludes = getStringFromContext("exclude", ""); - if (excludes != null && !excludes.trim().equals("")) { - exclude = Arrays.asList(excludes.split(",")); - } - - include.clear(); - String includes = getStringFromContext("include", ""); - if (includes != null && !includes.trim().equals("")) { - include = Arrays.asList(includes.split(",")); - } - batchSize = getIntFromContext("batchSize", 20); - customFilter = getStringFromContext("customFilter", ""); - if (filters != null) filters.clear(); - folderIter = null; - msgIter = null; - - String lastIndexTime = null; - String command = - String.valueOf(context.getRequestParameters().get("command")); - if (!DataImporter.FULL_IMPORT_CMD.equals(command)) - throw new IllegalArgumentException(this.getClass().getSimpleName()+ - " only supports "+DataImporter.FULL_IMPORT_CMD); - - // Read the last_index_time out of the dataimport.properties if available - String cname = getStringFromContext("name", "mailimporter"); - String varName = ConfigNameConstants.IMPORTER_NS_SHORT + "." + cname + "." - + DocBuilder.LAST_INDEX_TIME; - Object varValue = context.getVariableResolver().resolve(varName); - log.info("{}={}", varName, varValue); - - if (varValue != null && !"".equals(varValue) && - !"".equals(getStringFromContext("fetchMailsSince", ""))) { - - // need to check if varValue is the epoch, which we'll take to mean the - // initial value, in which case means we should use fetchMailsSince instead - Date tmp = null; - try { - tmp = sinceDateParser.parse((String)varValue); - if (tmp.getTime() == 0) { - log.info("Ignoring initial value {} for {} in favor of fetchMailsSince config parameter" - , varValue, varName); - tmp = null; // don't use this value - } - } catch (ParseException e) { - // probably ok to ignore this since we have other options below - // as we're just trying to figure out if the date is 0 - log.warn("Failed to parse {} from {} due to", varValue, varName, e); - } - - if (tmp == null) { - // favor fetchMailsSince in this case because the value from - // dataimport.properties is the default/init value - varValue = getStringFromContext("fetchMailsSince", ""); - log.info("fetchMailsSince={}", varValue); - } - } - - if (varValue == null || "".equals(varValue)) { - varName = ConfigNameConstants.IMPORTER_NS_SHORT + "." - + DocBuilder.LAST_INDEX_TIME; - varValue = context.getVariableResolver().resolve(varName); - log.info("{}={}", varName, varValue); - } - - if (varValue != null && varValue instanceof String) { - lastIndexTime = (String)varValue; - if (lastIndexTime != null && lastIndexTime.length() == 0) - lastIndexTime = null; - } - - if (lastIndexTime == null) - lastIndexTime = getStringFromContext("fetchMailsSince", ""); - - log.info("Using lastIndexTime {} for mail import", lastIndexTime); - - this.fetchMailsSince = null; - if (lastIndexTime != null && lastIndexTime.length() > 0) { - try { - fetchMailsSince = sinceDateParser.parse(lastIndexTime); - log.info("Parsed fetchMailsSince={}", lastIndexTime); - } catch (ParseException e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Invalid value for fetchMailSince: " + lastIndexTime, e); - } - } - - fetchSize = getIntFromContext("fetchSize", 32 * 1024); - cTimeout = getIntFromContext("connectTimeout", 30 * 1000); - rTimeout = getIntFromContext("readTimeout", 60 * 1000); - - String tmp = context.getEntityAttribute("includeOtherUserFolders"); - includeOtherUserFolders = (tmp != null && Boolean.valueOf(tmp.trim())); - tmp = context.getEntityAttribute("includeSharedFolders"); - includeSharedFolders = (tmp != null && Boolean.valueOf(tmp.trim())); - - setProcessAttachmentConfig(); - includeContent = getBoolFromContext("includeContent", true); - - logConfig(); - } - - private void setProcessAttachmentConfig() { - processAttachment = true; - String tbval = context.getEntityAttribute("processAttachments"); - if (tbval == null) { - tbval = context.getEntityAttribute("processAttachement"); - if (tbval != null) processAttachment = Boolean.valueOf(tbval); - } else processAttachment = Boolean.valueOf(tbval); - } - - @Override - public Map nextRow() { - Message mail = null; - Map row = null; - do { - // try till there is a valid document or folders get exhausted. - // when mail == NULL, it means end of processing - mail = getNextMail(); - - if (mail != null) - row = getDocumentFromMail(mail); - - if (row != null && row.get("folder") == null) - row.put("folder", mail.getFolder().getFullName()); - - } while (row == null && mail != null); - return row; - } - - private Message getNextMail() { - if (!connected) { - // this is needed to load the activation mail stuff correctly - // otherwise, the JavaMail multipart support doesn't get configured - // correctly, which leads to a class cast exception when processing - // multipart messages: IMAPInputStream cannot be cast to - // javax.mail.Multipart - if (false == withContextClassLoader(getClass().getClassLoader(), this::connectToMailBox)) { - return null; - } - connected = true; - } - if (folderIter == null) { - createFilters(); - folderIter = new FolderIterator(mailbox); - } - // get next message from the folder - // if folder is exhausted get next folder - // loop till a valid mail or all folders exhausted. - while (msgIter == null || !msgIter.hasNext()) { - Folder next = folderIter.hasNext() ? folderIter.next() : null; - if (next == null) return null; - - msgIter = new MessageIterator(next, batchSize); - } - return msgIter.next(); - } - - private Map getDocumentFromMail(Message mail) { - Map row = new HashMap<>(); - try { - addPartToDocument(mail, row, true); - return row; - } catch (Exception e) { - log.error("Failed to convert message [{}] to document due to: {}" - , mail, e, e); - return null; - } - } - - @SuppressWarnings({"unchecked"}) - public void addPartToDocument(Part part, Map row, boolean outerMost) throws Exception { - if (part instanceof Message) { - addEnvelopeToDocument(part, row); - } - - String ct = part.getContentType().toLowerCase(Locale.ROOT); - ContentType ctype = new ContentType(ct); - if (part.isMimeType("multipart/*")) { - Object content = part.getContent(); - if (content != null && content instanceof Multipart) { - Multipart mp = (Multipart) part.getContent(); - int count = mp.getCount(); - if (part.isMimeType("multipart/alternative")) count = 1; - for (int i = 0; i < count; i++) - addPartToDocument(mp.getBodyPart(i), row, false); - } else { - log.warn("Multipart content is a not an instance of Multipart! Content is: {}" - + ". Typically, this is due to the Java Activation JAR being loaded by the wrong classloader." - , (content != null ? content.getClass().getName() : "null")); - } - } else if (part.isMimeType("message/rfc822")) { - addPartToDocument((Part) part.getContent(), row, false); - } else { - String disp = part.getDisposition(); - if (includeContent - && !(disp != null && disp.equalsIgnoreCase(Part.ATTACHMENT))) { - InputStream is = part.getInputStream(); - Metadata contentTypeHint = new Metadata(); - contentTypeHint.set(Metadata.CONTENT_TYPE, ctype.getBaseType() - .toLowerCase(Locale.ENGLISH)); - String content = (new Tika()).parseToString(is, contentTypeHint); - if (row.get(CONTENT) == null) row.put(CONTENT, new ArrayList()); - List contents = (List) row.get(CONTENT); - contents.add(content.trim()); - row.put(CONTENT, contents); - } - if (!processAttachment || disp == null - || !disp.equalsIgnoreCase(Part.ATTACHMENT)) return; - InputStream is = part.getInputStream(); - String fileName = part.getFileName(); - Metadata contentTypeHint = new Metadata(); - contentTypeHint.set(Metadata.CONTENT_TYPE, ctype.getBaseType() - .toLowerCase(Locale.ENGLISH)); - String content = (new Tika()).parseToString(is, contentTypeHint); - if (content == null || content.trim().length() == 0) return; - - if (row.get(ATTACHMENT) == null) row.put(ATTACHMENT, - new ArrayList()); - List contents = (List) row.get(ATTACHMENT); - contents.add(content.trim()); - row.put(ATTACHMENT, contents); - if (row.get(ATTACHMENT_NAMES) == null) row.put(ATTACHMENT_NAMES, - new ArrayList()); - List names = (List) row.get(ATTACHMENT_NAMES); - names.add(fileName); - row.put(ATTACHMENT_NAMES, names); - } - } - - private void addEnvelopeToDocument(Part part, Map row) - throws MessagingException { - MimeMessage mail = (MimeMessage) part; - Address[] adresses; - if ((adresses = mail.getFrom()) != null && adresses.length > 0) row.put( - FROM, adresses[0].toString()); - - List to = new ArrayList<>(); - if ((adresses = mail.getRecipients(Message.RecipientType.TO)) != null) addAddressToList( - adresses, to); - if ((adresses = mail.getRecipients(Message.RecipientType.CC)) != null) addAddressToList( - adresses, to); - if ((adresses = mail.getRecipients(Message.RecipientType.BCC)) != null) addAddressToList( - adresses, to); - if (to.size() > 0) row.put(TO_CC_BCC, to); - - row.put(MESSAGE_ID, mail.getMessageID()); - row.put(SUBJECT, mail.getSubject()); - - Date d = mail.getSentDate(); - if (d != null) { - row.put(SENT_DATE, d); - } - - List flags = new ArrayList<>(); - for (Flags.Flag flag : mail.getFlags().getSystemFlags()) { - if (flag == Flags.Flag.ANSWERED) flags.add(FLAG_ANSWERED); - else if (flag == Flags.Flag.DELETED) flags.add(FLAG_DELETED); - else if (flag == Flags.Flag.DRAFT) flags.add(FLAG_DRAFT); - else if (flag == Flags.Flag.FLAGGED) flags.add(FLAG_FLAGGED); - else if (flag == Flags.Flag.RECENT) flags.add(FLAG_RECENT); - else if (flag == Flags.Flag.SEEN) flags.add(FLAG_SEEN); - } - flags.addAll(Arrays.asList(mail.getFlags().getUserFlags())); - if (flags.size() == 0) flags.add(FLAG_NONE); - row.put(FLAGS, flags); - - String[] hdrs = mail.getHeader("X-Mailer"); - if (hdrs != null) row.put(XMAILER, hdrs[0]); - } - - private void addAddressToList(Address[] adresses, List to) - throws AddressException { - for (Address address : adresses) { - to.add(address.toString()); - InternetAddress ia = (InternetAddress) address; - if (ia.isGroup()) { - InternetAddress[] group = ia.getGroup(false); - for (InternetAddress member : group) - to.add(member.toString()); - } - } - } - - private boolean connectToMailBox() { - try { - Properties props = new Properties(); - if (System.getProperty("mail.debug") != null) - props.setProperty("mail.debug", System.getProperty("mail.debug")); - - if (("imap".equals(protocol) || "imaps".equals(protocol)) - && "imap.gmail.com".equals(host)) { - log.info("Consider using 'gimaps' protocol instead of '{}' for enabling GMail specific extensions for {}" - , protocol, host); - } - - props.setProperty("mail.store.protocol", protocol); - - String imapPropPrefix = protocol.startsWith("gimap") ? "gimap" : "imap"; - props.setProperty("mail." + imapPropPrefix + ".fetchsize", "" + fetchSize); - props.setProperty("mail." + imapPropPrefix + ".timeout", "" + rTimeout); - props.setProperty("mail." + imapPropPrefix + ".connectiontimeout", "" + cTimeout); - - int port = -1; - int colonAt = host.indexOf(":"); - if (colonAt != -1) { - port = Integer.parseInt(host.substring(colonAt + 1)); - host = host.substring(0, colonAt); - } - - Session session = Session.getDefaultInstance(props, null); - mailbox = session.getStore(protocol); - if (port != -1) { - mailbox.connect(host, port, user, password); - } else { - mailbox.connect(host, user, password); - } - log.info("Connected to {}'s mailbox on {}", user, host); - - return true; - } catch (MessagingException e) { - String errMsg = String.format(Locale.ENGLISH, - "Failed to connect to %s server %s as user %s due to: %s", protocol, - host, user, e.toString()); - log.error(errMsg, e); - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - errMsg, e); - } - } - - private void createFilters() { - if (fetchMailsSince != null) { - filters.add(new MailsSinceLastCheckFilter(fetchMailsSince)); - } - if (customFilter != null && !customFilter.equals("")) { - try { - Class cf = Class.forName(customFilter); - Object obj = cf.getConstructor().newInstance(); - if (obj instanceof CustomFilter) { - filters.add((CustomFilter) obj); - } - } catch (Exception e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Custom filter could not be created", e); - } - } - } - - private void logConfig() { - if (!log.isInfoEnabled()) return; - - String lineSep = System.getProperty("line.separator"); - - StringBuffer config = new StringBuffer(); - config.append("user : ").append(user).append(lineSep); - config - .append("pwd : ") - .append( - password != null && password.length() > 0 ? "" : "") - .append(lineSep); - config.append("protocol : ").append(protocol) - .append(lineSep); - config.append("host : ").append(host) - .append(lineSep); - config.append("folders : ").append(folderNames) - .append(lineSep); - config.append("recurse : ").append(recurse) - .append(lineSep); - config.append("exclude : ").append(exclude.toString()) - .append(lineSep); - config.append("include : ").append(include.toString()) - .append(lineSep); - config.append("batchSize : ").append(batchSize) - .append(lineSep); - config.append("fetchSize : ").append(fetchSize) - .append(lineSep); - config.append("read timeout : ").append(rTimeout) - .append(lineSep); - config.append("conection timeout : ").append(cTimeout) - .append(lineSep); - config.append("custom filter : ").append(customFilter) - .append(lineSep); - config.append("fetch mail since : ").append(fetchMailsSince) - .append(lineSep); - config.append("includeContent : ").append(includeContent) - .append(lineSep); - config.append("processAttachments : ").append(processAttachment) - .append(lineSep); - config.append("includeOtherUserFolders : ").append(includeOtherUserFolders) - .append(lineSep); - config.append("includeSharedFolders : ").append(includeSharedFolders) - .append(lineSep); - log.info("{}", config); - } - - class FolderIterator implements Iterator { - private Store mailbox; - private List topLevelFolders; - private List folders = null; - private Folder lastFolder = null; - - public FolderIterator(Store mailBox) { - this.mailbox = mailBox; - folders = new ArrayList<>(); - getTopLevelFolders(mailBox); - if (includeOtherUserFolders) getOtherUserFolders(); - if (includeSharedFolders) getSharedFolders(); - } - - public boolean hasNext() { - return !folders.isEmpty(); - } - - public Folder next() { - try { - boolean hasMessages = false; - Folder next; - do { - if (lastFolder != null) { - lastFolder.close(false); - lastFolder = null; - } - if (folders.isEmpty()) { - mailbox.close(); - return null; - } - next = folders.remove(0); - if (next != null) { - String fullName = next.getFullName(); - if (!excludeFolder(fullName)) { - hasMessages = (next.getType() & Folder.HOLDS_MESSAGES) != 0; - next.open(Folder.READ_ONLY); - lastFolder = next; - log.info("Opened folder : {}", fullName); - } - if (recurse && ((next.getType() & Folder.HOLDS_FOLDERS) != 0)) { - Folder[] children = next.list(); - log.info("Added its children to list : "); - for (int i = children.length - 1; i >= 0; i--) { - folders.add(0, children[i]); - if (log.isInfoEnabled()) { - log.info("child name : {}", children[i].getFullName()); - } - } - if (children.length == 0) log.info("NO children : "); - } - } - } while (!hasMessages); - return next; - } catch (Exception e) { - log.warn("Failed to read folders due to: {}", e); - // throw new - // DataImportHandlerException(DataImportHandlerException.SEVERE, - // "Folder open failed", e); - } - return null; - } - - public void remove() { - throw new UnsupportedOperationException("It's read only mode..."); - } - - private void getTopLevelFolders(Store mailBox) { - if (folderNames != null) topLevelFolders = Arrays.asList(folderNames - .split(",")); - for (int i = 0; topLevelFolders != null && i < topLevelFolders.size(); i++) { - try { - folders.add(mailbox.getFolder(topLevelFolders.get(i))); - } catch (MessagingException e) { - // skip bad ones unless it's the last one and still no good folder - if (folders.size() == 0 && i == topLevelFolders.size() - 1) throw new DataImportHandlerException( - DataImportHandlerException.SEVERE, "Folder retreival failed"); - } - } - if (topLevelFolders == null || topLevelFolders.size() == 0) { - try { - folders.add(mailBox.getDefaultFolder()); - } catch (MessagingException e) { - throw new DataImportHandlerException( - DataImportHandlerException.SEVERE, "Folder retreival failed"); - } - } - } - - private void getOtherUserFolders() { - try { - Folder[] ufldrs = mailbox.getUserNamespaces(null); - if (ufldrs != null) { - log.info("Found {} user namespace folders", ufldrs.length); - for (Folder ufldr : ufldrs) - folders.add(ufldr); - } - } catch (MessagingException me) { - log.warn("Messaging exception retrieving user namespaces: ", me); - } - } - - private void getSharedFolders() { - try { - Folder[] sfldrs = mailbox.getSharedNamespaces(); - if (sfldrs != null) { - log.info("Found {} shared namespace folders", sfldrs.length); - for (Folder sfldr : sfldrs) - folders.add(sfldr); - } - } catch (MessagingException me) { - log.warn("Messaging exception retrieving shared namespaces: ", me); - } - } - - private boolean excludeFolder(String name) { - for (String s : exclude) { - if (name.matches(s)) return true; - } - for (String s : include) { - if (name.matches(s)) return false; - } - return include.size() > 0; - } - } - - class MessageIterator extends SearchTerm implements Iterator { - private Folder folder; - private Message[] messagesInCurBatch = null; - private int current = 0; - private int currentBatch = 0; - private int batchSize = 0; - private int totalInFolder = 0; - private boolean doBatching = true; - - public MessageIterator(Folder folder, int batchSize) { - super(); - - try { - this.folder = folder; - this.batchSize = batchSize; - SearchTerm st = getSearchTerm(); - - log.info("SearchTerm={}", st); - - if (st != null || folder instanceof GmailFolder) { - doBatching = false; - // Searching can still take a while even though we're only pulling - // envelopes; unless you're using gmail server-side filter, which is - // fast - if (log.isInfoEnabled()) { - log.info("Searching folder {} for messages", folder.getName()); - } - final RTimer searchTimer = new RTimer(); - - // If using GMail, speed up the envelope processing by doing a - // server-side - // search for messages occurring on or after the fetch date (at - // midnight), - // which reduces the number of envelopes we need to pull from the - // server - // to apply the precise DateTerm filter; GMail server-side search has - // date - // granularity only but the local filters are also applied - - if (folder instanceof GmailFolder && fetchMailsSince != null) { - String afterCrit = "after:" + afterFmt.format(fetchMailsSince); - log.info("Added server-side gmail filter: {}", afterCrit); - Message[] afterMessages = folder.search(new GmailRawSearchTerm( - afterCrit)); - - if (log.isInfoEnabled()) { - log.info("GMail server-side filter found {} messages received {} in folder {}" - , afterMessages.length, afterCrit, folder.getName()); - } - - // now pass in the server-side filtered messages to the local filter - messagesInCurBatch = folder.search((st != null ? st : this), afterMessages); - } else { - messagesInCurBatch = folder.search(st); - } - totalInFolder = messagesInCurBatch.length; - folder.fetch(messagesInCurBatch, fp); - current = 0; - if (log.isInfoEnabled()) { - log.info("Total messages : {}", totalInFolder); - log.info("Search criteria applied. Batching disabled. Took {} (ms)", searchTimer.getTime()); // logOk - } - } else { - totalInFolder = folder.getMessageCount(); - log.info("Total messages : {}", totalInFolder); - getNextBatch(batchSize, folder); - } - } catch (MessagingException e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Message retreival failed", e); - } - } - - private void getNextBatch(int batchSize, Folder folder) - throws MessagingException { - // after each batch invalidate cache - if (messagesInCurBatch != null) { - for (Message m : messagesInCurBatch) { - if (m instanceof IMAPMessage) ((IMAPMessage) m).invalidateHeaders(); - } - } - int lastMsg = (currentBatch + 1) * batchSize; - lastMsg = lastMsg > totalInFolder ? totalInFolder : lastMsg; - messagesInCurBatch = folder.getMessages(currentBatch * batchSize + 1, - lastMsg); - folder.fetch(messagesInCurBatch, fp); - current = 0; - currentBatch++; - log.info("Current Batch : {}", currentBatch); - log.info("Messages in this batch : {}", messagesInCurBatch.length); - } - - public boolean hasNext() { - boolean hasMore = current < messagesInCurBatch.length; - if (!hasMore && doBatching && currentBatch * batchSize < totalInFolder) { - // try next batch - try { - getNextBatch(batchSize, folder); - hasMore = current < messagesInCurBatch.length; - } catch (MessagingException e) { - throw new DataImportHandlerException( - DataImportHandlerException.SEVERE, "Message retreival failed", e); - } - } - return hasMore; - } - - public Message next() { - return hasNext() ? messagesInCurBatch[current++] : null; - } - - public void remove() { - throw new UnsupportedOperationException("It's read only mode..."); - } - - private SearchTerm getSearchTerm() { - if (filters.size() == 0) return null; - if (filters.size() == 1) return filters.get(0).getCustomSearch(folder); - SearchTerm last = filters.get(0).getCustomSearch(folder); - for (int i = 1; i < filters.size(); i++) { - CustomFilter filter = filters.get(i); - SearchTerm st = filter.getCustomSearch(folder); - if (st != null) { - last = new AndTerm(last, st); - } - } - return last; - } - - public boolean match(Message message) { - return true; - } - } - - static class MailsSinceLastCheckFilter implements CustomFilter { - - private Date since; - - public MailsSinceLastCheckFilter(Date date) { - since = date; - } - - @SuppressWarnings("serial") - public SearchTerm getCustomSearch(final Folder folder) { - if (log.isInfoEnabled()) { - log.info("Building mail filter for messages in {} that occur after {}" - , folder.getName(), sinceDateParser.format(since)); - } - return new DateTerm(ComparisonTerm.GE, since) { - private int matched = 0; - private int seen = 0; - - @Override - public boolean match(Message msg) { - boolean isMatch = false; - ++seen; - try { - Date msgDate = msg.getReceivedDate(); - if (msgDate == null) msgDate = msg.getSentDate(); - - if (msgDate != null && msgDate.getTime() >= since.getTime()) { - ++matched; - isMatch = true; - } else { - String msgDateStr = (msgDate != null) ? sinceDateParser.format(msgDate) : "null"; - String sinceDateStr = (since != null) ? sinceDateParser.format(since) : "null"; - if (log.isDebugEnabled()) { - log.debug("Message {} was received at [{}], since filter is [{}]" - , msg.getSubject(), msgDateStr, sinceDateStr); - } - } - } catch (MessagingException e) { - log.warn("Failed to process message due to: {}", e, e); - } - - if (seen % 100 == 0) { - if (log.isInfoEnabled()) { - log.info("Matched {} of {} messages since: {}" - , matched, seen, sinceDateParser.format(since)); - } - } - - return isMatch; - } - }; - } - } - - // user settings stored in member variables - private String user; - private String password; - private String host; - private String protocol; - - private String folderNames; - private List exclude = new ArrayList<>(); - private List include = new ArrayList<>(); - private boolean recurse; - - private int batchSize; - private int fetchSize; - private int cTimeout; - private int rTimeout; - - private Date fetchMailsSince; - private String customFilter; - - private boolean processAttachment = true; - private boolean includeContent = true; - private boolean includeOtherUserFolders = false; - private boolean includeSharedFolders = false; - - // holds the current state - private Store mailbox; - private boolean connected = false; - private FolderIterator folderIter; - private MessageIterator msgIter; - private List filters = new ArrayList<>(); - private static FetchProfile fp = new FetchProfile(); - - static { - fp.add(FetchProfile.Item.ENVELOPE); - fp.add(FetchProfile.Item.FLAGS); - fp.add("X-Mailer"); - } - - // Fields To Index - // single valued - private static final String MESSAGE_ID = "messageId"; - private static final String SUBJECT = "subject"; - private static final String FROM = "from"; - private static final String SENT_DATE = "sentDate"; - private static final String XMAILER = "xMailer"; - // multi valued - private static final String TO_CC_BCC = "allTo"; - private static final String FLAGS = "flags"; - private static final String CONTENT = "content"; - private static final String ATTACHMENT = "attachment"; - private static final String ATTACHMENT_NAMES = "attachmentNames"; - // flag values - private static final String FLAG_NONE = "none"; - private static final String FLAG_ANSWERED = "answered"; - private static final String FLAG_DELETED = "deleted"; - private static final String FLAG_DRAFT = "draft"; - private static final String FLAG_FLAGGED = "flagged"; - private static final String FLAG_RECENT = "recent"; - private static final String FLAG_SEEN = "seen"; - - private int getIntFromContext(String prop, int ifNull) { - int v = ifNull; - try { - String val = context.getEntityAttribute(prop); - if (val != null) { - val = context.replaceTokens(val); - v = Integer.parseInt(val); - } - } catch (NumberFormatException e) { - // do nothing - } - return v; - } - - private boolean getBoolFromContext(String prop, boolean ifNull) { - boolean v = ifNull; - String val = context.getEntityAttribute(prop); - if (val != null) { - val = context.replaceTokens(val); - v = Boolean.valueOf(val); - } - return v; - } - - private String getStringFromContext(String prop, String ifNull) { - String v = ifNull; - String val = context.getEntityAttribute(prop); - if (val != null) { - val = context.replaceTokens(val); - v = val; - } - return v; - } - - @SuppressForbidden(reason = "Uses context class loader as a workaround to inject correct classloader to 3rd party libs") - private static T withContextClassLoader(ClassLoader loader, Supplier action) { - Thread ct = Thread.currentThread(); - ClassLoader prev = ct.getContextClassLoader(); - try { - ct.setContextClassLoader(loader); - return action.get(); - } finally { - ct.setContextClassLoader(prev); - } - } - -} diff --git a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java deleted file mode 100644 index 78a53fac258..00000000000 --- a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.commons.io.IOUtils; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.AutoDetectParser; -import org.apache.tika.parser.EmptyParser; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.html.HtmlMapper; -import org.apache.tika.parser.html.IdentityHtmlMapper; -import org.apache.tika.sax.BodyContentHandler; -import org.apache.tika.sax.ContentHandlerDecorator; -import org.apache.tika.sax.XHTMLContentHandler; -import org.xml.sax.Attributes; -import org.xml.sax.ContentHandler; -import org.xml.sax.SAXException; -import org.xml.sax.helpers.DefaultHandler; - -import javax.xml.transform.OutputKeys; -import javax.xml.transform.TransformerConfigurationException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.sax.SAXTransformerFactory; -import javax.xml.transform.sax.TransformerHandler; -import javax.xml.transform.stream.StreamResult; -import java.io.File; -import java.io.InputStream; -import java.io.StringWriter; -import java.io.Writer; -import java.util.HashMap; -import java.util.Locale; -import java.util.Map; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; -import static org.apache.solr.handler.dataimport.DataImporter.COLUMN; -import static org.apache.solr.handler.dataimport.XPathEntityProcessor.URL; -/** - *

An implementation of {@link EntityProcessor} which reads data from rich docs - * using Apache Tika - * - *

To index latitude/longitude data that might - * be extracted from a file's metadata, identify - * the geo field for this information with this attribute: - * spatialMetadataField - * - * @since solr 3.1 - */ -public class TikaEntityProcessor extends EntityProcessorBase { - private static Parser EMPTY_PARSER = new EmptyParser(); - private TikaConfig tikaConfig; - private String format = "text"; - private boolean done = false; - private boolean extractEmbedded = false; - private String parser; - static final String AUTO_PARSER = "org.apache.tika.parser.AutoDetectParser"; - private String htmlMapper; - private String spatialMetadataField; - - @Override - public void init(Context context) { - super.init(context); - done = false; - } - - @Override - protected void firstInit(Context context) { - super.firstInit(context); - // See similar code in ExtractingRequestHandler.inform - try { - String tikaConfigLoc = context.getResolvedEntityAttribute("tikaConfig"); - if (tikaConfigLoc == null) { - ClassLoader classLoader = context.getSolrCore().getResourceLoader().getClassLoader(); - try (InputStream is = classLoader.getResourceAsStream("solr-default-tika-config.xml")) { - tikaConfig = new TikaConfig(is); - } - } else { - File configFile = new File(tikaConfigLoc); - if (configFile.isAbsolute()) { - tikaConfig = new TikaConfig(configFile); - } else { // in conf/ - try (InputStream is = context.getSolrCore().getResourceLoader().openResource(tikaConfigLoc)) { - tikaConfig = new TikaConfig(is); - } - } - } - } catch (Exception e) { - wrapAndThrow(SEVERE, e,"Unable to load Tika Config"); - } - - String extractEmbeddedString = context.getResolvedEntityAttribute("extractEmbedded"); - if ("true".equals(extractEmbeddedString)) { - extractEmbedded = true; - } - format = context.getResolvedEntityAttribute("format"); - if(format == null) - format = "text"; - if (!"html".equals(format) && !"xml".equals(format) && !"text".equals(format)&& !"none".equals(format) ) - throw new DataImportHandlerException(SEVERE, "'format' can be one of text|html|xml|none"); - - htmlMapper = context.getResolvedEntityAttribute("htmlMapper"); - if (htmlMapper == null) - htmlMapper = "default"; - if (!"default".equals(htmlMapper) && !"identity".equals(htmlMapper)) - throw new DataImportHandlerException(SEVERE, "'htmlMapper', if present, must be 'default' or 'identity'"); - - parser = context.getResolvedEntityAttribute("parser"); - if(parser == null) { - parser = AUTO_PARSER; - } - - spatialMetadataField = context.getResolvedEntityAttribute("spatialMetadataField"); - } - - @Override - public Map nextRow() { - if(done) return null; - Map row = new HashMap<>(); - @SuppressWarnings({"unchecked"}) - DataSource dataSource = context.getDataSource(); - InputStream is = dataSource.getData(context.getResolvedEntityAttribute(URL)); - ContentHandler contentHandler = null; - Metadata metadata = new Metadata(); - StringWriter sw = new StringWriter(); - try { - if ("html".equals(format)) { - contentHandler = getHtmlHandler(sw); - } else if ("xml".equals(format)) { - contentHandler = getXmlContentHandler(sw); - } else if ("text".equals(format)) { - contentHandler = getTextContentHandler(sw); - } else if("none".equals(format)){ - contentHandler = new DefaultHandler(); - } - } catch (TransformerConfigurationException e) { - wrapAndThrow(SEVERE, e, "Unable to create content handler"); - } - Parser tikaParser = null; - if(parser.equals(AUTO_PARSER)){ - tikaParser = new AutoDetectParser(tikaConfig); - } else { - tikaParser = context.getSolrCore().getResourceLoader().newInstance(parser, Parser.class); - } - try { - ParseContext context = new ParseContext(); - if ("identity".equals(htmlMapper)){ - context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE); - } - if (extractEmbedded) { - context.set(Parser.class, tikaParser); - } else { - context.set(Parser.class, EMPTY_PARSER); - } - tikaParser.parse(is, contentHandler, metadata , context); - } catch (Exception e) { - if(SKIP.equals(onError)) { - throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW, - "Document skipped :" + e.getMessage()); - } - wrapAndThrow(SEVERE, e, "Unable to read content"); - } - IOUtils.closeQuietly(is); - for (Map field : context.getAllEntityFields()) { - if (!"true".equals(field.get("meta"))) continue; - String col = field.get(COLUMN); - String s = metadata.get(col); - if (s != null) row.put(col, s); - } - if(!"none".equals(format) ) row.put("text", sw.toString()); - tryToAddLatLon(metadata, row); - done = true; - return row; - } - - private void tryToAddLatLon(Metadata metadata, Map row) { - if (spatialMetadataField == null) return; - String latString = metadata.get(Metadata.LATITUDE); - String lonString = metadata.get(Metadata.LONGITUDE); - if (latString != null && lonString != null) { - row.put(spatialMetadataField, String.format(Locale.ROOT, "%s,%s", latString, lonString)); - } - } - - private static ContentHandler getHtmlHandler(Writer writer) - throws TransformerConfigurationException { - SAXTransformerFactory factory = (SAXTransformerFactory) - TransformerFactory.newInstance(); - TransformerHandler handler = factory.newTransformerHandler(); - handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html"); - handler.setResult(new StreamResult(writer)); - return new ContentHandlerDecorator(handler) { - @Override - public void startElement( - String uri, String localName, String name, Attributes atts) - throws SAXException { - if (XHTMLContentHandler.XHTML.equals(uri)) { - uri = null; - } - if (!"head".equals(localName)) { - super.startElement(uri, localName, name, atts); - } - } - - @Override - public void endElement(String uri, String localName, String name) - throws SAXException { - if (XHTMLContentHandler.XHTML.equals(uri)) { - uri = null; - } - if (!"head".equals(localName)) { - super.endElement(uri, localName, name); - } - } - - @Override - public void startPrefixMapping(String prefix, String uri) {/*no op*/ } - - @Override - public void endPrefixMapping(String prefix) {/*no op*/ } - }; - } - - private static ContentHandler getTextContentHandler(Writer writer) { - return new BodyContentHandler(writer); - } - - private static ContentHandler getXmlContentHandler(Writer writer) - throws TransformerConfigurationException { - SAXTransformerFactory factory = (SAXTransformerFactory) - TransformerFactory.newInstance(); - TransformerHandler handler = factory.newTransformerHandler(); - handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml"); - handler.setResult(new StreamResult(writer)); - return handler; - } - -} diff --git a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/package.html b/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/package.html deleted file mode 100644 index 9a7f6f260e2..00000000000 --- a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/package.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - - -Plugins for DataImportHandler that have additional dependencies. - - diff --git a/solr/contrib/dataimporthandler-extras/src/java/overview.html b/solr/contrib/dataimporthandler-extras/src/java/overview.html deleted file mode 100644 index 5a55432908e..00000000000 --- a/solr/contrib/dataimporthandler-extras/src/java/overview.html +++ /dev/null @@ -1,21 +0,0 @@ - - - -Apache Solr Search Server: DataImportHandler Extras contrib. This contrib module is deprecated as of 8.6 - - diff --git a/solr/contrib/dataimporthandler-extras/src/resources/solr-default-tika-config.xml b/solr/contrib/dataimporthandler-extras/src/resources/solr-default-tika-config.xml deleted file mode 100644 index b598d9e867e..00000000000 --- a/solr/contrib/dataimporthandler-extras/src/resources/solr-default-tika-config.xml +++ /dev/null @@ -1,20 +0,0 @@ - - - - - \ No newline at end of file diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc deleted file mode 100644 index 5944c24b2cf..00000000000 Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc and /dev/null differ diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr-word.pdf b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr-word.pdf deleted file mode 100644 index bd8b865905f..00000000000 Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr-word.pdf and /dev/null differ diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml deleted file mode 100644 index 793482a4991..00000000000 --- a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml +++ /dev/null @@ -1,205 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-solrconfig.xml b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-solrconfig.xml deleted file mode 100644 index 344589e340d..00000000000 --- a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-solrconfig.xml +++ /dev/null @@ -1,277 +0,0 @@ - - - - - ${tests.luceneMatchVersion:LATEST} - - ${useCompoundFile:false} - - - - ${solr.data.dir:} - - - - - - - - - - - 100000 - - - - - - - ${solr.max.booleanClauses:1024} - - - - - - - - - - - - - true - - - - - - - - 50 - - - 200 - - - - - - - - - - - - - - - - - - - false - - - 4 - - - - - - - - - - - - - - - - - - - explicit - - - - - - - - - - - - explicit - - - - - - - *:* - - - - diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/structured.html b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/structured.html deleted file mode 100644 index 103748120c1..00000000000 --- a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/structured.html +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - Title in the header - - -

H1 Header

-
Basic div
-
Div with attribute
- - - diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_jpeg.jpg b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_jpeg.jpg deleted file mode 100644 index 10d1ebb2d32..00000000000 Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_jpeg.jpg and /dev/null differ diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_recursive_embedded.docx b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_recursive_embedded.docx deleted file mode 100644 index cd562cbb82d..00000000000 Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_recursive_embedded.docx and /dev/null differ diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_vsdx.vsdx b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_vsdx.vsdx deleted file mode 100644 index 659ecdd5853..00000000000 Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_vsdx.vsdx and /dev/null differ diff --git a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java deleted file mode 100644 index 027a8d73279..00000000000 --- a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.solr.common.SolrInputDocument; -import org.junit.Ignore; -import org.junit.Test; - -import java.text.ParseException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -// Test mailbox is like this: foldername(mailcount) -// top1(2) -> child11(6) -// -> child12(0) -// top2(2) -> child21(1) -// -> grandchild211(2) -// -> grandchild212(1) -// -> child22(2) - -/** - * Test for MailEntityProcessor. The tests are marked as ignored because we'd need a mail server (real or mocked) for - * these to work. - * - * TODO: Find a way to make the tests actually test code - * - * - * @see org.apache.solr.handler.dataimport.MailEntityProcessor - * @since solr 1.4 - */ -@Ignore("Needs a Mock Mail Server to work") -public class TestMailEntityProcessor extends AbstractDataImportHandlerTestCase { - - // Credentials - private static final String user = "user"; - private static final String password = "password"; - private static final String host = "host"; - private static final String protocol = "imaps"; - - private static Map paramMap = new HashMap<>(); - - @Test - @Ignore("Needs a Mock Mail Server to work") - public void testConnection() { - // also tests recurse = false and default settings - paramMap.put("folders", "top2"); - paramMap.put("recurse", "false"); - paramMap.put("processAttachement", "false"); - DataImporter di = new DataImporter(); - di.loadAndInit(getConfigFromMap(paramMap)); - @SuppressWarnings({"unchecked"}) - RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null); - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - assertEquals("top1 did not return 2 messages", swi.docs.size(), 2); - } - - @Test - @Ignore("Needs a Mock Mail Server to work") - public void testRecursion() { - paramMap.put("folders", "top2"); - paramMap.put("recurse", "true"); - paramMap.put("processAttachement", "false"); - DataImporter di = new DataImporter(); - di.loadAndInit(getConfigFromMap(paramMap)); - @SuppressWarnings({"unchecked"}) - RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null); - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - assertEquals("top2 and its children did not return 8 messages", swi.docs.size(), 8); - } - - @Test - @Ignore("Needs a Mock Mail Server to work") - public void testExclude() { - paramMap.put("folders", "top2"); - paramMap.put("recurse", "true"); - paramMap.put("processAttachement", "false"); - paramMap.put("exclude", ".*grandchild.*"); - DataImporter di = new DataImporter(); - di.loadAndInit(getConfigFromMap(paramMap)); - @SuppressWarnings({"unchecked"}) - RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null); - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - assertEquals("top2 and its direct children did not return 5 messages", swi.docs.size(), 5); - } - - @Test - @Ignore("Needs a Mock Mail Server to work") - public void testInclude() { - paramMap.put("folders", "top2"); - paramMap.put("recurse", "true"); - paramMap.put("processAttachement", "false"); - paramMap.put("include", ".*grandchild.*"); - DataImporter di = new DataImporter(); - di.loadAndInit(getConfigFromMap(paramMap)); - @SuppressWarnings({"unchecked"}) - RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null); - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3); - } - - @Test - @Ignore("Needs a Mock Mail Server to work") - public void testIncludeAndExclude() { - paramMap.put("folders", "top1,top2"); - paramMap.put("recurse", "true"); - paramMap.put("processAttachement", "false"); - paramMap.put("exclude", ".*top1.*"); - paramMap.put("include", ".*grandchild.*"); - DataImporter di = new DataImporter(); - di.loadAndInit(getConfigFromMap(paramMap)); - @SuppressWarnings({"unchecked"}) - RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null); - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3); - } - - @Test - @Ignore("Needs a Mock Mail Server to work") - @SuppressWarnings({"unchecked"}) - public void testFetchTimeSince() throws ParseException { - paramMap.put("folders", "top1/child11"); - paramMap.put("recurse", "true"); - paramMap.put("processAttachement", "false"); - paramMap.put("fetchMailsSince", "2008-12-26 00:00:00"); - DataImporter di = new DataImporter(); - di.loadAndInit(getConfigFromMap(paramMap)); - RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null); - SolrWriterImpl swi = new SolrWriterImpl(); - di.runCmd(rp, swi); - assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3); - } - - private String getConfigFromMap(Map params) { - String conf = - "" + - "" + - "" + - "" + - ""; - params.put("user", user); - params.put("password", password); - params.put("host", host); - params.put("protocol", protocol); - StringBuilder attribs = new StringBuilder(""); - for (String key : params.keySet()) - attribs.append(" ").append(key).append("=" + "\"").append(params.get(key)).append("\""); - attribs.append(" "); - return conf.replace("someconfig", attribs.toString()); - } - - static class SolrWriterImpl extends SolrWriter { - List docs = new ArrayList<>(); - Boolean deleteAllCalled; - Boolean commitCalled; - - public SolrWriterImpl() { - super(null, null); - } - - @Override - public boolean upload(SolrInputDocument doc) { - return docs.add(doc); - } - - - @Override - public void doDeleteAll() { - deleteAllCalled = Boolean.TRUE; - } - - @Override - public void commit(boolean b) { - commitCalled = Boolean.TRUE; - } - } -} diff --git a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java deleted file mode 100644 index 05acfca90c3..00000000000 --- a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.BeforeClass; -import org.junit.Test; - -import java.util.Locale; - -/**Testcase for TikaEntityProcessor - * - * @since solr 3.1 - */ -public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase { - private String conf = - "" + - " " + - " " + - " " + - " " + - " " + - " " + - " " + - " " + - ""; - - private String skipOnErrConf = - "" + - " " + - " " + - " " + - "" + - " " + - " " + - " " + - "" + - " " + - ""; - - private String spatialConf = - "" + - " " + - " " + - " " + - " " + - " " + - " " + - ""; - - private String vsdxConf = - "" + - " " + - " " + - " " + - " " + - " " + - " " + - ""; - - private String[] tests = { - "//*[@numFound='1']" - ,"//str[@name='author'][.='Grant Ingersoll']" - ,"//str[@name='title'][.='solr-word']" - ,"//str[@name='text']" - }; - - private String[] testsHTMLDefault = { - "//*[@numFound='1']" - , "//str[@name='text'][contains(.,'Basic div')]" - , "//str[@name='text'][contains(.,'

')]" - , "//str[@name='text'][not(contains(.,'
'))]" //default mapper lower-cases elements as it maps - , "//str[@name='text'][not(contains(.,'
'))]" - }; - - private String[] testsHTMLIdentity = { - "//*[@numFound='1']" - , "//str[@name='text'][contains(.,'Basic div')]" - , "//str[@name='text'][contains(.,'

')]" - , "//str[@name='text'][contains(.,'
')]" - , "//str[@name='text'][contains(.,'class=\"classAttribute\"')]" //attributes are lower-cased - }; - - private String[] testsSpatial = { - "//*[@numFound='1']" - }; - - private String[] testsEmbedded = { - "//*[@numFound='1']", - "//str[@name='text'][contains(.,'When in the Course')]" - }; - - private String[] testsIgnoreEmbedded = { - "//*[@numFound='1']", - "//str[@name='text'][not(contains(.,'When in the Course'))]" - }; - - private String[] testsVSDX = { - "//*[@numFound='1']", - "//str[@name='text'][contains(.,'Arrears')]" - }; - - @BeforeClass - public static void beforeClass() throws Exception { - assumeFalse("This test fails on UNIX with Turkish default locale (https://issues.apache.org/jira/browse/SOLR-6387)", - new Locale("tr").getLanguage().equals(Locale.getDefault().getLanguage())); - initCore("dataimport-solrconfig.xml", "dataimport-schema-no-unique-key.xml", getFile("dihextras/solr").getAbsolutePath()); - } - - @Test - public void testIndexingWithTikaEntityProcessor() throws Exception { - runFullImport(conf); - assertQ(req("*:*"), tests ); - } - - @Test - public void testSkip() throws Exception { - runFullImport(skipOnErrConf); - assertQ(req("*:*"), "//*[@numFound='1']"); - } - - @Test - public void testVSDX() throws Exception { - //this ensures that we've included the curvesapi dependency - //and that the ConnectsType class is bundled with poi-ooxml-schemas. - runFullImport(vsdxConf); - assertQ(req("*:*"), testsVSDX); - } - - @Test - public void testTikaHTMLMapperEmpty() throws Exception { - runFullImport(getConfigHTML(null)); - assertQ(req("*:*"), testsHTMLDefault); - } - - @Test - public void testTikaHTMLMapperDefault() throws Exception { - runFullImport(getConfigHTML("default")); - assertQ(req("*:*"), testsHTMLDefault); - } - - @Test - public void testTikaHTMLMapperIdentity() throws Exception { - runFullImport(getConfigHTML("identity")); - assertQ(req("*:*"), testsHTMLIdentity); - } - - @Test - public void testTikaGeoMetadata() throws Exception { - runFullImport(spatialConf); - String pt = "38.97,-77.018"; - Double distance = 5.0d; - assertQ(req("q", "*:* OR foo_i:" + random().nextInt(100), "fq", - "{!geofilt sfield=\"home\"}\"", - "pt", pt, "d", String.valueOf(distance)), testsSpatial); - } - - private String getConfigHTML(String htmlMapper) { - return - "" + - " " + - " " + - " " + - " " + - " " + - " " + - ""; - - } - - @Test - public void testEmbeddedDocsLegacy() throws Exception { - //test legacy behavior: ignore embedded docs - runFullImport(conf); - assertQ(req("*:*"), testsIgnoreEmbedded); - } - - @Test - public void testEmbeddedDocsTrue() throws Exception { - runFullImport(getConfigEmbedded(true)); - assertQ(req("*:*"), testsEmbedded); - } - - @Test - public void testEmbeddedDocsFalse() throws Exception { - runFullImport(getConfigEmbedded(false)); - assertQ(req("*:*"), testsIgnoreEmbedded); - } - - private String getConfigEmbedded(boolean extractEmbedded) { - return - "" + - " " + - " " + - " " + - " " + - " " + - " " + - " " + - " " + - ""; - } -} diff --git a/solr/contrib/dataimporthandler/README.md b/solr/contrib/dataimporthandler/README.md deleted file mode 100644 index 8dc9391494e..00000000000 --- a/solr/contrib/dataimporthandler/README.md +++ /dev/null @@ -1,26 +0,0 @@ -Apache Solr - DataImportHandler -================================ - -Introduction ------------- -DataImportHandler is a data import tool for Solr which makes importing data from Databases, XML files and -HTTP data sources quick and easy. - -Important Note --------------- -Although Solr strives to be agnostic of the Locale where the server is -running, some code paths in DataImportHandler are known to depend on the -System default Locale, Timezone, or Charset. It is recommended that when -running Solr you set the following system properties: - -Duser.language=xx -Duser.country=YY -Duser.timezone=ZZZ - -where xx, YY, and ZZZ are consistent with any database server's configuration. - -Deprecation notice ------------------- -This contrib module is deprecated as of v8.6, scheduled for removal in Solr 9.0. -The reason is that DIH is no longer being maintained in a manner we feel is necessary in order to keep it -healthy and secure. Also it was not designed to work with SolrCloud and does not meet current performance requirements. - -The project hopes that the community will take over maintenance of DIH as a 3rd party package (See SOLR-14066 for more details). Please reach out to us at the dev@ mailing list if you want to help. - diff --git a/solr/contrib/dataimporthandler/build.gradle b/solr/contrib/dataimporthandler/build.gradle deleted file mode 100644 index 9286d4317b6..00000000000 --- a/solr/contrib/dataimporthandler/build.gradle +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -apply plugin: 'java-library' - -description = 'Data Import Handler' - -dependencies { - implementation project(':solr:core') - - testImplementation project(':solr:test-framework') - - testImplementation('org.mockito:mockito-core', { - exclude group: "net.bytebuddy", module: "byte-buddy-agent" - }) - testImplementation ('org.hsqldb:hsqldb') - testImplementation ('org.apache.derby:derby') - testImplementation ('org.objenesis:objenesis') -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java deleted file mode 100644 index f4b1d7a24bf..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.solr.common.util.ContentStream; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; - -import java.io.InputStream; -import java.io.IOException; -import java.util.Properties; -/** - *

A data source implementation which can be used to read binary stream from content streams.

Refer to http://wiki.apache.org/solr/DataImportHandler for more - * details.

- *

- * This API is experimental and may change in the future. - * - * @since solr 3.1 - */ - -public class BinContentStreamDataSource extends DataSource { - private ContextImpl context; - private ContentStream contentStream; - private InputStream in; - - - @Override - public void init(Context context, Properties initProps) { - this.context = (ContextImpl) context; - } - - @Override - public InputStream getData(String query) { - contentStream = context.getDocBuilder().getReqParams().getContentStream(); - if (contentStream == null) - throw new DataImportHandlerException(SEVERE, "No stream available. The request has no body"); - try { - return in = contentStream.getStream(); - } catch (IOException e) { - DataImportHandlerException.wrapAndThrow(SEVERE, e); - return null; - } - } - - @Override - public void close() { - if (contentStream != null) { - try { - if (in == null) in = contentStream.getStream(); - in.close(); - } catch (IOException e) { - /*no op*/ - } - } - } -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinFileDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinFileDataSource.java deleted file mode 100644 index dc7a0f552e3..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinFileDataSource.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; - -import java.io.InputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.util.Properties; -/** - *

- * A DataSource which reads from local files - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @since solr 3.1 - */ - -public class BinFileDataSource extends DataSource{ - protected String basePath; - @Override - public void init(Context context, Properties initProps) { - basePath = initProps.getProperty(FileDataSource.BASE_PATH); - } - - @Override - public InputStream getData(String query) { - File f = FileDataSource.getFile(basePath,query); - try { - return new FileInputStream(f); - } catch (FileNotFoundException e) { - wrapAndThrow(SEVERE,e,"Unable to open file "+f.getAbsolutePath()); - return null; - } - } - - @Override - public void close() { - - } -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinURLDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinURLDataSource.java deleted file mode 100644 index 03a30ab07a9..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinURLDataSource.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.*; -import static org.apache.solr.handler.dataimport.URLDataSource.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.InputStream; -import java.lang.invoke.MethodHandles; -import java.net.URL; -import java.net.URLConnection; -import java.util.Properties; -/** - *

A data source implementation which can be used to read binary streams using HTTP.

Refer to http://wiki.apache.org/solr/DataImportHandler for more - * details.

- *

- * This API is experimental and may change in the future. - * - * @since solr 3.1 - */ -public class BinURLDataSource extends DataSource{ - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - private String baseUrl; - private int connectionTimeout = CONNECTION_TIMEOUT; - - private int readTimeout = READ_TIMEOUT; - - private Context context; - - private Properties initProps; - - public BinURLDataSource() { } - - @Override - public void init(Context context, Properties initProps) { - this.context = context; - this.initProps = initProps; - - baseUrl = getInitPropWithReplacements(BASE_URL); - String cTimeout = getInitPropWithReplacements(CONNECTION_TIMEOUT_FIELD_NAME); - String rTimeout = getInitPropWithReplacements(READ_TIMEOUT_FIELD_NAME); - if (cTimeout != null) { - try { - connectionTimeout = Integer.parseInt(cTimeout); - } catch (NumberFormatException e) { - log.warn("Invalid connection timeout: {}", cTimeout); - } - } - if (rTimeout != null) { - try { - readTimeout = Integer.parseInt(rTimeout); - } catch (NumberFormatException e) { - log.warn("Invalid read timeout: {}", rTimeout); - } - } - } - - @Override - public InputStream getData(String query) { - URL url = null; - try { - if (URIMETHOD.matcher(query).find()) url = new URL(query); - else url = new URL(baseUrl + query); - log.debug("Accessing URL: {}", url); - URLConnection conn = url.openConnection(); - conn.setConnectTimeout(connectionTimeout); - conn.setReadTimeout(readTimeout); - return conn.getInputStream(); - } catch (Exception e) { - log.error("Exception thrown while getting data", e); - wrapAndThrow (SEVERE, e, "Exception in invoking url " + url); - return null;//unreachable - } - } - - @Override - public void close() { } - - private String getInitPropWithReplacements(String propertyName) { - final String expr = initProps.getProperty(propertyName); - if (expr == null) { - return null; - } - return context.replaceTokens(expr); - } -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/CachePropertyUtil.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/CachePropertyUtil.java deleted file mode 100644 index 544761f8d88..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/CachePropertyUtil.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -public class CachePropertyUtil { - public static String getAttributeValueAsString(Context context, String attr) { - Object o = context.getSessionAttribute(attr, Context.SCOPE_ENTITY); - if (o == null) { - o = context.getResolvedEntityAttribute(attr); - } - if (o == null && context.getRequestParameters() != null) { - o = context.getRequestParameters().get(attr); - } - if (o == null) { - return null; - } - return o.toString(); - } - - public static Object getAttributeValue(Context context, String attr) { - Object o = context.getSessionAttribute(attr, Context.SCOPE_ENTITY); - if (o == null) { - o = context.getResolvedEntityAttribute(attr); - } - if (o == null && context.getRequestParameters() != null) { - o = context.getRequestParameters().get(attr); - } - if (o == null) { - return null; - } - return o; - } - -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ClobTransformer.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ClobTransformer.java deleted file mode 100644 index 2e9d93a0c1a..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ClobTransformer.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.HTMLStripTransformer.TRUE; - -import java.io.IOException; -import java.io.Reader; -import java.sql.Clob; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * {@link Transformer} instance which converts a {@link Clob} to a {@link String}. - *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- * This API is experimental and subject to change - * - * @since solr 1.4 - */ -public class ClobTransformer extends Transformer { - @Override - public Object transformRow(Map aRow, Context context) { - for (Map map : context.getAllEntityFields()) { - if (!TRUE.equals(map.get(CLOB))) continue; - String column = map.get(DataImporter.COLUMN); - String srcCol = map.get(RegexTransformer.SRC_COL_NAME); - if (srcCol == null) - srcCol = column; - Object o = aRow.get(srcCol); - if (o instanceof List) { - @SuppressWarnings({"unchecked"}) - List inputs = (List) o; - List results = new ArrayList<>(); - for (Object input : inputs) { - if (input instanceof Clob) { - Clob clob = (Clob) input; - results.add(readFromClob(clob)); - } - } - aRow.put(column, results); - } else { - if (o instanceof Clob) { - Clob clob = (Clob) o; - aRow.put(column, readFromClob(clob)); - } - } - } - return aRow; - } - - private String readFromClob(Clob clob) { - Reader reader = FieldReaderDataSource.readCharStream(clob); - StringBuilder sb = new StringBuilder(); - char[] buf = new char[1024]; - int len; - try { - while ((len = reader.read(buf)) != -1) { - sb.append(buf, 0, len); - } - } catch (IOException e) { - DataImportHandlerException.wrapAndThrow(DataImportHandlerException.SEVERE, e); - } - return sb.toString(); - } - - public static final String CLOB = "clob"; -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ConfigParseUtil.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ConfigParseUtil.java deleted file mode 100644 index 179df231526..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ConfigParseUtil.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - -import org.w3c.dom.Element; -import org.w3c.dom.NamedNodeMap; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; - -public class ConfigParseUtil { - public static String getStringAttribute(Element e, String name, String def) { - String r = e.getAttribute(name); - if (r == null || "".equals(r.trim())) - r = def; - return r; - } - - public static HashMap getAllAttributes(Element e) { - HashMap m = new HashMap<>(); - NamedNodeMap nnm = e.getAttributes(); - for (int i = 0; i < nnm.getLength(); i++) { - m.put(nnm.item(i).getNodeName(), nnm.item(i).getNodeValue()); - } - return m; - } - - public static String getText(Node elem, StringBuilder buffer) { - if (elem.getNodeType() != Node.CDATA_SECTION_NODE) { - NodeList childs = elem.getChildNodes(); - for (int i = 0; i < childs.getLength(); i++) { - Node child = childs.item(i); - short childType = child.getNodeType(); - if (childType != Node.COMMENT_NODE - && childType != Node.PROCESSING_INSTRUCTION_NODE) { - getText(child, buffer); - } - } - } else { - buffer.append(elem.getNodeValue()); - } - - return buffer.toString(); - } - - public static List getChildNodes(Element e, String byName) { - List result = new ArrayList<>(); - NodeList l = e.getChildNodes(); - for (int i = 0; i < l.getLength(); i++) { - if (e.equals(l.item(i).getParentNode()) - && byName.equals(l.item(i).getNodeName())) - result.add((Element) l.item(i)); - } - return result; - } -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java deleted file mode 100644 index 4482160c22b..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.solr.common.util.ContentStream; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; - -import java.io.IOException; -import java.io.Reader; -import java.util.Properties; - -/** - * A DataSource implementation which reads from the ContentStream of a POST request - *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- * This API is experimental and may change in the future. - * - * @since solr 1.4 - */ -public class ContentStreamDataSource extends DataSource { - private ContextImpl context; - private ContentStream contentStream; - private Reader reader; - - @Override - public void init(Context context, Properties initProps) { - this.context = (ContextImpl) context; - } - - @Override - public Reader getData(String query) { - contentStream = context.getDocBuilder().getReqParams().getContentStream(); - if (contentStream == null) - throw new DataImportHandlerException(SEVERE, "No stream available. The request has no body"); - try { - return reader = contentStream.getReader(); - } catch (IOException e) { - DataImportHandlerException.wrapAndThrow(SEVERE, e); - return null; - } - } - - @Override - public void close() { - if (contentStream != null) { - try { - if (reader == null) reader = contentStream.getReader(); - reader.close(); - } catch (IOException e) { - } - } - } -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Context.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Context.java deleted file mode 100644 index 70dbbcb6ec6..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Context.java +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.solr.core.SolrCore; - -import java.util.List; -import java.util.Map; - -/** - *

- * This abstract class gives access to all available objects. So any - * component implemented by a user can have the full power of DataImportHandler - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and subject to change - * - * @since solr 1.3 - */ -public abstract class Context { - public static final String FULL_DUMP = "FULL_DUMP", DELTA_DUMP = "DELTA_DUMP", FIND_DELTA = "FIND_DELTA"; - - /** - * An object stored in entity scope is valid only for the current entity for the current document only. - */ - public static final String SCOPE_ENTITY = "entity"; - - /** - * An object stored in global scope is available for the current import only but across entities and documents. - */ - public static final String SCOPE_GLOBAL = "global"; - - /** - * An object stored in document scope is available for the current document only but across entities. - */ - public static final String SCOPE_DOC = "document"; - - /** - * An object stored in 'solrcore' scope is available across imports, entities and documents throughout the life of - * a solr core. A solr core unload or reload will destroy this data. - */ - public static final String SCOPE_SOLR_CORE = "solrcore"; - - /** - * Get the value of any attribute put into this entity - * - * @param name name of the attribute eg: 'name' - * @return value of named attribute in entity - */ - public abstract String getEntityAttribute(String name); - - /** - * Get the value of any attribute put into this entity after resolving all variables found in the attribute value - * @param name name of the attribute - * @return value of the named attribute after resolving all variables - */ - public abstract String getResolvedEntityAttribute(String name); - - /** - * Returns all the fields put into an entity. each item (which is a map ) in - * the list corresponds to one field. each if the map contains the attribute - * names and values in a field - * - * @return all fields in an entity - */ - public abstract List> getAllEntityFields(); - - /** - * Returns the VariableResolver used in this entity which can be used to - * resolve the tokens in ${<namespce.name>} - * - * @return a VariableResolver instance - * @see org.apache.solr.handler.dataimport.VariableResolver - */ - - public abstract VariableResolver getVariableResolver(); - - /** - * Gets the datasource instance defined for this entity. Do not close() this instance. - * Transformers should use the getDataSource(String name) method. - * - * @return a new DataSource instance as configured for the current entity - * @see org.apache.solr.handler.dataimport.DataSource - * @see #getDataSource(String) - */ - @SuppressWarnings({"rawtypes"}) - public abstract DataSource getDataSource(); - - /** - * Gets a new DataSource instance with a name. Ensure that you close() this after use - * because this is created just for this method call. - * - * @param name Name of the dataSource as defined in the dataSource tag - * @return a new DataSource instance - * @see org.apache.solr.handler.dataimport.DataSource - */ - @SuppressWarnings({"rawtypes"}) - public abstract DataSource getDataSource(String name); - - /** - * Returns the instance of EntityProcessor used for this entity - * - * @return instance of EntityProcessor used for the current entity - * @see org.apache.solr.handler.dataimport.EntityProcessor - */ - public abstract EntityProcessor getEntityProcessor(); - - /** - * Store values in a certain name and scope (entity, document,global) - * - * @param name the key - * @param val the value - * @param scope the scope in which the given key, value pair is to be stored - */ - public abstract void setSessionAttribute(String name, Object val, String scope); - - /** - * get a value by name in the given scope (entity, document,global) - * - * @param name the key - * @param scope the scope from which the value is to be retrieved - * @return the object stored in the given scope with the given key - */ - public abstract Object getSessionAttribute(String name, String scope); - - /** - * Get the context instance for the parent entity. works only in the full dump - * If the current entity is rootmost a null is returned - * - * @return parent entity's Context - */ - public abstract Context getParentContext(); - - /** - * The request parameters passed over HTTP for this command the values in the - * map are either String(for single valued parameters) or List<String> (for - * multi-valued parameters) - * - * @return the request parameters passed in the URL to initiate this process - */ - public abstract Map getRequestParameters(); - - /** - * Returns if the current entity is the root entity - * - * @return true if current entity is the root entity, false otherwise - */ - public abstract boolean isRootEntity(); - - /** - * Returns the current process FULL_DUMP, DELTA_DUMP, FIND_DELTA - * - * @return the type of the current running process - */ - public abstract String currentProcess(); - - /** - * Exposing the actual SolrCore to the components - * - * @return the core - */ - public abstract SolrCore getSolrCore(); - - /** - * Makes available some basic running statistics such as "docCount", - * "deletedDocCount", "rowCount", "queryCount" and "skipDocCount" - * - * @return a Map containing running statistics of the current import - */ - public abstract Map getStats(); - - /** - * Returns the text specified in the script tag in the data-config.xml - */ - public abstract String getScript(); - - /** - * Returns the language of the script as specified in the script tag in data-config.xml - */ - public abstract String getScriptLanguage(); - - /**delete a document by id - */ - public abstract void deleteDoc(String id); - - /**delete documents by query - */ - public abstract void deleteDocByQuery(String query); - - /**Use this directly to resolve variable - * @param var the variable name - * @return the resolved value - */ - public abstract Object resolve(String var); - - /** Resolve variables in a template - * - * @return The string w/ variables resolved - */ - public abstract String replaceTokens(String template); - -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContextImpl.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContextImpl.java deleted file mode 100644 index 3d9f3868508..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContextImpl.java +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; -import org.apache.solr.core.SolrCore; -import org.apache.solr.handler.dataimport.config.Script; - -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - *

- * An implementation for the Context - *

- * This API is experimental and subject to change - * - * @since solr 1.3 - */ -public class ContextImpl extends Context { - protected EntityProcessorWrapper epw; - - private ContextImpl parent; - - private VariableResolver resolver; - - @SuppressWarnings({"rawtypes"}) - private DataSource ds; - - private String currProcess; - - private Map requestParams; - - private DataImporter dataImporter; - - private Map entitySession, globalSession; - - private Exception lastException = null; - - DocBuilder.DocWrapper doc; - - DocBuilder docBuilder; - - - - public ContextImpl(EntityProcessorWrapper epw, VariableResolver resolver, - @SuppressWarnings({"rawtypes"})DataSource ds, String currProcess, - Map global, ContextImpl parentContext, DocBuilder docBuilder) { - this.epw = epw; - this.docBuilder = docBuilder; - this.resolver = resolver; - this.ds = ds; - this.currProcess = currProcess; - if (docBuilder != null) { - this.requestParams = docBuilder.getReqParams().getRawParams(); - dataImporter = docBuilder.dataImporter; - } - globalSession = global; - parent = parentContext; - } - - @Override - public String getEntityAttribute(String name) { - return epw==null || epw.getEntity() == null ? null : epw.getEntity().getAllAttributes().get(name); - } - - @Override - public String getResolvedEntityAttribute(String name) { - return epw==null || epw.getEntity() == null ? null : resolver.replaceTokens(epw.getEntity().getAllAttributes().get(name)); - } - - @Override - public List> getAllEntityFields() { - return epw==null || epw.getEntity() == null ? Collections.emptyList() : epw.getEntity().getAllFieldsList(); - } - - @Override - public VariableResolver getVariableResolver() { - return resolver; - } - - @Override - @SuppressWarnings({"rawtypes"}) - public DataSource getDataSource() { - if (ds != null) return ds; - if(epw==null) { return null; } - if (epw!=null && epw.getDatasource() == null) { - epw.setDatasource(dataImporter.getDataSourceInstance(epw.getEntity(), epw.getEntity().getDataSourceName(), this)); - } - if (epw!=null && epw.getDatasource() != null && docBuilder != null && docBuilder.verboseDebug && - Context.FULL_DUMP.equals(currentProcess())) { - //debug is not yet implemented properly for deltas - epw.setDatasource(docBuilder.getDebugLogger().wrapDs(epw.getDatasource())); - } - return epw.getDatasource(); - } - - @Override - @SuppressWarnings({"rawtypes"}) - public DataSource getDataSource(String name) { - return dataImporter.getDataSourceInstance(epw==null ? null : epw.getEntity(), name, this); - } - - @Override - public boolean isRootEntity() { - return epw==null ? false : epw.getEntity().isDocRoot(); - } - - @Override - public String currentProcess() { - return currProcess; - } - - @Override - public Map getRequestParameters() { - return requestParams; - } - - @Override - public EntityProcessor getEntityProcessor() { - return epw; - } - - @Override - public void setSessionAttribute(String name, Object val, String scope) { - if(name == null) { - return; - } - if (Context.SCOPE_ENTITY.equals(scope)) { - if (entitySession == null) { - entitySession = new HashMap<>(); - } - entitySession.put(name, val); - } else if (Context.SCOPE_GLOBAL.equals(scope)) { - if (globalSession != null) { - globalSession.put(name, val); - } - } else if (Context.SCOPE_DOC.equals(scope)) { - DocBuilder.DocWrapper doc = getDocument(); - if (doc != null) { - doc.setSessionAttribute(name, val); - } - } else if (SCOPE_SOLR_CORE.equals(scope)){ - if(dataImporter != null) { - dataImporter.putToCoreScopeSession(name, val); - } - } - } - - @Override - public Object getSessionAttribute(String name, String scope) { - if (Context.SCOPE_ENTITY.equals(scope)) { - if (entitySession == null) - return null; - return entitySession.get(name); - } else if (Context.SCOPE_GLOBAL.equals(scope)) { - if (globalSession != null) { - return globalSession.get(name); - } - } else if (Context.SCOPE_DOC.equals(scope)) { - DocBuilder.DocWrapper doc = getDocument(); - return doc == null ? null: doc.getSessionAttribute(name); - } else if (SCOPE_SOLR_CORE.equals(scope)){ - return dataImporter == null ? null : dataImporter.getFromCoreScopeSession(name); - } - return null; - } - - @Override - public Context getParentContext() { - return parent; - } - - private DocBuilder.DocWrapper getDocument() { - ContextImpl c = this; - while (true) { - if (c.doc != null) - return c.doc; - if (c.parent != null) - c = c.parent; - else - return null; - } - } - - void setDoc(DocBuilder.DocWrapper docWrapper) { - this.doc = docWrapper; - } - - - @Override - public SolrCore getSolrCore() { - return dataImporter == null ? null : dataImporter.getCore(); - } - - - @Override - public Map getStats() { - return docBuilder != null ? docBuilder.importStatistics.getStatsSnapshot() : Collections.emptyMap(); - } - - @Override - public String getScript() { - if (dataImporter != null) { - Script script = dataImporter.getConfig().getScript(); - return script == null ? null : script.getText(); - } - return null; - } - - @Override - public String getScriptLanguage() { - if (dataImporter != null) { - Script script = dataImporter.getConfig().getScript(); - return script == null ? null : script.getLanguage(); - } - return null; - } - - @Override - public void deleteDoc(String id) { - if(docBuilder != null){ - docBuilder.writer.deleteDoc(id); - } - } - - @Override - public void deleteDocByQuery(String query) { - if(docBuilder != null){ - docBuilder.writer.deleteByQuery(query); - } - } - - DocBuilder getDocBuilder(){ - return docBuilder; - } - @Override - public Object resolve(String var) { - return resolver.resolve(var); - } - - @Override - public String replaceTokens(String template) { - return resolver.replaceTokens(template); - } - - public Exception getLastException() { return lastException; } - - public void setLastException(Exception lastException) {this.lastException = lastException; } -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCache.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCache.java deleted file mode 100644 index a67b3e42963..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCache.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.Iterator; -import java.util.Map; - -/** - *

- * A cache that allows a DIH entity's data to persist locally prior being joined - * to other data and/or indexed. - *

- * - * @lucene.experimental - */ -public interface DIHCache extends Iterable> { - - /** - *

- * Opens the cache using the specified properties. The {@link Context} - * includes any parameters needed by the cache impl. This must be called - * before any read/write operations are permitted. - */ - void open(Context context); - - /** - *

- * Releases resources used by this cache, if possible. The cache is flushed - * but not destroyed. - *

- */ - void close(); - - /** - *

- * Persists any pending data to the cache - *

- */ - void flush(); - - /** - *

- * Closes the cache, if open. Then removes all data, possibly removing the - * cache entirely from persistent storage. - *

- */ - public void destroy(); - - /** - *

- * Adds a document. If a document already exists with the same key, both - * documents will exist in the cache, as the cache allows duplicate keys. To - * update a key's documents, first call delete(Object key). - *

- */ - void add(Map rec); - - /** - *

- * Returns an iterator, allowing callers to iterate through the entire cache - * in key, then insertion, order. - *

- */ - @Override - Iterator> iterator(); - - /** - *

- * Returns an iterator, allowing callers to iterate through all documents that - * match the given key in insertion order. - *

- */ - Iterator> iterator(Object key); - - /** - *

- * Delete all documents associated with the given key - *

- */ - void delete(Object key); - - /** - *

- * Delete all data from the cache,leaving the empty cache intact. - *

- */ - void deleteAll(); - -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCacheSupport.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCacheSupport.java deleted file mode 100644 index 2f3d95743fd..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCacheSupport.java +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; - -import java.lang.invoke.MethodHandles; -import java.lang.reflect.Constructor; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; - -import org.apache.solr.common.SolrException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class DIHCacheSupport { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private String cacheForeignKey; - private String cacheImplName; - private Map queryVsCache = new HashMap<>(); - private Map>> queryVsCacheIterator; - private Iterator> dataSourceRowCache; - private boolean cacheDoKeyLookup; - - public DIHCacheSupport(Context context, String cacheImplName) { - this.cacheImplName = cacheImplName; - - Relation r = new Relation(context); - cacheDoKeyLookup = r.doKeyLookup; - String cacheKey = r.primaryKey; - cacheForeignKey = r.foreignKey; - - context.setSessionAttribute(DIHCacheSupport.CACHE_PRIMARY_KEY, cacheKey, - Context.SCOPE_ENTITY); - context.setSessionAttribute(DIHCacheSupport.CACHE_FOREIGN_KEY, cacheForeignKey, - Context.SCOPE_ENTITY); - context.setSessionAttribute(DIHCacheSupport.CACHE_DELETE_PRIOR_DATA, - "true", Context.SCOPE_ENTITY); - context.setSessionAttribute(DIHCacheSupport.CACHE_READ_ONLY, "false", - Context.SCOPE_ENTITY); - } - - static class Relation{ - protected final boolean doKeyLookup; - protected final String foreignKey; - protected final String primaryKey; - - public Relation(Context context) { - String where = context.getEntityAttribute("where"); - String cacheKey = context.getEntityAttribute(DIHCacheSupport.CACHE_PRIMARY_KEY); - String lookupKey = context.getEntityAttribute(DIHCacheSupport.CACHE_FOREIGN_KEY); - if (cacheKey != null && lookupKey == null) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "'cacheKey' is specified for the entity " - + context.getEntityAttribute("name") - + " but 'cacheLookup' is missing"); - - } - if (where == null && cacheKey == null) { - doKeyLookup = false; - primaryKey = null; - foreignKey = null; - } else { - if (where != null) { - String[] splits = where.split("="); - primaryKey = splits[0]; - foreignKey = splits[1].trim(); - } else { - primaryKey = cacheKey; - foreignKey = lookupKey; - } - doKeyLookup = true; - } - } - - @Override - public String toString() { - return "Relation " - + primaryKey + "="+foreignKey ; - } - - - } - - private DIHCache instantiateCache(Context context) { - DIHCache cache = null; - try { - @SuppressWarnings("unchecked") - Class cacheClass = DocBuilder.loadClass(cacheImplName, context - .getSolrCore()); - Constructor constr = cacheClass.getConstructor(); - cache = constr.newInstance(); - cache.open(context); - } catch (Exception e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Unable to load Cache implementation:" + cacheImplName, e); - } - return cache; - } - - public void initNewParent(Context context) { - dataSourceRowCache = null; - queryVsCacheIterator = new HashMap<>(); - for (Map.Entry entry : queryVsCache.entrySet()) { - queryVsCacheIterator.put(entry.getKey(), entry.getValue().iterator()); - } - } - - public void destroyAll() { - if (queryVsCache != null) { - for (DIHCache cache : queryVsCache.values()) { - cache.destroy(); - } - } - queryVsCache = null; - dataSourceRowCache = null; - cacheForeignKey = null; - } - - /** - *

- * Get all the rows from the datasource for the given query and cache them - *

- */ - public void populateCache(String query, - Iterator> rowIterator) { - Map aRow = null; - DIHCache cache = queryVsCache.get(query); - while ((aRow = getNextFromCache(query, rowIterator)) != null) { - cache.add(aRow); - } - } - - private Map getNextFromCache(String query, - Iterator> rowIterator) { - try { - if (rowIterator == null) return null; - if (rowIterator.hasNext()) return rowIterator.next(); - return null; - } catch (Exception e) { - SolrException.log(log, "getNextFromCache() failed for query '" + query - + "'", e); - wrapAndThrow(DataImportHandlerException.WARN, e); - return null; - } - } - - public Map getCacheData(Context context, String query, - Iterator> rowIterator) { - if (cacheDoKeyLookup) { - return getIdCacheData(context, query, rowIterator); - } else { - return getSimpleCacheData(context, query, rowIterator); - } - } - - /** - * If the where clause is present the cache is sql Vs Map of key Vs List of - * Rows. - * - * @param query - * the query string for which cached data is to be returned - * - * @return the cached row corresponding to the given query after all variables - * have been resolved - */ - protected Map getIdCacheData(Context context, String query, - Iterator> rowIterator) { - Object key = context.resolve(cacheForeignKey); - if (key == null) { - throw new DataImportHandlerException(DataImportHandlerException.WARN, - "The cache lookup value : " + cacheForeignKey - + " is resolved to be null in the entity :" - + context.getEntityAttribute("name")); - - } - if (dataSourceRowCache == null) { - DIHCache cache = queryVsCache.get(query); - - if (cache == null) { - cache = instantiateCache(context); - queryVsCache.put(query, cache); - populateCache(query, rowIterator); - } - dataSourceRowCache = cache.iterator(key); - } - return getFromRowCacheTransformed(); - } - - /** - * If where clause is not present the cache is a Map of query vs List of Rows. - * - * @param query - * string for which cached row is to be returned - * - * @return the cached row corresponding to the given query - */ - protected Map getSimpleCacheData(Context context, - String query, Iterator> rowIterator) { - if (dataSourceRowCache == null) { - DIHCache cache = queryVsCache.get(query); - if (cache == null) { - cache = instantiateCache(context); - queryVsCache.put(query, cache); - populateCache(query, rowIterator); - queryVsCacheIterator.put(query, cache.iterator()); - } - Iterator> cacheIter = queryVsCacheIterator.get(query); - dataSourceRowCache = cacheIter; - } - - return getFromRowCacheTransformed(); - } - - protected Map getFromRowCacheTransformed() { - if (dataSourceRowCache == null || !dataSourceRowCache.hasNext()) { - dataSourceRowCache = null; - return null; - } - Map r = dataSourceRowCache.next(); - return r; - } - - /** - *

- * Specify the class for the cache implementation - *

- */ - public static final String CACHE_IMPL = "cacheImpl"; - - /** - *

- * If the cache supports persistent data, set to "true" to delete any prior - * persisted data before running the entity. - *

- */ - - public static final String CACHE_DELETE_PRIOR_DATA = "cacheDeletePriorData"; - /** - *

- * Specify the Foreign Key from the parent entity to join on. Use if the cache - * is on a child entity. - *

- */ - public static final String CACHE_FOREIGN_KEY = "cacheLookup"; - - /** - *

- * Specify the Primary Key field from this Entity to map the input records - * with - *

- */ - public static final String CACHE_PRIMARY_KEY = "cacheKey"; - /** - *

- * If true, a pre-existing cache is re-opened for read-only access. - *

- */ - public static final String CACHE_READ_ONLY = "cacheReadOnly"; - - - - -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHLogLevels.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHLogLevels.java deleted file mode 100644 index 24732d1454f..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHLogLevels.java +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -public enum DIHLogLevels { - START_ENTITY, END_ENTITY, TRANSFORMED_ROW, ENTITY_META, PRE_TRANSFORMER_ROW, START_DOC, END_DOC, ENTITY_OUT, ROW_END, TRANSFORMER_EXCEPTION, ENTITY_EXCEPTION, DISABLE_LOGGING, ENABLE_LOGGING, NONE -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHProperties.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHProperties.java deleted file mode 100644 index f51ef0713b6..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHProperties.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.Date; -import java.util.Map; - -/** - * Implementations write out properties about the last data import - * for use by the next import. ex: to persist the last import timestamp - * so that future delta imports can know what needs to be updated. - * - * @lucene.experimental - */ -public abstract class DIHProperties { - - public abstract void init(DataImporter dataImporter, Map initParams); - - public abstract boolean isWritable(); - - public abstract void persist(Map props); - - public abstract Map readIndexerProperties(); - - public abstract String convertDateToString(Date d); - - public Date getCurrentTimestamp() { - return new Date(); - } - -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriter.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriter.java deleted file mode 100644 index bdb988d4836..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriter.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; -import java.util.Map; -import java.util.Set; - -import org.apache.solr.common.SolrInputDocument; - -/** - * @lucene.experimental - * - */ -public interface DIHWriter { - - /** - *

- * If this writer supports transactions or commit points, then commit any changes, - * optionally optimizing the data for read/write performance - *

- */ - public void commit(boolean optimize); - - /** - *

- * Release resources used by this writer. After calling close, reads & updates will throw exceptions. - *

- */ - public void close(); - - /** - *

- * If this writer supports transactions or commit points, then roll back any uncommitted changes. - *

- */ - public void rollback(); - - /** - *

- * Delete from the writer's underlying data store based the passed-in writer-specific query. (Optional Operation) - *

- */ - public void deleteByQuery(String q); - - /** - *

- * Delete everything from the writer's underlying data store - *

- */ - public void doDeleteAll(); - - /** - *

- * Delete from the writer's underlying data store based on the passed-in Primary Key - *

- */ - public void deleteDoc(Object key); - - - - /** - *

- * Add a document to this writer's underlying data store. - *

- * @return true on success, false on failure - */ - public boolean upload(SolrInputDocument doc); - - - - /** - *

- * Provide context information for this writer. init() should be called before using the writer. - *

- */ - public void init(Context context) ; - - - /** - *

- * Specify the keys to be modified by a delta update (required by writers that can store duplicate keys) - *

- */ - public void setDeltaKeys(Set> deltaKeys) ; - -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriterBase.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriterBase.java deleted file mode 100644 index 43e92c37f79..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriterBase.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -public abstract class DIHWriterBase implements DIHWriter { - protected String keyFieldName; - protected Set deltaKeys = null; - - @Override - public void setDeltaKeys(Set> passedInDeltaKeys) { - deltaKeys = new HashSet<>(); - for (Map aMap : passedInDeltaKeys) { - if (aMap.size() > 0) { - Object key = null; - if (keyFieldName != null) { - key = aMap.get(keyFieldName); - } else { - key = aMap.entrySet().iterator().next(); - } - if (key != null) { - deltaKeys.add(key); - } - } - } - } -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java deleted file mode 100644 index 278de7dfb5d..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java +++ /dev/null @@ -1,318 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.lang.invoke.MethodHandles; -import java.lang.reflect.Constructor; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; - -import org.apache.solr.common.SolrException; -import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.params.CommonParams; -import org.apache.solr.common.params.MapSolrParams; -import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; -import org.apache.solr.common.util.ContentStreamBase; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.util.StrUtils; -import org.apache.solr.core.SolrCore; -import org.apache.solr.core.SolrResourceLoader; -import org.apache.solr.handler.RequestHandlerBase; -import org.apache.solr.metrics.MetricsMap; -import org.apache.solr.metrics.SolrMetricsContext; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.response.RawResponseWriter; -import org.apache.solr.response.SolrQueryResponse; -import org.apache.solr.update.processor.UpdateRequestProcessor; -import org.apache.solr.update.processor.UpdateRequestProcessorChain; -import org.apache.solr.util.plugin.SolrCoreAware; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import static org.apache.solr.handler.dataimport.DataImporter.IMPORT_CMD; - -/** - *

- * Solr Request Handler for data import from databases and REST data sources. - *

- *

- * It is configured in solrconfig.xml - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and subject to change - * - * @deprecated since 8.6 - * @since solr 1.3 - */ -@Deprecated(since = "8.6") -public class DataImportHandler extends RequestHandlerBase implements - SolrCoreAware { - - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - private DataImporter importer; - - private boolean debugEnabled = true; - - private String myName = "dataimport"; - - private MetricsMap metrics; - - private static final String PARAM_WRITER_IMPL = "writerImpl"; - private static final String DEFAULT_WRITER_NAME = "SolrWriter"; - static final String ENABLE_DIH_DATA_CONFIG_PARAM = "enable.dih.dataConfigParam"; - - final boolean dataConfigParam_enabled = Boolean.getBoolean(ENABLE_DIH_DATA_CONFIG_PARAM); - - public DataImporter getImporter() { - return this.importer; - } - - @Override - - public void init(@SuppressWarnings({"rawtypes"})NamedList args) { - super.init(args); - Map macro = new HashMap<>(); - macro.put("expandMacros", "false"); - defaults = SolrParams.wrapDefaults(defaults, new MapSolrParams(macro)); - log.warn("Data Import Handler is deprecated as of Solr 8.6. See SOLR-14066 for more details."); - } - - @Override - @SuppressWarnings("unchecked") - public void inform(SolrCore core) { - try { - String name = getPluginInfo().name; - if (name.startsWith("/")) { - myName = name.substring(1); - } - // some users may have '/' in the handler name. replace with '_' - myName = myName.replaceAll("/", "_"); - debugEnabled = StrUtils.parseBool((String)initArgs.get(ENABLE_DEBUG), true); - importer = new DataImporter(core, myName); - } catch (Exception e) { - log.error( DataImporter.MSG.LOAD_EXP, e); - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, DataImporter.MSG.LOAD_EXP, e); - } - } - - @Override - @SuppressWarnings("unchecked") - public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) - throws Exception { - rsp.setHttpCaching(false); - - //TODO: figure out why just the first one is OK... - ContentStream contentStream = null; - Iterable streams = req.getContentStreams(); - if(streams != null){ - for (ContentStream stream : streams) { - contentStream = stream; - break; - } - } - SolrParams params = req.getParams(); - @SuppressWarnings({"rawtypes"}) - NamedList defaultParams = (NamedList) initArgs.get("defaults"); - RequestInfo requestParams = new RequestInfo(req, getParamsMap(params), contentStream); - String command = requestParams.getCommand(); - - if (DataImporter.SHOW_CONF_CMD.equals(command)) { - String dataConfigFile = params.get("config"); - String dataConfig = params.get("dataConfig"); // needn't check dataConfigParam_enabled; we don't execute it - if(dataConfigFile != null) { - dataConfig = SolrWriter.getResourceAsString(req.getCore().getResourceLoader().openResource(dataConfigFile)); - } - if(dataConfig==null) { - rsp.add("status", DataImporter.MSG.NO_CONFIG_FOUND); - } else { - // Modify incoming request params to add wt=raw - ModifiableSolrParams rawParams = new ModifiableSolrParams(req.getParams()); - rawParams.set(CommonParams.WT, "raw"); - req.setParams(rawParams); - ContentStreamBase content = new ContentStreamBase.StringStream(dataConfig); - rsp.add(RawResponseWriter.CONTENT, content); - } - return; - } - - if (params.get("dataConfig") != null && dataConfigParam_enabled == false) { - throw new SolrException(SolrException.ErrorCode.FORBIDDEN, - "Use of the dataConfig param (DIH debug mode) requires the system property " + - ENABLE_DIH_DATA_CONFIG_PARAM + " because it's a security risk."); - } - - rsp.add("initArgs", initArgs); - String message = ""; - - if (command != null) { - rsp.add("command", command); - } - // If importer is still null - if (importer == null) { - rsp.add("status", DataImporter.MSG.NO_INIT); - return; - } - - if (command != null && DataImporter.ABORT_CMD.equals(command)) { - importer.runCmd(requestParams, null); - } else if (importer.isBusy()) { - message = DataImporter.MSG.CMD_RUNNING; - } else if (command != null) { - if (DataImporter.FULL_IMPORT_CMD.equals(command) - || DataImporter.DELTA_IMPORT_CMD.equals(command) || - IMPORT_CMD.equals(command)) { - importer.maybeReloadConfiguration(requestParams, defaultParams); - UpdateRequestProcessorChain processorChain = - req.getCore().getUpdateProcessorChain(params); - UpdateRequestProcessor processor = processorChain.createProcessor(req, rsp); - SolrResourceLoader loader = req.getCore().getResourceLoader(); - DIHWriter sw = getSolrWriter(processor, loader, requestParams, req); - - if (requestParams.isDebug()) { - if (debugEnabled) { - // Synchronous request for the debug mode - importer.runCmd(requestParams, sw); - rsp.add("mode", "debug"); - rsp.add("documents", requestParams.getDebugInfo().debugDocuments); - if (requestParams.getDebugInfo().debugVerboseOutput != null) { - rsp.add("verbose-output", requestParams.getDebugInfo().debugVerboseOutput); - } - } else { - message = DataImporter.MSG.DEBUG_NOT_ENABLED; - } - } else { - // Asynchronous request for normal mode - if(requestParams.getContentStream() == null && !requestParams.isSyncMode()){ - importer.runAsync(requestParams, sw); - } else { - importer.runCmd(requestParams, sw); - } - } - } else if (DataImporter.RELOAD_CONF_CMD.equals(command)) { - if(importer.maybeReloadConfiguration(requestParams, defaultParams)) { - message = DataImporter.MSG.CONFIG_RELOADED; - } else { - message = DataImporter.MSG.CONFIG_NOT_RELOADED; - } - } - } - rsp.add("status", importer.isBusy() ? "busy" : "idle"); - rsp.add("importResponse", message); - rsp.add("statusMessages", importer.getStatusMessages()); - } - - /** The value is converted to a String or {@code List} if multi-valued. */ - private Map getParamsMap(SolrParams params) { - Map result = new HashMap<>(); - for (Map.Entry pair : params){ - String s = pair.getKey(); - String[] val = pair.getValue(); - if (val == null || val.length < 1) - continue; - if (val.length == 1) - result.put(s, val[0]); - else - result.put(s, Arrays.asList(val)); - } - return result; - } - - private DIHWriter getSolrWriter(final UpdateRequestProcessor processor, - final SolrResourceLoader loader, final RequestInfo requestParams, - SolrQueryRequest req) { - SolrParams reqParams = req.getParams(); - String writerClassStr = null; - if (reqParams != null && reqParams.get(PARAM_WRITER_IMPL) != null) { - writerClassStr = reqParams.get(PARAM_WRITER_IMPL); - } - DIHWriter writer; - if (writerClassStr != null - && !writerClassStr.equals(DEFAULT_WRITER_NAME) - && !writerClassStr.equals(DocBuilder.class.getPackage().getName() + "." - + DEFAULT_WRITER_NAME)) { - try { - @SuppressWarnings("unchecked") - Class writerClass = DocBuilder.loadClass(writerClassStr, req.getCore()); - @SuppressWarnings({"rawtypes"}) - Constructor cnstr = writerClass.getConstructor(new Class[] { - UpdateRequestProcessor.class, SolrQueryRequest.class}); - return cnstr.newInstance((Object) processor, (Object) req); - } catch (Exception e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Unable to load Writer implementation:" + writerClassStr, e); - } - } else { - return new SolrWriter(processor, req) { - @Override - public boolean upload(SolrInputDocument document) { - try { - return super.upload(document); - } catch (RuntimeException e) { - log.error("Exception while adding: {}", document, e); - return false; - } - } - }; - } - } - - @Override - public void initializeMetrics(SolrMetricsContext parentContext, String scope) { - super.initializeMetrics(parentContext, scope); - metrics = new MetricsMap((detailed, map) -> { - if (importer != null) { - DocBuilder.Statistics cumulative = importer.cumulativeStatistics; - - map.put("Status", importer.getStatus().toString()); - - if (importer.docBuilder != null) { - DocBuilder.Statistics running = importer.docBuilder.importStatistics; - map.put("Documents Processed", running.docCount); - map.put("Requests made to DataSource", running.queryCount); - map.put("Rows Fetched", running.rowsCount); - map.put("Documents Deleted", running.deletedDocCount); - map.put("Documents Skipped", running.skipDocCount); - } - - map.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, cumulative.docCount); - map.put(DataImporter.MSG.TOTAL_QUERIES_EXECUTED, cumulative.queryCount); - map.put(DataImporter.MSG.TOTAL_ROWS_EXECUTED, cumulative.rowsCount); - map.put(DataImporter.MSG.TOTAL_DOCS_DELETED, cumulative.deletedDocCount); - map.put(DataImporter.MSG.TOTAL_DOCS_SKIPPED, cumulative.skipDocCount); - } - }); - solrMetricsContext.gauge(metrics, true, "importer", getCategory().toString(), scope); - } - - // //////////////////////SolrInfoMBeans methods ////////////////////// - - @Override - public String getDescription() { - return DataImporter.MSG.JMX_DESC; - } - - public static final String ENABLE_DEBUG = "enableDebug"; -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java deleted file mode 100644 index e69b3fd9063..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -/** - *

Exception class for all DataImportHandler exceptions

- *

- * This API is experimental and subject to change - * - * @since solr 1.3 - */ -public class DataImportHandlerException extends RuntimeException { - private int errCode; - - public boolean debugged = false; - - public static final int SEVERE = 500, WARN = 400, SKIP = 300, SKIP_ROW =301; - - public DataImportHandlerException(int err) { - super(); - errCode = err; - } - - public DataImportHandlerException(int err, String message) { - super(message + (SolrWriter.getDocCount() == null ? "" : MSG + SolrWriter.getDocCount())); - errCode = err; - } - - public DataImportHandlerException(int err, String message, Throwable cause) { - super(message + (SolrWriter.getDocCount() == null ? "" : MSG + SolrWriter.getDocCount()), cause); - errCode = err; - } - - public DataImportHandlerException(int err, Throwable cause) { - super(cause); - errCode = err; - } - - public int getErrCode() { - return errCode; - } - - public static DataImportHandlerException wrapAndThrow(int err, Exception e) { - if (e instanceof DataImportHandlerException) { - throw (DataImportHandlerException) e; - } else { - throw new DataImportHandlerException(err, e); - } - } - - public static DataImportHandlerException wrapAndThrow(int err, Exception e, String msg) { - if (e instanceof DataImportHandlerException) { - throw (DataImportHandlerException) e; - } else { - throw new DataImportHandlerException(err, msg, e); - } - } - - - public static final String MSG = " Processing Document # "; -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java deleted file mode 100644 index c5b2f70bf22..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java +++ /dev/null @@ -1,628 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.solr.common.EmptyEntityResolver; -import org.apache.solr.common.SolrException; -import org.apache.solr.core.SolrCore; -import org.apache.solr.schema.IndexSchema; -import org.apache.solr.util.SystemIdResolver; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.util.XMLErrorLogger; -import org.apache.solr.handler.dataimport.config.ConfigNameConstants; -import org.apache.solr.handler.dataimport.config.ConfigParseUtil; -import org.apache.solr.handler.dataimport.config.DIHConfiguration; -import org.apache.solr.handler.dataimport.config.Entity; -import org.apache.solr.handler.dataimport.config.PropertyWriter; -import org.apache.solr.handler.dataimport.config.Script; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import static org.apache.solr.handler.dataimport.DocBuilder.loadClass; -import static org.apache.solr.handler.dataimport.config.ConfigNameConstants.CLASS; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; -import org.xml.sax.InputSource; -import org.apache.commons.io.IOUtils; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; - -import java.io.IOException; -import java.io.StringReader; -import java.lang.invoke.MethodHandles; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.locks.ReentrantLock; - -/** - *

Stores all configuration information for pulling and indexing data.

- *

- * This API is experimental and subject to change - * - * @since solr 1.3 - */ -public class DataImporter { - - public enum Status { - IDLE, RUNNING_FULL_DUMP, RUNNING_DELTA_DUMP, JOB_FAILED - } - - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private static final XMLErrorLogger XMLLOG = new XMLErrorLogger(log); - - private Status status = Status.IDLE; - private DIHConfiguration config; - private Date indexStartTime; - private Properties store = new Properties(); - private Map> requestLevelDataSourceProps = new HashMap<>(); - private IndexSchema schema; - public DocBuilder docBuilder; - public DocBuilder.Statistics cumulativeStatistics = new DocBuilder.Statistics(); - private SolrCore core; - private Map coreScopeSession = new ConcurrentHashMap<>(); - private ReentrantLock importLock = new ReentrantLock(); - private boolean isDeltaImportSupported = false; - private final String handlerName; - - /** - * Only for testing purposes - */ - DataImporter() { - this.handlerName = "dataimport" ; - } - - DataImporter(SolrCore core, String handlerName) { - this.handlerName = handlerName; - this.core = core; - this.schema = core.getLatestSchema(); - } - - - - - boolean maybeReloadConfiguration(RequestInfo params, - NamedList defaultParams) throws IOException { - if (importLock.tryLock()) { - boolean success = false; - try { - if (null != params.getRequest()) { - if (schema != params.getRequest().getSchema()) { - schema = params.getRequest().getSchema(); - } - } - String dataConfigText = params.getDataConfig(); - String dataconfigFile = params.getConfigFile(); - InputSource is = null; - if(dataConfigText!=null && dataConfigText.length()>0) { - is = new InputSource(new StringReader(dataConfigText)); - } else if(dataconfigFile!=null) { - is = new InputSource(core.getResourceLoader().openResource(dataconfigFile)); - is.setSystemId(SystemIdResolver.createSystemIdFromResourceName(dataconfigFile)); - log.info("Loading DIH Configuration: {}", dataconfigFile); - } - if(is!=null) { - config = loadDataConfig(is); - success = true; - } - - Map> dsProps = new HashMap<>(); - if(defaultParams!=null) { - int position = 0; - while (position < defaultParams.size()) { - if (defaultParams.getName(position) == null) { - break; - } - String name = defaultParams.getName(position); - if (name.equals("datasource")) { - success = true; - @SuppressWarnings({"rawtypes"}) - NamedList dsConfig = (NamedList) defaultParams.getVal(position); - log.info("Getting configuration for Global Datasource..."); - Map props = new HashMap<>(); - for (int i = 0; i < dsConfig.size(); i++) { - props.put(dsConfig.getName(i), dsConfig.getVal(i).toString()); - } - log.info("Adding properties to datasource: {}", props); - dsProps.put((String) dsConfig.get("name"), props); - } - position++; - } - } - requestLevelDataSourceProps = Collections.unmodifiableMap(dsProps); - } catch(IOException ioe) { - throw ioe; - } finally { - importLock.unlock(); - } - return success; - } else { - return false; - } - } - - - - public String getHandlerName() { - return handlerName; - } - - public IndexSchema getSchema() { - return schema; - } - - /** - * Used by tests - */ - void loadAndInit(String configStr) { - config = loadDataConfig(new InputSource(new StringReader(configStr))); - } - - void loadAndInit(InputSource configFile) { - config = loadDataConfig(configFile); - } - - public DIHConfiguration loadDataConfig(InputSource configFile) { - - DIHConfiguration dihcfg = null; - try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - dbf.setValidating(false); - - // only enable xinclude, if XML is coming from safe source (local file) - // and a a SolrCore and SystemId is present (makes no sense otherwise): - if (core != null && configFile.getSystemId() != null) { - try { - dbf.setXIncludeAware(true); - dbf.setNamespaceAware(true); - } catch( UnsupportedOperationException e ) { - log.warn( "XML parser doesn't support XInclude option" ); - } - } - - DocumentBuilder builder = dbf.newDocumentBuilder(); - // only enable xinclude / external entities, if XML is coming from - // safe source (local file) and a a SolrCore and SystemId is present: - if (core != null && configFile.getSystemId() != null) { - builder.setEntityResolver(new SystemIdResolver(core.getResourceLoader())); - } else { - // Don't allow external entities without having a system ID: - builder.setEntityResolver(EmptyEntityResolver.SAX_INSTANCE); - } - builder.setErrorHandler(XMLLOG); - Document document; - try { - document = builder.parse(configFile); - } finally { - // some XML parsers are broken and don't close the byte stream (but they should according to spec) - IOUtils.closeQuietly(configFile.getByteStream()); - } - - dihcfg = readFromXml(document); - log.info("Data Configuration loaded successfully"); - } catch (Exception e) { - throw new DataImportHandlerException(SEVERE, - "Data Config problem: " + e.getMessage(), e); - } - for (Entity e : dihcfg.getEntities()) { - if (e.getAllAttributes().containsKey(SqlEntityProcessor.DELTA_QUERY)) { - isDeltaImportSupported = true; - break; - } - } - return dihcfg; - } - - public DIHConfiguration readFromXml(Document xmlDocument) { - DIHConfiguration config; - List> functions = new ArrayList<>(); - Script script = null; - Map> dataSources = new HashMap<>(); - - NodeList dataConfigTags = xmlDocument.getElementsByTagName("dataConfig"); - if(dataConfigTags == null || dataConfigTags.getLength() == 0) { - throw new DataImportHandlerException(SEVERE, "the root node '' is missing"); - } - Element e = (Element) dataConfigTags.item(0); - List documentTags = ConfigParseUtil.getChildNodes(e, "document"); - if (documentTags.isEmpty()) { - throw new DataImportHandlerException(SEVERE, "DataImportHandler " + - "configuration file must have one node."); - } - - List scriptTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.SCRIPT); - if (!scriptTags.isEmpty()) { - script = new Script(scriptTags.get(0)); - } - - // Add the provided evaluators - List functionTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.FUNCTION); - if (!functionTags.isEmpty()) { - for (Element element : functionTags) { - String func = ConfigParseUtil.getStringAttribute(element, NAME, null); - String clz = ConfigParseUtil.getStringAttribute(element, ConfigNameConstants.CLASS, null); - if (func == null || clz == null){ - throw new DataImportHandlerException( - SEVERE, - " must have a 'name' and 'class' attributes"); - } else { - functions.add(ConfigParseUtil.getAllAttributes(element)); - } - } - } - List dataSourceTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.DATA_SRC); - if (!dataSourceTags.isEmpty()) { - for (Element element : dataSourceTags) { - Map p = new HashMap<>(); - HashMap attrs = ConfigParseUtil.getAllAttributes(element); - for (Map.Entry entry : attrs.entrySet()) { - p.put(entry.getKey(), entry.getValue()); - } - dataSources.put(p.get("name"), p); - } - } - if(dataSources.get(null) == null){ - for (Map properties : dataSources.values()) { - dataSources.put(null,properties); - break; - } - } - PropertyWriter pw = null; - List propertyWriterTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.PROPERTY_WRITER); - if (propertyWriterTags.isEmpty()) { - boolean zookeeper = false; - if (this.core != null - && this.core.getCoreContainer().isZooKeeperAware()) { - zookeeper = true; - } - pw = new PropertyWriter(zookeeper ? "ZKPropertiesWriter" - : "SimplePropertiesWriter", Collections. emptyMap()); - } else if (propertyWriterTags.size() > 1) { - throw new DataImportHandlerException(SEVERE, "Only one " - + ConfigNameConstants.PROPERTY_WRITER + " can be configured."); - } else { - Element pwElement = propertyWriterTags.get(0); - String type = null; - Map params = new HashMap<>(); - for (Map.Entry entry : ConfigParseUtil.getAllAttributes( - pwElement).entrySet()) { - if (TYPE.equals(entry.getKey())) { - type = entry.getValue(); - } else { - params.put(entry.getKey(), entry.getValue()); - } - } - if (type == null) { - throw new DataImportHandlerException(SEVERE, "The " - + ConfigNameConstants.PROPERTY_WRITER + " element must specify " - + TYPE); - } - pw = new PropertyWriter(type, params); - } - return new DIHConfiguration(documentTags.get(0), this, functions, script, dataSources, pw); - } - - @SuppressWarnings("unchecked") - private DIHProperties createPropertyWriter() { - DIHProperties propWriter = null; - PropertyWriter configPw = config.getPropertyWriter(); - try { - Class writerClass = DocBuilder.loadClass(configPw.getType(), this.core); - propWriter = writerClass.getConstructor().newInstance(); - propWriter.init(this, configPw.getParameters()); - } catch (Exception e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Unable to PropertyWriter implementation:" + configPw.getType(), e); - } - return propWriter; - } - - public DIHConfiguration getConfig() { - return config; - } - - Date getIndexStartTime() { - return indexStartTime; - } - - void setIndexStartTime(Date indextStartTime) { - this.indexStartTime = indextStartTime; - } - - void store(Object key, Object value) { - store.put(key, value); - } - - Object retrieve(Object key) { - return store.get(key); - } - - @SuppressWarnings({"unchecked", "rawtypes"}) - public DataSource getDataSourceInstance(Entity key, String name, Context ctx) { - Map p = requestLevelDataSourceProps.get(name); - if (p == null) - p = config.getDataSources().get(name); - if (p == null) - p = requestLevelDataSourceProps.get(null);// for default data source - if (p == null) - p = config.getDataSources().get(null); - if (p == null) - throw new DataImportHandlerException(SEVERE, - "No dataSource :" + name + " available for entity :" + key.getName()); - String type = p.get(TYPE); - @SuppressWarnings({"rawtypes"}) - DataSource dataSrc = null; - if (type == null) { - dataSrc = new JdbcDataSource(); - } else { - try { - dataSrc = (DataSource) DocBuilder.loadClass(type, getCore()).getConstructor().newInstance(); - } catch (Exception e) { - wrapAndThrow(SEVERE, e, "Invalid type for data source: " + type); - } - } - try { - Properties copyProps = new Properties(); - copyProps.putAll(p); - Map map = ctx.getRequestParameters(); - if (map.containsKey("rows")) { - int rows = Integer.parseInt((String) map.get("rows")); - if (map.containsKey("start")) { - rows += Integer.parseInt((String) map.get("start")); - } - copyProps.setProperty("maxRows", String.valueOf(rows)); - } - dataSrc.init(ctx, copyProps); - } catch (Exception e) { - wrapAndThrow(SEVERE, e, "Failed to initialize DataSource: " + key.getDataSourceName()); - } - return dataSrc; - } - - public Status getStatus() { - return status; - } - - public void setStatus(Status status) { - this.status = status; - } - - public boolean isBusy() { - return importLock.isLocked(); - } - - public void doFullImport(DIHWriter writer, RequestInfo requestParams) { - log.info("Starting Full Import"); - setStatus(Status.RUNNING_FULL_DUMP); - try { - DIHProperties dihPropWriter = createPropertyWriter(); - setIndexStartTime(dihPropWriter.getCurrentTimestamp()); - docBuilder = new DocBuilder(this, writer, dihPropWriter, requestParams); - checkWritablePersistFile(writer, dihPropWriter); - docBuilder.execute(); - if (!requestParams.isDebug()) - cumulativeStatistics.add(docBuilder.importStatistics); - } catch (Exception e) { - SolrException.log(log, "Full Import failed", e); - docBuilder.handleError("Full Import failed", e); - } finally { - setStatus(Status.IDLE); - DocBuilder.INSTANCE.set(null); - } - - } - - private void checkWritablePersistFile(DIHWriter writer, DIHProperties dihPropWriter) { - if (isDeltaImportSupported && !dihPropWriter.isWritable()) { - throw new DataImportHandlerException(SEVERE, - "Properties is not writable. Delta imports are supported by data config but will not work."); - } - } - - public void doDeltaImport(DIHWriter writer, RequestInfo requestParams) { - log.info("Starting Delta Import"); - setStatus(Status.RUNNING_DELTA_DUMP); - try { - DIHProperties dihPropWriter = createPropertyWriter(); - setIndexStartTime(dihPropWriter.getCurrentTimestamp()); - docBuilder = new DocBuilder(this, writer, dihPropWriter, requestParams); - checkWritablePersistFile(writer, dihPropWriter); - docBuilder.execute(); - if (!requestParams.isDebug()) - cumulativeStatistics.add(docBuilder.importStatistics); - } catch (Exception e) { - log.error("Delta Import Failed", e); - docBuilder.handleError("Delta Import Failed", e); - } finally { - setStatus(Status.IDLE); - DocBuilder.INSTANCE.set(null); - } - - } - - public void runAsync(final RequestInfo reqParams, final DIHWriter sw) { - new Thread(() -> runCmd(reqParams, sw)).start(); - } - - void runCmd(RequestInfo reqParams, DIHWriter sw) { - String command = reqParams.getCommand(); - if (command.equals(ABORT_CMD)) { - if (docBuilder != null) { - docBuilder.abort(); - } - return; - } - if (!importLock.tryLock()){ - log.warn("Import command failed . another import is running"); - return; - } - try { - if (FULL_IMPORT_CMD.equals(command) || IMPORT_CMD.equals(command)) { - doFullImport(sw, reqParams); - } else if (command.equals(DELTA_IMPORT_CMD)) { - doDeltaImport(sw, reqParams); - } - } finally { - importLock.unlock(); - } - } - - @SuppressWarnings("unchecked") - Map getStatusMessages() { - //this map object is a Collections.synchronizedMap(new LinkedHashMap()). if we - // synchronize on the object it must be safe to iterate through the map - @SuppressWarnings({"rawtypes"}) - Map statusMessages = (Map) retrieve(STATUS_MSGS); - Map result = new LinkedHashMap<>(); - if (statusMessages != null) { - synchronized (statusMessages) { - for (Object o : statusMessages.entrySet()) { - @SuppressWarnings({"rawtypes"}) - Map.Entry e = (Map.Entry) o; - //the toString is taken because some of the Objects create the data lazily when toString() is called - result.put((String) e.getKey(), e.getValue().toString()); - } - } - } - return result; - - } - - public DocBuilder getDocBuilder() { - return docBuilder; - } - - public DocBuilder getDocBuilder(DIHWriter writer, RequestInfo requestParams) { - DIHProperties dihPropWriter = createPropertyWriter(); - return new DocBuilder(this, writer, dihPropWriter, requestParams); - } - - Map getEvaluators() { - return getEvaluators(config.getFunctions()); - } - - /** - * used by tests. - */ - @SuppressWarnings({"unchecked"}) - Map getEvaluators(List> fn) { - Map evaluators = new HashMap<>(); - evaluators.put(Evaluator.DATE_FORMAT_EVALUATOR, new DateFormatEvaluator()); - evaluators.put(Evaluator.SQL_ESCAPE_EVALUATOR, new SqlEscapingEvaluator()); - evaluators.put(Evaluator.URL_ENCODE_EVALUATOR, new UrlEvaluator()); - evaluators.put(Evaluator.ESCAPE_SOLR_QUERY_CHARS, new SolrQueryEscapingEvaluator()); - SolrCore core = docBuilder == null ? null : docBuilder.dataImporter.getCore(); - for (Map map : fn) { - try { - evaluators.put(map.get(NAME), (Evaluator) loadClass(map.get(CLASS), core).getConstructor().newInstance()); - } catch (Exception e) { - wrapAndThrow(SEVERE, e, "Unable to instantiate evaluator: " + map.get(CLASS)); - } - } - return evaluators; - } - - static final ThreadLocal QUERY_COUNT = new ThreadLocal() { - @Override - protected AtomicLong initialValue() { - return new AtomicLong(); - } - }; - - - - static final class MSG { - public static final String NO_CONFIG_FOUND = "Configuration not found"; - - public static final String NO_INIT = "DataImportHandler started. Not Initialized. No commands can be run"; - - public static final String INVALID_CONFIG = "FATAL: Could not create importer. DataImporter config invalid"; - - public static final String LOAD_EXP = "Exception while loading DataImporter"; - - public static final String JMX_DESC = "Manage data import from databases to Solr"; - - public static final String CMD_RUNNING = "A command is still running..."; - - public static final String DEBUG_NOT_ENABLED = "Debug not enabled. Add a tag true in solrconfig.xml"; - - public static final String CONFIG_RELOADED = "Configuration Re-loaded sucessfully"; - - public static final String CONFIG_NOT_RELOADED = "Configuration NOT Re-loaded...Data Importer is busy."; - - public static final String TOTAL_DOC_PROCESSED = "Total Documents Processed"; - - public static final String TOTAL_FAILED_DOCS = "Total Documents Failed"; - - public static final String TOTAL_QUERIES_EXECUTED = "Total Requests made to DataSource"; - - public static final String TOTAL_ROWS_EXECUTED = "Total Rows Fetched"; - - public static final String TOTAL_DOCS_DELETED = "Total Documents Deleted"; - - public static final String TOTAL_DOCS_SKIPPED = "Total Documents Skipped"; - } - - public SolrCore getCore() { - return core; - } - - void putToCoreScopeSession(String key, Object val) { - coreScopeSession.put(key, val); - } - Object getFromCoreScopeSession(String key) { - return coreScopeSession.get(key); - } - - public static final String COLUMN = "column"; - - public static final String TYPE = "type"; - - public static final String DATA_SRC = "dataSource"; - - public static final String MULTI_VALUED = "multiValued"; - - public static final String NAME = "name"; - - public static final String STATUS_MSGS = "status-messages"; - - public static final String FULL_IMPORT_CMD = "full-import"; - - public static final String IMPORT_CMD = "import"; - - public static final String DELTA_IMPORT_CMD = "delta-import"; - - public static final String ABORT_CMD = "abort"; - - public static final String DEBUG_MODE = "debug"; - - public static final String RELOAD_CONF_CMD = "reload-config"; - - public static final String SHOW_CONF_CMD = "show-config"; - -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataSource.java deleted file mode 100644 index aeded279cbe..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataSource.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.io.Closeable; -import java.util.Properties; - -/** - *

- * Provides data from a source with a given query. - *

- *

- * Implementation of this abstract class must provide a default no-arg constructor - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @since solr 1.3 - */ -public abstract class DataSource implements Closeable { - - /** - * Initializes the DataSource with the Context and - * initialization properties. - *

- * This is invoked by the DataImporter after creating an - * instance of this class. - */ - public abstract void init(Context context, Properties initProps); - - /** - * Get records for the given query.The return type depends on the - * implementation . - * - * @param query The query string. It can be a SQL for JdbcDataSource or a URL - * for HttpDataSource or a file location for FileDataSource or a custom - * format for your own custom DataSource. - * @return Depends on the implementation. For instance JdbcDataSource returns - * an Iterator<Map <String,Object>> - */ - public abstract T getData(String query); - - /** - * Cleans up resources of this DataSource after use. - */ - public abstract void close(); -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java deleted file mode 100644 index f4df82080aa..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; -import java.util.IllformedLocaleException; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Set; -import java.util.TimeZone; - -import org.apache.solr.common.util.SuppressForbidden; -import org.apache.solr.handler.dataimport.config.EntityField; -import org.apache.solr.util.DateMathParser; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; - -/** - *

Formats values using a given date format.

- *

Pass three parameters: - *

    - *
  • An {@link EntityField} or a date expression to be parsed with - * the {@link DateMathParser} class If the value is in a String, - * then it is assumed to be a datemath expression, otherwise it - * resolved using a {@link VariableResolver} instance
  • - *
  • A date format see {@link SimpleDateFormat} for the syntax.
  • - *
  • The {@link Locale} to parse. - * (optional. Defaults to the Root Locale)
  • - *
- */ -public class DateFormatEvaluator extends Evaluator { - - public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; - protected Map availableLocales = new HashMap<>(); - protected Set availableTimezones = new HashSet<>(); - - @SuppressForbidden(reason = "Usage of outdated locale parsing with Locale#toString() because of backwards compatibility") - public DateFormatEvaluator() { - for (Locale locale : Locale.getAvailableLocales()) { - availableLocales.put(locale.toString(), locale); - } - for (String tz : TimeZone.getAvailableIDs()) { - availableTimezones.add(tz); - } - } - - private SimpleDateFormat getDateFormat(String pattern, TimeZone timezone, Locale locale) { - final SimpleDateFormat sdf = new SimpleDateFormat(pattern, locale); - sdf.setTimeZone(timezone); - return sdf; - } - - @Override - public String evaluate(String expression, Context context) { - List l = parseParams(expression, context.getVariableResolver()); - if (l.size() < 2 || l.size() > 4) { - throw new DataImportHandlerException(SEVERE, "'formatDate()' must have two, three or four parameters "); - } - Object o = l.get(0); - Object format = l.get(1); - if (format instanceof VariableWrapper) { - VariableWrapper wrapper = (VariableWrapper) format; - o = wrapper.resolve(); - format = o.toString(); - } - Locale locale = Locale.ENGLISH; // we default to ENGLISH for dates for full Java 9 compatibility - if(l.size()>2) { - Object localeObj = l.get(2); - String localeStr = null; - if (localeObj instanceof VariableWrapper) { - localeStr = ((VariableWrapper) localeObj).resolve().toString(); - } else { - localeStr = localeObj.toString(); - } - locale = availableLocales.get(localeStr); - if (locale == null) try { - locale = new Locale.Builder().setLanguageTag(localeStr).build(); - } catch (IllformedLocaleException ex) { - throw new DataImportHandlerException(SEVERE, "Malformed / non-existent locale: " + localeStr, ex); - } - } - TimeZone tz = TimeZone.getDefault(); // DWS TODO: is this the right default for us? Deserves explanation if so. - if(l.size()==4) { - Object tzObj = l.get(3); - String tzStr = null; - if (tzObj instanceof VariableWrapper) { - tzStr = ((VariableWrapper) tzObj).resolve().toString(); - } else { - tzStr = tzObj.toString(); - } - if(availableTimezones.contains(tzStr)) { - tz = TimeZone.getTimeZone(tzStr); - } else { - throw new DataImportHandlerException(SEVERE, "Unsupported Timezone: " + tzStr); - } - } - String dateFmt = format.toString(); - SimpleDateFormat fmt = getDateFormat(dateFmt, tz, locale); - Date date = null; - if (o instanceof VariableWrapper) { - date = evaluateWrapper((VariableWrapper) o, locale, tz); - } else { - date = evaluateString(o.toString(), locale, tz); - } - return fmt.format(date); - } - - /** - * NOTE: declared as a method to allow for extensibility - * - * @lucene.experimental this API is experimental and subject to change - * @return the result of evaluating a string - */ - protected Date evaluateWrapper(VariableWrapper variableWrapper, Locale locale, TimeZone tz) { - Date date = null; - Object variableval = resolveWrapper(variableWrapper,locale,tz); - if (variableval instanceof Date) { - date = (Date) variableval; - } else { - String s = variableval.toString(); - try { - date = getDateFormat(DEFAULT_DATE_FORMAT, tz, locale).parse(s); - } catch (ParseException exp) { - wrapAndThrow(SEVERE, exp, "Invalid expression for date"); - } - } - return date; - } - - /** - * NOTE: declared as a method to allow for extensibility - * @lucene.experimental - * @return the result of evaluating a string - */ - protected Date evaluateString(String datemathfmt, Locale locale, TimeZone tz) { - // note: DMP does not use the locale but perhaps a subclass might use it, for e.g. parsing a date in a custom - // string that doesn't necessarily have date math? - //TODO refactor DateMathParser.parseMath a bit to have a static method for this logic. - if (datemathfmt.startsWith("NOW")) { - datemathfmt = datemathfmt.substring("NOW".length()); - } - try { - DateMathParser parser = new DateMathParser(tz); - parser.setNow(new Date());// thus do *not* use SolrRequestInfo - return parser.parseMath(datemathfmt); - } catch (ParseException e) { - throw wrapAndThrow(SEVERE, e, "Invalid expression for date"); - } - } - - /** - * NOTE: declared as a method to allow for extensibility - * @lucene.experimental - * @return the result of resolving the variable wrapper - */ - protected Object resolveWrapper(VariableWrapper variableWrapper, Locale locale, TimeZone tz) { - return variableWrapper.resolve(); - } - -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java deleted file mode 100644 index 61edbe61117..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.lang.invoke.MethodHandles; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.*; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - *

- * {@link Transformer} instance which creates {@link Date} instances out of {@link String}s. - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- * This API is experimental and subject to change - * - * @since solr 1.3 - */ -public class DateFormatTransformer extends Transformer { - private Map fmtCache = new HashMap<>(); - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - @Override - @SuppressWarnings("unchecked") - public Object transformRow(Map aRow, Context context) { - - for (Map map : context.getAllEntityFields()) { - Locale locale = Locale.ENGLISH; // we default to ENGLISH for dates for full Java 9 compatibility - String customLocale = map.get(LOCALE); - if (customLocale != null) { - try { - locale = new Locale.Builder().setLanguageTag(customLocale).build(); - } catch (IllformedLocaleException e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Invalid Locale specified: " + customLocale, e); - } - } - - String fmt = map.get(DATE_TIME_FMT); - if (fmt == null) - continue; - VariableResolver resolver = context.getVariableResolver(); - fmt = resolver.replaceTokens(fmt); - String column = map.get(DataImporter.COLUMN); - String srcCol = map.get(RegexTransformer.SRC_COL_NAME); - if (srcCol == null) - srcCol = column; - try { - Object o = aRow.get(srcCol); - if (o instanceof List) { - @SuppressWarnings({"rawtypes"}) - List inputs = (List) o; - List results = new ArrayList<>(); - for (Object input : inputs) { - results.add(process(input, fmt, locale)); - } - aRow.put(column, results); - } else { - if (o != null) { - aRow.put(column, process(o, fmt, locale)); - } - } - } catch (ParseException e) { - log.warn("Could not parse a Date field ", e); - } - } - return aRow; - } - - private Date process(Object value, String format, Locale locale) throws ParseException { - if (value == null) return null; - String strVal = value.toString().trim(); - if (strVal.length() == 0) - return null; - SimpleDateFormat fmt = fmtCache.get(format); - if (fmt == null) { - fmt = new SimpleDateFormat(format, locale); - fmtCache.put(format, fmt); - } - return fmt.parse(strVal); - } - - public static final String DATE_TIME_FMT = "dateTimeFormat"; - - public static final String LOCALE = "locale"; -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugInfo.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugInfo.java deleted file mode 100644 index 623832fc9c4..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugInfo.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.AbstractList; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.util.StrUtils; - -public class DebugInfo { - - private static final class ChildRollupDocs extends AbstractList { - - private List delegate = new ArrayList<>(); - - @Override - public SolrInputDocument get(int index) { - return delegate.get(index); - } - - @Override - public int size() { - return delegate.size(); - } - - public boolean add(SolrInputDocument e) { - SolrInputDocument transformed = e.deepCopy(); - if (transformed.hasChildDocuments()) { - ChildRollupDocs childList = new ChildRollupDocs(); - childList.addAll(transformed.getChildDocuments()); - transformed.addField("_childDocuments_", childList); - transformed.getChildDocuments().clear(); - } - return delegate.add(transformed); - } - } - - public List debugDocuments = new ChildRollupDocs(); - - public NamedList debugVerboseOutput = null; - public boolean verbose; - - public DebugInfo(Map requestParams) { - verbose = StrUtils.parseBool((String) requestParams.get("verbose"), false); - debugVerboseOutput = new NamedList<>(); - } -} - diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugLogger.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugLogger.java deleted file mode 100644 index 9de42fc6f3a..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugLogger.java +++ /dev/null @@ -1,295 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; -import org.apache.solr.common.util.NamedList; - -import java.io.PrintWriter; -import java.io.StringWriter; -import java.text.MessageFormat; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Properties; -import java.util.Stack; - -/** - *

- * Implements most of the interactive development functionality - *

- *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and subject to change - * - * @since solr 1.3 - */ -class DebugLogger { - private Stack debugStack; - - @SuppressWarnings({"rawtypes"}) - NamedList output; -// private final SolrWriter writer1; - - private static final String LINE = "---------------------------------------------"; - - private MessageFormat fmt = new MessageFormat( - "----------- row #{0}-------------", Locale.ROOT); - - boolean enabled = true; - - @SuppressWarnings({"rawtypes"}) - public DebugLogger() { -// writer = solrWriter; - output = new NamedList(); - debugStack = new Stack() { - - @Override - public DebugInfo pop() { - if (size() == 1) - throw new DataImportHandlerException( - DataImportHandlerException.SEVERE, "Stack is becoming empty"); - return super.pop(); - } - }; - debugStack.push(new DebugInfo(null, DIHLogLevels.NONE, null)); - output = debugStack.peek().lst; - } - - private DebugInfo peekStack() { - return debugStack.isEmpty() ? null : debugStack.peek(); - } - - @SuppressWarnings({"unchecked"}) - public void log(DIHLogLevels event, String name, Object row) { - if (event == DIHLogLevels.DISABLE_LOGGING) { - enabled = false; - return; - } else if (event == DIHLogLevels.ENABLE_LOGGING) { - enabled = true; - return; - } - - if (!enabled && event != DIHLogLevels.START_ENTITY - && event != DIHLogLevels.END_ENTITY) { - return; - } - - if (event == DIHLogLevels.START_DOC) { - debugStack.push(new DebugInfo(null, DIHLogLevels.START_DOC, peekStack())); - } else if (DIHLogLevels.START_ENTITY == event) { - debugStack - .push(new DebugInfo(name, DIHLogLevels.START_ENTITY, peekStack())); - } else if (DIHLogLevels.ENTITY_OUT == event - || DIHLogLevels.PRE_TRANSFORMER_ROW == event) { - if (debugStack.peek().type == DIHLogLevels.START_ENTITY - || debugStack.peek().type == DIHLogLevels.START_DOC) { - debugStack.peek().lst.add(null, fmt.format(new Object[]{++debugStack - .peek().rowCount})); - addToNamedList(debugStack.peek().lst, row); - debugStack.peek().lst.add(null, LINE); - } - } else if (event == DIHLogLevels.ROW_END) { - popAllTransformers(); - } else if (DIHLogLevels.END_ENTITY == event) { - while (debugStack.pop().type != DIHLogLevels.START_ENTITY) - ; - } else if (DIHLogLevels.END_DOC == event) { - while (debugStack.pop().type != DIHLogLevels.START_DOC) - ; - } else if (event == DIHLogLevels.TRANSFORMER_EXCEPTION) { - debugStack.push(new DebugInfo(name, event, peekStack())); - debugStack.peek().lst.add("EXCEPTION", - getStacktraceString((Exception) row)); - } else if (DIHLogLevels.TRANSFORMED_ROW == event) { - debugStack.push(new DebugInfo(name, event, peekStack())); - debugStack.peek().lst.add(null, LINE); - addToNamedList(debugStack.peek().lst, row); - debugStack.peek().lst.add(null, LINE); - if (row instanceof DataImportHandlerException) { - DataImportHandlerException dataImportHandlerException = (DataImportHandlerException) row; - dataImportHandlerException.debugged = true; - } - } else if (DIHLogLevels.ENTITY_META == event) { - popAllTransformers(); - debugStack.peek().lst.add(name, row); - } else if (DIHLogLevels.ENTITY_EXCEPTION == event) { - if (row instanceof DataImportHandlerException) { - DataImportHandlerException dihe = (DataImportHandlerException) row; - if (dihe.debugged) - return; - dihe.debugged = true; - } - - popAllTransformers(); - debugStack.peek().lst.add("EXCEPTION", - getStacktraceString((Exception) row)); - } - } - - private void popAllTransformers() { - while (true) { - DIHLogLevels type = debugStack.peek().type; - if (type == DIHLogLevels.START_DOC || type == DIHLogLevels.START_ENTITY) - break; - debugStack.pop(); - } - } - - @SuppressWarnings({"unchecked"}) - private void addToNamedList(@SuppressWarnings({"rawtypes"})NamedList nl, Object row) { - if (row instanceof List) { - @SuppressWarnings({"rawtypes"}) - List list = (List) row; - @SuppressWarnings({"rawtypes"}) - NamedList l = new NamedList(); - nl.add(null, l); - for (Object o : list) { - Map map = (Map) o; - for (Map.Entry entry : map.entrySet()) - nl.add(entry.getKey(), entry.getValue()); - } - } else if (row instanceof Map) { - Map map = (Map) row; - for (Map.Entry entry : map.entrySet()) - nl.add(entry.getKey(), entry.getValue()); - } - } - - @SuppressWarnings({"rawtypes"}) - DataSource wrapDs(final DataSource ds) { - return new DataSource() { - @Override - public void init(Context context, Properties initProps) { - ds.init(context, initProps); - } - - @Override - public void close() { - ds.close(); - } - - @Override - public Object getData(String query) { - log(DIHLogLevels.ENTITY_META, "query", query); - long start = System.nanoTime(); - try { - return ds.getData(query); - } catch (DataImportHandlerException de) { - log(DIHLogLevels.ENTITY_EXCEPTION, - null, de); - throw de; - } catch (Exception e) { - log(DIHLogLevels.ENTITY_EXCEPTION, - null, e); - DataImportHandlerException de = new DataImportHandlerException( - DataImportHandlerException.SEVERE, "", e); - de.debugged = true; - throw de; - } finally { - log(DIHLogLevels.ENTITY_META, "time-taken", DocBuilder - .getTimeElapsedSince(start)); - } - } - }; - } - - Transformer wrapTransformer(final Transformer t) { - return new Transformer() { - @Override - public Object transformRow(Map row, Context context) { - log(DIHLogLevels.PRE_TRANSFORMER_ROW, null, row); - String tName = getTransformerName(t); - Object result = null; - try { - result = t.transformRow(row, context); - log(DIHLogLevels.TRANSFORMED_ROW, tName, result); - } catch (DataImportHandlerException de) { - log(DIHLogLevels.TRANSFORMER_EXCEPTION, tName, de); - de.debugged = true; - throw de; - } catch (Exception e) { - log(DIHLogLevels.TRANSFORMER_EXCEPTION, tName, e); - DataImportHandlerException de = new DataImportHandlerException(DataImportHandlerException.SEVERE, "", e); - de.debugged = true; - throw de; - } - return result; - } - }; - } - - public static String getStacktraceString(Exception e) { - StringWriter sw = new StringWriter(); - e.printStackTrace(new PrintWriter(sw)); - return sw.toString(); - } - - static String getTransformerName(Transformer t) { - @SuppressWarnings({"rawtypes"}) - Class transClass = t.getClass(); - if (t instanceof EntityProcessorWrapper.ReflectionTransformer) { - return ((EntityProcessorWrapper.ReflectionTransformer) t).trans; - } - if (t instanceof ScriptTransformer) { - ScriptTransformer scriptTransformer = (ScriptTransformer) t; - return "script:" + scriptTransformer.getFunctionName(); - } - if (transClass.getPackage().equals(DebugLogger.class.getPackage())) { - return transClass.getSimpleName(); - } else { - return transClass.getName(); - } - } - - private static class DebugInfo { - String name; - - int tCount, rowCount; - - @SuppressWarnings({"rawtypes"}) - NamedList lst; - - DIHLogLevels type; - - DebugInfo parent; - - @SuppressWarnings({"unchecked", "rawtypes"}) - public DebugInfo(String name, DIHLogLevels type, DebugInfo parent) { - this.name = name; - this.type = type; - this.parent = parent; - lst = new NamedList(); - if (parent != null) { - String displayName = null; - if (type == DIHLogLevels.START_ENTITY) { - displayName = "entity:" + name; - } else if (type == DIHLogLevels.TRANSFORMED_ROW - || type == DIHLogLevels.TRANSFORMER_EXCEPTION) { - displayName = "transformer:" + name; - } else if (type == DIHLogLevels.START_DOC) { - this.name = displayName = "document#" + SolrWriter.getDocCount(); - } - parent.lst.add(displayName, lst); - } - } - } - -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java deleted file mode 100644 index 8115695d40a..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java +++ /dev/null @@ -1,1020 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.solr.common.SolrException; -import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.util.IOUtils; -import org.apache.solr.core.SolrCore; -import org.apache.solr.handler.dataimport.config.ConfigNameConstants; -import org.apache.solr.handler.dataimport.config.DIHConfiguration; -import org.apache.solr.handler.dataimport.config.Entity; -import org.apache.solr.handler.dataimport.config.EntityField; - -import static org.apache.solr.handler.dataimport.SolrWriter.LAST_INDEX_KEY; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; - -import org.apache.solr.schema.IndexSchema; -import org.apache.solr.schema.SchemaField; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.lang.invoke.MethodHandles; -import java.text.SimpleDateFormat; -import java.util.*; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicLong; - -/** - *

{@link DocBuilder} is responsible for creating Solr documents out of the given configuration. It also maintains - * statistics information. It depends on the {@link EntityProcessor} implementations to fetch data.

- *

- * This API is experimental and subject to change - * - * @since solr 1.3 - */ -public class DocBuilder { - - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private static final AtomicBoolean WARNED_ABOUT_INDEX_TIME_BOOSTS = new AtomicBoolean(); - - private static final Date EPOCH = new Date(0); - public static final String DELETE_DOC_BY_ID = "$deleteDocById"; - public static final String DELETE_DOC_BY_QUERY = "$deleteDocByQuery"; - public static final String DOC_BOOST = "$docBoost"; - public static final String SKIP_DOC = "$skipDoc"; - public static final String SKIP_ROW = "$skipRow"; - - DataImporter dataImporter; - - private DIHConfiguration config; - - private EntityProcessorWrapper currentEntityProcessorWrapper; - - @SuppressWarnings({"unchecked", "rawtypes"}) - private Map statusMessages = Collections.synchronizedMap(new LinkedHashMap()); - - public Statistics importStatistics = new Statistics(); - - DIHWriter writer; - - boolean verboseDebug = false; - - Map session = new HashMap<>(); - - static final ThreadLocal INSTANCE = new ThreadLocal<>(); - private Map persistedProperties; - - private DIHProperties propWriter; - private DebugLogger debugLogger; - private final RequestInfo reqParams; - - public DocBuilder(DataImporter dataImporter, DIHWriter solrWriter, DIHProperties propWriter, RequestInfo reqParams) { - INSTANCE.set(this); - this.dataImporter = dataImporter; - this.reqParams = reqParams; - this.propWriter = propWriter; - DataImporter.QUERY_COUNT.set(importStatistics.queryCount); - verboseDebug = reqParams.isDebug() && reqParams.getDebugInfo().verbose; - persistedProperties = propWriter.readIndexerProperties(); - - writer = solrWriter; - ContextImpl ctx = new ContextImpl(null, null, null, null, reqParams.getRawParams(), null, this); - if (writer != null) { - writer.init(ctx); - } - } - - - DebugLogger getDebugLogger(){ - if (debugLogger == null) { - debugLogger = new DebugLogger(); - } - return debugLogger; - } - - private VariableResolver getVariableResolver() { - try { - VariableResolver resolver = null; - String epoch = propWriter.convertDateToString(EPOCH); - if(dataImporter != null && dataImporter.getCore() != null - && dataImporter.getCore().getCoreDescriptor().getSubstitutableProperties() != null){ - resolver = new VariableResolver(dataImporter.getCore().getCoreDescriptor().getSubstitutableProperties()); - } else { - resolver = new VariableResolver(); - } - resolver.setEvaluators(dataImporter.getEvaluators()); - Map indexerNamespace = new HashMap<>(); - if (persistedProperties.get(LAST_INDEX_TIME) != null) { - indexerNamespace.put(LAST_INDEX_TIME, persistedProperties.get(LAST_INDEX_TIME)); - } else { - // set epoch - indexerNamespace.put(LAST_INDEX_TIME, epoch); - } - indexerNamespace.put(INDEX_START_TIME, dataImporter.getIndexStartTime()); - indexerNamespace.put("request", new HashMap<>(reqParams.getRawParams())); - indexerNamespace.put("handlerName", dataImporter.getHandlerName()); - for (Entity entity : dataImporter.getConfig().getEntities()) { - Map entityNamespace = new HashMap<>(); - String key = SolrWriter.LAST_INDEX_KEY; - Object lastIndex = persistedProperties.get(entity.getName() + "." + key); - if (lastIndex != null) { - entityNamespace.put(SolrWriter.LAST_INDEX_KEY, lastIndex); - } else { - entityNamespace.put(SolrWriter.LAST_INDEX_KEY, epoch); - } - indexerNamespace.put(entity.getName(), entityNamespace); - } - resolver.addNamespace(ConfigNameConstants.IMPORTER_NS_SHORT, indexerNamespace); - resolver.addNamespace(ConfigNameConstants.IMPORTER_NS, indexerNamespace); - return resolver; - } catch (Exception e) { - wrapAndThrow(SEVERE, e); - // unreachable statement - return null; - } - } - - private void invokeEventListener(String className) { - invokeEventListener(className, null); - } - - - private void invokeEventListener(String className, Exception lastException) { - try { - @SuppressWarnings({"unchecked"}) - EventListener listener = (EventListener) loadClass(className, dataImporter.getCore()).getConstructor().newInstance(); - notifyListener(listener, lastException); - } catch (Exception e) { - wrapAndThrow(SEVERE, e, "Unable to load class : " + className); - } - } - - private void notifyListener(EventListener listener, Exception lastException) { - String currentProcess; - if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP) { - currentProcess = Context.DELTA_DUMP; - } else { - currentProcess = Context.FULL_DUMP; - } - ContextImpl ctx = new ContextImpl(null, getVariableResolver(), null, currentProcess, session, null, this); - ctx.setLastException(lastException); - listener.onEvent(ctx); - } - - @SuppressWarnings("unchecked") - public void execute() { - List epwList = null; - try { - dataImporter.store(DataImporter.STATUS_MSGS, statusMessages); - config = dataImporter.getConfig(); - final AtomicLong startTime = new AtomicLong(System.nanoTime()); - statusMessages.put(TIME_ELAPSED, new Object() { - @Override - public String toString() { - return getTimeElapsedSince(startTime.get()); - } - }); - - statusMessages.put(DataImporter.MSG.TOTAL_QUERIES_EXECUTED, - importStatistics.queryCount); - statusMessages.put(DataImporter.MSG.TOTAL_ROWS_EXECUTED, - importStatistics.rowsCount); - statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, - importStatistics.docCount); - statusMessages.put(DataImporter.MSG.TOTAL_DOCS_SKIPPED, - importStatistics.skipDocCount); - - List entities = reqParams.getEntitiesToRun(); - - // Trigger onImportStart - if (config.getOnImportStart() != null) { - invokeEventListener(config.getOnImportStart()); - } - AtomicBoolean fullCleanDone = new AtomicBoolean(false); - //we must not do a delete of *:* multiple times if there are multiple root entities to be run - Map lastIndexTimeProps = new HashMap<>(); - lastIndexTimeProps.put(LAST_INDEX_KEY, dataImporter.getIndexStartTime()); - - epwList = new ArrayList<>(config.getEntities().size()); - for (Entity e : config.getEntities()) { - epwList.add(getEntityProcessorWrapper(e)); - } - for (EntityProcessorWrapper epw : epwList) { - if (entities != null && !entities.contains(epw.getEntity().getName())) - continue; - lastIndexTimeProps.put(epw.getEntity().getName() + "." + LAST_INDEX_KEY, propWriter.getCurrentTimestamp()); - currentEntityProcessorWrapper = epw; - String delQuery = epw.getEntity().getAllAttributes().get("preImportDeleteQuery"); - if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP) { - cleanByQuery(delQuery, fullCleanDone); - doDelta(); - delQuery = epw.getEntity().getAllAttributes().get("postImportDeleteQuery"); - if (delQuery != null) { - fullCleanDone.set(false); - cleanByQuery(delQuery, fullCleanDone); - } - } else { - cleanByQuery(delQuery, fullCleanDone); - doFullDump(); - delQuery = epw.getEntity().getAllAttributes().get("postImportDeleteQuery"); - if (delQuery != null) { - fullCleanDone.set(false); - cleanByQuery(delQuery, fullCleanDone); - } - } - } - - if (stop.get()) { - // Dont commit if aborted using command=abort - statusMessages.put("Aborted", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).format(new Date())); - handleError("Aborted", null); - } else { - // Do not commit unnecessarily if this is a delta-import and no documents were created or deleted - if (!reqParams.isClean()) { - if (importStatistics.docCount.get() > 0 || importStatistics.deletedDocCount.get() > 0) { - finish(lastIndexTimeProps); - } - } else { - // Finished operation normally, commit now - finish(lastIndexTimeProps); - } - - if (config.getOnImportEnd() != null) { - invokeEventListener(config.getOnImportEnd()); - } - } - - statusMessages.remove(TIME_ELAPSED); - statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, ""+ importStatistics.docCount.get()); - if(importStatistics.failedDocCount.get() > 0) - statusMessages.put(DataImporter.MSG.TOTAL_FAILED_DOCS, ""+ importStatistics.failedDocCount.get()); - - statusMessages.put("Time taken", getTimeElapsedSince(startTime.get())); - if (log.isInfoEnabled()) { - log.info("Time taken = {}", getTimeElapsedSince(startTime.get())); - } - } catch(Exception e) - { - throw new RuntimeException(e); - } finally { - // Cannot use IOUtils.closeQuietly since DIH relies on exceptions bubbling out of writer.close() to indicate - // success/failure of the run. - RuntimeException raisedDuringClose = null; - try { - if (writer != null) { - writer.close(); - } - } catch (RuntimeException e) { - if (log.isWarnEnabled()) { - log.warn("Exception encountered while closing DIHWriter " + writer + "; temporarily suppressing to ensure other DocBuilder elements are closed", e); // logOk - } - raisedDuringClose = e; - } - - if (epwList != null) { - closeEntityProcessorWrappers(epwList); - } - if(reqParams.isDebug()) { - reqParams.getDebugInfo().debugVerboseOutput = getDebugLogger().output; - } - - if (raisedDuringClose != null) { - throw raisedDuringClose; - } - } - } - private void closeEntityProcessorWrappers(List epwList) { - for(EntityProcessorWrapper epw : epwList) { - IOUtils.closeQuietly(epw); - - if(epw.getDatasource() != null) { - IOUtils.closeQuietly(epw.getDatasource()); - } - closeEntityProcessorWrappers(epw.getChildren()); - } - } - - @SuppressWarnings("unchecked") - private void finish(Map lastIndexTimeProps) { - log.info("Import completed successfully"); - statusMessages.put("", "Indexing completed. Added/Updated: " - + importStatistics.docCount + " documents. Deleted " - + importStatistics.deletedDocCount + " documents."); - if(reqParams.isCommit()) { - writer.commit(reqParams.isOptimize()); - addStatusMessage("Committed"); - if (reqParams.isOptimize()) - addStatusMessage("Optimized"); - } - try { - propWriter.persist(lastIndexTimeProps); - } catch (Exception e) { - log.error("Could not write property file", e); - statusMessages.put("error", "Could not write property file. Delta imports will not work. " + - "Make sure your conf directory is writable"); - } - } - - @SuppressWarnings({"unchecked"}) - void handleError(String message, Exception e) { - if (!dataImporter.getCore().getCoreContainer().isZooKeeperAware()) { - writer.rollback(); - } - - statusMessages.put(message, "Indexing error"); - addStatusMessage(message); - if ((config != null) && (config.getOnError() != null)) { - invokeEventListener(config.getOnError(), e); - } - } - - private void doFullDump() { - addStatusMessage("Full Dump Started"); - buildDocument(getVariableResolver(), null, null, currentEntityProcessorWrapper, true, null); - } - - @SuppressWarnings("unchecked") - private void doDelta() { - addStatusMessage("Delta Dump started"); - VariableResolver resolver = getVariableResolver(); - - if (config.getDeleteQuery() != null) { - writer.deleteByQuery(config.getDeleteQuery()); - } - - addStatusMessage("Identifying Delta"); - log.info("Starting delta collection."); - Set> deletedKeys = new HashSet<>(); - Set> allPks = collectDelta(currentEntityProcessorWrapper, resolver, deletedKeys); - if (stop.get()) - return; - addStatusMessage("Deltas Obtained"); - addStatusMessage("Building documents"); - if (!deletedKeys.isEmpty()) { - allPks.removeAll(deletedKeys); - deleteAll(deletedKeys); - // Make sure that documents are not re-created - } - deletedKeys = null; - writer.setDeltaKeys(allPks); - - statusMessages.put("Total Changed Documents", allPks.size()); - VariableResolver vri = getVariableResolver(); - Iterator> pkIter = allPks.iterator(); - while (pkIter.hasNext()) { - Map map = pkIter.next(); - vri.addNamespace(ConfigNameConstants.IMPORTER_NS_SHORT + ".delta", map); - buildDocument(vri, null, map, currentEntityProcessorWrapper, true, null); - pkIter.remove(); - // check for abort - if (stop.get()) - break; - } - - if (!stop.get()) { - log.info("Delta Import completed successfully"); - } - } - - private void deleteAll(Set> deletedKeys) { - log.info("Deleting stale documents "); - Iterator> iter = deletedKeys.iterator(); - while (iter.hasNext()) { - Map map = iter.next(); - String keyName = currentEntityProcessorWrapper.getEntity().isDocRoot() ? currentEntityProcessorWrapper.getEntity().getPk() : currentEntityProcessorWrapper.getEntity().getSchemaPk(); - Object key = map.get(keyName); - if(key == null) { - keyName = findMatchingPkColumn(keyName, map); - key = map.get(keyName); - } - if(key == null) { - log.warn("no key was available for deleted pk query. keyName = {}", keyName); - continue; - } - writer.deleteDoc(key); - importStatistics.deletedDocCount.incrementAndGet(); - iter.remove(); - } - } - - @SuppressWarnings("unchecked") - public void addStatusMessage(String msg) { - statusMessages.put(msg, new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).format(new Date())); - } - - private void resetEntity(EntityProcessorWrapper epw) { - epw.setInitialized(false); - for (EntityProcessorWrapper child : epw.getChildren()) { - resetEntity(child); - } - - } - - private void buildDocument(VariableResolver vr, DocWrapper doc, - Map pk, EntityProcessorWrapper epw, boolean isRoot, - ContextImpl parentCtx) { - List entitiesToDestroy = new ArrayList<>(); - try { - buildDocument(vr, doc, pk, epw, isRoot, parentCtx, entitiesToDestroy); - } catch (Exception e) { - throw new RuntimeException(e); - } finally { - for (EntityProcessorWrapper entityWrapper : entitiesToDestroy) { - entityWrapper.destroy(); - } - resetEntity(epw); - } - } - - @SuppressWarnings("unchecked") - private void buildDocument(VariableResolver vr, DocWrapper doc, - Map pk, EntityProcessorWrapper epw, boolean isRoot, - ContextImpl parentCtx, List entitiesToDestroy) { - - ContextImpl ctx = new ContextImpl(epw, vr, null, - pk == null ? Context.FULL_DUMP : Context.DELTA_DUMP, - session, parentCtx, this); - epw.init(ctx); - if (!epw.isInitialized()) { - entitiesToDestroy.add(epw); - epw.setInitialized(true); - } - - if (reqParams.getStart() > 0) { - getDebugLogger().log(DIHLogLevels.DISABLE_LOGGING, null, null); - } - - if (verboseDebug) { - getDebugLogger().log(DIHLogLevels.START_ENTITY, epw.getEntity().getName(), null); - } - - int seenDocCount = 0; - - try { - while (true) { - if (stop.get()) - return; - if(importStatistics.docCount.get() > (reqParams.getStart() + reqParams.getRows())) break; - try { - seenDocCount++; - - if (seenDocCount > reqParams.getStart()) { - getDebugLogger().log(DIHLogLevels.ENABLE_LOGGING, null, null); - } - - if (verboseDebug && epw.getEntity().isDocRoot()) { - getDebugLogger().log(DIHLogLevels.START_DOC, epw.getEntity().getName(), null); - } - if (doc == null && epw.getEntity().isDocRoot()) { - doc = new DocWrapper(); - ctx.setDoc(doc); - Entity e = epw.getEntity(); - while (e.getParentEntity() != null) { - addFields(e.getParentEntity(), doc, (Map) vr - .resolve(e.getParentEntity().getName()), vr); - e = e.getParentEntity(); - } - } - - Map arow = epw.nextRow(); - if (arow == null) { - break; - } - - // Support for start parameter in debug mode - if (epw.getEntity().isDocRoot()) { - if (seenDocCount <= reqParams.getStart()) - continue; - if (seenDocCount > reqParams.getStart() + reqParams.getRows()) { - log.info("Indexing stopped at docCount = {}", importStatistics.docCount); - break; - } - } - - if (verboseDebug) { - getDebugLogger().log(DIHLogLevels.ENTITY_OUT, epw.getEntity().getName(), arow); - } - importStatistics.rowsCount.incrementAndGet(); - - DocWrapper childDoc = null; - if (doc != null) { - if (epw.getEntity().isChild()) { - childDoc = new DocWrapper(); - handleSpecialCommands(arow, childDoc); - addFields(epw.getEntity(), childDoc, arow, vr); - doc.addChildDocument(childDoc); - } else { - handleSpecialCommands(arow, doc); - vr.addNamespace(epw.getEntity().getName(), arow); - addFields(epw.getEntity(), doc, arow, vr); - vr.removeNamespace(epw.getEntity().getName()); - } - } - if (epw.getEntity().getChildren() != null) { - vr.addNamespace(epw.getEntity().getName(), arow); - for (EntityProcessorWrapper child : epw.getChildren()) { - if (childDoc != null) { - buildDocument(vr, childDoc, - child.getEntity().isDocRoot() ? pk : null, child, false, ctx, entitiesToDestroy); - } else { - buildDocument(vr, doc, - child.getEntity().isDocRoot() ? pk : null, child, false, ctx, entitiesToDestroy); - } - } - vr.removeNamespace(epw.getEntity().getName()); - } - if (epw.getEntity().isDocRoot()) { - if (stop.get()) - return; - if (!doc.isEmpty()) { - boolean result = writer.upload(doc); - if(reqParams.isDebug()) { - reqParams.getDebugInfo().debugDocuments.add(doc); - } - doc = null; - if (result){ - importStatistics.docCount.incrementAndGet(); - } else { - importStatistics.failedDocCount.incrementAndGet(); - } - } - } - } catch (DataImportHandlerException e) { - if (verboseDebug) { - getDebugLogger().log(DIHLogLevels.ENTITY_EXCEPTION, epw.getEntity().getName(), e); - } - if(e.getErrCode() == DataImportHandlerException.SKIP_ROW){ - continue; - } - if (isRoot) { - if (e.getErrCode() == DataImportHandlerException.SKIP) { - importStatistics.skipDocCount.getAndIncrement(); - doc = null; - } else { - SolrException.log(log, "Exception while processing: " - + epw.getEntity().getName() + " document : " + doc, e); - } - if (e.getErrCode() == DataImportHandlerException.SEVERE) - throw e; - } else - throw e; - } catch (Exception t) { - if (verboseDebug) { - getDebugLogger().log(DIHLogLevels.ENTITY_EXCEPTION, epw.getEntity().getName(), t); - } - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, t); - } finally { - if (verboseDebug) { - getDebugLogger().log(DIHLogLevels.ROW_END, epw.getEntity().getName(), null); - if (epw.getEntity().isDocRoot()) - getDebugLogger().log(DIHLogLevels.END_DOC, null, null); - } - } - } - } finally { - if (verboseDebug) { - getDebugLogger().log(DIHLogLevels.END_ENTITY, null, null); - } - } - } - - static class DocWrapper extends SolrInputDocument { - //final SolrInputDocument solrDocument = new SolrInputDocument(); - Map session; - - public void setSessionAttribute(String key, Object val){ - if(session == null) session = new HashMap<>(); - session.put(key, val); - } - - public Object getSessionAttribute(String key) { - return session == null ? null : session.get(key); - } - } - - private void handleSpecialCommands(Map arow, DocWrapper doc) { - Object value = arow.get(DELETE_DOC_BY_ID); - if (value != null) { - if (value instanceof Collection) { - @SuppressWarnings({"rawtypes"}) - Collection collection = (Collection) value; - for (Object o : collection) { - writer.deleteDoc(o.toString()); - importStatistics.deletedDocCount.incrementAndGet(); - } - } else { - writer.deleteDoc(value); - importStatistics.deletedDocCount.incrementAndGet(); - } - } - value = arow.get(DELETE_DOC_BY_QUERY); - if (value != null) { - if (value instanceof Collection) { - @SuppressWarnings({"rawtypes"}) - Collection collection = (Collection) value; - for (Object o : collection) { - writer.deleteByQuery(o.toString()); - importStatistics.deletedDocCount.incrementAndGet(); - } - } else { - writer.deleteByQuery(value.toString()); - importStatistics.deletedDocCount.incrementAndGet(); - } - } - value = arow.get(DOC_BOOST); - if (value != null) { - String message = "Ignoring document boost: " + value + " as index-time boosts are not supported anymore"; - if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) { - log.warn(message); - } else { - log.debug(message); - } - } - - value = arow.get(SKIP_DOC); - if (value != null) { - if (Boolean.parseBoolean(value.toString())) { - throw new DataImportHandlerException(DataImportHandlerException.SKIP, - "Document skipped :" + arow); - } - } - - value = arow.get(SKIP_ROW); - if (value != null) { - if (Boolean.parseBoolean(value.toString())) { - throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW); - } - } - } - - @SuppressWarnings("unchecked") - private void addFields(Entity entity, DocWrapper doc, - Map arow, VariableResolver vr) { - for (Map.Entry entry : arow.entrySet()) { - String key = entry.getKey(); - Object value = entry.getValue(); - if (value == null) continue; - if (key.startsWith("$")) continue; - Set field = entity.getColNameVsField().get(key); - IndexSchema schema = null == reqParams.getRequest() ? null : reqParams.getRequest().getSchema(); - if (field == null && schema != null) { - // This can be a dynamic field or a field which does not have an entry in data-config ( an implicit field) - SchemaField sf = schema.getFieldOrNull(key); - if (sf == null) { - sf = config.getSchemaField(key); - } - if (sf != null) { - addFieldToDoc(entry.getValue(), sf.getName(), sf.multiValued(), doc); - } - //else do nothing. if we add it it may fail - } else { - if (field != null) { - for (EntityField f : field) { - String name = f.getName(); - boolean multiValued = f.isMultiValued(); - boolean toWrite = f.isToWrite(); - if(f.isDynamicName()){ - name = vr.replaceTokens(name); - SchemaField schemaField = config.getSchemaField(name); - if(schemaField == null) { - toWrite = false; - } else { - multiValued = schemaField.multiValued(); - toWrite = true; - } - } - if (toWrite) { - addFieldToDoc(entry.getValue(), name, multiValued, doc); - } - } - } - } - } - } - - private void addFieldToDoc(Object value, String name, boolean multiValued, DocWrapper doc) { - if (value instanceof Collection) { - @SuppressWarnings({"rawtypes"}) - Collection collection = (Collection) value; - if (multiValued) { - for (Object o : collection) { - if (o != null) - doc.addField(name, o); - } - } else { - if (doc.getField(name) == null) - for (Object o : collection) { - if (o != null) { - doc.addField(name, o); - break; - } - } - } - } else if (multiValued) { - if (value != null) { - doc.addField(name, value); - } - } else { - if (doc.getField(name) == null && value != null) - doc.addField(name, value); - } - } - - @SuppressWarnings({"unchecked"}) - public EntityProcessorWrapper getEntityProcessorWrapper(Entity entity) { - EntityProcessor entityProcessor = null; - if (entity.getProcessorName() == null) { - entityProcessor = new SqlEntityProcessor(); - } else { - try { - entityProcessor = (EntityProcessor) loadClass(entity.getProcessorName(), dataImporter.getCore()) - .getConstructor().newInstance(); - } catch (Exception e) { - wrapAndThrow (SEVERE,e, - "Unable to load EntityProcessor implementation for entity:" + entity.getName()); - } - } - EntityProcessorWrapper epw = new EntityProcessorWrapper(entityProcessor, entity, this); - for(Entity e1 : entity.getChildren()) { - epw.getChildren().add(getEntityProcessorWrapper(e1)); - } - - return epw; - } - - private String findMatchingPkColumn(String pk, Map row) { - if (row.containsKey(pk)) { - throw new IllegalArgumentException(String.format(Locale.ROOT, - "deltaQuery returned a row with null for primary key %s", pk)); - } - String resolvedPk = null; - for (String columnName : row.keySet()) { - if (columnName.endsWith("." + pk) || pk.endsWith("." + columnName)) { - if (resolvedPk != null) - throw new IllegalArgumentException( - String.format(Locale.ROOT, - "deltaQuery has more than one column (%s and %s) that might resolve to declared primary key pk='%s'", - resolvedPk, columnName, pk)); - resolvedPk = columnName; - } - } - if (resolvedPk == null) { - throw new IllegalArgumentException( - String - .format( - Locale.ROOT, - "deltaQuery has no column to resolve to declared primary key pk='%s'", - pk)); - } - if (log.isInfoEnabled()) { - log.info(String.format(Locale.ROOT, - "Resolving deltaQuery column '%s' to match entity's declared pk '%s'", - resolvedPk, pk)); - } - return resolvedPk; - } - - /** - *

Collects unique keys of all Solr documents for whom one or more source tables have been changed since the last - * indexed time.

Note: In our definition, unique key of Solr document is the primary key of the top level - * entity (unless skipped using docRoot=false) in the Solr document in data-config.xml

- * - * @return an iterator to the list of keys for which Solr documents should be updated. - */ - @SuppressWarnings({"unchecked", "rawtypes"}) - public Set> collectDelta(EntityProcessorWrapper epw, VariableResolver resolver, - Set> deletedRows) { - //someone called abort - if (stop.get()) - return new HashSet(); - - ContextImpl context1 = new ContextImpl(epw, resolver, null, Context.FIND_DELTA, session, null, this); - epw.init(context1); - - Set> myModifiedPks = new HashSet<>(); - - - - for (EntityProcessorWrapper childEpw : epw.getChildren()) { - //this ensures that we start from the leaf nodes - myModifiedPks.addAll(collectDelta(childEpw, resolver, deletedRows)); - //someone called abort - if (stop.get()) - return new HashSet(); - } - - // identifying the modified rows for this entity - Map> deltaSet = new HashMap<>(); - if (log.isInfoEnabled()) { - log.info("Running ModifiedRowKey() for Entity: {}", epw.getEntity().getName()); - } - //get the modified rows in this entity - String pk = epw.getEntity().getPk(); - while (true) { - Map row = epw.nextModifiedRowKey(); - - if (row == null) - break; - - Object pkValue = row.get(pk); - if (pkValue == null) { - pk = findMatchingPkColumn(pk, row); - pkValue = row.get(pk); - } - - deltaSet.put(pkValue.toString(), row); - importStatistics.rowsCount.incrementAndGet(); - // check for abort - if (stop.get()) - return new HashSet(); - } - //get the deleted rows for this entity - Set> deletedSet = new HashSet<>(); - while (true) { - Map row = epw.nextDeletedRowKey(); - if (row == null) - break; - - deletedSet.add(row); - - Object pkValue = row.get(pk); - if (pkValue == null) { - pk = findMatchingPkColumn(pk, row); - pkValue = row.get(pk); - } - - // Remove deleted rows from the delta rows - String deletedRowPk = pkValue.toString(); - if (deltaSet.containsKey(deletedRowPk)) { - deltaSet.remove(deletedRowPk); - } - - importStatistics.rowsCount.incrementAndGet(); - // check for abort - if (stop.get()) - return new HashSet(); - } - - if (log.isInfoEnabled()) { - log.info("Completed ModifiedRowKey for Entity: {} rows obtained: {}", epw.getEntity().getName(), deltaSet.size()); - log.info("Completed DeletedRowKey for Entity: {} rows obtained : {}", epw.getEntity().getName(), deletedSet.size()); // logOk - } - - myModifiedPks.addAll(deltaSet.values()); - Set> parentKeyList = new HashSet<>(); - //all that we have captured is useless (in a sub-entity) if no rows in the parent is modified because of these - //propogate up the changes in the chain - if (epw.getEntity().getParentEntity() != null) { - // identifying deleted rows with deltas - - for (Map row : myModifiedPks) { - resolver.addNamespace(epw.getEntity().getName(), row); - getModifiedParentRows(resolver, epw.getEntity().getName(), epw, parentKeyList); - // check for abort - if (stop.get()) - return new HashSet(); - } - // running the same for deletedrows - for (Map row : deletedSet) { - resolver.addNamespace(epw.getEntity().getName(), row); - getModifiedParentRows(resolver, epw.getEntity().getName(), epw, parentKeyList); - // check for abort - if (stop.get()) - return new HashSet(); - } - } - if (log.isInfoEnabled()) { - log.info("Completed parentDeltaQuery for Entity: {}", epw.getEntity().getName()); - } - if (epw.getEntity().isDocRoot()) - deletedRows.addAll(deletedSet); - - // Do not use entity.isDocRoot here because one of descendant entities may set rootEntity="true" - return epw.getEntity().getParentEntity() == null ? - myModifiedPks : new HashSet<>(parentKeyList); - } - - private void getModifiedParentRows(VariableResolver resolver, - String entity, EntityProcessor entityProcessor, - Set> parentKeyList) { - try { - while (true) { - Map parentRow = entityProcessor - .nextModifiedParentRowKey(); - if (parentRow == null) - break; - - parentKeyList.add(parentRow); - importStatistics.rowsCount.incrementAndGet(); - // check for abort - if (stop.get()) - return; - } - - } finally { - resolver.removeNamespace(entity); - } - } - - public void abort() { - stop.set(true); - } - - private AtomicBoolean stop = new AtomicBoolean(false); - - public static final String TIME_ELAPSED = "Time Elapsed"; - - static String getTimeElapsedSince(long l) { - l = TimeUnit.MILLISECONDS.convert(System.nanoTime() - l, TimeUnit.NANOSECONDS); - return (l / (60000 * 60)) + ":" + (l / 60000) % 60 + ":" + (l / 1000) - % 60 + "." + l % 1000; - } - - public RequestInfo getReqParams() { - return reqParams; - } - - @SuppressWarnings({"unchecked", "rawtypes"}) - static Class loadClass(String name, SolrCore core) throws ClassNotFoundException { - try { - return core != null ? - core.getResourceLoader().findClass(name, Object.class) : - Class.forName(name); - } catch (Exception e) { - try { - String n = DocBuilder.class.getPackage().getName() + "." + name; - return core != null ? - core.getResourceLoader().findClass(n, Object.class) : - Class.forName(n); - } catch (Exception e1) { - throw new ClassNotFoundException("Unable to load " + name + " or " + DocBuilder.class.getPackage().getName() + "." + name, e); - } - } - } - - public static class Statistics { - public AtomicLong docCount = new AtomicLong(); - - public AtomicLong deletedDocCount = new AtomicLong(); - - public AtomicLong failedDocCount = new AtomicLong(); - - public AtomicLong rowsCount = new AtomicLong(); - - public AtomicLong queryCount = new AtomicLong(); - - public AtomicLong skipDocCount = new AtomicLong(); - - public Statistics add(Statistics stats) { - this.docCount.addAndGet(stats.docCount.get()); - this.deletedDocCount.addAndGet(stats.deletedDocCount.get()); - this.rowsCount.addAndGet(stats.rowsCount.get()); - this.queryCount.addAndGet(stats.queryCount.get()); - - return this; - } - - public Map getStatsSnapshot() { - Map result = new HashMap<>(); - result.put("docCount", docCount.get()); - result.put("deletedDocCount", deletedDocCount.get()); - result.put("rowCount", rowsCount.get()); - result.put("queryCount", rowsCount.get()); - result.put("skipDocCount", skipDocCount.get()); - return result; - } - - } - - private void cleanByQuery(String delQuery, AtomicBoolean completeCleanDone) { - delQuery = getVariableResolver().replaceTokens(delQuery); - if (reqParams.isClean()) { - if (delQuery == null && !completeCleanDone.get()) { - writer.doDeleteAll(); - completeCleanDone.set(true); - } else if (delQuery != null) { - writer.deleteByQuery(delQuery); - } - } - } - - public static final String LAST_INDEX_TIME = "last_index_time"; - public static final String INDEX_START_TIME = "index_start_time"; -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessor.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessor.java deleted file mode 100644 index 7ded623486e..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessor.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.io.Closeable; -import java.util.Map; - -/** - *

- * An instance of entity processor serves an entity. It is reused throughout the - * import process. - *

- *

- * Implementations of this abstract class must provide a public no-args constructor. - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @since solr 1.3 - */ -public abstract class EntityProcessor implements Closeable { - - /** - * This method is called when it starts processing an entity. When it comes - * back to the entity it is called again. So it can reset anything at that point. - * For a rootmost entity this is called only once for an ingestion. For sub-entities , this - * is called multiple once for each row from its parent entity - * - * @param context The current context - */ - public abstract void init(Context context); - - /** - * This method helps streaming the data for each row . The implementation - * would fetch as many rows as needed and gives one 'row' at a time. Only this - * method is used during a full import - * - * @return A 'row'. The 'key' for the map is the column name and the 'value' - * is the value of that column. If there are no more rows to be - * returned, return 'null' - */ - public abstract Map nextRow(); - - /** - * This is used for delta-import. It gives the pks of the changed rows in this - * entity - * - * @return the pk vs value of all changed rows - */ - public abstract Map nextModifiedRowKey(); - - /** - * This is used during delta-import. It gives the primary keys of the rows - * that are deleted from this entity. If this entity is the root entity, solr - * document is deleted. If this is a sub-entity, the Solr document is - * considered as 'changed' and will be recreated - * - * @return the pk vs value of all changed rows - */ - public abstract Map nextDeletedRowKey(); - - /** - * This is used during delta-import. This gives the primary keys and their - * values of all the rows changed in a parent entity due to changes in this - * entity. - * - * @return the pk vs value of all changed rows in the parent entity - */ - public abstract Map nextModifiedParentRowKey(); - - /** - * Invoked for each entity at the very end of the import to do any needed cleanup tasks. - * - */ - public abstract void destroy(); - - /** - * Invoked after the transformers are invoked. EntityProcessors can add, remove or modify values - * added by Transformers in this method. - * - * @param r The transformed row - * @since solr 1.4 - */ - public void postTransform(Map r) { - } - - /** - * Invoked when the Entity processor is destroyed towards the end of import. - * - * @since solr 1.4 - */ - public void close() { - //no-op - } -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java deleted file mode 100644 index 8311f362840..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.solr.common.SolrException; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.lang.invoke.MethodHandles; -import java.util.*; - -/** - *

Base class for all implementations of {@link EntityProcessor}

Most implementations of {@link EntityProcessor} - * extend this base class which provides common functionality.

- *

- * This API is experimental and subject to change - * - * @since solr 1.3 - */ -public class EntityProcessorBase extends EntityProcessor { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - protected boolean isFirstInit = true; - - protected String entityName; - - protected Context context; - - protected Iterator> rowIterator; - - protected String query; - - protected String onError = ABORT; - - protected DIHCacheSupport cacheSupport = null; - - private Zipper zipper; - - - @Override - public void init(Context context) { - this.context = context; - if (isFirstInit) { - firstInit(context); - } - if(zipper!=null){ - zipper.onNewParent(context); - }else{ - if(cacheSupport!=null) { - cacheSupport.initNewParent(context); - } - } - } - - /** - * first time init call. do one-time operations here - * it's necessary to call it from the overridden method, - * otherwise it throws NPE on accessing zipper from nextRow() - */ - protected void firstInit(Context context) { - entityName = context.getEntityAttribute("name"); - String s = context.getEntityAttribute(ON_ERROR); - if (s != null) onError = s; - - zipper = Zipper.createOrNull(context); - - if(zipper==null){ - initCache(context); - } - isFirstInit = false; - } - - protected void initCache(Context context) { - String cacheImplName = context - .getResolvedEntityAttribute(DIHCacheSupport.CACHE_IMPL); - - if (cacheImplName != null ) { - cacheSupport = new DIHCacheSupport(context, cacheImplName); - } - } - - @Override - public Map nextModifiedRowKey() { - return null; - } - - @Override - public Map nextDeletedRowKey() { - return null; - } - - @Override - public Map nextModifiedParentRowKey() { - return null; - } - - /** - * For a simple implementation, this is the only method that the sub-class should implement. This is intended to - * stream rows one-by-one. Return null to signal end of rows - * - * @return a row where the key is the name of the field and value can be any Object or a Collection of objects. Return - * null to signal end of rows - */ - @Override - public Map nextRow() { - return null;// do not do anything - } - - protected Map getNext() { - if(zipper!=null){ - return zipper.supplyNextChild(rowIterator); - }else{ - if(cacheSupport==null) { - try { - if (rowIterator == null) - return null; - if (rowIterator.hasNext()) - return rowIterator.next(); - query = null; - rowIterator = null; - return null; - } catch (Exception e) { - SolrException.log(log, "getNext() failed for query '" + query + "'", e); - query = null; - rowIterator = null; - wrapAndThrow(DataImportHandlerException.WARN, e); - return null; - } - } else { - return cacheSupport.getCacheData(context, query, rowIterator); - } - } - } - - - @Override - public void destroy() { - query = null; - if(cacheSupport!=null){ - cacheSupport.destroyAll(); - } - cacheSupport = null; - } - - - - public static final String TRANSFORMER = "transformer"; - - public static final String TRANSFORM_ROW = "transformRow"; - - public static final String ON_ERROR = "onError"; - - public static final String ABORT = "abort"; - - public static final String CONTINUE = "continue"; - - public static final String SKIP = "skip"; -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java deleted file mode 100644 index 6c106bd3617..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.solr.common.SolrException; -import org.apache.solr.common.SolrException.ErrorCode; -import org.apache.solr.core.SolrCore; -import org.apache.solr.handler.dataimport.config.ConfigNameConstants; -import org.apache.solr.handler.dataimport.config.Entity; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.*; -import static org.apache.solr.handler.dataimport.EntityProcessorBase.*; -import static org.apache.solr.handler.dataimport.EntityProcessorBase.SKIP; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.lang.invoke.MethodHandles; -import java.lang.reflect.Method; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -/** - * A Wrapper over {@link EntityProcessor} instance which performs transforms and handles multi-row outputs correctly. - * - * @since solr 1.4 - */ -public class EntityProcessorWrapper extends EntityProcessor { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - private EntityProcessor delegate; - private Entity entity; - @SuppressWarnings({"rawtypes"}) - private DataSource datasource; - private List children = new ArrayList<>(); - private DocBuilder docBuilder; - private boolean initialized; - private String onError; - private Context context; - private VariableResolver resolver; - private String entityName; - - protected List transformers; - - protected List> rowcache; - - public EntityProcessorWrapper(EntityProcessor delegate, Entity entity, DocBuilder docBuilder) { - this.delegate = delegate; - this.entity = entity; - this.docBuilder = docBuilder; - } - - @Override - public void init(Context context) { - rowcache = null; - this.context = context; - resolver = context.getVariableResolver(); - if (entityName == null) { - onError = resolver.replaceTokens(context.getEntityAttribute(ON_ERROR)); - if (onError == null) onError = ABORT; - entityName = context.getEntityAttribute(ConfigNameConstants.NAME); - } - delegate.init(context); - - } - - @SuppressWarnings({"unchecked"}) - void loadTransformers() { - String transClasses = context.getEntityAttribute(TRANSFORMER); - - if (transClasses == null) { - transformers = Collections.emptyList(); - return; - } - - String[] transArr = transClasses.split(","); - transformers = new ArrayList() { - @Override - public boolean add(Transformer transformer) { - if (docBuilder != null && docBuilder.verboseDebug) { - transformer = docBuilder.getDebugLogger().wrapTransformer(transformer); - } - return super.add(transformer); - } - }; - for (String aTransArr : transArr) { - String trans = aTransArr.trim(); - if (trans.startsWith("script:")) { - // The script transformer is a potential vulnerability, esp. when the script is - // provided from an untrusted source. Check and don't proceed if source is untrusted. - checkIfTrusted(trans); - String functionName = trans.substring("script:".length()); - ScriptTransformer scriptTransformer = new ScriptTransformer(); - scriptTransformer.setFunctionName(functionName); - transformers.add(scriptTransformer); - continue; - } - try { - @SuppressWarnings({"rawtypes"}) - Class clazz = DocBuilder.loadClass(trans, context.getSolrCore()); - if (Transformer.class.isAssignableFrom(clazz)) { - transformers.add((Transformer) clazz.getConstructor().newInstance()); - } else { - Method meth = clazz.getMethod(TRANSFORM_ROW, Map.class); - transformers.add(new ReflectionTransformer(meth, clazz, trans)); - } - } catch (NoSuchMethodException nsme){ - String msg = "Transformer :" - + trans - + "does not implement Transformer interface or does not have a transformRow(Map m)method"; - log.error(msg); - wrapAndThrow(SEVERE, nsme,msg); - } catch (Exception e) { - log.error("Unable to load Transformer: {}", aTransArr, e); - wrapAndThrow(SEVERE, e,"Unable to load Transformer: " + trans); - } - } - - } - - private void checkIfTrusted(String trans) { - if (docBuilder != null) { - SolrCore core = docBuilder.dataImporter.getCore(); - boolean trusted = (core != null)? core.getCoreDescriptor().isConfigSetTrusted(): true; - if (!trusted) { - Exception ex = new SolrException(ErrorCode.UNAUTHORIZED, "The configset for this collection was uploaded " - + "without any authentication in place," - + " and this transformer is not available for collections with untrusted configsets. To use this transformer," - + " re-upload the configset after enabling authentication and authorization."); - String msg = "Transformer: " - + trans - + ". " + ex.getMessage(); - log.error(msg); - wrapAndThrow(SEVERE, ex, msg); - } - } - } - - @SuppressWarnings("unchecked") - static class ReflectionTransformer extends Transformer { - final Method meth; - - @SuppressWarnings({"rawtypes"}) - final Class clazz; - - final String trans; - - final Object o; - - public ReflectionTransformer(Method meth, @SuppressWarnings({"rawtypes"})Class clazz, String trans) - throws Exception { - this.meth = meth; - this.clazz = clazz; - this.trans = trans; - o = clazz.getConstructor().newInstance(); - } - - @Override - public Object transformRow(Map aRow, Context context) { - try { - return meth.invoke(o, aRow); - } catch (Exception e) { - log.warn("method invocation failed on transformer : {}", trans, e); - throw new DataImportHandlerException(WARN, e); - } - } - } - - protected Map getFromRowCache() { - Map r = rowcache.remove(0); - if (rowcache.isEmpty()) - rowcache = null; - return r; - } - - @SuppressWarnings("unchecked") - protected Map applyTransformer(Map row) { - if(row == null) return null; - if (transformers == null) - loadTransformers(); - if (transformers == Collections.EMPTY_LIST) - return row; - Map transformedRow = row; - List> rows = null; - boolean stopTransform = checkStopTransform(row); - VariableResolver resolver = context.getVariableResolver(); - for (Transformer t : transformers) { - if (stopTransform) break; - try { - if (rows != null) { - List> tmpRows = new ArrayList<>(); - for (Map map : rows) { - resolver.addNamespace(entityName, map); - Object o = t.transformRow(map, context); - if (o == null) - continue; - if (o instanceof Map) { - @SuppressWarnings({"rawtypes"}) - Map oMap = (Map) o; - stopTransform = checkStopTransform(oMap); - tmpRows.add((Map) o); - } else if (o instanceof List) { - tmpRows.addAll((List) o); - } else { - log.error("Transformer must return Map or a List>"); - } - } - rows = tmpRows; - } else { - resolver.addNamespace(entityName, transformedRow); - Object o = t.transformRow(transformedRow, context); - if (o == null) - return null; - if (o instanceof Map) { - @SuppressWarnings({"rawtypes"}) - Map oMap = (Map) o; - stopTransform = checkStopTransform(oMap); - transformedRow = (Map) o; - } else if (o instanceof List) { - rows = (List) o; - } else { - log.error("Transformer must return Map or a List>"); - } - } - } catch (Exception e) { - log.warn("transformer threw error", e); - if (ABORT.equals(onError)) { - wrapAndThrow(SEVERE, e); - } else if (SKIP.equals(onError)) { - wrapAndThrow(DataImportHandlerException.SKIP, e); - } - // onError = continue - } - } - if (rows == null) { - return transformedRow; - } else { - rowcache = rows; - return getFromRowCache(); - } - - } - - private boolean checkStopTransform(@SuppressWarnings({"rawtypes"})Map oMap) { - return oMap.get("$stopTransform") != null - && Boolean.parseBoolean(oMap.get("$stopTransform").toString()); - } - - @Override - public Map nextRow() { - if (rowcache != null) { - return getFromRowCache(); - } - while (true) { - Map arow = null; - try { - arow = delegate.nextRow(); - } catch (Exception e) { - if(ABORT.equals(onError)){ - wrapAndThrow(SEVERE, e); - } else { - //SKIP is not really possible. If this calls the nextRow() again the Entityprocessor would be in an inconisttent state - SolrException.log(log, "Exception in entity : "+ entityName, e); - return null; - } - } - if (arow == null) { - return null; - } else { - arow = applyTransformer(arow); - if (arow != null) { - delegate.postTransform(arow); - return arow; - } - } - } - } - - @Override - public Map nextModifiedRowKey() { - Map row = delegate.nextModifiedRowKey(); - row = applyTransformer(row); - rowcache = null; - return row; - } - - @Override - public Map nextDeletedRowKey() { - Map row = delegate.nextDeletedRowKey(); - row = applyTransformer(row); - rowcache = null; - return row; - } - - @Override - public Map nextModifiedParentRowKey() { - return delegate.nextModifiedParentRowKey(); - } - - @Override - public void destroy() { - delegate.destroy(); - } - - public VariableResolver getVariableResolver() { - return context.getVariableResolver(); - } - - public Context getContext() { - return context; - } - - @Override - public void close() { - delegate.close(); - } - - public Entity getEntity() { - return entity; - } - - public List getChildren() { - return children; - } - - @SuppressWarnings({"rawtypes"}) - public DataSource getDatasource() { - return datasource; - } - - public void setDatasource(@SuppressWarnings({"rawtypes"})DataSource datasource) { - this.datasource = datasource; - } - - public boolean isInitialized() { - return initialized; - } - - public void setInitialized(boolean initialized) { - this.initialized = initialized; - } -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Evaluator.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Evaluator.java deleted file mode 100644 index 22282b9cf8b..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Evaluator.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; - -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Pattern; - -/** - *

- * Pluggable functions for resolving variables - *

- *

- * Implementations of this abstract class must provide a public no-arg constructor. - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- * This API is experimental and may change in the future. - * - * @since solr 1.3 - */ -public abstract class Evaluator { - - /** - * Return a String after processing an expression and a {@link VariableResolver} - * - * @see VariableResolver - * @param expression string to be evaluated - * @param context instance - * @return the value of the given expression evaluated using the resolver - */ - public abstract String evaluate(String expression, Context context); - - /** - * Parses a string of expression into separate params. The values are separated by commas. each value will be - * translated into one of the following: - * <ol> - * <li>If it is in single quotes the value will be translated to a String</li> - * <li>If is is not in quotes and is a number a it will be translated into a Double</li> - * <li>else it is a variable which can be resolved and it will be put in as an instance of VariableWrapper</li> - * </ol> - * - * @param expression the expression to be parsed - * @param vr the VariableResolver instance for resolving variables - * - * @return a List of objects which can either be a string, number or a variable wrapper - */ - protected List parseParams(String expression, VariableResolver vr) { - List result = new ArrayList<>(); - expression = expression.trim(); - String[] ss = expression.split(","); - for (int i = 0; i < ss.length; i++) { - ss[i] = ss[i].trim(); - if (ss[i].startsWith("'")) {//a string param has started - StringBuilder sb = new StringBuilder(); - while (true) { - sb.append(ss[i]); - if (ss[i].endsWith("'")) break; - i++; - if (i >= ss.length) - throw new DataImportHandlerException(SEVERE, "invalid string at " + ss[i - 1] + " in function params: " + expression); - sb.append(","); - } - String s = sb.substring(1, sb.length() - 1); - s = s.replaceAll("\\\\'", "'"); - result.add(s); - } else { - if (Character.isDigit(ss[i].charAt(0))) { - try { - Double doub = Double.parseDouble(ss[i]); - result.add(doub); - } catch (NumberFormatException e) { - if (vr.resolve(ss[i]) == null) { - wrapAndThrow( - SEVERE, e, "Invalid number :" + ss[i] + - "in parameters " + expression); - } - } - } else { - result.add(getVariableWrapper(ss[i], vr)); - } - } - } - return result; - } - - protected VariableWrapper getVariableWrapper(String s, VariableResolver vr) { - return new VariableWrapper(s,vr); - } - - static protected class VariableWrapper { - public final String varName; - public final VariableResolver vr; - - public VariableWrapper(String s, VariableResolver vr) { - this.varName = s; - this.vr = vr; - } - - public Object resolve() { - return vr.resolve(varName); - } - - @Override - public String toString() { - Object o = vr.resolve(varName); - return o == null ? null : o.toString(); - } - } - - static Pattern IN_SINGLE_QUOTES = Pattern.compile("^'(.*?)'$"); - - public static final String DATE_FORMAT_EVALUATOR = "formatDate"; - - public static final String URL_ENCODE_EVALUATOR = "encodeUrl"; - - public static final String ESCAPE_SOLR_QUERY_CHARS = "escapeQueryChars"; - - public static final String SQL_ESCAPE_EVALUATOR = "escapeSql"; -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EventListener.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EventListener.java deleted file mode 100644 index 0c43a0bbaaa..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EventListener.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -/** - * Event listener for DataImportHandler - * - * This API is experimental and subject to change - * - * @since solr 1.4 - */ -public interface EventListener { - - /** - * Event callback - * - * @param ctx the Context in which this event was called - */ - void onEvent(Context ctx); - -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java deleted file mode 100644 index 571c280e395..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.*; -import java.lang.invoke.MethodHandles; -import java.nio.charset.StandardCharsets; -import java.sql.Blob; -import java.sql.Clob; -import java.sql.SQLException; -import java.util.Properties; - -/** - * This can be useful for users who have a DB field containing xml and wish to use a nested {@link XPathEntityProcessor} - *

- * The datasouce may be configured as follows - *

- * <datasource name="f1" type="FieldReaderDataSource" /> - *

- * The entity which uses this datasource must keep the url value as the variable name url="field-name" - *

- * The fieldname must be resolvable from {@link VariableResolver} - *

- * This may be used with any {@link EntityProcessor} which uses a {@link DataSource}<{@link Reader}> eg: {@link XPathEntityProcessor} - *

- * Supports String, BLOB, CLOB data types and there is an extra field (in the entity) 'encoding' for BLOB types - * - * @since 1.4 - */ -public class FieldReaderDataSource extends DataSource { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - protected VariableResolver vr; - protected String dataField; - private String encoding; - private EntityProcessorWrapper entityProcessor; - - @Override - public void init(Context context, Properties initProps) { - dataField = context.getEntityAttribute("dataField"); - encoding = context.getEntityAttribute("encoding"); - entityProcessor = (EntityProcessorWrapper) context.getEntityProcessor(); - /*no op*/ - } - - @Override - public Reader getData(String query) { - Object o = entityProcessor.getVariableResolver().resolve(dataField); - if (o == null) { - throw new DataImportHandlerException (SEVERE, "No field available for name : " +dataField); - } - if (o instanceof String) { - return new StringReader((String) o); - } else if (o instanceof Clob) { - Clob clob = (Clob) o; - try { - //Most of the JDBC drivers have getCharacterStream defined as public - // so let us just check it - return readCharStream(clob); - } catch (Exception e) { - log.info("Unable to get data from CLOB"); - return null; - - } - - } else if (o instanceof Blob) { - Blob blob = (Blob) o; - try { - return getReader(blob); - } catch (Exception e) { - log.info("Unable to get data from BLOB"); - return null; - - } - } else { - return new StringReader(o.toString()); - } - - } - - static Reader readCharStream(Clob clob) { - try { - return clob.getCharacterStream(); - } catch (Exception e) { - wrapAndThrow(SEVERE, e,"Unable to get reader from clob"); - return null;//unreachable - } - } - - private Reader getReader(Blob blob) - throws SQLException, UnsupportedEncodingException { - if (encoding == null) { - return (new InputStreamReader(blob.getBinaryStream(), StandardCharsets.UTF_8)); - } else { - return (new InputStreamReader(blob.getBinaryStream(), encoding)); - } - } - - @Override - public void close() { - - } -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java deleted file mode 100644 index ba7ca5d319e..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.lang.invoke.MethodHandles; -import java.sql.Blob; -import java.sql.SQLException; -import java.util.Properties; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * This can be useful for users who have a DB field containing BLOBs which may be Rich documents - *

- * The datasource may be configured as follows - *

- * <dataSource name="f1" type="FieldStreamDataSource" /> - *

- * The entity which uses this datasource must keep and attribute dataField - *

- * The fieldname must be resolvable from {@link VariableResolver} - *

- * This may be used with any {@link EntityProcessor} which uses a {@link DataSource}<{@link InputStream}> eg: TikaEntityProcessor - * - * @since 3.1 - */ -public class FieldStreamDataSource extends DataSource { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - protected VariableResolver vr; - protected String dataField; - private EntityProcessorWrapper wrapper; - - @Override - public void init(Context context, Properties initProps) { - dataField = context.getEntityAttribute("dataField"); - wrapper = (EntityProcessorWrapper) context.getEntityProcessor(); - /*no op*/ - } - - @Override - public InputStream getData(String query) { - Object o = wrapper.getVariableResolver().resolve(dataField); - if (o == null) { - throw new DataImportHandlerException(SEVERE, "No field available for name : " + dataField); - } else if (o instanceof Blob) { - Blob blob = (Blob) o; - try { - return blob.getBinaryStream(); - } catch (SQLException sqle) { - log.info("Unable to get data from BLOB"); - return null; - } - } else if (o instanceof byte[]) { - byte[] bytes = (byte[]) o; - return new ByteArrayInputStream(bytes); - } else { - throw new RuntimeException("unsupported type : " + o.getClass()); - } - - } - - @Override - public void close() { - } -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java deleted file mode 100644 index 34df122687c..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.io.*; -import java.lang.invoke.MethodHandles; -import java.nio.charset.StandardCharsets; -import java.util.Properties; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; - -/** - *

- * A {@link DataSource} which reads from local files - *

- *

- * The file is read with the default platform encoding. It can be overriden by - * specifying the encoding in solrconfig.xml - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @since solr 1.3 - */ -public class FileDataSource extends DataSource { - public static final String BASE_PATH = "basePath"; - - /** - * The basePath for this data source - */ - protected String basePath; - - /** - * The encoding using which the given file should be read - */ - protected String encoding = null; - - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - @Override - public void init(Context context, Properties initProps) { - basePath = initProps.getProperty(BASE_PATH); - if (initProps.get(URLDataSource.ENCODING) != null) - encoding = initProps.getProperty(URLDataSource.ENCODING); - } - - /** - *

- * Returns a reader for the given file. - *

- *

- * If the given file is not absolute, we try to construct an absolute path - * using basePath configuration. If that fails, then the relative path is - * tried. If file is not found a RuntimeException is thrown. - *

- *

- * It is the responsibility of the calling method to properly close the - * returned Reader - *

- */ - @Override - public Reader getData(String query) { - File f = getFile(basePath,query); - try { - return openStream(f); - } catch (Exception e) { - wrapAndThrow(SEVERE,e,"Unable to open File : "+f.getAbsolutePath()); - return null; - } - } - - static File getFile(String basePath, String query) { - try { - File file = new File(query); - - // If it's not an absolute path, try relative from basePath. - if (!file.isAbsolute()) { - // Resolve and correct basePath. - File basePathFile; - if (basePath == null) { - basePathFile = new File(".").getAbsoluteFile(); - log.warn("FileDataSource.basePath is empty. Resolving to: {}" - , basePathFile.getAbsolutePath()); - } else { - basePathFile = new File(basePath); - if (!basePathFile.isAbsolute()) { - basePathFile = basePathFile.getAbsoluteFile(); - log.warn("FileDataSource.basePath is not absolute. Resolving to: {}" - , basePathFile.getAbsolutePath()); - } - } - - file = new File(basePathFile, query).getAbsoluteFile(); - } - - if (file.isFile() && file.canRead()) { - if (log.isDebugEnabled()) { - log.debug("Accessing File: {}", file.getAbsolutePath()); - } - return file; - } else { - throw new FileNotFoundException("Could not find file: " + query + - " (resolved to: " + file.getAbsolutePath()); - } - } catch (FileNotFoundException e) { - throw new RuntimeException(e); - } - } - - /** - * Open a {@link java.io.Reader} for the given file name - * - * @param file a {@link java.io.File} instance - * @return a Reader on the given file - * @throws FileNotFoundException if the File does not exist - * @throws UnsupportedEncodingException if the encoding is unsupported - * @since solr 1.4 - */ - protected Reader openStream(File file) throws FileNotFoundException, - UnsupportedEncodingException { - if (encoding == null) { - return new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8); - } else { - return new InputStreamReader(new FileInputStream(file), encoding); - } - } - - @Override - public void close() { - - } -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java deleted file mode 100644 index a03354f2d2f..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java +++ /dev/null @@ -1,305 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.io.File; -import java.io.FilenameFilter; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.TimeZone; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.apache.solr.util.DateMathParser; - -/** - *

- * An {@link EntityProcessor} instance which can stream file names found in a given base - * directory matching patterns and returning rows containing file information. - *

- *

- * It supports querying a give base directory by matching: - *

    - *
  • regular expressions to file names
  • - *
  • excluding certain files based on regular expression
  • - *
  • last modification date (newer or older than a given date or time)
  • - *
  • size (bigger or smaller than size given in bytes)
  • - *
  • recursively iterating through sub-directories
  • - *
- * Its output can be used along with {@link FileDataSource} to read from files in file - * systems. - *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @since solr 1.3 - * @see Pattern - */ -public class FileListEntityProcessor extends EntityProcessorBase { - /** - * A regex pattern to identify files given in data-config.xml after resolving any variables - */ - protected String fileName; - - /** - * The baseDir given in data-config.xml after resolving any variables - */ - protected String baseDir; - - /** - * A Regex pattern of excluded file names as given in data-config.xml after resolving any variables - */ - protected String excludes; - - /** - * The newerThan given in data-config as a {@link java.util.Date} - *

- * Note: This variable is resolved just-in-time in the {@link #nextRow()} method. - *

- */ - protected Date newerThan; - - /** - * The newerThan given in data-config as a {@link java.util.Date} - */ - protected Date olderThan; - - /** - * The biggerThan given in data-config as a long value - *

- * Note: This variable is resolved just-in-time in the {@link #nextRow()} method. - *

- */ - protected long biggerThan = -1; - - /** - * The smallerThan given in data-config as a long value - *

- * Note: This variable is resolved just-in-time in the {@link #nextRow()} method. - *

- */ - protected long smallerThan = -1; - - /** - * The recursive given in data-config. Default value is false. - */ - protected boolean recursive = false; - - private Pattern fileNamePattern, excludesPattern; - - @Override - public void init(Context context) { - super.init(context); - fileName = context.getEntityAttribute(FILE_NAME); - if (fileName != null) { - fileName = context.replaceTokens(fileName); - fileNamePattern = Pattern.compile(fileName); - } - baseDir = context.getEntityAttribute(BASE_DIR); - if (baseDir == null) - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "'baseDir' is a required attribute"); - baseDir = context.replaceTokens(baseDir); - File dir = new File(baseDir); - if (!dir.isDirectory()) - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "'baseDir' value: " + baseDir + " is not a directory"); - - String r = context.getEntityAttribute(RECURSIVE); - if (r != null) - recursive = Boolean.parseBoolean(r); - excludes = context.getEntityAttribute(EXCLUDES); - if (excludes != null) { - excludes = context.replaceTokens(excludes); - excludesPattern = Pattern.compile(excludes); - } - } - - /** - * Get the Date object corresponding to the given string. - * - * @param dateStr the date string. It can be a DateMath string or it may have a evaluator function - * @return a Date instance corresponding to the input string - */ - private Date getDate(String dateStr) { - if (dateStr == null) - return null; - - Matcher m = PLACE_HOLDER_PATTERN.matcher(dateStr); - if (m.find()) { - Object o = context.resolve(m.group(1)); - if (o instanceof Date) return (Date)o; - dateStr = (String) o; - } else { - dateStr = context.replaceTokens(dateStr); - } - m = Evaluator.IN_SINGLE_QUOTES.matcher(dateStr); - if (m.find()) { - String expr = m.group(1); - //TODO refactor DateMathParser.parseMath a bit to have a static method for this logic. - if (expr.startsWith("NOW")) { - expr = expr.substring("NOW".length()); - } - try { - // DWS TODO: is this TimeZone the right default for us? Deserves explanation if so. - return new DateMathParser(TimeZone.getDefault()).parseMath(expr); - } catch (ParseException exp) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Invalid expression for date", exp); - } - } - try { - return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).parse(dateStr); - } catch (ParseException exp) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Invalid expression for date", exp); - } - } - - /** - * Get the Long value for the given string after resolving any evaluator or variable. - * - * @param sizeStr the size as a string - * @return the Long value corresponding to the given string - */ - private Long getSize(String sizeStr) { - if (sizeStr == null) - return null; - - Matcher m = PLACE_HOLDER_PATTERN.matcher(sizeStr); - if (m.find()) { - Object o = context.resolve(m.group(1)); - if (o instanceof Number) { - Number number = (Number) o; - return number.longValue(); - } - sizeStr = (String) o; - } else { - sizeStr = context.replaceTokens(sizeStr); - } - - return Long.parseLong(sizeStr); - } - - @Override - public Map nextRow() { - if (rowIterator != null) - return getNext(); - List> fileDetails = new ArrayList<>(); - File dir = new File(baseDir); - - String dateStr = context.getEntityAttribute(NEWER_THAN); - newerThan = getDate(dateStr); - dateStr = context.getEntityAttribute(OLDER_THAN); - olderThan = getDate(dateStr); - String biggerThanStr = context.getEntityAttribute(BIGGER_THAN); - if (biggerThanStr != null) - biggerThan = getSize(biggerThanStr); - String smallerThanStr = context.getEntityAttribute(SMALLER_THAN); - if (smallerThanStr != null) - smallerThan = getSize(smallerThanStr); - - getFolderFiles(dir, fileDetails); - rowIterator = fileDetails.iterator(); - return getNext(); - } - - private void getFolderFiles(File dir, final List> fileDetails) { - // Fetch an array of file objects that pass the filter, however the - // returned array is never populated; accept() always returns false. - // Rather we make use of the fileDetails array which is populated as - // a side affect of the accept method. - dir.list(new FilenameFilter() { - @Override - public boolean accept(File dir, String name) { - File fileObj = new File(dir, name); - if (fileObj.isDirectory()) { - if (recursive) getFolderFiles(fileObj, fileDetails); - } else if (fileNamePattern == null) { - addDetails(fileDetails, dir, name); - } else if (fileNamePattern.matcher(name).find()) { - if (excludesPattern != null && excludesPattern.matcher(name).find()) - return false; - addDetails(fileDetails, dir, name); - } - return false; - } - }); - } - - private void addDetails(List> files, File dir, String name) { - Map details = new HashMap<>(); - File aFile = new File(dir, name); - if (aFile.isDirectory()) return; - long sz = aFile.length(); - Date lastModified = new Date(aFile.lastModified()); - if (biggerThan != -1 && sz <= biggerThan) - return; - if (smallerThan != -1 && sz >= smallerThan) - return; - if (olderThan != null && lastModified.after(olderThan)) - return; - if (newerThan != null && lastModified.before(newerThan)) - return; - details.put(DIR, dir.getAbsolutePath()); - details.put(FILE, name); - details.put(ABSOLUTE_FILE, aFile.getAbsolutePath()); - details.put(SIZE, sz); - details.put(LAST_MODIFIED, lastModified); - files.add(details); - } - - public static final Pattern PLACE_HOLDER_PATTERN = Pattern - .compile("\\$\\{(.*?)\\}"); - - public static final String DIR = "fileDir"; - - public static final String FILE = "file"; - - public static final String ABSOLUTE_FILE = "fileAbsolutePath"; - - public static final String SIZE = "fileSize"; - - public static final String LAST_MODIFIED = "fileLastModified"; - - public static final String FILE_NAME = "fileName"; - - public static final String BASE_DIR = "baseDir"; - - public static final String EXCLUDES = "excludes"; - - public static final String NEWER_THAN = "newerThan"; - - public static final String OLDER_THAN = "olderThan"; - - public static final String BIGGER_THAN = "biggerThan"; - - public static final String SMALLER_THAN = "smallerThan"; - - public static final String RECURSIVE = "recursive"; - -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java deleted file mode 100644 index 7ef4d9362a9..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; - -import java.io.IOException; -import java.io.StringReader; -import java.io.BufferedReader; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * A {@link Transformer} implementation which strip off HTML tags using {@link HTMLStripCharFilter} This is useful - * in case you don't need this HTML anyway. - * - * @see HTMLStripCharFilter - * @since solr 1.4 - */ -public class HTMLStripTransformer extends Transformer { - - @Override - @SuppressWarnings("unchecked") - public Object transformRow(Map row, Context context) { - List> fields = context.getAllEntityFields(); - for (Map field : fields) { - String col = field.get(DataImporter.COLUMN); - String splitHTML = context.replaceTokens(field.get(STRIP_HTML)); - if (!TRUE.equals(splitHTML)) - continue; - Object tmpVal = row.get(col); - if (tmpVal == null) - continue; - - if (tmpVal instanceof List) { - List inputs = (List) tmpVal; - @SuppressWarnings({"rawtypes"}) - List results = new ArrayList(); - for (String input : inputs) { - if (input == null) - continue; - Object o = stripHTML(input, col); - if (o != null) - results.add(o); - } - row.put(col, results); - } else { - String value = tmpVal.toString(); - Object o = stripHTML(value, col); - if (o != null) - row.put(col, o); - } - } - return row; - } - - private Object stripHTML(String value, String column) { - StringBuilder out = new StringBuilder(); - StringReader strReader = new StringReader(value); - try { - HTMLStripCharFilter html = new HTMLStripCharFilter(strReader.markSupported() ? strReader : new BufferedReader(strReader)); - char[] cbuf = new char[1024 * 10]; - while (true) { - int count = html.read(cbuf); - if (count == -1) - break; // end of stream mark is -1 - if (count > 0) - out.append(cbuf, 0, count); - } - html.close(); - } catch (IOException e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Failed stripping HTML for column: " + column, e); - } - return out.toString(); - } - - public static final String STRIP_HTML = "stripHTML"; - - public static final String TRUE = "true"; -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/JdbcDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/JdbcDataSource.java deleted file mode 100644 index 87f38f49f68..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/JdbcDataSource.java +++ /dev/null @@ -1,583 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; - -import org.apache.solr.common.SolrException; -import org.apache.solr.util.CryptoKeys; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.naming.InitialContext; -import javax.naming.NamingException; - -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.Reader; -import java.lang.invoke.MethodHandles; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.sql.*; -import java.util.*; -import java.util.concurrent.Callable; -import java.util.concurrent.TimeUnit; - -/** - *

A DataSource implementation which can fetch data using JDBC.

Refer to http://wiki.apache.org/solr/DataImportHandler for more - * details.

- *

- * This API is experimental and may change in the future. - * - * @since solr 1.3 - */ -public class JdbcDataSource extends - DataSource>> { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - protected Callable factory; - - private long connLastUsed = 0; - - private Connection conn; - - private ResultSetIterator resultSetIterator; - - private Map fieldNameVsType = new HashMap<>(); - - private boolean convertType = false; - - private int batchSize = FETCH_SIZE; - - private int maxRows = 0; - - @Override - public void init(Context context, Properties initProps) { - resolveVariables(context, initProps); - initProps = decryptPwd(context, initProps); - Object o = initProps.get(CONVERT_TYPE); - if (o != null) - convertType = Boolean.parseBoolean(o.toString()); - - factory = createConnectionFactory(context, initProps); - - String bsz = initProps.getProperty("batchSize"); - if (bsz != null) { - bsz = context.replaceTokens(bsz); - try { - batchSize = Integer.parseInt(bsz); - if (batchSize == -1) - batchSize = Integer.MIN_VALUE; - } catch (NumberFormatException e) { - log.warn("Invalid batch size: {}", bsz); - } - } - - for (Map map : context.getAllEntityFields()) { - String n = map.get(DataImporter.COLUMN); - String t = map.get(DataImporter.TYPE); - if ("sint".equals(t) || "integer".equals(t)) - fieldNameVsType.put(n, Types.INTEGER); - else if ("slong".equals(t) || "long".equals(t)) - fieldNameVsType.put(n, Types.BIGINT); - else if ("float".equals(t) || "sfloat".equals(t)) - fieldNameVsType.put(n, Types.FLOAT); - else if ("double".equals(t) || "sdouble".equals(t)) - fieldNameVsType.put(n, Types.DOUBLE); - else if ("date".equals(t)) - fieldNameVsType.put(n, Types.DATE); - else if ("boolean".equals(t)) - fieldNameVsType.put(n, Types.BOOLEAN); - else if ("binary".equals(t)) - fieldNameVsType.put(n, Types.BLOB); - else - fieldNameVsType.put(n, Types.VARCHAR); - } - } - - private Properties decryptPwd(Context context, Properties initProps) { - String encryptionKey = initProps.getProperty("encryptKeyFile"); - if (initProps.getProperty("password") != null && encryptionKey != null) { - // this means the password is encrypted and use the file to decode it - try { - try (Reader fr = new InputStreamReader(new FileInputStream(encryptionKey), UTF_8)) { - char[] chars = new char[100];//max 100 char password - int len = fr.read(chars); - if (len < 6) - throw new DataImportHandlerException(SEVERE, "There should be a password of length 6 atleast " + encryptionKey); - Properties props = new Properties(); - props.putAll(initProps); - String password = null; - try { - password = CryptoKeys.decodeAES(initProps.getProperty("password"), new String(chars, 0, len)).trim(); - } catch (SolrException se) { - throw new DataImportHandlerException(SEVERE, "Error decoding password", se.getCause()); - } - props.put("password", password); - initProps = props; - } - } catch (IOException e) { - throw new DataImportHandlerException(SEVERE, "Could not load encryptKeyFile " + encryptionKey); - } - } - return initProps; - } - - protected Callable createConnectionFactory(final Context context, - final Properties initProps) { -// final VariableResolver resolver = context.getVariableResolver(); - final String jndiName = initProps.getProperty(JNDI_NAME); - final String url = initProps.getProperty(URL); - final String driver = initProps.getProperty(DRIVER); - - if (url == null && jndiName == null) - throw new DataImportHandlerException(SEVERE, - "JDBC URL or JNDI name has to be specified"); - - if (driver != null) { - try { - DocBuilder.loadClass(driver, context.getSolrCore()); - } catch (ClassNotFoundException e) { - wrapAndThrow(SEVERE, e, "Could not load driver: " + driver); - } - } else { - if(jndiName == null){ - throw new DataImportHandlerException(SEVERE, "One of driver or jndiName must be specified in the data source"); - } - } - - String s = initProps.getProperty("maxRows"); - if (s != null) { - maxRows = Integer.parseInt(s); - } - - return factory = new Callable() { - @Override - public Connection call() throws Exception { - if (log.isInfoEnabled()) { - log.info("Creating a connection for entity {} with URL: {}" - , context.getEntityAttribute(DataImporter.NAME), url); - } - long start = System.nanoTime(); - Connection c = null; - - if (jndiName != null) { - c = getFromJndi(initProps, jndiName); - } else if (url != null) { - try { - c = DriverManager.getConnection(url, initProps); - } catch (SQLException e) { - // DriverManager does not allow you to use a driver which is not loaded through - // the class loader of the class which is trying to make the connection. - // This is a workaround for cases where the user puts the driver jar in the - // solr.home/lib or solr.home/core/lib directories. - @SuppressWarnings({"unchecked"}) - Driver d = (Driver) DocBuilder.loadClass(driver, context.getSolrCore()).getConstructor().newInstance(); - c = d.connect(url, initProps); - } - } - if (c != null) { - try { - initializeConnection(c, initProps); - } catch (SQLException e) { - try { - c.close(); - } catch (SQLException e2) { - log.warn("Exception closing connection during cleanup", e2); - } - - throw new DataImportHandlerException(SEVERE, "Exception initializing SQL connection", e); - } - } - log.info("Time taken for getConnection(): {}" - , TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS)); - return c; - } - - private void initializeConnection(Connection c, final Properties initProps) - throws SQLException { - if (Boolean.parseBoolean(initProps.getProperty("readOnly"))) { - c.setReadOnly(true); - // Add other sane defaults - c.setAutoCommit(true); - c.setTransactionIsolation(Connection.TRANSACTION_READ_UNCOMMITTED); - c.setHoldability(ResultSet.CLOSE_CURSORS_AT_COMMIT); - } - if (!Boolean.parseBoolean(initProps.getProperty("autoCommit"))) { - c.setAutoCommit(false); - } - String transactionIsolation = initProps.getProperty("transactionIsolation"); - if ("TRANSACTION_READ_UNCOMMITTED".equals(transactionIsolation)) { - c.setTransactionIsolation(Connection.TRANSACTION_READ_UNCOMMITTED); - } else if ("TRANSACTION_READ_COMMITTED".equals(transactionIsolation)) { - c.setTransactionIsolation(Connection.TRANSACTION_READ_COMMITTED); - } else if ("TRANSACTION_REPEATABLE_READ".equals(transactionIsolation)) { - c.setTransactionIsolation(Connection.TRANSACTION_REPEATABLE_READ); - } else if ("TRANSACTION_SERIALIZABLE".equals(transactionIsolation)) { - c.setTransactionIsolation(Connection.TRANSACTION_SERIALIZABLE); - } else if ("TRANSACTION_NONE".equals(transactionIsolation)) { - c.setTransactionIsolation(Connection.TRANSACTION_NONE); - } - String holdability = initProps.getProperty("holdability"); - if ("CLOSE_CURSORS_AT_COMMIT".equals(holdability)) { - c.setHoldability(ResultSet.CLOSE_CURSORS_AT_COMMIT); - } else if ("HOLD_CURSORS_OVER_COMMIT".equals(holdability)) { - c.setHoldability(ResultSet.HOLD_CURSORS_OVER_COMMIT); - } - } - - private Connection getFromJndi(final Properties initProps, final String jndiName) throws NamingException, - SQLException { - - Connection c = null; - InitialContext ctx = new InitialContext(); - Object jndival = ctx.lookup(jndiName); - if (jndival instanceof javax.sql.DataSource) { - javax.sql.DataSource dataSource = (javax.sql.DataSource) jndival; - String user = (String) initProps.get("user"); - String pass = (String) initProps.get("password"); - if(user == null || user.trim().equals("")){ - c = dataSource.getConnection(); - } else { - c = dataSource.getConnection(user, pass); - } - } else { - throw new DataImportHandlerException(SEVERE, - "the jndi name : '"+jndiName +"' is not a valid javax.sql.DataSource"); - } - return c; - } - }; - } - - private void resolveVariables(Context ctx, Properties initProps) { - for (Map.Entry entry : initProps.entrySet()) { - if (entry.getValue() != null) { - entry.setValue(ctx.replaceTokens((String) entry.getValue())); - } - } - } - - @Override - public Iterator> getData(String query) { - if (resultSetIterator != null) { - resultSetIterator.close(); - resultSetIterator = null; - } - resultSetIterator = createResultSetIterator(query); - return resultSetIterator.getIterator(); - } - - protected ResultSetIterator createResultSetIterator(String query) { - return new ResultSetIterator(query); - } - - private void logError(String msg, Exception e) { - log.warn(msg, e); - } - - protected List readFieldNames(ResultSetMetaData metaData) - throws SQLException { - List colNames = new ArrayList<>(); - int count = metaData.getColumnCount(); - for (int i = 0; i < count; i++) { - colNames.add(metaData.getColumnLabel(i + 1)); - } - return colNames; - } - - protected class ResultSetIterator { - private ResultSet resultSet; - - private Statement stmt = null; - - private List colNames; - - private Iterator> rSetIterator; - - public ResultSetIterator(String query) { - - try { - Connection c = getConnection(); - stmt = createStatement(c, batchSize, maxRows); - log.debug("Executing SQL: {}", query); - long start = System.nanoTime(); - resultSet = executeStatement(stmt, query); - log.trace("Time taken for sql : {}" - , TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS)); - setColNames(resultSet); - } catch (Exception e) { - close(); - wrapAndThrow(SEVERE, e, "Unable to execute query: " + query); - return; - } - if (resultSet == null) { - close(); - rSetIterator = new ArrayList>().iterator(); - return; - } - - rSetIterator = createIterator(convertType, fieldNameVsType); - } - - - protected Statement createStatement(final Connection c, final int batchSize, final int maxRows) - throws SQLException { - Statement statement = c.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); - statement.setFetchSize(batchSize); - statement.setMaxRows(maxRows); - return statement; - } - - protected ResultSet executeStatement(Statement statement, String query) throws SQLException { - boolean resultSetReturned = statement.execute(query); - return getNextResultSet(resultSetReturned, statement); - } - - protected ResultSet getNextResultSet(final boolean initialResultSetAvailable, final Statement statement) throws SQLException { - boolean resultSetAvailable = initialResultSetAvailable; - while (!resultSetAvailable && statement.getUpdateCount() != -1) { - resultSetAvailable = statement.getMoreResults(); - } - if (resultSetAvailable) { - return statement.getResultSet(); - } - return null; - } - - protected void setColNames(final ResultSet resultSet) throws SQLException { - if (resultSet != null) { - colNames = readFieldNames(resultSet.getMetaData()); - } else { - colNames = Collections.emptyList(); - } - } - - protected Iterator> createIterator(final boolean convertType, - final Map fieldNameVsType) { - return new Iterator>() { - @Override - public boolean hasNext() { - return hasnext(); - } - - @Override - public Map next() { - return getARow(convertType, fieldNameVsType); - } - - @Override - public void remove() {/* do nothing */ - } - }; - } - - - - protected Map getARow(boolean convertType, Map fieldNameVsType) { - if (getResultSet() == null) - return null; - Map result = new HashMap<>(); - for (String colName : getColNames()) { - try { - if (!convertType) { - // Use underlying database's type information except for BigDecimal and BigInteger - // which cannot be serialized by JavaBin/XML. See SOLR-6165 - Object value = getResultSet().getObject(colName); - if (value instanceof BigDecimal || value instanceof BigInteger) { - result.put(colName, value.toString()); - } else { - result.put(colName, value); - } - continue; - } - - Integer type = fieldNameVsType.get(colName); - if (type == null) - type = Types.VARCHAR; - switch (type) { - case Types.INTEGER: - result.put(colName, getResultSet().getInt(colName)); - break; - case Types.FLOAT: - result.put(colName, getResultSet().getFloat(colName)); - break; - case Types.BIGINT: - result.put(colName, getResultSet().getLong(colName)); - break; - case Types.DOUBLE: - result.put(colName, getResultSet().getDouble(colName)); - break; - case Types.DATE: - result.put(colName, getResultSet().getTimestamp(colName)); - break; - case Types.BOOLEAN: - result.put(colName, getResultSet().getBoolean(colName)); - break; - case Types.BLOB: - result.put(colName, getResultSet().getBytes(colName)); - break; - default: - result.put(colName, getResultSet().getString(colName)); - break; - } - } catch (SQLException e) { - logError("Error reading data ", e); - wrapAndThrow(SEVERE, e, "Error reading data from database"); - } - } - return result; - } - - protected boolean hasnext() { - if (getResultSet() == null) { - close(); - return false; - } - try { - if (getResultSet().next()) { - return true; - } else { - closeResultSet(); - setResultSet(getNextResultSet(getStatement().getMoreResults(), getStatement())); - setColNames(getResultSet()); - return hasnext(); - } - } catch (SQLException e) { - close(); - wrapAndThrow(SEVERE,e); - return false; - } - } - - protected void close() { - closeResultSet(); - try { - if (getStatement() != null) - getStatement().close(); - } catch (Exception e) { - logError("Exception while closing statement", e); - } finally { - setStatement(null); - } - } - - protected void closeResultSet() { - try { - if (getResultSet() != null) { - getResultSet().close(); - } - } catch (Exception e) { - logError("Exception while closing result set", e); - } finally { - setResultSet(null); - } - } - - protected final Iterator> getIterator() { - return rSetIterator; - } - - - protected final Statement getStatement() { - return stmt; - } - - protected final void setStatement(Statement stmt) { - this.stmt = stmt; - } - - protected final ResultSet getResultSet() { - return resultSet; - } - - protected final void setResultSet(ResultSet resultSet) { - this.resultSet = resultSet; - } - - protected final List getColNames() { - return colNames; - } - - protected final void setColNames(List colNames) { - this.colNames = colNames; - } - - } - - protected Connection getConnection() throws Exception { - long currTime = System.nanoTime(); - if (currTime - connLastUsed > CONN_TIME_OUT) { - synchronized (this) { - Connection tmpConn = factory.call(); - closeConnection(); - connLastUsed = System.nanoTime(); - return conn = tmpConn; - } - - } else { - connLastUsed = currTime; - return conn; - } - } - - private boolean isClosed = false; - - @Override - public void close() { - if (resultSetIterator != null) { - resultSetIterator.close(); - } - try { - closeConnection(); - } finally { - isClosed = true; - } - } - - private void closeConnection() { - try { - if (conn != null) { - try { - //SOLR-2045 - conn.commit(); - } catch(Exception ex) { - //ignore. - } - conn.close(); - } - } catch (Exception e) { - log.error("Ignoring Error when closing connection", e); - } - } - - private static final long CONN_TIME_OUT = TimeUnit.NANOSECONDS.convert(10, TimeUnit.SECONDS); - - private static final int FETCH_SIZE = 500; - - public static final String URL = "url"; - - public static final String JNDI_NAME = "jndiName"; - - public static final String DRIVER = "driver"; - - public static final String CONVERT_TYPE = "convertType"; -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java deleted file mode 100644 index 0940cbd4cf4..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.io.*; -import java.util.*; -import java.util.regex.Pattern; - -import org.apache.commons.io.IOUtils; - - -/** - *

- * An {@link EntityProcessor} instance which can stream lines of text read from a - * datasource. Options allow lines to be explicitly skipped or included in the index. - *

- *

- * Attribute summary - *

    - *
  • url is the required location of the input file. If this value is - * relative, it assumed to be relative to baseLoc.
  • - *
  • acceptLineRegex is an optional attribute that if present discards any - * line which does not match the regExp.
  • - *
  • skipLineRegex is an optional attribute that is applied after any - * acceptLineRegex and discards any line which matches this regExp.
  • - *
- *

- * Although envisioned for reading lines from a file or url, LineEntityProcessor may also be useful - * for dealing with change lists, where each line contains filenames which can be used by subsequent entities - * to parse content from those files. - *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @since solr 1.4 - * @see Pattern - */ -public class LineEntityProcessor extends EntityProcessorBase { - private Pattern acceptLineRegex, skipLineRegex; - private String url; - private BufferedReader reader; - - /** - * Parses each of the entity attributes. - */ - @Override - public void init(Context context) { - super.init(context); - String s; - - // init a regex to locate files from the input we want to index - s = context.getResolvedEntityAttribute(ACCEPT_LINE_REGEX); - if (s != null) { - acceptLineRegex = Pattern.compile(s); - } - - // init a regex to locate files from the input to be skipped - s = context.getResolvedEntityAttribute(SKIP_LINE_REGEX); - if (s != null) { - skipLineRegex = Pattern.compile(s); - } - - // the FileName is required. - url = context.getResolvedEntityAttribute(URL); - if (url == null) throw - new DataImportHandlerException(DataImportHandlerException.SEVERE, - "'"+ URL +"' is a required attribute"); - } - - - /** - * Reads lines from the url till it finds a lines that matches the - * optional acceptLineRegex and does not match the optional skipLineRegex. - * - * @return A row containing a minimum of one field "rawLine" or null to signal - * end of file. The rawLine is the as line as returned by readLine() - * from the url. However transformers can be used to create as - * many other fields as required. - */ - @Override - public Map nextRow() { - if (reader == null) { - reader = new BufferedReader((Reader) context.getDataSource().getData(url)); - } - - String line; - - while ( true ) { - // read a line from the input file - try { - line = reader.readLine(); - } - catch (IOException exp) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Problem reading from input", exp); - } - - // end of input - if (line == null) { - closeResources(); - return null; - } - - // First scan whole line to see if we want it - if (acceptLineRegex != null && ! acceptLineRegex.matcher(line).find()) continue; - if (skipLineRegex != null && skipLineRegex.matcher(line).find()) continue; - // Contruct the 'row' of fields - Map row = new HashMap<>(); - row.put("rawLine", line); - return row; - } - } - - public void closeResources() { - if (reader != null) { - IOUtils.closeQuietly(reader); - } - reader= null; - } - - @Override - public void destroy() { - closeResources(); - super.destroy(); - } - - /** - * Holds the name of entity attribute that will be parsed to obtain - * the filename containing the changelist. - */ - public static final String URL = "url"; - - /** - * Holds the name of entity attribute that will be parsed to obtain - * the pattern to be used when checking to see if a line should - * be returned. - */ - public static final String ACCEPT_LINE_REGEX = "acceptLineRegex"; - - /** - * Holds the name of entity attribute that will be parsed to obtain - * the pattern to be used when checking to see if a line should - * be ignored. - */ - public static final String SKIP_LINE_REGEX = "skipLineRegex"; -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/LogTransformer.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/LogTransformer.java deleted file mode 100644 index 66c525e6218..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/LogTransformer.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.lang.invoke.MethodHandles; -import java.util.Map; - -/** - * A {@link Transformer} implementation which logs messages in a given template format. - *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- * This API is experimental and may change in the future. - * - * @since solr 1.4 - */ -public class LogTransformer extends Transformer { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - @Override - public Object transformRow(Map row, Context ctx) { - String expr = ctx.getEntityAttribute(LOG_TEMPLATE); - String level = ctx.replaceTokens(ctx.getEntityAttribute(LOG_LEVEL)); - - if (expr == null || level == null) return row; - - if ("info".equals(level)) { - if (log.isInfoEnabled()) - log.info(ctx.replaceTokens(expr)); - } else if ("trace".equals(level)) { - if (log.isTraceEnabled()) - log.trace(ctx.replaceTokens(expr)); - } else if ("warn".equals(level)) { - if (log.isWarnEnabled()) - log.warn(ctx.replaceTokens(expr)); - } else if ("error".equals(level)) { - if (log.isErrorEnabled()) - log.error(ctx.replaceTokens(expr)); - } else if ("debug".equals(level)) { - if (log.isDebugEnabled()) - log.debug(ctx.replaceTokens(expr)); - } - - return row; - } - - public static final String LOG_TEMPLATE = "logTemplate"; - public static final String LOG_LEVEL = "logLevel"; -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/MockDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/MockDataSource.java deleted file mode 100644 index 8989ea2d07e..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/MockDataSource.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.Properties; - -/** - *

- * A mock DataSource implementation which can be used for testing. - *

- *

- * This API is experimental and may change in the future. - * - * @since solr 1.3 - */ -public class MockDataSource extends - DataSource>> { - - private static Map>> cache = new HashMap<>(); - - public static void setIterator(String query, - Iterator> iter) { - cache.put(query, iter); - } - - public static void clearCache() { - cache.clear(); - } - - @Override - public void init(Context context, Properties initProps) { - } - - @Override - public Iterator> getData(String query) { - return cache.get(query); - } - - @Override - public void close() { - cache.clear(); - - } -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java deleted file mode 100644 index f693aecce4a..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; -import java.text.NumberFormat; -import java.text.ParseException; -import java.text.ParsePosition; -import java.util.ArrayList; -import java.util.IllformedLocaleException; -import java.util.List; -import java.util.Locale; -import java.util.Map; - -/** - *

- * A {@link Transformer} instance which can extract numbers out of strings. It uses - * {@link NumberFormat} class to parse strings and supports - * Number, Integer, Currency and Percent styles as supported by - * {@link NumberFormat} with configurable locales. - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @since solr 1.3 - */ -public class NumberFormatTransformer extends Transformer { - - @Override - @SuppressWarnings("unchecked") - public Object transformRow(Map row, Context context) { - for (Map fld : context.getAllEntityFields()) { - String style = context.replaceTokens(fld.get(FORMAT_STYLE)); - if (style != null) { - String column = fld.get(DataImporter.COLUMN); - String srcCol = fld.get(RegexTransformer.SRC_COL_NAME); - String localeStr = context.replaceTokens(fld.get(LOCALE)); - if (srcCol == null) - srcCol = column; - Locale locale = Locale.ROOT; - if (localeStr != null) { - try { - locale = new Locale.Builder().setLanguageTag(localeStr).build(); - } catch (IllformedLocaleException e) { - throw new DataImportHandlerException(DataImportHandlerException.SEVERE, - "Invalid Locale '" + localeStr + "' specified for field: " + fld, e); - } - } - - Object val = row.get(srcCol); - String styleSmall = style.toLowerCase(Locale.ROOT); - - if (val instanceof List) { - List inputs = (List) val; - @SuppressWarnings({"rawtypes"}) - List results = new ArrayList(); - for (String input : inputs) { - try { - results.add(process(input, styleSmall, locale)); - } catch (ParseException e) { - throw new DataImportHandlerException( - DataImportHandlerException.SEVERE, - "Failed to apply NumberFormat on column: " + column, e); - } - } - row.put(column, results); - } else { - if (val == null || val.toString().trim().equals("")) - continue; - try { - row.put(column, process(val.toString(), styleSmall, locale)); - } catch (ParseException e) { - throw new DataImportHandlerException( - DataImportHandlerException.SEVERE, - "Failed to apply NumberFormat on column: " + column, e); - } - } - } - } - return row; - } - - private Number process(String val, String style, Locale locale) throws ParseException { - if (INTEGER.equals(style)) { - return parseNumber(val, NumberFormat.getIntegerInstance(locale)); - } else if (NUMBER.equals(style)) { - return parseNumber(val, NumberFormat.getNumberInstance(locale)); - } else if (CURRENCY.equals(style)) { - return parseNumber(val, NumberFormat.getCurrencyInstance(locale)); - } else if (PERCENT.equals(style)) { - return parseNumber(val, NumberFormat.getPercentInstance(locale)); - } - - return null; - } - - private Number parseNumber(String val, NumberFormat numFormat) throws ParseException { - ParsePosition parsePos = new ParsePosition(0); - Number num = numFormat.parse(val, parsePos); - if (parsePos.getIndex() != val.length()) { - throw new ParseException("illegal number format", parsePos.getIndex()); - } - return num; - } - - public static final String FORMAT_STYLE = "formatStyle"; - - public static final String LOCALE = "locale"; - - public static final String NUMBER = "number"; - - public static final String PERCENT = "percent"; - - public static final String INTEGER = "integer"; - - public static final String CURRENCY = "currency"; -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java deleted file mode 100644 index 4b8771af26e..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; -import static org.apache.solr.handler.dataimport.XPathEntityProcessor.URL; -import org.apache.commons.io.IOUtils; - -import java.io.IOException; -import java.io.Reader; -import java.io.StringWriter; -import java.util.HashMap; -import java.util.Map; - -/** - *

An implementation of {@link EntityProcessor} which reads data from a url/file and give out a row which contains one String - * value. The name of the field is 'plainText'. - * - * @since solr 1.4 - */ -public class PlainTextEntityProcessor extends EntityProcessorBase { - private boolean ended = false; - - @Override - public void init(Context context) { - super.init(context); - ended = false; - } - - @Override - public Map nextRow() { - if (ended) return null; - @SuppressWarnings({"unchecked"}) - DataSource ds = context.getDataSource(); - String url = context.replaceTokens(context.getEntityAttribute(URL)); - Reader r = null; - try { - r = ds.getData(url); - } catch (Exception e) { - wrapAndThrow(SEVERE, e, "Exception reading url : " + url); - } - StringWriter sw = new StringWriter(); - char[] buf = new char[1024]; - while (true) { - int len = 0; - try { - len = r.read(buf); - } catch (IOException e) { - IOUtils.closeQuietly(r); - wrapAndThrow(SEVERE, e, "Exception reading url : " + url); - } - if (len <= 0) break; - sw.append(new String(buf, 0, len)); - } - Map row = new HashMap<>(); - row.put(PLAIN_TEXT, sw.toString()); - ended = true; - IOUtils.closeQuietly(r); - return row; - } - - public static final String PLAIN_TEXT = "plainText"; -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/RegexTransformer.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/RegexTransformer.java deleted file mode 100644 index f5934163b00..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/RegexTransformer.java +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.lang.invoke.MethodHandles; -import java.util.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - *

- * A {@link Transformer} implementation which uses Regular Expressions to extract, split - * and replace data in fields. - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @since solr 1.3 - * @see Pattern - */ -public class RegexTransformer extends Transformer { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - @Override - @SuppressWarnings({"unchecked", "rawtypes"}) - public Map transformRow(Map row, - Context ctx) { - List> fields = ctx.getAllEntityFields(); - for (Map field : fields) { - String col = field.get(DataImporter.COLUMN); - String reStr = ctx.replaceTokens(field.get(REGEX)); - String splitBy = ctx.replaceTokens(field.get(SPLIT_BY)); - String replaceWith = ctx.replaceTokens(field.get(REPLACE_WITH)); - String groupNames = ctx.replaceTokens(field.get(GROUP_NAMES)); - if (reStr != null || splitBy != null) { - String srcColName = field.get(SRC_COL_NAME); - if (srcColName == null) { - srcColName = col; - } - Object tmpVal = row.get(srcColName); - if (tmpVal == null) - continue; - - if (tmpVal instanceof List) { - List inputs = (List) tmpVal; - List results = new ArrayList(); - Map otherVars= null; - for (String input : inputs) { - Object o = process(col, reStr, splitBy, replaceWith, input, groupNames); - if (o != null){ - if (o instanceof Map) { - Map map = (Map) o; - for (Object e : map.entrySet()) { - Map.Entry entry = (Map.Entry) e; - List l = results; - if(!col.equals(entry.getKey())){ - if(otherVars == null) otherVars = new HashMap<>(); - l = otherVars.get(entry.getKey()); - if(l == null){ - l = new ArrayList(); - otherVars.put(entry.getKey(), l); - } - } - if (entry.getValue() instanceof Collection) { - l.addAll((Collection) entry.getValue()); - } else { - l.add(entry.getValue()); - } - } - } else { - if (o instanceof Collection) { - results.addAll((Collection) o); - } else { - results.add(o); - } - } - } - } - row.put(col, results); - if(otherVars != null) row.putAll(otherVars); - } else { - String value = tmpVal.toString(); - Object o = process(col, reStr, splitBy, replaceWith, value, groupNames); - if (o != null){ - if (o instanceof Map) { - row.putAll((Map) o); - } else{ - row.put(col, o); - } - } - } - } - } - return row; - } - - private Object process(String col, String reStr, String splitBy, - String replaceWith, String value, String groupNames) { - if (splitBy != null) { - return readBySplit(splitBy, value); - } else if (replaceWith != null) { - Pattern p = getPattern(reStr); - Matcher m = p.matcher(value); - return m.find() ? m.replaceAll(replaceWith) : value; - } else { - return readfromRegExp(reStr, value, col, groupNames); - } - } - - @SuppressWarnings("unchecked") - private List readBySplit(String splitBy, String value) { - String[] vals = value.split(splitBy); - List l = new ArrayList<>(Arrays.asList(vals)); - return l; - } - - @SuppressWarnings({"unchecked", "rawtypes"}) - private Object readfromRegExp(String reStr, String value, String columnName, String gNames) { - String[] groupNames = null; - if(gNames != null && gNames.trim().length() >0){ - groupNames = gNames.split(","); - } - Pattern regexp = getPattern(reStr); - Matcher m = regexp.matcher(value); - if (m.find() && m.groupCount() > 0) { - if (m.groupCount() > 1) { - List l = null; - Map map = null; - if(groupNames == null){ - l = new ArrayList(); - } else { - map = new HashMap<>(); - } - for (int i = 1; i <= m.groupCount(); i++) { - try { - if(l != null){ - l.add(m.group(i)); - } else if (map != null ){ - if(i <= groupNames.length){ - String nameOfGroup = groupNames[i-1]; - if(nameOfGroup != null && nameOfGroup.trim().length() >0){ - map.put(nameOfGroup, m.group(i)); - } - } - } - } catch (Exception e) { - log.warn("Parsing failed for field : {}", columnName, e); - } - } - return l == null ? map: l; - } else { - return m.group(1); - } - } - - return null; - } - - private Pattern getPattern(String reStr) { - Pattern result = PATTERN_CACHE.get(reStr); - if (result == null) { - PATTERN_CACHE.put(reStr, result = Pattern.compile(reStr)); - } - return result; - } - - private HashMap PATTERN_CACHE = new HashMap<>(); - - public static final String REGEX = "regex"; - - public static final String REPLACE_WITH = "replaceWith"; - - public static final String SPLIT_BY = "splitBy"; - - public static final String SRC_COL_NAME = "sourceColName"; - - public static final String GROUP_NAMES = "groupNames"; - -} diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/RequestInfo.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/RequestInfo.java deleted file mode 100644 index d3f1a56d078..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/RequestInfo.java +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.solr.common.util.ContentStream; -import org.apache.solr.common.util.StrUtils; -import org.apache.solr.request.SolrQueryRequest; - -public class RequestInfo { - private final String command; - private final boolean debug; - private final boolean syncMode; - private final boolean commit; - private final boolean optimize; - private final int start; - private final long rows; - private final boolean clean; - private final List entitiesToRun; - private final Map rawParams; - private final String configFile; - private final String dataConfig; - private final SolrQueryRequest request; - - //TODO: find a different home for these two... - private final ContentStream contentStream; - private final DebugInfo debugInfo; - - public RequestInfo(SolrQueryRequest request, Map requestParams, ContentStream stream) { - this.request = request; - this.contentStream = stream; - if (requestParams.containsKey("command")) { - command = (String) requestParams.get("command"); - } else { - command = null; - } - boolean debugMode = StrUtils.parseBool((String) requestParams.get("debug"), false); - if (debugMode) { - debug = true; - debugInfo = new DebugInfo(requestParams); - } else { - debug = false; - debugInfo = null; - } - if (requestParams.containsKey("clean")) { - clean = StrUtils.parseBool( (String) requestParams.get("clean"), true); - } else if (DataImporter.DELTA_IMPORT_CMD.equals(command) || DataImporter.IMPORT_CMD.equals(command)) { - clean = false; - } else { - clean = debug ? false : true; - } - optimize = StrUtils.parseBool((String) requestParams.get("optimize"), false); - if(optimize) { - commit = true; - } else { - commit = StrUtils.parseBool( (String) requestParams.get("commit"), (debug ? false : true)); - } - if (requestParams.containsKey("rows")) { - rows = Integer.parseInt((String) requestParams.get("rows")); - } else { - rows = debug ? 10 : Long.MAX_VALUE; - } - - if (requestParams.containsKey("start")) { - start = Integer.parseInt((String) requestParams.get("start")); - } else { - start = 0; - } - syncMode = StrUtils.parseBool((String) requestParams.get("synchronous"), false); - - Object o = requestParams.get("entity"); - List modifiableEntities = null; - if(o != null) { - if (o instanceof String) { - modifiableEntities = new ArrayList<>(); - modifiableEntities.add((String) o); - } else if (o instanceof List) { - @SuppressWarnings("unchecked") - List modifiableEntities1 = new ArrayList<>((List) o); - modifiableEntities = modifiableEntities1; - } - entitiesToRun = Collections.unmodifiableList(modifiableEntities); - } else { - entitiesToRun = null; - } - String configFileParam = (String) requestParams.get("config"); - configFile = configFileParam; - String dataConfigParam = (String) requestParams.get("dataConfig"); - if (dataConfigParam != null && dataConfigParam.trim().length() == 0) { - // Empty data-config param is not valid, change it to null - dataConfigParam = null; - } - dataConfig = dataConfigParam; - this.rawParams = Collections.unmodifiableMap(new HashMap<>(requestParams)); - } - - public String getCommand() { - return command; - } - - public boolean isDebug() { - return debug; - } - - public boolean isSyncMode() { - return syncMode; - } - - public boolean isCommit() { - return commit; - } - - public boolean isOptimize() { - return optimize; - } - - public int getStart() { - return start; - } - - public long getRows() { - return rows; - } - - public boolean isClean() { - return clean; - } - /** - * Returns null if we are to run all entities, otherwise just run the entities named in the list. - */ - public List getEntitiesToRun() { - return entitiesToRun; - } - - public String getDataConfig() { - return dataConfig; - } - - public Map getRawParams() { - return rawParams; - } - - public ContentStream getContentStream() { - return contentStream; - } - - public DebugInfo getDebugInfo() { - return debugInfo; - } - - public String getConfigFile() { - return configFile; - } - - public SolrQueryRequest getRequest() { - return request; - } -} \ No newline at end of file diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ScriptTransformer.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ScriptTransformer.java deleted file mode 100644 index fe848b1c0ad..00000000000 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ScriptTransformer.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; - -import java.security.AccessControlContext; -import java.security.AccessController; -import java.security.PrivilegedAction; -import java.security.PrivilegedActionException; -import java.security.PrivilegedExceptionAction; -import java.security.ProtectionDomain; -import java.util.Map; - -import javax.script.Invocable; -import javax.script.ScriptEngine; -import javax.script.ScriptEngineManager; -import javax.script.ScriptException; - -/** - *

- * A {@link Transformer} instance capable of executing functions written in scripting - * languages as a {@link Transformer} instance. - *

- *

- * Refer to http://wiki.apache.org/solr/DataImportHandler - * for more details. - *

- *

- * This API is experimental and may change in the future. - * - * @since solr 1.3 - */ -public class ScriptTransformer extends Transformer { - private Invocable engine; - private String functionName; - - @Override - public Object transformRow(Map row, Context context) { - return AccessController.doPrivileged(new PrivilegedAction() { - @Override - public Object run() { - return transformRowUnsafe(row, context); - } - }, SCRIPT_SANDBOX); - } - - public Object transformRowUnsafe(Map row, Context context) { - try { - if (engine == null) - initEngine(context); - if (engine == null) - return row; - return engine.invokeFunction(functionName, new Object[]{row, context}); - } catch (DataImportHandlerException e) { - throw e; - } catch (Exception e) { - wrapAndThrow(SEVERE,e, "Error invoking script for entity " + context.getEntityAttribute("name")); - } - //will not reach here - return null; - } - - private void initEngine(Context context) { - String scriptText = context.getScript(); - String scriptLang = context.getScriptLanguage(); - if (scriptText == null) { - throw new DataImportHandlerException(SEVERE, - "\t\n" - + "\t\t\n" - + "\n" + "\t\t\t\n" - + "\n" + "\t\t\n" + "\t\n" + ""; -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSimplePropertiesWriter.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSimplePropertiesWriter.java deleted file mode 100644 index 74e04c934a3..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSimplePropertiesWriter.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.lang.invoke.MethodHandles; -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.Statement; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.HashMap; -import java.util.Locale; -import java.util.Map; - -import org.apache.solr.common.util.SuppressForbidden; -import org.junit.Before; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -public class TestSimplePropertiesWriter extends AbstractDIHJdbcTestCase { - - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - private boolean useJdbcEscapeSyntax; - private String dateFormat; - private String fileLocation; - private String fileName; - - @Before - public void spwBefore() throws Exception { - fileLocation = createTempDir().toFile().getAbsolutePath(); - fileName = "the.properties"; - } - - @SuppressForbidden(reason = "Needs currentTimeMillis to construct date stamps") - @Test - public void testSimplePropertiesWriter() throws Exception { - - SimpleDateFormat errMsgFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSSSSS", Locale.ROOT); - - String[] d = { - "{'ts' ''yyyy-MM-dd HH:mm:ss.SSSSSS''}", - "{'ts' ''yyyy-MM-dd HH:mm:ss''}", - "yyyy-MM-dd HH:mm:ss", - "yyyy-MM-dd HH:mm:ss.SSSSSS" - }; - for(int i=0 ; i init = new HashMap<>(); - init.put("dateFormat", dateFormat); - init.put("filename", fileName); - init.put("directory", fileLocation); - SimplePropertiesWriter spw = new SimplePropertiesWriter(); - spw.init(new DataImporter(), init); - Map props = new HashMap<>(); - props.put("SomeDates.last_index_time", oneSecondAgo); - props.put("last_index_time", oneSecondAgo); - spw.persist(props); - - h.query("/dataimport", generateRequest()); - props = spw.readIndexerProperties(); - Date entityDate = df.parse((String) props.get("SomeDates.last_index_time")); - Date docDate= df.parse((String) props.get("last_index_time")); - int year = currentYearFromDatabase(); - - assertTrue("This date: " + errMsgFormat.format(oneSecondAgo) + " should be prior to the document date: " + errMsgFormat.format(docDate), docDate.getTime() - oneSecondAgo.getTime() > 0); - assertTrue("This date: " + errMsgFormat.format(oneSecondAgo) + " should be prior to the entity date: " + errMsgFormat.format(entityDate), entityDate.getTime() - oneSecondAgo.getTime() > 0); - assertQ(req("*:*"), "//*[@numFound='1']", "//doc/str[@name=\"ayear_s\"]=\"" + year + "\""); - } - } - - private int currentYearFromDatabase() throws Exception { - try ( - Connection conn = newConnection(); - Statement s = conn.createStatement(); - ResultSet rs = s.executeQuery("select year(current_timestamp) from sysibm.sysdummy1"); - ){ - if (rs.next()) { - return rs.getInt(1); - } - fail("We should have gotten a row from the db."); - } - return 0; - } - - @Override - protected Database setAllowedDatabases() { - return Database.DERBY; - } - @Override - protected String generateConfig() { - StringBuilder sb = new StringBuilder(); - String q = useJdbcEscapeSyntax ? "" : "'"; - sb.append(" \n"); - sb.append("\n"); - sb.append(" \n"); - sb.append(" \n"); - sb.append("\n"); - sb.append(" \n"); - sb.append("\n"); - sb.append(" \n"); - sb.append(" \n"); - String config = sb.toString(); - log.debug(config); - return config; - } - -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorEndToEnd.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorEndToEnd.java deleted file mode 100644 index b552d01808a..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorEndToEnd.java +++ /dev/null @@ -1,374 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.io.File; -import java.io.IOException; -import java.lang.invoke.MethodHandles; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; - -import org.apache.commons.io.FileUtils; -import org.apache.lucene.util.IOUtils; -import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.embedded.JettySolrRunner; -import org.apache.solr.client.solrj.impl.HttpSolrClient; -import org.apache.solr.common.SolrInputDocument; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * End-to-end test of SolrEntityProcessor. "Real" test using embedded Solr - */ -public class TestSolrEntityProcessorEndToEnd extends AbstractDataImportHandlerTestCase { - - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - private static final String SOLR_CONFIG = "dataimport-solrconfig.xml"; - private static final String SOLR_SCHEMA = "dataimport-schema.xml"; - private static final String SOURCE_CONF_DIR = "dih" + File.separator + "solr" + File.separator + "collection1" + File.separator + "conf" + File.separator; - private static final String ROOT_DIR = "dih" + File.separator + "solr" + File.separator; - - private static final String DEAD_SOLR_SERVER = "http://" + DEAD_HOST_1 + "/solr"; - - private static final List> DB_DOCS = new ArrayList<>(); - private static final List> SOLR_DOCS = new ArrayList<>(); - - static { - // dynamic fields in the destination schema - Map dbDoc = new HashMap<>(); - dbDoc.put("dbid_s", "1"); - dbDoc.put("dbdesc_s", "DbDescription"); - DB_DOCS.add(dbDoc); - - Map solrDoc = new HashMap<>(); - solrDoc.put("id", "1"); - solrDoc.put("desc", "SolrDescription"); - SOLR_DOCS.add(solrDoc); - } - - - private SolrInstance instance = null; - private JettySolrRunner jetty; - - private String getDihConfigTagsInnerEntity() { - return "\r\n" - + " \r\n" - + " \r\n" - + " \r\n" - + " \r\n" - + " \r\n" - + " \r\n" - + " \r\n" - + " \r\n" + " \r\n" - + " \r\n" + " \r\n" + "\r\n"; - } - - private String generateDIHConfig(String options, boolean useDeadServer) { - return "\r\n" + " \r\n" - + " \r\n" + " \r\n" - + "\r\n"; - } - - private String getSourceUrl() { - return buildUrl(jetty.getLocalPort(), "/solr/collection1"); - } - - //TODO: fix this test to close its directories - static String savedFactory; - @BeforeClass - public static void beforeClass() { - savedFactory = System.getProperty("solr.DirectoryFactory"); - System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory"); - } - - @AfterClass - public static void afterClass() { - if (savedFactory == null) { - System.clearProperty("solr.directoryFactory"); - } else { - System.setProperty("solr.directoryFactory", savedFactory); - } - } - - @Override - @Before - public void setUp() throws Exception { - super.setUp(); - // destination solr core - initCore(SOLR_CONFIG, SOLR_SCHEMA); - // data source solr instance - instance = new SolrInstance(); - instance.setUp(); - jetty = createAndStartJetty(instance); - } - - @Override - @After - public void tearDown() throws Exception { - try { - deleteCore(); - } catch (Exception e) { - log.error("Error deleting core", e); - } - if (null != jetty) { - jetty.stop(); - jetty = null; - } - if (null != instance) { - instance.tearDown(); - instance = null; - } - super.tearDown(); - } - - //commented 23-AUG-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // added 20-Jul-2018 - public void testFullImport() { - assertQ(req("*:*"), "//result[@numFound='0']"); - - try { - addDocumentsToSolr(SOLR_DOCS); - runFullImport(generateDIHConfig("query='*:*' rows='2' fl='id,desc' onError='skip'", false)); - } catch (Exception e) { - log.error("Exception running full import", e); - fail(e.getMessage()); - } - - assertQ(req("*:*"), "//result[@numFound='1']"); - assertQ(req("id:1"), "//result/doc/str[@name='id'][.='1']", - "//result/doc/arr[@name='desc'][.='SolrDescription']"); - } - - public void testFullImportFqParam() { - assertQ(req("*:*"), "//result[@numFound='0']"); - - try { - addDocumentsToSolr(generateSolrDocuments(30)); - Map map = new HashMap<>(); - map.put("rows", "50"); - runFullImport(generateDIHConfig("query='*:*' fq='desc:Description1*,desc:Description*2' rows='2'", false), map); - } catch (Exception e) { - log.error("Exception running full import", e); - fail(e.getMessage()); - } - - assertQ(req("*:*"), "//result[@numFound='1']"); - assertQ(req("id:12"), "//result[@numFound='1']", "//result/doc/arr[@name='desc'][.='Description12']"); - } - - public void testFullImportFieldsParam() { - assertQ(req("*:*"), "//result[@numFound='0']"); - - try { - addDocumentsToSolr(generateSolrDocuments(7)); - runFullImport(generateDIHConfig("query='*:*' fl='id' rows='2'"+(random().nextBoolean() ?" cursorMark='true' sort='id asc'":""), false)); - } catch (Exception e) { - log.error("Exception running full import", e); - fail(e.getMessage()); - } - - assertQ(req("*:*"), "//result[@numFound='7']"); - assertQ(req("id:1"), "//result[@numFound='1']"); - assertQ(req("id:1"), "count(//result/doc/arr[@name='desc'])=0"); - } - - /** - * Receive a row from SQL (Mock) and fetch a row from Solr - */ - public void testFullImportInnerEntity() { - assertQ(req("*:*"), "//result[@numFound='0']"); - - try { - List> DOCS = new ArrayList<>(DB_DOCS); - Map doc = new HashMap<>(); - doc.put("dbid_s", "2"); - doc.put("dbdesc_s", "DbDescription2"); - DOCS.add(doc); - MockDataSource.setIterator("select * from x", DOCS.iterator()); - - DOCS = new ArrayList<>(SOLR_DOCS); - Map solrDoc = new HashMap<>(); - solrDoc.put("id", "2"); - solrDoc.put("desc", "SolrDescription2"); - DOCS.add(solrDoc); - addDocumentsToSolr(DOCS); - runFullImport(getDihConfigTagsInnerEntity()); - } catch (Exception e) { - log.error("Exception running full import", e); - fail(e.getMessage()); - } finally { - MockDataSource.clearCache(); - } - - assertQ(req("*:*"), "//result[@numFound='2']"); - assertQ(req("id:1"), "//result/doc/str[@name='id'][.='1']", - "//result/doc/str[@name='dbdesc_s'][.='DbDescription']", - "//result/doc/str[@name='dbid_s'][.='1']", - "//result/doc/arr[@name='desc'][.='SolrDescription']"); - assertQ(req("id:2"), "//result/doc/str[@name='id'][.='2']", - "//result/doc/str[@name='dbdesc_s'][.='DbDescription2']", - "//result/doc/str[@name='dbid_s'][.='2']", - "//result/doc/arr[@name='desc'][.='SolrDescription2']"); - } - - public void testFullImportWrongSolrUrl() { - assertQ(req("*:*"), "//result[@numFound='0']"); - - try { - runFullImport(generateDIHConfig("query='*:*' rows='2' fl='id,desc' onError='skip'", true /* use dead server */)); - } catch (Exception e) { - log.error("Exception running full import", e); - fail(e.getMessage()); - } - - assertQ(req("*:*"), "//result[@numFound='0']"); - } - - public void testFullImportBadConfig() { - assertQ(req("*:*"), "//result[@numFound='0']"); - - try { - runFullImport(generateDIHConfig("query='bogus:3' rows='2' fl='id,desc' onError='"+ - (random().nextBoolean() ? "abort" : "justtogetcoverage")+"'", false)); - } catch (Exception e) { - log.error("Exception running full import", e); - fail(e.getMessage()); - } - - assertQ(req("*:*"), "//result[@numFound='0']"); - } - - public void testCursorMarkNoSort() throws SolrServerException, IOException { - assertQ(req("*:*"), "//result[@numFound='0']"); - addDocumentsToSolr(generateSolrDocuments(7)); - try { - List errors = Arrays.asList("sort='id'", //wrong sort spec - "", //no sort spec - "sort='id asc' timeout='12345'"); // sort is fine, but set timeout - Collections.shuffle(errors, random()); - String attrs = "query='*:*' rows='2' fl='id,desc' cursorMark='true' " - + errors.get(0); - runFullImport(generateDIHConfig(attrs, - false)); - } catch (Exception e) { - log.error("Exception running full import", e); - fail(e.getMessage()); - } - - assertQ(req("*:*"), "//result[@numFound='0']"); - } - - private static List> generateSolrDocuments(int num) { - List> docList = new ArrayList<>(); - for (int i = 1; i <= num; i++) { - Map map = new HashMap<>(); - map.put("id", i); - map.put("desc", "Description" + i); - docList.add(map); - } - return docList; - } - - private void addDocumentsToSolr(List> docs) throws SolrServerException, IOException { - List sidl = new ArrayList<>(); - for (Map doc : docs) { - SolrInputDocument sd = new SolrInputDocument(); - for (Entry entry : doc.entrySet()) { - sd.addField(entry.getKey(), entry.getValue()); - } - sidl.add(sd); - } - - try (HttpSolrClient solrServer = getHttpSolrClient(getSourceUrl(), 15000, 30000)) { - solrServer.add(sidl); - solrServer.commit(true, true); - } - } - - private static class SolrInstance { - File homeDir; - File confDir; - File dataDir; - - public String getHomeDir() { - return homeDir.toString(); - } - - public String getSchemaFile() { - return SOURCE_CONF_DIR + "dataimport-schema.xml"; - } - - public String getDataDir() { - return dataDir.toString(); - } - - public String getSolrConfigFile() { - return SOURCE_CONF_DIR + "dataimport-solrconfig.xml"; - } - - public String getSolrXmlFile() { - return ROOT_DIR + "solr.xml"; - } - - public void setUp() throws Exception { - homeDir = createTempDir().toFile(); - dataDir = new File(homeDir + "/collection1", "data"); - confDir = new File(homeDir + "/collection1", "conf"); - - homeDir.mkdirs(); - dataDir.mkdirs(); - confDir.mkdirs(); - - FileUtils.copyFile(getFile(getSolrXmlFile()), new File(homeDir, "solr.xml")); - File f = new File(confDir, "solrconfig.xml"); - FileUtils.copyFile(getFile(getSolrConfigFile()), f); - f = new File(confDir, "schema.xml"); - - FileUtils.copyFile(getFile(getSchemaFile()), f); - f = new File(confDir, "data-config.xml"); - FileUtils.copyFile(getFile(SOURCE_CONF_DIR + "dataconfig-contentstream.xml"), f); - - Files.createFile(confDir.toPath().resolve("../core.properties")); - } - - public void tearDown() throws Exception { - IOUtils.rm(homeDir.toPath()); - } - } - - private JettySolrRunner createAndStartJetty(SolrInstance instance) throws Exception { - Properties nodeProperties = new Properties(); - nodeProperties.setProperty("solr.data.dir", instance.getDataDir()); - JettySolrRunner jetty = new JettySolrRunner(instance.getHomeDir(), nodeProperties, buildJettyConfig("/solr")); - jetty.start(); - return jetty; - } - -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorUnit.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorUnit.java deleted file mode 100644 index 1753b81de04..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorUnit.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.*; - -import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.common.params.CommonParams; -import org.apache.solr.common.params.CursorMarkParams; -import org.apache.solr.handler.dataimport.SolrEntityProcessor.SolrDocumentListIterator; -import org.junit.Test; - -/** - * Unit test of SolrEntityProcessor. A very basic test outside of the DIH. - */ -public class TestSolrEntityProcessorUnit extends AbstractDataImportHandlerTestCase { - - private static final class NoNextMockProcessor extends SolrEntityProcessor { - @Override - protected void nextPage() { - } - } - - private static final String ID = "id"; - - public void testQuery() { - List docs = generateUniqueDocs(2); - - MockSolrEntityProcessor processor = createAndInit(docs); - try { - assertExpectedDocs(docs, processor); - assertEquals(1, processor.getQueryCount()); - } finally { - processor.destroy(); - } - } - - private MockSolrEntityProcessor createAndInit(List docs) { - return createAndInit(docs, SolrEntityProcessor.ROWS_DEFAULT); - } - - public void testNumDocsGreaterThanRows() { - List docs = generateUniqueDocs(44); - - int rowsNum = 10; - MockSolrEntityProcessor processor = createAndInit(docs, rowsNum); - try { - assertExpectedDocs(docs, processor); - assertEquals(5, processor.getQueryCount()); - } finally { - processor.destroy(); - } - } - - private MockSolrEntityProcessor createAndInit(List docs, int rowsNum) { - MockSolrEntityProcessor processor = new MockSolrEntityProcessor(docs, rowsNum); - HashMap entityAttrs = new HashMap(){{put(SolrEntityProcessor.SOLR_SERVER,"http://route:66/no");}}; - processor.init(getContext(null, null, null, null, Collections.emptyList(), - entityAttrs)); - return processor; - } - - public void testMultiValuedFields() { - List docs = new ArrayList<>(); - List types = new ArrayList<>(); - types.add(new FldType(ID, ONE_ONE, new SVal('A', 'Z', 4, 4))); - types.add(new FldType("description", new IRange(3, 3), new SVal('a', 'c', 1, 1))); - Doc testDoc = createDoc(types); - docs.add(testDoc); - - MockSolrEntityProcessor processor = createAndInit(docs); - try { - Map next = processor.nextRow(); - assertNotNull(next); - - @SuppressWarnings({"unchecked", "rawtypes"}) - List multiField = (List) next.get("description"); - assertEquals(testDoc.getValues("description").size(), multiField.size()); - assertEquals(testDoc.getValues("description"), multiField); - assertEquals(1, processor.getQueryCount()); - assertNull(processor.nextRow()); - } finally { - processor.destroy(); - } - } - @Test (expected = DataImportHandlerException.class) - public void testNoQuery() { - SolrEntityProcessor processor = new SolrEntityProcessor(); - - HashMap entityAttrs = new HashMap(){{put(SolrEntityProcessor.SOLR_SERVER,"http://route:66/no");}}; - processor.init(getContext(null, null, null, null, Collections.emptyList(), - entityAttrs)); - try { - processor.buildIterator(); - }finally { - processor.destroy(); - } - } - - public void testPagingQuery() { - SolrEntityProcessor processor = new NoNextMockProcessor() ; - - HashMap entityAttrs = new HashMap(){{ - put(SolrEntityProcessor.SOLR_SERVER,"http://route:66/no"); - if (random().nextBoolean()) { - List noCursor = Arrays.asList("","false",CursorMarkParams.CURSOR_MARK_START);//only 'true' not '*' - Collections.shuffle(noCursor, random()); - put(CursorMarkParams.CURSOR_MARK_PARAM, noCursor.get(0)); - }}}; - processor.init(getContext(null, null, null, null, Collections.emptyList(), - entityAttrs)); - try { - processor.buildIterator(); - SolrQuery query = new SolrQuery(); - ((SolrDocumentListIterator) processor.rowIterator).passNextPage(query); - assertEquals("0", query.get(CommonParams.START)); - assertNull( query.get(CursorMarkParams.CURSOR_MARK_PARAM)); - assertNotNull( query.get(CommonParams.TIME_ALLOWED)); - }finally { - processor.destroy(); - } - } - - public void testCursorQuery() { - SolrEntityProcessor processor = new NoNextMockProcessor() ; - - HashMap entityAttrs = new HashMap(){{ - put(SolrEntityProcessor.SOLR_SERVER,"http://route:66/no"); - put(CursorMarkParams.CURSOR_MARK_PARAM,"true"); - }}; - processor.init(getContext(null, null, null, null, Collections.emptyList(), - entityAttrs)); - try { - processor.buildIterator(); - SolrQuery query = new SolrQuery(); - ((SolrDocumentListIterator) processor.rowIterator).passNextPage(query); - assertNull(query.get(CommonParams.START)); - assertEquals(CursorMarkParams.CURSOR_MARK_START, query.get(CursorMarkParams.CURSOR_MARK_PARAM)); - assertNull( query.get(CommonParams.TIME_ALLOWED)); - }finally { - processor.destroy(); - } - } - - private List generateUniqueDocs(int numDocs) { - List types = new ArrayList<>(); - types.add(new FldType(ID, ONE_ONE, new SVal('A', 'Z', 4, 40))); - types.add(new FldType("description", new IRange(1, 3), new SVal('a', 'c', 1, 1))); - - @SuppressWarnings({"rawtypes"}) - Set previousIds = new HashSet<>(); - List docs = new ArrayList<>(numDocs); - for (int i = 0; i < numDocs; i++) { - Doc doc = createDoc(types); - while (previousIds.contains(doc.id)) { - doc = createDoc(types); - } - previousIds.add(doc.id); - docs.add(doc); - } - return docs; - } - - private static void assertExpectedDocs(List expectedDocs, SolrEntityProcessor processor) { - for (Doc expectedDoc : expectedDocs) { - Map next = processor.nextRow(); - assertNotNull(next); - assertEquals(expectedDoc.id, next.get("id")); - assertEquals(expectedDoc.getValues("description"), next.get("description")); - } - assertNull(processor.nextRow()); - } - -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSortedMapBackedCache.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSortedMapBackedCache.java deleted file mode 100644 index 8dd1b552e56..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSortedMapBackedCache.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.lang.invoke.MethodHandles; - -import java.math.BigDecimal; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -import org.junit.Assert; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestSortedMapBackedCache extends AbstractDIHCacheTestCase { - - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - @Test - public void testCacheWithKeyLookup() { - DIHCache cache = null; - try { - cache = new SortedMapBackedCache(); - cache.open(getContext(new HashMap())); - loadData(cache, data, fieldNames, true); - List testData = extractDataByKeyLookup(cache, fieldNames); - compareData(data, testData); - } catch (Exception e) { - log.warn("Exception thrown: {}", e); - Assert.fail(); - } finally { - try { - cache.destroy(); - } catch (Exception ex) { - } - } - } - - @Test - public void testCacheWithOrderedLookup() { - DIHCache cache = null; - try { - cache = new SortedMapBackedCache(); - cache.open(getContext(new HashMap())); - loadData(cache, data, fieldNames, true); - List testData = extractDataInKeyOrder(cache, fieldNames); - compareData(data, testData); - } catch (Exception e) { - log.warn("Exception thrown: {}", e); - Assert.fail(); - } finally { - try { - cache.destroy(); - } catch (Exception ex) { - } - } - } - - @Test - public void testNullKeys() throws Exception { - //A null key should just be ignored, but not throw an exception - DIHCache cache = null; - try { - cache = new SortedMapBackedCache(); - Map cacheProps = new HashMap<>(); - cacheProps.put(DIHCacheSupport.CACHE_PRIMARY_KEY, "a_id"); - cache.open(getContext(cacheProps)); - - Map data = new HashMap<>(); - data.put("a_id", null); - data.put("bogus", "data"); - cache.add(data); - - Iterator> cacheIter = cache.iterator(); - while (cacheIter.hasNext()) { - Assert.fail("cache should be empty."); - } - Assert.assertNull(cache.iterator(null)); - cache.delete(null); - } catch (Exception e) { - throw e; - } finally { - try { - cache.destroy(); - } catch (Exception ex) { - } - } - } - - @Test - public void testCacheReopensWithUpdate() { - DIHCache cache = null; - try { - Map cacheProps = new HashMap<>(); - cacheProps.put(DIHCacheSupport.CACHE_PRIMARY_KEY, "a_id"); - - cache = new SortedMapBackedCache(); - cache.open(getContext(cacheProps)); - // We can let the data hit the cache with the fields out of order because - // we've identified the pk up-front. - loadData(cache, data, fieldNames, false); - - // Close the cache. - cache.close(); - - List newControlData = new ArrayList<>(); - Object[] newIdEqualsThree = null; - int j = 0; - for (int i = 0; i < data.size(); i++) { - // We'll be deleting a_id=1 so remove it from the control data. - if (data.get(i).data[0].equals(1)) { - continue; - } - - // We'll be changing "Cookie" to "Carrot" in a_id=3 so change it in the control data. - if (data.get(i).data[0].equals(3)) { - newIdEqualsThree = new Object[data.get(i).data.length]; - System.arraycopy(data.get(i).data, 0, newIdEqualsThree, 0, newIdEqualsThree.length); - newIdEqualsThree[3] = "Carrot"; - newControlData.add(new ControlData(newIdEqualsThree)); - } - // Everything else can just be copied over. - else { - newControlData.add(data.get(i)); - } - - j++; - } - - // These new rows of data will get added to the cache, so add them to the control data too. - Object[] newDataRow1 = new Object[] {99, new BigDecimal(Math.PI), "Z", "Zebra", 99.99f, Feb21_2011, null }; - Object[] newDataRow2 = new Object[] {2, new BigDecimal(Math.PI), "B", "Ballerina", 2.22f, Feb21_2011, null }; - - newControlData.add(new ControlData(newDataRow1)); - newControlData.add(new ControlData(newDataRow2)); - - // Re-open the cache - cache.open(getContext(new HashMap())); - - // Delete a_id=1 from the cache. - cache.delete(1); - - // Because the cache allows duplicates, the only way to update is to - // delete first then add. - cache.delete(3); - cache.add(controlDataToMap(new ControlData(newIdEqualsThree), fieldNames, false)); - - // Add this row with a new Primary key. - cache.add(controlDataToMap(new ControlData(newDataRow1), fieldNames, false)); - - // Add this row, creating two records in the cache with a_id=2. - cache.add(controlDataToMap(new ControlData(newDataRow2), fieldNames, false)); - - // Read the cache back and compare to the newControlData - List testData = extractDataInKeyOrder(cache, fieldNames); - compareData(newControlData, testData); - - // Now try reading the cache read-only. - cache.close(); - cache.open(getContext(new HashMap())); - testData = extractDataInKeyOrder(cache, fieldNames); - compareData(newControlData, testData); - - } catch (Exception e) { - log.warn("Exception thrown: {}", e); - Assert.fail(); - } finally { - try { - cache.destroy(); - } catch (Exception ex) { - } - } - } -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java deleted file mode 100644 index f1277c91f30..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Ignore; -import org.junit.Test; - -/** - * Test with various combinations of parameters, child entities, caches, transformers. - */ -public class TestSqlEntityProcessor extends AbstractSqlEntityProcessorTestCase { - - @Test - public void testSingleEntity() throws Exception { - singleEntity(1); - } - @Test - public void testWithSimpleTransformer() throws Exception { - simpleTransform(1); - } - @Test - public void testWithComplexTransformer() throws Exception { - complexTransform(1, 0); - } - @Test - public void testChildEntities() throws Exception { - withChildEntities(false, true); - } - @Test - public void testCachedChildEntities() throws Exception { - withChildEntities(true, true); - } - - @Test - public void testSportZipperChildEntities() throws Exception { - sportsZipper = true; - withChildEntities(true, true); - } - - @Test - public void testCountryZipperChildEntities() throws Exception { - countryZipper = true; - withChildEntities(true, true); - } - - @Test - public void testBothZipperChildEntities() throws Exception { - countryZipper = true; - sportsZipper = true; - withChildEntities(true, true); - } - - @Test(expected=RuntimeException.class /* DIH exceptions are not propagated, here we capturing assertQ exceptions */) - public void testSportZipperChildEntitiesWrongOrder() throws Exception { - if(random().nextBoolean()){ - wrongPeopleOrder = true; - }else{ - wrongSportsOrder = true; - } - testSportZipperChildEntities(); - } - - @Test(expected=RuntimeException.class ) - public void testCountryZipperChildEntitiesWrongOrder() throws Exception { - if(random().nextBoolean()){ - wrongPeopleOrder = true; - }else{ - wrongCountryOrder = true; - } - testCountryZipperChildEntities(); - } - - @Test(expected=RuntimeException.class) - public void testBothZipperChildEntitiesWrongOrder() throws Exception { - if(random().nextBoolean()){ - wrongPeopleOrder = true; - }else{ - if(random().nextBoolean()){ - wrongSportsOrder = true; - }else{ - wrongCountryOrder = true; - } - } - testBothZipperChildEntities(); - } - - @Test - @Ignore("broken see SOLR-3857") - public void testSimpleCacheChildEntities() throws Exception { - simpleCacheChildEntities(true); - } - - @Override - protected String deltaQueriesCountryTable() { - return ""; - } - @Override - protected String deltaQueriesPersonTable() { - return ""; - } -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java deleted file mode 100644 index 9708cdcff34..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.lang.invoke.MethodHandles; - -import org.apache.solr.request.LocalSolrQueryRequest; -import org.junit.Before; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Test with various combinations of parameters, child entities, transformers. - */ -public class TestSqlEntityProcessorDelta extends AbstractSqlEntityProcessorTestCase { - - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - private boolean delta = false; - private boolean useParentDeltaQueryParam = false; - private IntChanges personChanges = null; - private String[] countryChanges = null; - - @Before - public void setupDeltaTest() { - delta = false; - personChanges = null; - countryChanges = null; - } - @Test - public void testSingleEntity() throws Exception { - log.debug("testSingleEntity full-import..."); - singleEntity(1); - logPropertiesFile(); - changeStuff(); - int c = calculateDatabaseCalls(); - log.debug("testSingleEntity delta-import ({} database calls expected)...", c); - singleEntity(c); - validateChanges(); - } - - @Test - public void testDeltaImportWithoutInitialFullImport() throws Exception { - log.debug("testDeltaImportWithoutInitialFullImport delta-import..."); - countryEntity = false; - delta = true; - /* - * We need to add 2 in total: - * +1 for deltaQuery i.e identifying id of items to update, - * +1 for deletedPkQuery i.e delete query - */ - singleEntity(totalPeople() + 2); - validateChanges(); - } - - @Test - public void testWithSimpleTransformer() throws Exception { - log.debug("testWithSimpleTransformer full-import..."); - simpleTransform(1); - logPropertiesFile(); - changeStuff(); - int c = calculateDatabaseCalls(); - simpleTransform(c); - log.debug("testWithSimpleTransformer delta-import ({} database calls expected)...", c); - validateChanges(); - } - @Test - public void testWithComplexTransformer() throws Exception { - log.debug("testWithComplexTransformer full-import..."); - complexTransform(1, 0); - logPropertiesFile(); - changeStuff(); - int c = calculateDatabaseCalls(); - log.debug("testWithComplexTransformer delta-import ({} database calls expected)...", c); - complexTransform(c, personChanges.deletedKeys.length); - validateChanges(); - } - @Test - public void testChildEntities() throws Exception { - log.debug("testChildEntities full-import..."); - useParentDeltaQueryParam = random().nextBoolean(); - log.debug("using parent delta? {}", useParentDeltaQueryParam); - withChildEntities(false, true); - logPropertiesFile(); - changeStuff(); - log.debug("testChildEntities delta-import..."); - withChildEntities(false, false); - validateChanges(); - } - - - private int calculateDatabaseCalls() { - //The main query generates 1 - //Deletes generate 1 - //Each add/mod generate 1 - int c = 1; - if (countryChanges != null) { - c += countryChanges.length + 1; - } - if (personChanges != null) { - c += personChanges.addedKeys.length + personChanges.changedKeys.length + 1; - } - return c; - } - private void validateChanges() throws Exception - { - if(personChanges!=null) { - for(int id : personChanges.addedKeys) { - assertQ(req("id:" + id), "//*[@numFound='1']"); - } - for(int id : personChanges.deletedKeys) { - assertQ(req("id:" + id), "//*[@numFound='0']"); - } - for(int id : personChanges.changedKeys) { - assertQ(req("id:" + id), "//*[@numFound='1']", "substring(//doc/arr[@name='NAME_mult_s']/str[1], 1, 8)='MODIFIED'"); - } - } - if(countryChanges!=null) { - for(String code : countryChanges) { - assertQ(req("COUNTRY_CODE_s:" + code), "//*[@numFound='" + numberPeopleByCountryCode(code) + "']", "substring((//doc/str[@name='COUNTRY_NAME_s'])[1], 1, 8)='MODIFIED'"); - } - } - } - private void changeStuff() throws Exception { - if(countryEntity) - { - int n = random().nextInt(2); - switch(n) { - case 0: - personChanges = modifySomePeople(); - break; - case 1: - countryChanges = modifySomeCountries(); - break; - case 2: - personChanges = modifySomePeople(); - countryChanges = modifySomeCountries(); - break; - } - } else { - personChanges = modifySomePeople(); - } - countryChangesLog(); - personChangesLog(); - delta = true; - } - private void countryChangesLog() - { - if(countryChanges!=null) { - StringBuilder sb = new StringBuilder(); - sb.append("country changes { "); - for(String s : countryChanges) { - sb.append(s).append(" "); - } - sb.append(" }"); - log.debug("{}", sb); - } - } - private void personChangesLog() - { - if(personChanges!=null) { - log.debug("person changes [ {} ] ", personChanges); - } - } - @Override - protected LocalSolrQueryRequest generateRequest() { - return lrf.makeRequest("command", (delta ? "delta-import" : "full-import"), "dataConfig", generateConfig(), - "clean", (delta ? "false" : "true"), "commit", "true", "synchronous", "true", "indent", "true"); - } - @Override - protected String deltaQueriesPersonTable() { - return - "deletedPkQuery=''SELECT ID FROM PEOPLE WHERE DELETED='Y' AND last_modified >='${dih.People.last_index_time}' '' " + - "deltaImportQuery=''SELECT ID, NAME, COUNTRY_CODE FROM PEOPLE where ID=${dih.delta.ID} '' " + - "deltaQuery=''" + - "SELECT ID FROM PEOPLE WHERE DELETED!='Y' AND last_modified >='${dih.People.last_index_time}' " + - (useParentDeltaQueryParam ? "" : - "UNION DISTINCT " + - "SELECT ID FROM PEOPLE WHERE DELETED!='Y' AND COUNTRY_CODE IN (SELECT CODE FROM COUNTRIES WHERE last_modified >='${dih.People.last_index_time}') " - ) + "'' " - ; - } - @Override - protected String deltaQueriesCountryTable() { - if(useParentDeltaQueryParam) { - return - "deltaQuery=''SELECT CODE FROM COUNTRIES WHERE DELETED != 'Y' AND last_modified >='${dih.last_index_time}' '' " + - "parentDeltaQuery=''SELECT ID FROM PEOPLE WHERE DELETED != 'Y' AND COUNTRY_CODE='${Countries.CODE}' '' " - ; - - } - return ""; - } -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestTemplateTransformer.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestTemplateTransformer.java deleted file mode 100644 index 11ea30be4f3..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestTemplateTransformer.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.junit.Test; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Arrays; - -/** - *

- * Test for TemplateTransformer - *

- * - * - * @since solr 1.3 - */ -public class TestTemplateTransformer extends AbstractDataImportHandlerTestCase { - - @Test - @SuppressWarnings("unchecked") - public void testTransformRow() { - @SuppressWarnings({"rawtypes"}) - List fields = new ArrayList(); - fields.add(createMap("column", "firstName")); - fields.add(createMap("column", "lastName")); - fields.add(createMap("column", "middleName")); - fields.add(createMap("column", "name", - TemplateTransformer.TEMPLATE, - "${e.lastName}, ${e.firstName} ${e.middleName}")); - fields.add(createMap("column", "emails", - TemplateTransformer.TEMPLATE, - "${e.mail}")); - - // test reuse of template output in another template - fields.add(createMap("column", "mrname", - TemplateTransformer.TEMPLATE,"Mr ${e.name}")); - - List mails = Arrays.asList("a@b.com", "c@d.com"); - @SuppressWarnings({"rawtypes"}) - Map row = createMap( - "firstName", "Shalin", - "middleName", "Shekhar", - "lastName", "Mangar", - "mail", mails); - - VariableResolver resolver = new VariableResolver(); - resolver.addNamespace("e", row); - Map entityAttrs = createMap("name", "e"); - - Context context = getContext(null, resolver, - null, Context.FULL_DUMP, fields, entityAttrs); - new TemplateTransformer().transformRow(row, context); - assertEquals("Mangar, Shalin Shekhar", row.get("name")); - assertEquals("Mr Mangar, Shalin Shekhar", row.get("mrname")); - assertEquals(mails,row.get("emails")); - } - - @Test - @SuppressWarnings("unchecked") - public void testTransformRowMultiValue() { - @SuppressWarnings({"rawtypes"}) - List fields = new ArrayList(); - fields.add(createMap("column", "year")); - fields.add(createMap("column", "month")); - fields.add(createMap("column", "day")); - - // create three variations of date format - fields.add(createMap( "column", "date", - TemplateTransformer.TEMPLATE, - "${e.day} ${e.month}, ${e.year}" )); - fields.add(createMap( "column", "date", - TemplateTransformer.TEMPLATE, - "${e.month} ${e.day}, ${e.year}" )); - fields.add(createMap("column", "date", - TemplateTransformer.TEMPLATE, - "${e.year}-${e.month}-${e.day}" )); - - @SuppressWarnings({"rawtypes"}) - Map row = createMap( "year", "2016", - "month", "Apr", - "day", "30" ); - VariableResolver resolver = new VariableResolver(); - resolver.addNamespace("e", row); - Map entityAttrs = createMap("date", "e"); - - Context context = getContext(null, resolver, - null, Context.FULL_DUMP, fields, entityAttrs); - new TemplateTransformer().transformRow(row, context); - assertTrue( row.get( "date" ) instanceof List ); - - List dates = (List)row.get( "date" ); - assertEquals( dates.size(), 3 ); - assertEquals( dates.get(0).toString(), "30 Apr, 2016" ); - assertEquals( dates.get(1).toString(), "Apr 30, 2016" ); - assertEquals( dates.get(2).toString(), "2016-Apr-30" ); - } - -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestURLDataSource.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestURLDataSource.java deleted file mode 100644 index c1acc5405a1..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestURLDataSource.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.junit.Test; - -public class TestURLDataSource extends AbstractDataImportHandlerTestCase { - private List> fields = new ArrayList<>(); - private URLDataSource dataSource = new URLDataSource(); - private VariableResolver variableResolver = new VariableResolver(); - private Context context = AbstractDataImportHandlerTestCase.getContext(null, variableResolver, - dataSource, Context.FULL_DUMP, fields, null); - private Properties initProps = new Properties(); - - @Test - public void substitutionsOnBaseUrl() throws Exception { - String url = "http://example.com/"; - - variableResolver.addNamespace("dataimporter.request", Collections.singletonMap("baseurl", url)); - - initProps.setProperty(URLDataSource.BASE_URL, "${dataimporter.request.baseurl}"); - dataSource.init(context, initProps); - assertEquals(url, dataSource.getBaseUrl()); - } -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestVariableResolver.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestVariableResolver.java deleted file mode 100644 index ef88fffb7d4..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestVariableResolver.java +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Properties; -import java.util.TimeZone; - -import org.apache.solr.util.DateMathParser; -import org.junit.Test; - -/** - *

- * Test for VariableResolver - *

- * - * - * @since solr 1.3 - */ -public class TestVariableResolver extends AbstractDataImportHandlerTestCase { - - @Test - public void testSimpleNamespace() { - VariableResolver vri = new VariableResolver(); - Map ns = new HashMap<>(); - ns.put("world", "WORLD"); - vri.addNamespace("hello", ns); - assertEquals("WORLD", vri.resolve("hello.world")); - } - - @Test - public void testDefaults() { - // System.out.println(System.setProperty(TestVariableResolver.class.getName(),"hello")); - System.setProperty(TestVariableResolver.class.getName(), "hello"); - // System.out.println("s.gP()"+ - // System.getProperty(TestVariableResolver.class.getName())); - - Properties p = new Properties(); - p.put("hello", "world"); - VariableResolver vri = new VariableResolver(p); - Object val = vri.resolve(TestVariableResolver.class.getName()); - // System.out.println("val = " + val); - assertEquals("hello", val); - assertEquals("world", vri.resolve("hello")); - } - - @Test - public void testNestedNamespace() { - VariableResolver vri = new VariableResolver(); - Map ns = new HashMap<>(); - ns.put("world", "WORLD"); - vri.addNamespace("hello", ns); - ns = new HashMap<>(); - ns.put("world1", "WORLD1"); - vri.addNamespace("hello.my", ns); - assertEquals("WORLD1", vri.resolve("hello.my.world1")); - } - - @Test - public void test3LevelNestedNamespace() { - VariableResolver vri = new VariableResolver(); - Map ns = new HashMap<>(); - ns.put("world", "WORLD"); - vri.addNamespace("hello", ns); - ns = new HashMap<>(); - ns.put("world1", "WORLD1"); - vri.addNamespace("hello.my.new", ns); - assertEquals("WORLD1", vri.resolve("hello.my.new.world1")); - } - - @Test - public void dateNamespaceWithValue() { - VariableResolver vri = new VariableResolver(); - vri.setEvaluators(new DataImporter().getEvaluators(Collections - .> emptyList())); - Map ns = new HashMap<>(); - Date d = new Date(); - ns.put("dt", d); - vri.addNamespace("A", ns); - assertEquals( - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).format(d), - vri.replaceTokens("${dataimporter.functions.formatDate(A.dt,'yyyy-MM-dd HH:mm:ss')}")); - } - - @Test - public void dateNamespaceWithExpr() throws Exception { - VariableResolver vri = new VariableResolver(); - vri.setEvaluators(new DataImporter().getEvaluators(Collections - .> emptyList())); - SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT); - format.setTimeZone(TimeZone.getTimeZone("UTC")); - DateMathParser dmp = new DateMathParser(TimeZone.getDefault()); - - String s = vri - .replaceTokens("${dataimporter.functions.formatDate('NOW/DAY','yyyy-MM-dd HH:mm')}"); - assertEquals( - new SimpleDateFormat("yyyy-MM-dd HH:mm", Locale.ROOT).format(dmp.parseMath("/DAY")), - s); - } - - @Test - public void testDefaultNamespace() { - VariableResolver vri = new VariableResolver(); - Map ns = new HashMap<>(); - ns.put("world", "WORLD"); - vri.addNamespace(null, ns); - assertEquals("WORLD", vri.resolve("world")); - } - - @Test - public void testDefaultNamespace1() { - VariableResolver vri = new VariableResolver(); - Map ns = new HashMap<>(); - ns.put("world", "WORLD"); - vri.addNamespace(null, ns); - assertEquals("WORLD", vri.resolve("world")); - } - - @Test - public void testFunctionNamespace1() throws Exception { - VariableResolver resolver = new VariableResolver(); - final List> l = new ArrayList<>(); - Map m = new HashMap<>(); - m.put("name", "test"); - m.put("class", E.class.getName()); - l.add(m); - resolver.setEvaluators(new DataImporter().getEvaluators(l)); - @SuppressWarnings({"unchecked"}) - ContextImpl context = new ContextImpl(null, resolver, null, - Context.FULL_DUMP, Collections.EMPTY_MAP, null, null); - - SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT); - format.setTimeZone(TimeZone.getTimeZone("UTC")); - DateMathParser dmp = new DateMathParser(TimeZone.getDefault()); - - String s = resolver - .replaceTokens("${dataimporter.functions.formatDate('NOW/DAY','yyyy-MM-dd HH:mm')}"); - assertEquals( - new SimpleDateFormat("yyyy-MM-dd HH:mm", Locale.ROOT).format(dmp.parseMath("/DAY")), - s); - assertEquals("Hello World", - resolver.replaceTokens("${dataimporter.functions.test('TEST')}")); - } - - public static class E extends Evaluator { - @Override - public String evaluate(String expression, Context context) { - return "Hello World"; - } - } - -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestVariableResolverEndToEnd.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestVariableResolverEndToEnd.java deleted file mode 100644 index 8ee6878c0ee..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestVariableResolverEndToEnd.java +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.lang.invoke.MethodHandles; -import java.sql.Connection; -import java.sql.Statement; -import java.util.Locale; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import junit.framework.Assert; - -import org.apache.solr.request.SolrQueryRequest; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestVariableResolverEndToEnd extends AbstractDIHJdbcTestCase { - - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - @Test - public void test() throws Exception { - h.query("/dataimport", generateRequest()); - SolrQueryRequest req = null; - try { - req = req("q", "*:*", "wt", "json", "indent", "true"); - String response = h.query(req); - log.debug(response); - response = response.replaceAll("\\s",""); - Assert.assertTrue(response.contains("\"numFound\":1")); - Pattern p = Pattern.compile("[\"]second1_s[\"][:][\"](.*?)[\"]"); - Matcher m = p.matcher(response); - Assert.assertTrue(m.find()); - String yearStr = m.group(1); - Assert.assertTrue(response.contains("\"second1_s\":\"" + yearStr + "\"")); - Assert.assertTrue(response.contains("\"second2_s\":\"" + yearStr + "\"")); - Assert.assertTrue(response.contains("\"second3_s\":\"" + yearStr + "\"")); - Assert.assertTrue(response.contains("\"PORK_s\":\"GRILL\"")); - Assert.assertTrue(response.contains("\"FISH_s\":\"FRY\"")); - Assert.assertTrue(response.contains("\"BEEF_CUTS_mult_s\":[\"ROUND\",\"SIRLOIN\"]")); - } catch(Exception e) { - throw e; - } finally { - req.close(); - } - } - - @Override - protected String generateConfig() { - String thirdLocaleParam = random().nextBoolean() ? "" : (", '" + Locale.getDefault().toLanguageTag() + "'"); - StringBuilder sb = new StringBuilder(); - sb.append(" \n"); - sb.append(" \n"); - sb.append(" \n"); - sb.append("\n"); - sb.append(" \n"); - sb.append(" \n"); - sb.append(" \n"); - sb.append(" \n"); - sb.append(" \n"); - sb.append(" \n"); - sb.append(" \n"); - sb.append(" \n"); - sb.append(" \n"); - sb.append("\n"); - sb.append(" \n"); - sb.append(" \n"); - String config = sb.toString(); - log.info(config); - return config; - } - @Override - protected void populateData(Connection conn) throws Exception { - Statement s = null; - try { - s = conn.createStatement(); - s.executeUpdate("create table dual(dual char(1) not null)"); - s.executeUpdate("insert into dual values('Y')"); - conn.commit(); - } catch (Exception e) { - throw e; - } finally { - try { - s.close(); - } catch (Exception ex) {} - try { - conn.close(); - } catch (Exception ex) {} - } - } - @Override - protected Database setAllowedDatabases() { - return Database.HSQLDB; - } -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestWriterImpl.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestWriterImpl.java deleted file mode 100644 index 24eb28bcd3a..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestWriterImpl.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.update.processor.UpdateRequestProcessor; - -import org.junit.BeforeClass; -import org.junit.Test; - -import java.util.*; - -/** - *

- * Test for writerImpl paramater (to provide own SolrWriter) - *

- * - * - * @since solr 4.0 - */ -public class TestWriterImpl extends AbstractDataImportHandlerTestCase { - - @BeforeClass - public static void beforeClass() throws Exception { - initCore("dataimport-nodatasource-solrconfig.xml", "dataimport-schema.xml"); - } - - @Test - @SuppressWarnings("unchecked") - public void testDataConfigWithDataSource() throws Exception { - @SuppressWarnings({"rawtypes"}) - List rows = new ArrayList(); - rows.add(createMap("id", "1", "desc", "one")); - rows.add(createMap("id", "2", "desc", "two")); - rows.add(createMap("id", "3", "desc", "break")); - rows.add(createMap("id", "4", "desc", "four")); - - MockDataSource.setIterator("select * from x", rows.iterator()); - - @SuppressWarnings({"rawtypes"}) - Map extraParams = createMap("writerImpl", TestSolrWriter.class.getName(), - "commit", "true"); - runFullImport(loadDataConfig("data-config-with-datasource.xml"), - extraParams); - - assertQ(req("id:1"), "//*[@numFound='1']"); - assertQ(req("id:2"), "//*[@numFound='1']"); - assertQ(req("id:3"), "//*[@numFound='0']"); - assertQ(req("id:4"), "//*[@numFound='1']"); - } - - public static class TestSolrWriter extends SolrWriter { - - public TestSolrWriter(UpdateRequestProcessor processor, SolrQueryRequest req) { - super(processor, req); - } - - @Override - public boolean upload(SolrInputDocument doc) { - if (doc.getField("desc").getFirstValue().equals("break")) { - return false; - } - return super.upload(doc); - } - - } - -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java deleted file mode 100644 index e2200eab783..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java +++ /dev/null @@ -1,506 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.io.File; -import java.io.Reader; -import java.io.StringReader; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.concurrent.TimeUnit; - -import org.junit.Test; - -/** - *

- * Test for XPathEntityProcessor - *

- * - * - * @since solr 1.3 - */ -public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase { - boolean simulateSlowReader; - boolean simulateSlowResultProcessor; - int rowsToRead = -1; - - @Test - @SuppressWarnings({"unchecked"}) - public void withFieldsAndXpath() throws Exception { - File tmpdir = createTempDir().toFile(); - - createFile(tmpdir, "x.xsl", xsl.getBytes(StandardCharsets.UTF_8), false); - @SuppressWarnings({"rawtypes"}) - Map entityAttrs = createMap("name", "e", "url", "cd.xml", - XPathEntityProcessor.FOR_EACH, "/catalog/cd"); - @SuppressWarnings({"rawtypes"}) - List fields = new ArrayList(); - fields.add(createMap("column", "title", "xpath", "/catalog/cd/title")); - fields.add(createMap("column", "artist", "xpath", "/catalog/cd/artist")); - fields.add(createMap("column", "year", "xpath", "/catalog/cd/year")); - Context c = getContext(null, - new VariableResolver(), getDataSource(cdData), Context.FULL_DUMP, fields, entityAttrs); - XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); - xPathEntityProcessor.init(c); - List> result = new ArrayList<>(); - while (true) { - Map row = xPathEntityProcessor.nextRow(); - if (row == null) - break; - result.add(row); - } - assertEquals(3, result.size()); - assertEquals("Empire Burlesque", result.get(0).get("title")); - assertEquals("Bonnie Tyler", result.get(1).get("artist")); - assertEquals("1982", result.get(2).get("year")); - } - - @Test - @SuppressWarnings({"unchecked"}) - public void testMultiValued() throws Exception { - @SuppressWarnings({"rawtypes"}) - Map entityAttrs = createMap("name", "e", "url", "testdata.xml", - XPathEntityProcessor.FOR_EACH, "/root"); - @SuppressWarnings({"rawtypes"}) - List fields = new ArrayList(); - fields.add(createMap("column", "a", "xpath", "/root/a", DataImporter.MULTI_VALUED, "true")); - Context c = getContext(null, - new VariableResolver(), getDataSource(testXml), Context.FULL_DUMP, fields, entityAttrs); - XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); - xPathEntityProcessor.init(c); - List> result = new ArrayList<>(); - while (true) { - Map row = xPathEntityProcessor.nextRow(); - if (row == null) - break; - result.add(row); - } - @SuppressWarnings({"rawtypes"}) - List l = (List)result.get(0).get("a"); - assertEquals(3, l.size()); - assertEquals("1", l.get(0)); - assertEquals("2", l.get(1)); - assertEquals("ü", l.get(2)); - } - - @SuppressWarnings({"rawtypes", "unchecked"}) - @Test - public void testMultiValuedWithMultipleDocuments() throws Exception { - Map entityAttrs = createMap("name", "e", "url", "testdata.xml", XPathEntityProcessor.FOR_EACH, "/documents/doc"); - List fields = new ArrayList(); - fields.add(createMap("column", "id", "xpath", "/documents/doc/id", DataImporter.MULTI_VALUED, "false")); - fields.add(createMap("column", "a", "xpath", "/documents/doc/a", DataImporter.MULTI_VALUED, "true")); - fields.add(createMap("column", "s1dataA", "xpath", "/documents/doc/sec1/s1dataA", DataImporter.MULTI_VALUED, "true")); - fields.add(createMap("column", "s1dataB", "xpath", "/documents/doc/sec1/s1dataB", DataImporter.MULTI_VALUED, "true")); - fields.add(createMap("column", "s1dataC", "xpath", "/documents/doc/sec1/s1dataC", DataImporter.MULTI_VALUED, "true")); - - Context c = getContext(null, - new VariableResolver(), getDataSource(textMultipleDocuments), Context.FULL_DUMP, fields, entityAttrs); - XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); - xPathEntityProcessor.init(c); - List> result = new ArrayList<>(); - while (true) { - Map row = xPathEntityProcessor.nextRow(); - if (row == null) - break; - result.add(row); - } - { - assertEquals("1", result.get(0).get("id")); - List a = (List)result.get(0).get("a"); - List s1dataA = (List)result.get(0).get("s1dataA"); - List s1dataB = (List)result.get(0).get("s1dataB"); - List s1dataC = (List)result.get(0).get("s1dataC"); - assertEquals(2, a.size()); - assertEquals("id1-a1", a.get(0)); - assertEquals("id1-a2", a.get(1)); - assertEquals(3, s1dataA.size()); - assertEquals("id1-s1dataA-1", s1dataA.get(0)); - assertNull(s1dataA.get(1)); - assertEquals("id1-s1dataA-3", s1dataA.get(2)); - assertEquals(3, s1dataB.size()); - assertEquals("id1-s1dataB-1", s1dataB.get(0)); - assertEquals("id1-s1dataB-2", s1dataB.get(1)); - assertEquals("id1-s1dataB-3", s1dataB.get(2)); - assertEquals(3, s1dataC.size()); - assertNull(s1dataC.get(0)); - assertNull(s1dataC.get(1)); - assertNull(s1dataC.get(2)); - } - { - assertEquals("2", result.get(1).get("id")); - List a = (List)result.get(1).get("a"); - List s1dataA = (List)result.get(1).get("s1dataA"); - List s1dataB = (List)result.get(1).get("s1dataB"); - List s1dataC = (List)result.get(1).get("s1dataC"); - assertTrue(a==null || a.size()==0); - assertEquals(1, s1dataA.size()); - assertNull(s1dataA.get(0)); - assertEquals(1, s1dataB.size()); - assertEquals("id2-s1dataB-1", s1dataB.get(0)); - assertEquals(1, s1dataC.size()); - assertNull(s1dataC.get(0)); - } - { - assertEquals("3", result.get(2).get("id")); - List a = (List)result.get(2).get("a"); - List s1dataA = (List)result.get(2).get("s1dataA"); - List s1dataB = (List)result.get(2).get("s1dataB"); - List s1dataC = (List)result.get(2).get("s1dataC"); - assertTrue(a==null || a.size()==0); - assertEquals(1, s1dataA.size()); - assertEquals("id3-s1dataA-1", s1dataA.get(0)); - assertEquals(1, s1dataB.size()); - assertNull(s1dataB.get(0)); - assertEquals(1, s1dataC.size()); - assertNull(s1dataC.get(0)); - } - { - assertEquals("4", result.get(3).get("id")); - List a = (List)result.get(3).get("a"); - List s1dataA = (List)result.get(3).get("s1dataA"); - List s1dataB = (List)result.get(3).get("s1dataB"); - List s1dataC = (List)result.get(3).get("s1dataC"); - assertTrue(a==null || a.size()==0); - assertEquals(1, s1dataA.size()); - assertEquals("id4-s1dataA-1", s1dataA.get(0)); - assertEquals(1, s1dataB.size()); - assertEquals("id4-s1dataB-1", s1dataB.get(0)); - assertEquals(1, s1dataC.size()); - assertEquals("id4-s1dataC-1", s1dataC.get(0)); - } - { - assertEquals("5", result.get(4).get("id")); - List a = (List)result.get(4).get("a"); - List s1dataA = (List)result.get(4).get("s1dataA"); - List s1dataB = (List)result.get(4).get("s1dataB"); - List s1dataC = (List)result.get(4).get("s1dataC"); - assertTrue(a==null || a.size()==0); - assertEquals(1, s1dataA.size()); - assertNull(s1dataA.get(0)); - assertEquals(1, s1dataB.size()); - assertNull(s1dataB.get(0)); - assertEquals(1, s1dataC.size()); - assertEquals("id5-s1dataC-1", s1dataC.get(0)); - } - { - assertEquals("6", result.get(5).get("id")); - List a = (List)result.get(5).get("a"); - List s1dataA = (List)result.get(5).get("s1dataA"); - List s1dataB = (List)result.get(5).get("s1dataB"); - List s1dataC = (List)result.get(5).get("s1dataC"); - assertTrue(a==null || a.size()==0); - assertEquals(3, s1dataA.size()); - assertEquals("id6-s1dataA-1", s1dataA.get(0)); - assertEquals("id6-s1dataA-2", s1dataA.get(1)); - assertNull(s1dataA.get(2)); - assertEquals(3, s1dataB.size()); - assertEquals("id6-s1dataB-1", s1dataB.get(0)); - assertEquals("id6-s1dataB-2", s1dataB.get(1)); - assertEquals("id6-s1dataB-3", s1dataB.get(2)); - assertEquals(3, s1dataC.size()); - assertEquals("id6-s1dataC-1", s1dataC.get(0)); - assertNull(s1dataC.get(1)); - assertEquals("id6-s1dataC-3", s1dataC.get(2)); - } - } - - @Test - @SuppressWarnings({"unchecked"}) - public void testMultiValuedFlatten() throws Exception { - @SuppressWarnings({"rawtypes"}) - Map entityAttrs = createMap("name", "e", "url", "testdata.xml", - XPathEntityProcessor.FOR_EACH, "/root"); - @SuppressWarnings({"rawtypes"}) - List fields = new ArrayList(); - fields.add(createMap("column", "a", "xpath", "/root/a" ,"flatten","true")); - Context c = getContext(null, - new VariableResolver(), getDataSource(testXmlFlatten), Context.FULL_DUMP, fields, entityAttrs); - XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); - xPathEntityProcessor.init(c); - Map result = null; - while (true) { - Map row = xPathEntityProcessor.nextRow(); - if (row == null) - break; - result = row; - } - assertEquals("1B2", result.get("a")); - } - - @Test - @SuppressWarnings({"unchecked"}) - public void withFieldsAndXpathStream() throws Exception { - final Object monitor = new Object(); - final boolean[] done = new boolean[1]; - - @SuppressWarnings({"rawtypes"}) - Map entityAttrs = createMap("name", "e", "url", "cd.xml", - XPathEntityProcessor.FOR_EACH, "/catalog/cd", "stream", "true", "batchSize","1"); - @SuppressWarnings({"rawtypes"}) - List fields = new ArrayList(); - fields.add(createMap("column", "title", "xpath", "/catalog/cd/title")); - fields.add(createMap("column", "artist", "xpath", "/catalog/cd/artist")); - fields.add(createMap("column", "year", "xpath", "/catalog/cd/year")); - Context c = getContext(null, - new VariableResolver(), getDataSource(cdData), Context.FULL_DUMP, fields, entityAttrs); - XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor() { - private int count; - - @Override - protected Map readRow(Map record, - String xpath) { - synchronized (monitor) { - if (simulateSlowReader && !done[0]) { - try { - monitor.wait(100); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - } - - return super.readRow(record, xpath); - } - }; - - if (simulateSlowResultProcessor) { - xPathEntityProcessor.blockingQueueSize = 1; - } - xPathEntityProcessor.blockingQueueTimeOut = 1; - xPathEntityProcessor.blockingQueueTimeOutUnits = TimeUnit.MICROSECONDS; - - xPathEntityProcessor.init(c); - List> result = new ArrayList<>(); - while (true) { - if (rowsToRead >= 0 && result.size() >= rowsToRead) { - Thread.currentThread().interrupt(); - } - Map row = xPathEntityProcessor.nextRow(); - if (row == null) - break; - result.add(row); - if (simulateSlowResultProcessor) { - synchronized (xPathEntityProcessor.publisherThread) { - if (xPathEntityProcessor.publisherThread.isAlive()) { - xPathEntityProcessor.publisherThread.wait(1000); - } - } - } - } - - synchronized (monitor) { - done[0] = true; - monitor.notify(); - } - - // confirm that publisher thread stops. - xPathEntityProcessor.publisherThread.join(1000); - assertEquals("Expected thread to stop", false, xPathEntityProcessor.publisherThread.isAlive()); - - assertEquals(rowsToRead < 0 ? 3 : rowsToRead, result.size()); - - if (rowsToRead < 0) { - assertEquals("Empire Burlesque", result.get(0).get("title")); - assertEquals("Bonnie Tyler", result.get(1).get("artist")); - assertEquals("1982", result.get(2).get("year")); - } - } - - @Test - public void withFieldsAndXpathStreamContinuesOnTimeout() throws Exception { - simulateSlowReader = true; - withFieldsAndXpathStream(); - } - - @Test - public void streamWritesMessageAfterBlockedAttempt() throws Exception { - simulateSlowResultProcessor = true; - withFieldsAndXpathStream(); - } - - @Test - public void streamStopsAfterInterrupt() throws Exception { - simulateSlowResultProcessor = true; - rowsToRead = 1; - withFieldsAndXpathStream(); - } - - @Test - @SuppressWarnings({"unchecked"}) - public void withDefaultSolrAndXsl() throws Exception { - File tmpdir = createTempDir().toFile(); - AbstractDataImportHandlerTestCase.createFile(tmpdir, "x.xsl", xsl.getBytes(StandardCharsets.UTF_8), - false); - - @SuppressWarnings({"rawtypes"}) - Map entityAttrs = createMap("name", "e", - XPathEntityProcessor.USE_SOLR_ADD_SCHEMA, "true", "xsl", "" - + new File(tmpdir, "x.xsl").toURI(), "url", "cd.xml"); - Context c = getContext(null, - new VariableResolver(), getDataSource(cdData), Context.FULL_DUMP, null, entityAttrs); - XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); - xPathEntityProcessor.init(c); - List> result = new ArrayList<>(); - while (true) { - Map row = xPathEntityProcessor.nextRow(); - if (row == null) - break; - result.add(row); - } - assertEquals(3, result.size()); - assertEquals("Empire Burlesque", result.get(0).get("title")); - assertEquals("Bonnie Tyler", result.get(1).get("artist")); - assertEquals("1982", result.get(2).get("year")); - } - - private DataSource getDataSource(final String xml) { - return new DataSource() { - - @Override - public void init(Context context, Properties initProps) { - } - - @Override - public void close() { - } - - @Override - public Reader getData(String query) { - return new StringReader(xml); - } - }; - } - - private static final String xsl = "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + " \n" - + "\n" + ""; - - private static final String cdData = "\n" - + "\n" - + "\n" - + "\t\n" - + "\t\tEmpire Burlesque\n" - + "\t\tBob Dylan\n" - + "\t\tUSA\n" - + "\t\tColumbia\n" - + "\t\t10.90\n" - + "\t\t1985\n" - + "\t\n" - + "\t\n" - + "\t\tHide your heart\n" - + "\t\tBonnie Tyler\n" - + "\t\tUK\n" - + "\t\tCBS Records\n" - + "\t\t9.90\n" - + "\t\t1988\n" - + "\t\n" - + "\t\n" - + "\t\tGreatest Hits\n" - + "\t\tDolly Parton\n" - + "\t\tUSA\n" - + "\t\tRCA\n" - + "\t\t9.90\n" - + "\t\t1982\n" + "\t\n" + "\t"; - - private static final String testXml = "\n\n]>\n12ü"; - - private static final String testXmlFlatten = "1B2"; - - private static final String textMultipleDocuments = - "" + - "" + - " " + - " 1" + - " id1-a1" + - " id1-a2" + - " " + - " id1-s1dataA-1" + - " id1-s1dataB-1" + - " " + - " " + - " id1-s1dataB-2" + - " " + - " " + - " id1-s1dataA-3" + - " id1-s1dataB-3" + - " " + - " " + - " " + - " 2" + - " " + - " id2-s1dataB-1" + - " " + - " " + - " " + - " 3" + - " " + - " id3-s1dataA-1" + - " " + - " " + - " " + - " 4" + - " " + - " id4-s1dataA-1" + - " id4-s1dataB-1" + - " id4-s1dataC-1" + - " " + - " " + - " " + - " 5" + - " " + - " id5-s1dataC-1" + - " " + - " " + - " " + - " 6" + - " " + - " id6-s1dataA-1" + - " id6-s1dataB-1" + - " id6-s1dataC-1" + - " " + - " " + - " id6-s1dataA-2" + - " id6-s1dataB-2" + - " " + - " " + - " id6-s1dataB-3" + - " id6-s1dataC-3" + - " " + - " " + - "" - ; -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathRecordReader.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathRecordReader.java deleted file mode 100644 index fe8c6571830..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathRecordReader.java +++ /dev/null @@ -1,591 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.junit.Test; - -/** - *

Test for XPathRecordReader

- * - * - * @since solr 1.3 - */ -public class TestXPathRecordReader extends AbstractDataImportHandlerTestCase { - @Test - public void testBasic() { - String xml="\n" - + " Hello C1\n" - + " Hello C1\n" - + " \n" - + " Hello C2\n" - + " \n" - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/b"); - rr.addField("c", "/root/b/c", true); - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals(2, l.size()); - assertEquals(2, ((List) l.get(0).get("c")).size()); - assertEquals(1, ((List) l.get(1).get("c")).size()); - } - - @Test - public void testAttributes() { - String xml="\n" - + " \n" - + " \n" - + " \n" - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/b"); - rr.addField("a", "/root/b/@a", false); - rr.addField("b", "/root/b/@b", false); - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals(3, l.size()); - assertEquals("x0", l.get(0).get("a")); - assertEquals("x1", l.get(1).get("a")); - assertEquals("x2", l.get(2).get("a")); - assertEquals("y0", l.get(0).get("b")); - assertEquals("y1", l.get(1).get("b")); - assertEquals("y2", l.get(2).get("b")); - } - - @Test - public void testAttrInRoot(){ - String xml="\n" + - "\n" + - " \n" + - " \n" + - " 301.46\n" + - " \n" + - "\n" + - " \n" + - " \n" + - " 302.46\n" + - " \n" + - "\n" + - ""; - XPathRecordReader rr = new XPathRecordReader("/r/merchantProduct"); - rr.addField("id", "/r/merchantProduct/@id", false); - rr.addField("mid", "/r/merchantProduct/@mid", false); - rr.addField("price", "/r/merchantProduct/price", false); - rr.addField("conditionType", "/r/merchantProduct/condition/@type", false); - List> l = rr.getAllRecords(new StringReader(xml)); - Map m = l.get(0); - assertEquals("814636051", m.get("id")); - assertEquals("189973", m.get("mid")); - assertEquals("301.46", m.get("price")); - assertEquals("cond-0", m.get("conditionType")); - - m = l.get(1); - assertEquals("814636052", m.get("id")); - assertEquals("189974", m.get("mid")); - assertEquals("302.46", m.get("price")); - assertEquals("cond-1", m.get("conditionType")); - } - - @Test - public void testAttributes2Level() { - String xml="\n" - + "\n \n" - + " \n" - + " \n" - + " " - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a/b"); - rr.addField("a", "/root/a/b/@a", false); - rr.addField("b", "/root/a/b/@b", false); - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals(3, l.size()); - assertEquals("x0", l.get(0).get("a")); - assertEquals("y1", l.get(1).get("b")); - } - - @Test - public void testAttributes2LevelHetero() { - String xml="\n" - + "\n \n" - + " \n" - + " \n" - + " " - + "\n \n" - + " \n" - + " \n" - + " " - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a | /root/x"); - rr.addField("a", "/root/a/b/@a", false); - rr.addField("b", "/root/a/b/@b", false); - rr.addField("a", "/root/x/b/@a", false); - rr.addField("b", "/root/x/b/@b", false); - - final List> a = new ArrayList<>(); - final List> x = new ArrayList<>(); - rr.streamRecords(new StringReader(xml), (record, xpath) -> { - if (record == null) return; - if (xpath.equals("/root/a")) a.add(record); - if (xpath.equals("/root/x")) x.add(record); - }); - - assertEquals(1, a.size()); - assertEquals(1, x.size()); - } - - @Test - public void testAttributes2LevelMissingAttrVal() { - String xml="\n" - + "\n \n" - + " \n" - + " " - + "\n \n" - + " \n" - + " " - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a"); - rr.addField("a", "/root/a/b/@a", true); - rr.addField("b", "/root/a/b/@b", true); - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals(2, l.size()); - assertNull(((List) l.get(1).get("a")).get(1)); - assertNull(((List) l.get(1).get("b")).get(0)); - } - - @Test - public void testElems2LevelMissing() { - String xml="\n" - + "\t\n" - + "\t \n\t x0\n" - + "\t y0\n" - + "\t \n" - + "\t \n\t x1\n" - + "\t y1\n" - + "\t \n" - + "\t \n" - + "\t\n" - + "\t \n\t x3\n\t \n" - + "\t \n\t y4\n\t \n" - + "\t \n" - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a"); - rr.addField("a", "/root/a/b/x", true); - rr.addField("b", "/root/a/b/y", true); - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals(2, l.size()); - assertNull(((List) l.get(1).get("a")).get(1)); - assertNull(((List) l.get(1).get("b")).get(0)); - } - - @Test - public void testElems2LevelEmpty() { - String xml="\n" - + "\t\n" - + "\t \n\t x0\n" - + "\t y0\n" - + "\t \n" - + "\t \n\t \n" // empty - + "\t y1\n" - + "\t \n" - + "\t\n" - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a"); - rr.addField("a", "/root/a/b/x", true); - rr.addField("b", "/root/a/b/y", true); - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals(1, l.size()); - assertEquals("x0",((List) l.get(0).get("a")).get(0)); - assertEquals("y0",((List) l.get(0).get("b")).get(0)); - assertEquals("",((List) l.get(0).get("a")).get(1)); - assertEquals("y1",((List) l.get(0).get("b")).get(1)); - } - - @Test - public void testMixedContent() { - String xml = "This text is \n" + - " bold and this text is \n" + - " underlined!\n" + - ""; - XPathRecordReader rr = new XPathRecordReader("/p"); - rr.addField("p", "/p", true); - rr.addField("b", "/p/b", true); - rr.addField("u", "/p/u", true); - List> l = rr.getAllRecords(new StringReader(xml)); - Map row = l.get(0); - - assertEquals("bold", ((List) row.get("b")).get(0)); - assertEquals("underlined", ((List) row.get("u")).get(0)); - String p = (String) ((List) row.get("p")).get(0); - assertTrue(p.contains("This text is")); - assertTrue(p.contains("and this text is")); - assertTrue(p.contains("!")); - // Should not contain content from child elements - assertFalse(p.contains("bold")); - } - - @Test - public void testMixedContentFlattened() { - String xml = "This text is \n" + - " bold and this text is \n" + - " underlined!\n" + - ""; - XPathRecordReader rr = new XPathRecordReader("/p"); - rr.addField("p", "/p", false, XPathRecordReader.FLATTEN); - List> l = rr.getAllRecords(new StringReader(xml)); - Map row = l.get(0); - assertEquals("This text is \n" + - " bold and this text is \n" + - " underlined!", ((String)row.get("p")).trim() ); - } - - @Test - public void testElems2LevelWithAttrib() { - String xml = "\n\t\n\t \n" - + "\t x0\n" - + "\t \n" // empty - + "\t \n" - + "\t \n" - + "\t \n" // empty - + "\t y1\n" - + "\t \n" - + "\t \n" - + "\t x2\n" - + "\t y2\n" - + "\t \n" - + "\t \n" - + "\t \n\t \n" - + "\t x3\n" - + "\t \n" - + "\t \n" - + "\t y4\n" - + "\t \n" - + "\t \n" - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a"); - rr.addField("x", "/root/a/b[@k]/x", true); - rr.addField("y", "/root/a/b[@k]/y", true); - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals(2, l.size()); - assertEquals(3, ((List) l.get(0).get("x")).size()); - assertEquals(3, ((List) l.get(0).get("y")).size()); - assertEquals("x0", ((List) l.get(0).get("x")).get(0)); - assertEquals("", ((List) l.get(0).get("y")).get(0)); - assertEquals("", ((List) l.get(0).get("x")).get(1)); - assertEquals("y1", ((List) l.get(0).get("y")).get(1)); - assertEquals("x2", ((List) l.get(0).get("x")).get(2)); - assertEquals("y2", ((List) l.get(0).get("y")).get(2)); - assertEquals(0, l.get(1).size()); - } - - @Test - public void testElems2LevelWithAttribMultiple() { - String xml="\n" - + "\t\n\t \n" - + "\t x0\n" - + "\t y0\n" - + "\t \n" - + "\t \n" - + "\t x1\n" - + "\t y1\n" - + "\t \n" - + "\t \n" - + "\t\n\t \n" - + "\t x3\n" - + "\t \n" - + "\t \n" - + "\t y4\n" - + "\t \n" - + "\t \n" - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a"); - rr.addField("x", "/root/a/b[@k][@m='n']/x", true); - rr.addField("y", "/root/a/b[@k][@m='n']/y", true); - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals(2, l.size()); - assertEquals(1, ((List) l.get(0).get("x")).size()); - assertEquals(1, ((List) l.get(0).get("y")).size()); - assertEquals(0, l.get(1).size()); - } - - @Test - public void testElems2LevelWithAttribVal() { - String xml="\n\t\n \n" - + "\t x0\n" - + "\t y0\n" - + "\t \n" - + "\t \n" - + "\t x1\n" - + "\t y1\n" - + "\t \n" - + "\t \n" - + "\t \n x3\n" - + "\t y4\n" - + "\t\n" + ""; - XPathRecordReader rr = new XPathRecordReader("/root/a"); - rr.addField("x", "/root/a/b[@k='x']/x", true); - rr.addField("y", "/root/a/b[@k='x']/y", true); - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals(2, l.size()); - assertEquals(1, ((List) l.get(0).get("x")).size()); - assertEquals(1, ((List) l.get(0).get("y")).size()); - assertEquals(0, l.get(1).size()); - } - - @Test - public void testAttribValWithSlash() { - String xml = "\n" + - " \n" + - ""; - XPathRecordReader rr = new XPathRecordReader("/root/b"); - rr.addField("x", "/root/b/a[@x='a/b']/@h", false); - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals(1, l.size()); - Map m = l.get(0); - assertEquals("hello-A", m.get("x")); - } - - @Test - public void testUnsupportedXPaths() { - RuntimeException ex = expectThrows(RuntimeException.class, () -> new XPathRecordReader("//b")); - assertEquals("forEach cannot start with '//': //b", ex.getMessage()); - - XPathRecordReader rr = new XPathRecordReader("/anyd/contenido"); - ex = expectThrows(RuntimeException.class, () -> rr.addField("bold", "b", false)); - assertEquals("xpath must start with '/' : b", ex.getMessage()); - } - - @Test - public void testAny_decendent_from_root() { - XPathRecordReader rr = new XPathRecordReader("/anyd/contenido"); - rr.addField("descdend", "//boo", true); - rr.addField("inr_descd","//boo/i", false); - rr.addField("cont", "/anyd/contenido", false); - rr.addField("id", "/anyd/contenido/@id", false); - rr.addField("status", "/anyd/status", false); - rr.addField("title", "/anyd/contenido/titulo", false,XPathRecordReader.FLATTEN); - rr.addField("resume", "/anyd/contenido/resumen",false); - rr.addField("text", "/anyd/contenido/texto", false); - - String xml="\n" - + " this top level is ignored because it is external to the forEach\n" - + " as is this element\n" - + " \n" - + " This one is not ignored as it's inside a forEach\n" - + " big antler\n" - + " My flattened title \n" - + " My summary skip this! \n" - + " Within the body ofMy text\n" - + "

Access inner sub clauses as well

\n" - + "
\n" - + "
"; - - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals(1, l.size()); - Map m = l.get(0); - assertEquals("This one is inside a forEach", m.get("cont").toString().trim()); - assertEquals("10097" ,m.get("id")); - assertEquals("My flattened title" ,m.get("title").toString().trim()); - assertEquals("My summary" ,m.get("resume").toString().trim()); - assertEquals("My text" ,m.get("text").toString().trim()); - assertEquals("not ignored as it's",(String) ((List) m.get("descdend")).get(0) ); - assertEquals("antler" ,(String) ((List) m.get("descdend")).get(1) ); - assertEquals("Within the body of" ,(String) ((List) m.get("descdend")).get(2) ); - assertEquals("inner as well" ,(String) ((List) m.get("descdend")).get(3) ); - assertEquals("sub clauses" ,m.get("inr_descd").toString().trim()); - } - - @Test - public void testAny_decendent_of_a_child1() { - XPathRecordReader rr = new XPathRecordReader("/anycd"); - rr.addField("descdend", "/anycd//boo", true); - - // same test string as above but checking to see if *all* //boo's are collected - String xml="\n" - + " this top level is ignored because it is external to the forEach\n" - + " as is this element\n" - + " \n" - + " This one is not ignored as it's inside a forEach\n" - + " big antler\n" - + " My flattened title \n" - + " My summary skip this! \n" - + " Within the body ofMy text\n" - + "

Access inner sub clauses as well

\n" - + "
\n" - + "
"; - - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals(1, l.size()); - Map m = l.get(0); - assertEquals("top level" ,(String) ((List) m.get("descdend")).get(0) ); - assertEquals("this element" ,(String) ((List) m.get("descdend")).get(1) ); - assertEquals("not ignored as it's",(String) ((List) m.get("descdend")).get(2) ); - assertEquals("antler" ,(String) ((List) m.get("descdend")).get(3) ); - assertEquals("title" ,(String) ((List) m.get("descdend")).get(4) ); - assertEquals("Within the body of" ,(String) ((List) m.get("descdend")).get(5) ); - assertEquals("inner as well" ,(String) ((List) m.get("descdend")).get(6) ); - } - - @Test - public void testAny_decendent_of_a_child2() { - XPathRecordReader rr = new XPathRecordReader("/anycd"); - rr.addField("descdend", "/anycd/contenido//boo", true); - - // same test string as above but checking to see if *some* //boo's are collected - String xml="\n" - + " this top level is ignored because it is external to the forEach\n" - + " as is this element\n" - + " \n" - + " This one is not ignored as it's inside a forEach\n" - + " big antler\n" - + " My flattened title \n" - + " My summary skip this! \n" - + " Within the body ofMy text\n" - + "

Access inner sub clauses as well

\n" - + "
\n" - + "
"; - - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals(1, l.size()); - Map m = l.get(0); - assertEquals("not ignored as it's",((List) m.get("descdend")).get(0) ); - assertEquals("antler" ,((List) m.get("descdend")).get(1) ); - assertEquals("title" ,((List) m.get("descdend")).get(2) ); - assertEquals("Within the body of" ,((List) m.get("descdend")).get(3) ); - assertEquals("inner as well" ,((List) m.get("descdend")).get(4) ); - } - - @Test - public void testAnother() { - String xml="\n" - + " \n" - + " \n" - + " This is my title \n" - + " This is my summary \n" - + " This is the body of my text \n" - + " \n" - + ""; - XPathRecordReader rr = new XPathRecordReader("/root/contenido"); - rr.addField("id", "/root/contenido/@id", false); - rr.addField("title", "/root/contenido/titulo", false); - rr.addField("resume","/root/contenido/resumen",false); - rr.addField("text", "/root/contenido/texto", false); - - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals(1, l.size()); - Map m = l.get(0); - assertEquals("10097", m.get("id")); - assertEquals("This is my title", m.get("title").toString().trim()); - assertEquals("This is my summary", m.get("resume").toString().trim()); - assertEquals("This is the body of my text", m.get("text").toString() - .trim()); - } - - @Test - public void testSameForEachAndXpath(){ - String xml="\n" + - " \n" + - " hello\n" + - " \n" + - " \n" + - ""; - XPathRecordReader rr = new XPathRecordReader("/root/cat/name"); - rr.addField("catName", "/root/cat/name",false); - List> l = rr.getAllRecords(new StringReader(xml)); - assertEquals("hello",l.get(0).get("catName")); - } - - @Test - @SuppressWarnings({"unchecked"}) - public void testPutNullTest(){ - String xml = "\n" + - " \n" + - " \n" + - "
A.1.1\n" + - " B.1.1\n" + - " \n" + - " \n" + - " B.1.2\n" + - " C.1.2\n" + - " \n" + - " \n" + - " \n" + - " \n" + - " A.2.1\n" + - " C.2.1\n" + - " \n" + - " \n" + - " B.2.2\n" + - " C.2.2\n" + - " \n" + - " \n" + - ""; - XPathRecordReader rr = new XPathRecordReader("/root/i"); - rr.addField("a", "/root/i/x/a", true); - rr.addField("b", "/root/i/x/b", true); - rr.addField("c", "/root/i/x/c", true); - List> l = rr.getAllRecords(new StringReader(xml)); - Map map = l.get(0); - List a = (List) map.get("a"); - List b = (List) map.get("b"); - List c = (List) map.get("c"); - - assertEquals("A.1.1",a.get(0)); - assertEquals("B.1.1",b.get(0)); - assertNull(c.get(0)); - - assertNull(a.get(1)); - assertEquals("B.1.2",b.get(1)); - assertEquals("C.1.2",c.get(1)); - - map = l.get(1); - a = (List) map.get("a"); - b = (List) map.get("b"); - c = (List) map.get("c"); - assertEquals("A.2.1",a.get(0)); - assertNull(b.get(0)); - assertEquals("C.2.1",c.get(0)); - - assertNull(a.get(1)); - assertEquals("B.2.2",b.get(1)); - assertEquals("C.2.2",c.get(1)); - } - - - @Test - public void testError(){ - String malformedXml = "\n" + - " \n" + - " 1\n" + - " test1\n" + - " \n" + - " \n" + - " 2\n" + - " test2\n" + - " \n" + - " \n" + - " 3\n" + // invalid XML - " test3\n" + - " \n" + - ""; - XPathRecordReader rr = new XPathRecordReader("/root/node"); - rr.addField("id", "/root/node/id", true); - rr.addField("desc", "/root/node/desc", true); - RuntimeException e = expectThrows(RuntimeException.class, () -> rr.getAllRecords(new StringReader(malformedXml))); - assertTrue(e.getMessage().contains("Unexpected close tag ")); - } -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestZKPropertiesWriter.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestZKPropertiesWriter.java deleted file mode 100644 index 54a5e1225db..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestZKPropertiesWriter.java +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import javax.xml.xpath.XPathExpressionException; -import java.io.ByteArrayOutputStream; -import java.io.StringWriter; -import java.lang.invoke.MethodHandles; - -import java.nio.charset.StandardCharsets; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; - -import org.apache.solr.client.solrj.embedded.JettySolrRunner; -import org.apache.solr.client.solrj.request.CollectionAdminRequest; -import org.apache.solr.cloud.MiniSolrCloudCluster; -import org.apache.solr.cloud.SolrCloudTestCase; -import org.apache.solr.cloud.ZkTestServer; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.cloud.DocCollection; -import org.apache.solr.common.cloud.Replica; -import org.apache.solr.common.params.CommonParams; -import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.util.SuppressForbidden; -import org.apache.solr.core.SolrCore; -import org.apache.solr.request.LocalSolrQueryRequest; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.request.SolrRequestInfo; -import org.apache.solr.response.BinaryQueryResponseWriter; -import org.apache.solr.response.QueryResponseWriter; -import org.apache.solr.response.SolrQueryResponse; -import org.apache.solr.util.BaseTestHarness; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Tests that DIH properties writer works when using Zookeeper. Zookeeper is used by virtue of starting a SolrCloud cluster.

- * - * Note this test is an unelegant bridge between code that assumes a non SolrCloud environment and that would normally use - * test infra that is not meant to work in a SolrCloud environment ({@link org.apache.solr.util.TestHarness} and some methods in - * {@link org.apache.solr.SolrTestCaseJ4}) and between a test running SolrCloud (extending {@link SolrCloudTestCase} and - * using {@link MiniSolrCloudCluster}).

- * - * These changes were introduced when https://issues.apache.org/jira/browse/SOLR-12823 got fixed and the legacy - * behaviour of SolrCloud that allowed a SolrCloud (Zookeeper active) to function like a standalone Solr (in which the - * cluster would adopt cores contributed by the nodes even if they were unknown to Zookeeper) was no more. - */ -public class TestZKPropertiesWriter extends SolrCloudTestCase { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - protected static ZkTestServer zkServer; - - private static MiniSolrCloudCluster minicluster; - - private String dateFormat = "yyyy-MM-dd HH:mm:ss.SSSSSS"; - - @BeforeClass - public static void dihZk_beforeClass() throws Exception { - System.setProperty(DataImportHandler.ENABLE_DIH_DATA_CONFIG_PARAM, "true"); - - minicluster = configureCluster(1) - .addConfig("conf", configset("dihconfigset")) - .configure(); - - zkServer = minicluster.getZkServer(); - } - - @After - public void afterDihZkTest() throws Exception { - MockDataSource.clearCache(); - } - - @AfterClass - public static void dihZk_afterClass() throws Exception { - shutdownCluster(); - } - - @SuppressForbidden(reason = "Needs currentTimeMillis to construct date stamps") - @Test - @SuppressWarnings({"unchecked"}) - public void testZKPropertiesWriter() throws Exception { - CollectionAdminRequest.createCollectionWithImplicitRouter("collection1", "conf", "1", 1) - .process(cluster.getSolrClient()); - - // DIH talks core, SolrCloud talks collection. - DocCollection coll = getCollectionState("collection1"); - Replica replica = coll.getReplicas().iterator().next(); - JettySolrRunner jetty = minicluster.getReplicaJetty(replica); - SolrCore core = jetty.getCoreContainer().getCore(replica.getCoreName()); - - localAssertQ("test query on empty index", request(core, "qlkciyopsbgzyvkylsjhchghjrdf"), "//result[@numFound='0']"); - - SimpleDateFormat errMsgFormat = new SimpleDateFormat(dateFormat, Locale.ROOT); - - // These two calls are from SolrTestCaseJ4 and end up in TestHarness... That's ok they are static and do not reference - // the various variables that were not initialized (so not copying them to this test class as some other methods at the bottom). - delQ("*:*"); - commit(); - SimpleDateFormat df = new SimpleDateFormat(dateFormat, Locale.ROOT); - Date oneSecondAgo = new Date(System.currentTimeMillis() - 1000); - - Map init = new HashMap<>(); - init.put("dateFormat", dateFormat); - ZKPropertiesWriter spw = new ZKPropertiesWriter(); - spw.init(new DataImporter(core, "dataimport"), init); - Map props = new HashMap<>(); - props.put("SomeDates.last_index_time", oneSecondAgo); - props.put("last_index_time", oneSecondAgo); - spw.persist(props); - - @SuppressWarnings({"rawtypes"}) - List rows = new ArrayList(); - rows.add(AbstractDataImportHandlerTestCase.createMap("id", "1", "year_s", "2013")); - MockDataSource.setIterator("select " + df.format(oneSecondAgo) + " from dummy", rows.iterator()); - - localQuery("/dataimport", localMakeRequest(core, "command", "full-import", "dataConfig", - generateConfig(), "clean", "true", "commit", "true", "synchronous", - "true", "indent", "true")); - props = spw.readIndexerProperties(); - Date entityDate = df.parse((String) props.get("SomeDates.last_index_time")); - Date docDate = df.parse((String) props.get("last_index_time")); - - Assert.assertTrue("This date: " + errMsgFormat.format(oneSecondAgo) + " should be prior to the document date: " + errMsgFormat.format(docDate), docDate.getTime() - oneSecondAgo.getTime() > 0); - Assert.assertTrue("This date: " + errMsgFormat.format(oneSecondAgo) + " should be prior to the entity date: " + errMsgFormat.format(entityDate), entityDate.getTime() - oneSecondAgo.getTime() > 0); - localAssertQ("Should have found 1 doc, year 2013", request(core, "*:*"), "//*[@numFound='1']", "//doc/str[@name=\"year_s\"]=\"2013\""); - - core.close(); - } - - private static SolrQueryRequest request(SolrCore core, String... q) { - LocalSolrQueryRequest req = localMakeRequest(core, q); - ModifiableSolrParams params = new ModifiableSolrParams(); - params.add(req.getParams()); - params.set("distrib", true); - req.setParams(params); - return req; - } - - private String generateConfig() { - StringBuilder sb = new StringBuilder(); - sb.append(" \n"); - sb.append("\n"); - sb.append("\n"); - sb.append(" \n"); - sb.append("\n"); - sb.append(" \n"); - sb.append("\n"); - sb.append(" \n"); - sb.append(" \n"); - String config = sb.toString(); - log.debug(config); - return config; - } - - /** - * Code copied with some adaptations from {@link org.apache.solr.util.TestHarness.LocalRequestFactory#makeRequest(String...)}. - */ - @SuppressWarnings({"unchecked"}) - private static LocalSolrQueryRequest localMakeRequest(SolrCore core, String ... q) { - if (q.length==1) { - Map args = new HashMap<>(); - args.put(CommonParams.VERSION,"2.2"); - - return new LocalSolrQueryRequest(core, q[0], "", 0, 20, args); - } - if (q.length%2 != 0) { - throw new RuntimeException("The length of the string array (query arguments) needs to be even"); - } - @SuppressWarnings({"rawtypes"}) - Map.Entry [] entries = new NamedList.NamedListEntry[q.length / 2]; - for (int i = 0; i < q.length; i += 2) { - entries[i/2] = new NamedList.NamedListEntry<>(q[i], q[i+1]); - } - @SuppressWarnings({"rawtypes"}) - NamedList nl = new NamedList(entries); - if(nl.get("wt" ) == null) nl.add("wt","xml"); - return new LocalSolrQueryRequest(core, nl); - } - - /** - * Code copied from {@link org.apache.solr.util.TestHarness#query(String, SolrQueryRequest)} because it is not - * static there (it could have been) and we do not have an instance of {@link org.apache.solr.util.TestHarness}. - */ - private static String localQuery(String handler, SolrQueryRequest req) throws Exception { - try { - SolrCore core = req.getCore(); - SolrQueryResponse rsp = new SolrQueryResponse(); - SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp)); - core.execute(core.getRequestHandler(handler),req,rsp); // TODO the core doesn't have the request handler - if (rsp.getException() != null) { - throw rsp.getException(); - } - QueryResponseWriter responseWriter = core.getQueryResponseWriter(req); - if (responseWriter instanceof BinaryQueryResponseWriter) { - ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(32000); - BinaryQueryResponseWriter writer = (BinaryQueryResponseWriter) responseWriter; - writer.write(byteArrayOutputStream, req, rsp); - return new String(byteArrayOutputStream.toByteArray(), StandardCharsets.UTF_8); - } else { - StringWriter sw = new StringWriter(32000); - responseWriter.write(sw,req,rsp); - return sw.toString(); - } - - } finally { - req.close(); - SolrRequestInfo.clearRequestInfo(); - } - } - - /** - * Code copied from {@link org.apache.solr.SolrTestCaseJ4#assertQ(String, SolrQueryRequest, String...)} in order not to - * use the instance of the {@link org.apache.solr.util.TestHarness}. - */ - private static void localAssertQ(String message, SolrQueryRequest req, String... tests) { - try { - String m = (null == message) ? "" : message + " "; // TODO log 'm' !!! - //since the default (standard) response format is now JSON - //need to explicitly request XML since this class uses XPath - ModifiableSolrParams xmlWriterTypeParams = new ModifiableSolrParams(req.getParams()); - xmlWriterTypeParams.set(CommonParams.WT,"xml"); - //for tests, let's turn indention off so we don't have to handle extraneous spaces - xmlWriterTypeParams.set("indent", xmlWriterTypeParams.get("indent", "off")); - req.setParams(xmlWriterTypeParams); - String response = localQuery(req.getParams().get(CommonParams.QT), req); - - if (req.getParams().getBool("facet", false)) { - // add a test to ensure that faceting did not throw an exception - // internally, where it would be added to facet_counts/exception - String[] allTests = new String[tests.length+1]; - System.arraycopy(tests,0,allTests,1,tests.length); - allTests[0] = "*[count(//lst[@name='facet_counts']/*[@name='exception'])=0]"; - tests = allTests; - } - - String results = BaseTestHarness.validateXPath(response, tests); - - if (null != results) { - String msg = "REQUEST FAILED: xpath=" + results - + "\n\txml response was: " + response - + "\n\trequest was:" + req.getParamString(); - - log.error(msg); - throw new RuntimeException(msg); - } - - } catch (XPathExpressionException e1) { - throw new RuntimeException("XPath is invalid", e1); - } catch (Exception e2) { - SolrException.log(log,"REQUEST FAILED: " + req.getParamString(), e2); - throw new RuntimeException("Exception during query", e2); - } - } -} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TripleThreatTransformer.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TripleThreatTransformer.java deleted file mode 100644 index 2d0aadbe78c..00000000000 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TripleThreatTransformer.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.dataimport; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - -/** - * This transformer does 3 things - *

    - *
  • It turns every row into 3 rows, - * modifying any "id" column to ensure duplicate entries in the index - *
  • The 2nd Row has 2x values for every column, - * with the added one being backwards of the original - *
  • The 3rd Row has an added static value - *
- * - * Also, this does not extend Transformer. - */ -public class TripleThreatTransformer { - public Object transformRow(Map row) { - List> rows = new ArrayList<>(3); - rows.add(row); - rows.add(addDuplicateBackwardsValues(row)); - rows.add(new LinkedHashMap<>(row)); - rows.get(2).put("AddAColumn_s", "Added"); - modifyIdColumn(rows.get(1), 1); - modifyIdColumn(rows.get(2), 2); - return rows; - } - private LinkedHashMap addDuplicateBackwardsValues(Map row) { - LinkedHashMap n = new LinkedHashMap<>(); - for(Map.Entry entry : row.entrySet()) { - String key = entry.getKey(); - if(!"id".equalsIgnoreCase(key)) { - String[] vals = new String[2]; - vals[0] = entry.getValue()==null ? "null" : entry.getValue().toString(); - vals[1] = new StringBuilder(vals[0]).reverse().toString(); - n.put(key, Arrays.asList(vals)); - } else { - n.put(key, entry.getValue()); - } - } - return n; - } - - private void modifyIdColumn(Map row, int num) { - Object o = row.remove("ID"); - if(o==null) { - o = row.remove("id"); - } - if(o!=null) { - String id = o.toString(); - id = "TripleThreat-" + num + "-" + id; - row.put("id", id); - } - } -} diff --git a/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java b/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java index 039b7f87ea0..f799c48f6a6 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java +++ b/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java @@ -66,7 +66,7 @@ public class SolrResourceLoader implements ResourceLoader, Closeable, SolrClassL private static final String base = "org.apache.solr"; private static final String[] packages = { "", "analysis.", "schema.", "handler.", "handler.tagger.", "search.", "update.", "core.", "response.", "request.", - "update.processor.", "util.", "spelling.", "handler.component.", "handler.dataimport.", + "update.processor.", "util.", "spelling.", "handler.component.", "spelling.suggest.", "spelling.suggest.fst.", "rest.schema.analysis.", "security.", "handler.admin." }; private static final Charset UTF_8 = StandardCharsets.UTF_8; diff --git a/solr/core/src/java/org/apache/solr/util/SolrCLI.java b/solr/core/src/java/org/apache/solr/util/SolrCLI.java index 19de535c35c..5e37ccb04ad 100755 --- a/solr/core/src/java/org/apache/solr/util/SolrCLI.java +++ b/solr/core/src/java/org/apache/solr/util/SolrCLI.java @@ -2637,7 +2637,7 @@ public class SolrCLI implements CLIO { .argName("NAME") .hasArg() .required(true) - .desc("Name of the example to launch, one of: cloud, techproducts, dih, schemaless") + .desc("Name of the example to launch, one of: cloud, techproducts, schemaless") .longOpt("example") .build(), Option.builder("script") @@ -2753,34 +2753,14 @@ public class SolrCLI implements CLIO { String exampleType = cli.getOptionValue("example"); if ("cloud".equals(exampleType)) { runCloudExample(cli); - } else if ("dih".equals(exampleType)) { - runDihExample(cli); } else if ("techproducts".equals(exampleType) || "schemaless".equals(exampleType)) { runExample(cli, exampleType); } else { throw new IllegalArgumentException("Unsupported example "+exampleType+ - "! Please choose one of: cloud, dih, schemaless, or techproducts"); + "! Please choose one of: cloud, schemaless, or techproducts"); } } - protected void runDihExample(CommandLine cli) throws Exception { - File dihSolrHome = new File(exampleDir, "example-DIH/solr"); - if (!dihSolrHome.isDirectory()) { - dihSolrHome = new File(serverDir.getParentFile(), "example/example-DIH/solr"); - if (!dihSolrHome.isDirectory()) { - throw new Exception("example/example-DIH/solr directory not found"); - } - } - - boolean isCloudMode = cli.hasOption('c'); - String zkHost = cli.getOptionValue('z'); - int port = Integer.parseInt(cli.getOptionValue('p', "8983")); - - Map nodeStatus = startSolr(dihSolrHome, isCloudMode, cli, port, zkHost, 30); - String solrUrl = (String)nodeStatus.get("baseUrl"); - echo("\nSolr dih example launched successfully. Direct your Web browser to "+solrUrl+" to visit the Solr Admin UI"); - } - protected void runExample(CommandLine cli, String exampleName) throws Exception { File exDir = setupExampleDir(serverDir, exampleDir, exampleName); String collectionName = "schemaless".equals(exampleName) ? "gettingstarted" : exampleName; diff --git a/solr/core/src/test-files/solr/configsets/upload/dih-script-transformer/managed-schema b/solr/core/src/test-files/solr/configsets/upload/dih-script-transformer/managed-schema deleted file mode 100644 index 9e2f9471026..00000000000 --- a/solr/core/src/test-files/solr/configsets/upload/dih-script-transformer/managed-schema +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - diff --git a/solr/core/src/test-files/solr/configsets/upload/dih-script-transformer/solrconfig.xml b/solr/core/src/test-files/solr/configsets/upload/dih-script-transformer/solrconfig.xml deleted file mode 100644 index 82d0cc95f92..00000000000 --- a/solr/core/src/test-files/solr/configsets/upload/dih-script-transformer/solrconfig.xml +++ /dev/null @@ -1,61 +0,0 @@ - - - - - - - - - ${solr.data.dir:} - - - - ${tests.luceneMatchVersion:LATEST} - - - - ${solr.commitwithin.softcommit:true} - - - - - - explicit - true - text - - - - - - - - - diff --git a/solr/example/README.md b/solr/example/README.md index 4ee92489357..1ba2d6200e0 100644 --- a/solr/example/README.md +++ b/solr/example/README.md @@ -25,15 +25,14 @@ separate directory. To run a specific example, do: bin/solr -e where is one of: cloud : SolrCloud example - dih : Data Import Handler (rdbms, mail, atom, tika) schemaless : Schema-less example (schema is inferred from data during indexing) techproducts : Kitchen sink example providing comprehensive examples of Solr features ``` -For instance, if you want to run the Solr Data Import Handler example, do: +For instance, if you want to run the SolrCloud example, do: ``` - bin/solr -e dih + bin/solr -e cloud ``` To see all the options available when starting Solr: @@ -80,8 +79,8 @@ statements in the solrconfig.xml file to reference plugin jars outside of this directory for loading "contrib" plugins via relative paths. If you make a copy of this example server and wish to use the -ExtractingRequestHandler (SolrCell), DataImportHandler (DIH), the -clustering component, or any other modules in "contrib", you will need to +ExtractingRequestHandler (SolrCell), the clustering component, +or any other modules in "contrib", you will need to copy the required jars or update the paths to those jars in your solrconfig.xml. diff --git a/solr/example/build.gradle b/solr/example/build.gradle index 3b6b3d16eed..b4f3ae955a4 100644 --- a/solr/example/build.gradle +++ b/solr/example/build.gradle @@ -24,13 +24,10 @@ description = 'Solr examples' configurations { packaging postJar - dih } dependencies { postJar project(path: ":solr:core", configuration: "postJar") - dih 'org.hsqldb:hsqldb' - dih 'org.apache.derby:derby' } ext { @@ -39,7 +36,6 @@ ext { task assemblePackaging(type: Sync) { from(projectDir, { - include "example-DIH/**" include "exampledocs/**" include "files/**" include "films/**" @@ -51,10 +47,6 @@ task assemblePackaging(type: Sync) { into "exampledocs/" }) - from(configurations.dih, { - into "example-DIH/solr/db/lib" - }) - into packagingDir } diff --git a/solr/example/example-DIH/.gitignore b/solr/example/example-DIH/.gitignore deleted file mode 100644 index 6d9594aa4f0..00000000000 --- a/solr/example/example-DIH/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/logs/ diff --git a/solr/example/example-DIH/README.md b/solr/example/example-DIH/README.md deleted file mode 100644 index ab989052713..00000000000 --- a/solr/example/example-DIH/README.md +++ /dev/null @@ -1,55 +0,0 @@ - - -Solr DataImportHandler example configuration --------------------------------------------- - -NOTE: The DataImportHandler is deprecated as of v8.6. See SOLR-14066 for more details. - -To run this multi-core example, use the "-e" option of the bin/solr script: - -``` -> bin/solr -e dih -``` - -When Solr is started connect to: - - http://localhost:8983/solr/ - -* To import data from the hsqldb database, connect to: - - http://localhost:8983/solr/db/dataimport?command=full-import - -* To import data from an ATOM feed, connect to: - - http://localhost:8983/solr/atom/dataimport?command=full-import - -* To import data from your IMAP server: - - 1. Edit the example-DIH/solr/mail/conf/mail-data-config.xml and add details about username, password, IMAP server - 2. Connect to http://localhost:8983/solr/mail/dataimport?command=full-import - -* To copy data from db Solr core, connect to: - - http://localhost:8983/solr/solr/dataimport?command=full-import - -* To index a full text document using Tika integration: - - http://localhost:8983/solr/tika/dataimport?command=full-import - -Check also the Solr Reference Guide for detailed usage guide: -https://lucene.apache.org/solr/guide/uploading-structured-data-store-data-with-the-data-import-handler.html diff --git a/solr/example/example-DIH/hsqldb/.gitignore b/solr/example/example-DIH/hsqldb/.gitignore deleted file mode 100644 index e75d109dc37..00000000000 --- a/solr/example/example-DIH/hsqldb/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -/ex.tmp/ -ex.log -ex.lck -ex.properties - diff --git a/solr/example/example-DIH/hsqldb/ex.script b/solr/example/example-DIH/hsqldb/ex.script deleted file mode 100644 index b78f6cfb271..00000000000 --- a/solr/example/example-DIH/hsqldb/ex.script +++ /dev/null @@ -1,165 +0,0 @@ -SET DATABASE UNIQUE NAME HSQLDB5E727295B6 -SET DATABASE GC 0 -SET DATABASE DEFAULT RESULT MEMORY ROWS 0 -SET DATABASE EVENT LOG LEVEL 0 -SET DATABASE TRANSACTION CONTROL LOCKS -SET DATABASE DEFAULT ISOLATION LEVEL READ COMMITTED -SET DATABASE TRANSACTION ROLLBACK ON CONFLICT TRUE -SET DATABASE TEXT TABLE DEFAULTS '' -SET DATABASE SQL NAMES FALSE -SET DATABASE SQL REFERENCES FALSE -SET DATABASE SQL SIZE TRUE -SET DATABASE SQL TYPES FALSE -SET DATABASE SQL TDC DELETE TRUE -SET DATABASE SQL TDC UPDATE TRUE -SET DATABASE SQL CONCAT NULLS TRUE -SET DATABASE SQL UNIQUE NULLS TRUE -SET DATABASE SQL CONVERT TRUNCATE TRUE -SET DATABASE SQL AVG SCALE 0 -SET DATABASE SQL DOUBLE NAN TRUE -SET FILES WRITE DELAY 500 MILLIS -SET FILES BACKUP INCREMENT TRUE -SET FILES CACHE SIZE 10000 -SET FILES CACHE ROWS 50000 -SET FILES SCALE 32 -SET FILES LOB SCALE 32 -SET FILES DEFRAG 0 -SET FILES NIO TRUE -SET FILES NIO SIZE 256 -SET FILES LOG TRUE -SET FILES LOG SIZE 50 -CREATE USER SA PASSWORD DIGEST 'd41d8cd98f00b204e9800998ecf8427e' -ALTER USER SA SET LOCAL TRUE -CREATE SCHEMA PUBLIC AUTHORIZATION DBA -SET SCHEMA PUBLIC -CREATE MEMORY TABLE PUBLIC.ITEM(ID VARCHAR(100),NAME VARCHAR(1024),MANU VARCHAR(50),WEIGHT DOUBLE,PRICE DOUBLE,POPULARITY INTEGER,INCLUDES VARCHAR(200),LAST_MODIFIED TIMESTAMP) -CREATE MEMORY TABLE PUBLIC.FEATURE(ITEM_ID VARCHAR(100),DESCRIPTION VARCHAR(1024),LAST_MODIFIED TIMESTAMP) -CREATE MEMORY TABLE PUBLIC.CATEGORY(ID INTEGER,DESCRIPTION VARCHAR(30),LAST_MODIFIED TIMESTAMP) -CREATE MEMORY TABLE PUBLIC.ITEM_CATEGORY(ITEM_ID VARCHAR(100),CATEGORY_ID INTEGER,LAST_MODIFIED TIMESTAMP) -ALTER SEQUENCE SYSTEM_LOBS.LOB_ID RESTART WITH 1 -SET DATABASE DEFAULT INITIAL SCHEMA PUBLIC -GRANT USAGE ON DOMAIN INFORMATION_SCHEMA.SQL_IDENTIFIER TO PUBLIC -GRANT USAGE ON DOMAIN INFORMATION_SCHEMA.YES_OR_NO TO PUBLIC -GRANT USAGE ON DOMAIN INFORMATION_SCHEMA.TIME_STAMP TO PUBLIC -GRANT USAGE ON DOMAIN INFORMATION_SCHEMA.CARDINAL_NUMBER TO PUBLIC -GRANT USAGE ON DOMAIN INFORMATION_SCHEMA.CHARACTER_DATA TO PUBLIC -GRANT DBA TO SA -SET SCHEMA SYSTEM_LOBS -INSERT INTO BLOCKS VALUES(0,2147483647,0) -SET SCHEMA PUBLIC -INSERT INTO ITEM VALUES('6H500F0','Maxtor DiamondMax 11 - hard drive - 500 GB - SATA-300','Maxtor Corp.',0.0E0,350.0E0,6,'null','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('F8V7067-APL-KIT','Belkin Mobile Power Cord for iPod w/ Dock','Belkin',4.0E0,19.95E0,1,'null','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('IW-02','iPod & iPod Mini USB 2.0 Cable','Belkin',2.0E0,11.5E0,1,'null','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('MA147LL/A','Apple 60 GB iPod with Video Playback Black','Apple Computer Inc.',5.5E0,399.0E0,10,'earbud headphones, USB cable','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('TWINX2048-3200PRO','CORSAIR XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) Dual Channel Kit System Memory - Retail','Corsair Microsystems Inc.',0.0E0,185.0E0,5,'null','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('VS1GB400C3','CORSAIR ValueSelect 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - Retail','Corsair Microsystems Inc.',0.0E0,74.99E0,7,'null','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('VDBDB1A16','A-DATA V-Series 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - OEM','A-DATA Technology Inc.',0.0E0,0.0E0,5,'null','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('3007WFP','Dell Widescreen UltraSharp 3007WFP','Dell, Inc.',401.6E0,2199.0E0,6,'USB cable','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('VA902B','ViewSonic VA902B - flat panel display - TFT - 19"','ViewSonic Corp.',190.4E0,279.95E0,6,'null','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('0579B002','Canon PIXMA MP500 All-In-One Photo Printer','Canon Inc.',352.0E0,179.99E0,6,'null','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('9885A004','Canon PowerShot SD500','Canon Inc.',6.4E0,329.95E0,7,'32MB SD card, USB cable, AV cable, battery','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('SOLR1000','Solr, the Enterprise Search Server','Apache Software Foundation',0.0E0,0.0E0,10,'null','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('UTF8TEST','Test with some UTF-8 encoded characters','Apache Software Foundation',0.0E0,0.0E0,0,'null','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('EN7800GTX/2DHTV/256M','ASUS Extreme N7800GTX/2DHTV (256 MB)','ASUS Computer Inc.',16.0E0,479.95E0,7,'null','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('100-435805','ATI Radeon X1900 XTX 512 MB PCIE Video Card','ATI Technologies',48.0E0,649.99E0,7,'null','2017-09-01 12:34:56.000000') -INSERT INTO ITEM VALUES('SP2514N','Samsung SpinPoint P120 SP2514N - hard drive - 250 GB - ATA-133','Samsung Electronics Co. Ltd.',0.0E0,92.0E0,6,'null','2008-03-12 13:30:00.000000') -INSERT INTO FEATURE VALUES('SP2514N','7200RPM, 8MB cache, IDE Ultra ATA-133','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('SP2514N','NoiseGuard, SilentSeek technology, Fluid Dynamic Bearing (FDB) motor','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('6H500F0','SATA 3.0Gb/s, NCQ','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('6H500F0','8.5ms seek','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('6H500F0','16MB cache','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('F8V7067-APL-KIT','car power adapter, white','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('IW-02','car power adapter for iPod, white','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('MA147LL/A','iTunes, Podcasts, Audiobooks','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('MA147LL/A','Stores up to 15,000 songs, 25,000 photos, or 150 hours of video','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('MA147LL/A','2.5-inch, 320x240 color TFT LCD display with LED backlight','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('MA147LL/A','Up to 20 hours of battery life','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('MA147LL/A','Plays AAC, MP3, WAV, AIFF, Audible, Apple Lossless, H.264 video','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('MA147LL/A','Notes, Calendar, Phone book, Hold button, Date display, Photo wallet, Built-in games, JPEG photo playback, Upgradeable firmware, USB 2.0 compatibility, Playback speed control, Rechargeable capability, Battery level indication','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('TWINX2048-3200PRO','CAS latency 2,\u00092-3-3-6 timing, 2.75v, unbuffered, heat-spreader','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('VDBDB1A16','CAS latency 3,\u0009 2.7v','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('3007WFP','30" TFT active matrix LCD, 2560 x 1600, .25mm dot pitch, 700:1 contrast','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('VA902B','19" TFT active matrix LCD, 8ms response time, 1280 x 1024 native resolution','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('0579B002','Multifunction ink-jet color photo printer','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('0579B002','Flatbed scanner, optical scan resolution of 1,200 x 2,400 dpi','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('0579B002','2.5" color LCD preview screen','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('0579B002','Duplex Copying','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('0579B002','Printing speed up to 29ppm black, 19ppm color','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('0579B002','Hi-Speed USB','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('0579B002','memory card: CompactFlash, Micro Drive, SmartMedia, Memory Stick, Memory Stick Pro, SD Card, and MultiMediaCard','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('9885A004','3x zoop, 7.1 megapixel Digital ELPH','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('9885A004','movie clips up to 640x480 @30 fps','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('9885A004','2.0" TFT LCD, 118,000 pixels','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('9885A004','built in flash, red-eye reduction','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('SOLR1000','Advanced Full-Text Search Capabilities using Lucene','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('SOLR1000','Optimizied for High Volume Web Traffic','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('SOLR1000','Standards Based Open Interfaces - XML and HTTP','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('SOLR1000','Comprehensive HTML Administration Interfaces','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('SOLR1000','Scalability - Efficient Replication to other Solr Search Servers','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('SOLR1000','Flexible and Adaptable with XML configuration and Schema','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('SOLR1000','Good unicode support: h\u00e9llo (hello with an accent over the e)','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('UTF8TEST','No accents here','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('UTF8TEST','This is an e acute: \u00e9','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('UTF8TEST','eaiou with circumflexes: \u00ea\u00e2\u00ee\u00f4\u00fb','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('UTF8TEST','This is in Turkish: bu T\u00fcrk\u00e7e','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('UTF8TEST','This is in Korean: \uc774\uac83\uc740 \ud55c\uad6d\uc5b4\uc774\ub2e4.','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('UTF8TEST','This is in Greek: \u0391\u03c5\u03c4\u03cc \u03b5\u03af\u03bd\u03b1\u03b9 \u03c3\u03c4\u03b1 \u03b5\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('EN7800GTX/2DHTV/256M','NVIDIA GeForce 7800 GTX GPU/VPU clocked at 486MHz','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('EN7800GTX/2DHTV/256M','256MB GDDR3 Memory clocked at 1.35GHz','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('EN7800GTX/2DHTV/256M','PCI Express x16','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('EN7800GTX/2DHTV/256M','Dual DVI connectors, HDTV out, video input','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('EN7800GTX/2DHTV/256M','OpenGL 2.0, DirectX 9.0','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('100-435805','ATI RADEON X1900 GPU/VPU clocked at 650MHz','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('100-435805','512MB GDDR3 SDRAM clocked at 1.55GHz','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('100-435805','PCI Express x16','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('100-435805','dual DVI, HDTV, svideo, composite out','2017-09-01 12:34:56.000000') -INSERT INTO FEATURE VALUES('100-435805','OpenGL 2.0, DirectX 9.0','2017-09-01 12:34:56.000000') -INSERT INTO CATEGORY VALUES(1,'electronics','2017-09-01 12:34:56.000000') -INSERT INTO CATEGORY VALUES(2,'hard drive','2017-09-01 12:34:56.000000') -INSERT INTO CATEGORY VALUES(3,'connector','2017-09-01 12:34:56.000000') -INSERT INTO CATEGORY VALUES(4,'music','2017-09-01 12:34:56.000000') -INSERT INTO CATEGORY VALUES(5,'memory','2017-09-01 12:34:56.000000') -INSERT INTO CATEGORY VALUES(6,'monitor','2017-09-01 12:34:56.000000') -INSERT INTO CATEGORY VALUES(7,'multifunction printer','2017-09-01 12:34:56.000000') -INSERT INTO CATEGORY VALUES(8,'printer','2017-09-01 12:34:56.000000') -INSERT INTO CATEGORY VALUES(9,'scanner','2017-09-01 12:34:56.000000') -INSERT INTO CATEGORY VALUES(10,'copier','2017-09-01 12:34:56.000000') -INSERT INTO CATEGORY VALUES(11,'camera','2017-09-01 12:34:56.000000') -INSERT INTO CATEGORY VALUES(12,'software','2017-09-01 12:34:56.000000') -INSERT INTO CATEGORY VALUES(13,'search','2017-09-01 12:34:56.000000') -INSERT INTO CATEGORY VALUES(14,'graphics card','2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('SP2514N',1,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('SP2514N',2,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('6H500F0',1,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('6H500F0',2,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('F8V7067-APL-KIT',1,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('F8V7067-APL-KIT',3,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('IW-02',1,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('IW-02',3,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('MA147LL/A',1,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('MA147LL/A',4,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('TWINX2048-3200PRO',1,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('TWINX2048-3200PRO',5,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('VS1GB400C3',1,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('VS1GB400C3',5,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('VDBDB1A16',1,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('VDBDB1A16',5,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('3007WFP',1,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('3007WFP',6,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('VA902B',1,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('VA902B',6,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('0579B002',1,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('0579B002',7,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('0579B002',8,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('0579B002',9,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('0579B002',10,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('9885A004',1,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('9885A004',11,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('SOLR1000',12,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('SOLR1000',13,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('UTF8TEST',12,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('UTF8TEST',13,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('EN7800GTX/2DHTV/256M',1,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('EN7800GTX/2DHTV/256M',14,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('100-435805',1,'2017-09-01 12:34:56.000000') -INSERT INTO ITEM_CATEGORY VALUES('100-435805',14,'2017-09-01 12:34:56.000000') diff --git a/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml b/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml deleted file mode 100644 index b7de812d005..00000000000 --- a/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt b/solr/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt deleted file mode 100644 index 2c164c0b2a1..00000000000 --- a/solr/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -# Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -such -that -the -their -then -there -these -they -this -to -was -will -with diff --git a/solr/example/example-DIH/solr/atom/conf/managed-schema b/solr/example/example-DIH/solr/atom/conf/managed-schema deleted file mode 100644 index 5376c5ba03a..00000000000 --- a/solr/example/example-DIH/solr/atom/conf/managed-schema +++ /dev/null @@ -1,106 +0,0 @@ - - - - - id - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/atom/conf/protwords.txt b/solr/example/example-DIH/solr/atom/conf/protwords.txt deleted file mode 100644 index 1303e42a061..00000000000 --- a/solr/example/example-DIH/solr/atom/conf/protwords.txt +++ /dev/null @@ -1,17 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# Use a protected word file to protect against the stemmer reducing two -# unrelated words to the same base word. - -lucene diff --git a/solr/example/example-DIH/solr/atom/conf/solrconfig.xml b/solr/example/example-DIH/solr/atom/conf/solrconfig.xml deleted file mode 100644 index 5431986e9e2..00000000000 --- a/solr/example/example-DIH/solr/atom/conf/solrconfig.xml +++ /dev/null @@ -1,64 +0,0 @@ - - - - - - - - 9.0.0 - - - - - - explicit - text - - - - - - - atom-data-config.xml - trim_text - - - - - text_en_splitting - - - diff --git a/solr/example/example-DIH/solr/atom/conf/synonyms.txt b/solr/example/example-DIH/solr/atom/conf/synonyms.txt deleted file mode 100644 index eab4ee87537..00000000000 --- a/solr/example/example-DIH/solr/atom/conf/synonyms.txt +++ /dev/null @@ -1,29 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -#some test synonym mappings unlikely to appear in real input text -aaafoo => aaabar -bbbfoo => bbbfoo bbbbar -cccfoo => cccbar cccbaz -fooaaa,baraaa,bazaaa - -# Some synonym groups specific to this example -GB,gib,gigabyte,gigabytes -MB,mib,megabyte,megabytes -Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming -#after us won't split it into two words. - -# Synonym mappings can be used for spelling correction too -pixima => pixma - diff --git a/solr/example/example-DIH/solr/atom/conf/url_types.txt b/solr/example/example-DIH/solr/atom/conf/url_types.txt deleted file mode 100644 index 808f3138466..00000000000 --- a/solr/example/example-DIH/solr/atom/conf/url_types.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/solr/example/example-DIH/solr/atom/core.properties b/solr/example/example-DIH/solr/atom/core.properties deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/solr/example/example-DIH/solr/db/conf/clustering/carrot2/kmeans-attributes.xml b/solr/example/example-DIH/solr/db/conf/clustering/carrot2/kmeans-attributes.xml deleted file mode 100644 index d802465f669..00000000000 --- a/solr/example/example-DIH/solr/db/conf/clustering/carrot2/kmeans-attributes.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/db/conf/clustering/carrot2/lingo-attributes.xml b/solr/example/example-DIH/solr/db/conf/clustering/carrot2/lingo-attributes.xml deleted file mode 100644 index 4bf13608b36..00000000000 --- a/solr/example/example-DIH/solr/db/conf/clustering/carrot2/lingo-attributes.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/solr/example/example-DIH/solr/db/conf/clustering/carrot2/stc-attributes.xml b/solr/example/example-DIH/solr/db/conf/clustering/carrot2/stc-attributes.xml deleted file mode 100644 index c1bf110c8fd..00000000000 --- a/solr/example/example-DIH/solr/db/conf/clustering/carrot2/stc-attributes.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/db/conf/currency.xml b/solr/example/example-DIH/solr/db/conf/currency.xml deleted file mode 100644 index 3a9c58afee8..00000000000 --- a/solr/example/example-DIH/solr/db/conf/currency.xml +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/db/conf/db-data-config.xml b/solr/example/example-DIH/solr/db/conf/db-data-config.xml deleted file mode 100644 index 4a7dba955be..00000000000 --- a/solr/example/example-DIH/solr/db/conf/db-data-config.xml +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/db/conf/elevate.xml b/solr/example/example-DIH/solr/db/conf/elevate.xml deleted file mode 100644 index 2c09ebed669..00000000000 --- a/solr/example/example-DIH/solr/db/conf/elevate.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - - diff --git a/solr/example/example-DIH/solr/db/conf/lang/contractions_ca.txt b/solr/example/example-DIH/solr/db/conf/lang/contractions_ca.txt deleted file mode 100644 index 307a85f913d..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/contractions_ca.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Set of Catalan contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -l -m -n -s -t diff --git a/solr/example/example-DIH/solr/db/conf/lang/contractions_fr.txt b/solr/example/example-DIH/solr/db/conf/lang/contractions_fr.txt deleted file mode 100644 index f1bba51b23e..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/contractions_fr.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Set of French contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -l -m -t -qu -n -s -j -d -c -jusqu -quoiqu -lorsqu -puisqu diff --git a/solr/example/example-DIH/solr/db/conf/lang/contractions_ga.txt b/solr/example/example-DIH/solr/db/conf/lang/contractions_ga.txt deleted file mode 100644 index 9ebe7fa349a..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/contractions_ga.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Set of Irish contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -m -b diff --git a/solr/example/example-DIH/solr/db/conf/lang/contractions_it.txt b/solr/example/example-DIH/solr/db/conf/lang/contractions_it.txt deleted file mode 100644 index cac04095372..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/contractions_it.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Set of Italian contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -c -l -all -dall -dell -nell -sull -coll -pell -gl -agl -dagl -degl -negl -sugl -un -m -t -s -v -d diff --git a/solr/example/example-DIH/solr/db/conf/lang/hyphenations_ga.txt b/solr/example/example-DIH/solr/db/conf/lang/hyphenations_ga.txt deleted file mode 100644 index 4d2642cc5a3..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/hyphenations_ga.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Set of Irish hyphenations for StopFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -h -n -t diff --git a/solr/example/example-DIH/solr/db/conf/lang/stemdict_nl.txt b/solr/example/example-DIH/solr/db/conf/lang/stemdict_nl.txt deleted file mode 100644 index 441072971d3..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stemdict_nl.txt +++ /dev/null @@ -1,6 +0,0 @@ -# Set of overrides for the dutch stemmer -# TODO: load this as a resource from the analyzer and sync it in build.xml -fiets fiets -bromfiets bromfiets -ei eier -kind kinder diff --git a/solr/example/example-DIH/solr/db/conf/lang/stoptags_ja.txt b/solr/example/example-DIH/solr/db/conf/lang/stoptags_ja.txt deleted file mode 100644 index 71b750845e3..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stoptags_ja.txt +++ /dev/null @@ -1,420 +0,0 @@ -# -# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. -# -# Any token with a part-of-speech tag that exactly matches those defined in this -# file are removed from the token stream. -# -# Set your own stoptags by uncommenting the lines below. Note that comments are -# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, -# etc. that can be useful for building you own stoptag set. -# -# The entire possible tagset is provided below for convenience. -# -##### -# noun: unclassified nouns -#名詞 -# -# noun-common: Common nouns or nouns where the sub-classification is undefined -#名詞-一般 -# -# noun-proper: Proper nouns where the sub-classification is undefined -#名詞-固有名詞 -# -# noun-proper-misc: miscellaneous proper nouns -#名詞-固有名詞-一般 -# -# noun-proper-person: Personal names where the sub-classification is undefined -#名詞-固有名詞-人名 -# -# noun-proper-person-misc: names that cannot be divided into surname and -# given name; foreign names; names where the surname or given name is unknown. -# e.g. お市の方 -#名詞-固有名詞-人名-一般 -# -# noun-proper-person-surname: Mainly Japanese surnames. -# e.g. 山田 -#名詞-固有名詞-人名-姓 -# -# noun-proper-person-given_name: Mainly Japanese given names. -# e.g. 太郎 -#名詞-固有名詞-人名-名 -# -# noun-proper-organization: Names representing organizations. -# e.g. 通産省, NHK -#名詞-固有名詞-組織 -# -# noun-proper-place: Place names where the sub-classification is undefined -#名詞-固有名詞-地域 -# -# noun-proper-place-misc: Place names excluding countries. -# e.g. アジア, バルセロナ, 京都 -#名詞-固有名詞-地域-一般 -# -# noun-proper-place-country: Country names. -# e.g. 日本, オーストラリア -#名詞-固有名詞-地域-国 -# -# noun-pronoun: Pronouns where the sub-classification is undefined -#名詞-代名詞 -# -# noun-pronoun-misc: miscellaneous pronouns: -# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ -#名詞-代名詞-一般 -# -# noun-pronoun-contraction: Spoken language contraction made by combining a -# pronoun and the particle 'wa'. -# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ -#名詞-代名詞-縮約 -# -# noun-adverbial: Temporal nouns such as names of days or months that behave -# like adverbs. Nouns that represent amount or ratios and can be used adverbially, -# e.g. 金曜, 一月, 午後, 少量 -#名詞-副詞可能 -# -# noun-verbal: Nouns that take arguments with case and can appear followed by -# 'suru' and related verbs (する, できる, なさる, くださる) -# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り -#名詞-サ変接続 -# -# noun-adjective-base: The base form of adjectives, words that appear before な ("na") -# e.g. 健康, 安易, 駄目, だめ -#名詞-形容動詞語幹 -# -# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. -# e.g. 0, 1, 2, 何, 数, 幾 -#名詞-数 -# -# noun-affix: noun affixes where the sub-classification is undefined -#名詞-非自立 -# -# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that -# attach to the base form of inflectional words, words that cannot be classified -# into any of the other categories below. This category includes indefinite nouns. -# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, -# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, -# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, -# わり, 割り, 割, ん-口語/, もん-口語/ -#名詞-非自立-一般 -# -# noun-affix-adverbial: noun affixes that that can behave as adverbs. -# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, -# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, -# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, -# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, -# 儘, 侭, みぎり, 矢先 -#名詞-非自立-副詞可能 -# -# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars -# with the stem よう(だ) ("you(da)"). -# e.g. よう, やう, 様 (よう) -#名詞-非自立-助動詞語幹 -# -# noun-affix-adjective-base: noun affixes that can connect to the indeclinable -# connection form な (aux "da"). -# e.g. みたい, ふう -#名詞-非自立-形容動詞語幹 -# -# noun-special: special nouns where the sub-classification is undefined. -#名詞-特殊 -# -# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is -# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base -# form of inflectional words. -# e.g. そう -#名詞-特殊-助動詞語幹 -# -# noun-suffix: noun suffixes where the sub-classification is undefined. -#名詞-接尾 -# -# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect -# to ガル or タイ and can combine into compound nouns, words that cannot be classified into -# any of the other categories below. In general, this category is more inclusive than -# 接尾語 ("suffix") and is usually the last element in a compound noun. -# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, -# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 -#名詞-接尾-一般 -# -# noun-suffix-person: Suffixes that form nouns and attach to person names more often -# than other nouns. -# e.g. 君, 様, 著 -#名詞-接尾-人名 -# -# noun-suffix-place: Suffixes that form nouns and attach to place names more often -# than other nouns. -# e.g. 町, 市, 県 -#名詞-接尾-地域 -# -# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that -# can appear before スル ("suru"). -# e.g. 化, 視, 分け, 入り, 落ち, 買い -#名詞-接尾-サ変接続 -# -# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, -# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the -# conjunctive form of inflectional words. -# e.g. そう -#名詞-接尾-助動詞語幹 -# -# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive -# form of inflectional words and appear before the copula だ ("da"). -# e.g. 的, げ, がち -#名詞-接尾-形容動詞語幹 -# -# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. -# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) -#名詞-接尾-副詞可能 -# -# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category -# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach -# to numbers. -# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 -#名詞-接尾-助数詞 -# -# noun-suffix-special: Special suffixes that mainly attach to inflecting words. -# e.g. (楽し) さ, (考え) 方 -#名詞-接尾-特殊 -# -# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words -# together. -# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) -#名詞-接続詞的 -# -# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are -# semantically verb-like. -# e.g. ごらん, ご覧, 御覧, 頂戴 -#名詞-動詞非自立的 -# -# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, -# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") -# is いわく ("iwaku"). -#名詞-引用文字列 -# -# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and -# behave like an adjective. -# e.g. 申し訳, 仕方, とんでも, 違い -#名詞-ナイ形容詞語幹 -# -##### -# prefix: unclassified prefixes -#接頭詞 -# -# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) -# excluding numerical expressions. -# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) -#接頭詞-名詞接続 -# -# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb -# in conjunctive form followed by なる/なさる/くださる. -# e.g. お (読みなさい), お (座り) -#接頭詞-動詞接続 -# -# prefix-adjectival: Prefixes that attach to adjectives. -# e.g. お (寒いですねえ), バカ (でかい) -#接頭詞-形容詞接続 -# -# prefix-numerical: Prefixes that attach to numerical expressions. -# e.g. 約, およそ, 毎時 -#接頭詞-数接続 -# -##### -# verb: unclassified verbs -#動詞 -# -# verb-main: -#動詞-自立 -# -# verb-auxiliary: -#動詞-非自立 -# -# verb-suffix: -#動詞-接尾 -# -##### -# adjective: unclassified adjectives -#形容詞 -# -# adjective-main: -#形容詞-自立 -# -# adjective-auxiliary: -#形容詞-非自立 -# -# adjective-suffix: -#形容詞-接尾 -# -##### -# adverb: unclassified adverbs -#副詞 -# -# adverb-misc: Words that can be segmented into one unit and where adnominal -# modification is not possible. -# e.g. あいかわらず, 多分 -#副詞-一般 -# -# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, -# な, する, だ, etc. -# e.g. こんなに, そんなに, あんなに, なにか, なんでも -#副詞-助詞類接続 -# -##### -# adnominal: Words that only have noun-modifying forms. -# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, -# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, -# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き -#連体詞 -# -##### -# conjunction: Conjunctions that can occur independently. -# e.g. が, けれども, そして, じゃあ, それどころか -接続詞 -# -##### -# particle: unclassified particles. -助詞 -# -# particle-case: case particles where the subclassification is undefined. -助詞-格助詞 -# -# particle-case-misc: Case particles. -# e.g. から, が, で, と, に, へ, より, を, の, にて -助詞-格助詞-一般 -# -# particle-case-quote: the "to" that appears after nouns, a person’s speech, -# quotation marks, expressions of decisions from a meeting, reasons, judgements, -# conjectures, etc. -# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) -助詞-格助詞-引用 -# -# particle-case-compound: Compounds of particles and verbs that mainly behave -# like case particles. -# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, -# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, -# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, -# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, -# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, -# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, -# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, -# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ -助詞-格助詞-連語 -# -# particle-conjunctive: -# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, -# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, -# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ -助詞-接続助詞 -# -# particle-dependency: -# e.g. こそ, さえ, しか, すら, は, も, ぞ -助詞-係助詞 -# -# particle-adverbial: -# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, -# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, -# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, -# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, -# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) -助詞-副助詞 -# -# particle-interjective: particles with interjective grammatical roles. -# e.g. (松島) や -助詞-間投助詞 -# -# particle-coordinate: -# e.g. と, たり, だの, だり, とか, なり, や, やら -助詞-並立助詞 -# -# particle-final: -# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, -# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ -助詞-終助詞 -# -# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is -# adverbial, conjunctive, or sentence final. For example: -# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 -# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 -# 「(祈りが届いたせい) か (, 試験に合格した.)」 -# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 -# e.g. か -助詞-副助詞/並立助詞/終助詞 -# -# particle-adnominalizer: The "no" that attaches to nouns and modifies -# non-inflectional words. -助詞-連体化 -# -# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs -# that are giongo, giseigo, or gitaigo. -# e.g. に, と -助詞-副詞化 -# -# particle-special: A particle that does not fit into one of the above classifications. -# This includes particles that are used in Tanka, Haiku, and other poetry. -# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) -助詞-特殊 -# -##### -# auxiliary-verb: -助動詞 -# -##### -# interjection: Greetings and other exclamations. -# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, -# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい -#感動詞 -# -##### -# symbol: unclassified Symbols. -記号 -# -# symbol-misc: A general symbol not in one of the categories below. -# e.g. [○◎@$〒→+] -記号-一般 -# -# symbol-comma: Commas -# e.g. [,、] -記号-読点 -# -# symbol-period: Periods and full stops. -# e.g. [..。] -記号-句点 -# -# symbol-space: Full-width whitespace. -記号-空白 -# -# symbol-open_bracket: -# e.g. [({‘“『【] -記号-括弧開 -# -# symbol-close_bracket: -# e.g. [)}’”』」】] -記号-括弧閉 -# -# symbol-alphabetic: -#記号-アルファベット -# -##### -# other: unclassified other -#その他 -# -# other-interjection: Words that are hard to classify as noun-suffixes or -# sentence-final particles. -# e.g. (だ)ァ -その他-間投 -# -##### -# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. -# e.g. あの, うんと, えと -フィラー -# -##### -# non-verbal: non-verbal sound. -非言語音 -# -##### -# fragment: -#語断片 -# -##### -# unknown: unknown part of speech. -#未知語 -# -##### End of file diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_ar.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_ar.txt deleted file mode 100644 index 046829db6a2..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_ar.txt +++ /dev/null @@ -1,125 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Cleaned on October 11, 2009 (not normalized, so use before normalization) -# This means that when modifying this list, you might need to add some -# redundant entries, for example containing forms with both أ and ا -من -ومن -منها -منه -في -وفي -فيها -فيه -و -ف -ثم -او -أو -ب -بها -به -ا -أ -اى -اي -أي -أى -لا -ولا -الا -ألا -إلا -لكن -ما -وما -كما -فما -عن -مع -اذا -إذا -ان -أن -إن -انها -أنها -إنها -انه -أنه -إنه -بان -بأن -فان -فأن -وان -وأن -وإن -التى -التي -الذى -الذي -الذين -الى -الي -إلى -إلي -على -عليها -عليه -اما -أما -إما -ايضا -أيضا -كل -وكل -لم -ولم -لن -ولن -هى -هي -هو -وهى -وهي -وهو -فهى -فهي -فهو -انت -أنت -لك -لها -له -هذه -هذا -تلك -ذلك -هناك -كانت -كان -يكون -تكون -وكانت -وكان -غير -بعض -قد -نحو -بين -بينما -منذ -ضمن -حيث -الان -الآن -خلال -بعد -قبل -حتى -عند -عندما -لدى -جميع diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_bg.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_bg.txt deleted file mode 100644 index 1ae4ba2ae38..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_bg.txt +++ /dev/null @@ -1,193 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -а -аз -ако -ала -бе -без -беше -би -бил -била -били -било -близо -бъдат -бъде -бяха -в -вас -ваш -ваша -вероятно -вече -взема -ви -вие -винаги -все -всеки -всички -всичко -всяка -във -въпреки -върху -г -ги -главно -го -д -да -дали -до -докато -докога -дори -досега -доста -е -едва -един -ето -за -зад -заедно -заради -засега -затова -защо -защото -и -из -или -им -има -имат -иска -й -каза -как -каква -какво -както -какъв -като -кога -когато -което -които -кой -който -колко -която -къде -където -към -ли -м -ме -между -мен -ми -мнозина -мога -могат -може -моля -момента -му -н -на -над -назад -най -направи -напред -например -нас -не -него -нея -ни -ние -никой -нито -но -някои -някой -няма -обаче -около -освен -особено -от -отгоре -отново -още -пак -по -повече -повечето -под -поне -поради -после -почти -прави -пред -преди -през -при -пък -първо -с -са -само -се -сега -си -скоро -след -сме -според -сред -срещу -сте -съм -със -също -т -тази -така -такива -такъв -там -твой -те -тези -ти -тн -то -това -тогава -този -той -толкова -точно -трябва -тук -тъй -тя -тях -у -харесва -ч -че -често -чрез -ще -щом -я diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_ca.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_ca.txt deleted file mode 100644 index 3da65deafe1..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_ca.txt +++ /dev/null @@ -1,220 +0,0 @@ -# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) -a -abans -ací -ah -així -això -al -als -aleshores -algun -alguna -algunes -alguns -alhora -allà -allí -allò -altra -altre -altres -amb -ambdós -ambdues -apa -aquell -aquella -aquelles -aquells -aquest -aquesta -aquestes -aquests -aquí -baix -cada -cadascú -cadascuna -cadascunes -cadascuns -com -contra -d'un -d'una -d'unes -d'uns -dalt -de -del -dels -des -després -dins -dintre -donat -doncs -durant -e -eh -el -els -em -en -encara -ens -entre -érem -eren -éreu -es -és -esta -està -estàvem -estaven -estàveu -esteu -et -etc -ets -fins -fora -gairebé -ha -han -has -havia -he -hem -heu -hi -ho -i -igual -iguals -ja -l'hi -la -les -li -li'n -llavors -m'he -ma -mal -malgrat -mateix -mateixa -mateixes -mateixos -me -mentre -més -meu -meus -meva -meves -molt -molta -moltes -molts -mon -mons -n'he -n'hi -ne -ni -no -nogensmenys -només -nosaltres -nostra -nostre -nostres -o -oh -oi -on -pas -pel -pels -per -però -perquè -poc -poca -pocs -poques -potser -propi -qual -quals -quan -quant -que -què -quelcom -qui -quin -quina -quines -quins -s'ha -s'han -sa -semblant -semblants -ses -seu -seus -seva -seva -seves -si -sobre -sobretot -sóc -solament -sols -son -són -sons -sota -sou -t'ha -t'han -t'he -ta -tal -també -tampoc -tan -tant -tanta -tantes -teu -teus -teva -teves -ton -tons -tot -tota -totes -tots -un -una -unes -uns -us -va -vaig -vam -van -vas -veu -vosaltres -vostra -vostre -vostres diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_ckb.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_ckb.txt deleted file mode 100644 index 87abf118fec..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_ckb.txt +++ /dev/null @@ -1,136 +0,0 @@ -# set of kurdish stopwords -# note these have been normalized with our scheme (e represented with U+06D5, etc) -# constructed from: -# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al) -# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston) -# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc - -# and -و -# which -کە -# of -ی -# made/did -کرد -# that/which -ئەوەی -# on/head -سەر -# two -دوو -# also -هەروەها -# from/that -لەو -# makes/does -دەکات -# some -چەند -# every -هەر - -# demonstratives -# that -ئەو -# this -ئەم - -# personal pronouns -# I -من -# we -ئێمە -# you -تۆ -# you -ئێوە -# he/she/it -ئەو -# they -ئەوان - -# prepositions -# to/with/by -بە -پێ -# without -بەبێ -# along with/while/during -بەدەم -# in the opinion of -بەلای -# according to -بەپێی -# before -بەرلە -# in the direction of -بەرەوی -# in front of/toward -بەرەوە -# before/in the face of -بەردەم -# without -بێ -# except for -بێجگە -# for -بۆ -# on/in -دە -تێ -# with -دەگەڵ -# after -دوای -# except for/aside from -جگە -# in/from -لە -لێ -# in front of/before/because of -لەبەر -# between/among -لەبەینی -# concerning/about -لەبابەت -# concerning -لەبارەی -# instead of -لەباتی -# beside -لەبن -# instead of -لەبرێتی -# behind -لەدەم -# with/together with -لەگەڵ -# by -لەلایەن -# within -لەناو -# between/among -لەنێو -# for the sake of -لەپێناوی -# with respect to -لەرەوی -# by means of/for -لەرێ -# for the sake of -لەرێگا -# on/on top of/according to -لەسەر -# under -لەژێر -# between/among -ناو -# between/among -نێوان -# after -پاش -# before -پێش -# like -وەک diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_cz.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_cz.txt deleted file mode 100644 index 53c6097dac7..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_cz.txt +++ /dev/null @@ -1,172 +0,0 @@ -a -s -k -o -i -u -v -z -dnes -cz -tímto -budeš -budem -byli -jseš -můj -svým -ta -tomto -tohle -tuto -tyto -jej -zda -proč -máte -tato -kam -tohoto -kdo -kteří -mi -nám -tom -tomuto -mít -nic -proto -kterou -byla -toho -protože -asi -ho -naši -napište -re -což -tím -takže -svých -její -svými -jste -aj -tu -tedy -teto -bylo -kde -ke -pravé -ji -nad -nejsou -či -pod -téma -mezi -přes -ty -pak -vám -ani -když -však -neg -jsem -tento -článku -články -aby -jsme -před -pta -jejich -byl -ještě -až -bez -také -pouze -první -vaše -která -nás -nový -tipy -pokud -může -strana -jeho -své -jiné -zprávy -nové -není -vás -jen -podle -zde -už -být -více -bude -již -než -který -by -které -co -nebo -ten -tak -má -při -od -po -jsou -jak -další -ale -si -se -ve -to -jako -za -zpět -ze -do -pro -je -na -atd -atp -jakmile -přičemž -já -on -ona -ono -oni -ony -my -vy -jí -ji -mě -mne -jemu -tomu -těm -těmu -němu -němuž -jehož -jíž -jelikož -jež -jakož -načež diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_da.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_da.txt deleted file mode 100644 index 42e6145b98e..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_da.txt +++ /dev/null @@ -1,110 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Danish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - -og | and -i | in -jeg | I -det | that (dem. pronoun)/it (pers. pronoun) -at | that (in front of a sentence)/to (with infinitive) -en | a/an -den | it (pers. pronoun)/that (dem. pronoun) -til | to/at/for/until/against/by/of/into, more -er | present tense of "to be" -som | who, as -på | on/upon/in/on/at/to/after/of/with/for, on -de | they -med | with/by/in, along -han | he -af | of/by/from/off/for/in/with/on, off -for | at/for/to/from/by/of/ago, in front/before, because -ikke | not -der | who/which, there/those -var | past tense of "to be" -mig | me/myself -sig | oneself/himself/herself/itself/themselves -men | but -et | a/an/one, one (number), someone/somebody/one -har | present tense of "to have" -om | round/about/for/in/a, about/around/down, if -vi | we -min | my -havde | past tense of "to have" -ham | him -hun | she -nu | now -over | over/above/across/by/beyond/past/on/about, over/past -da | then, when/as/since -fra | from/off/since, off, since -du | you -ud | out -sin | his/her/its/one's -dem | them -os | us/ourselves -op | up -man | you/one -hans | his -hvor | where -eller | or -hvad | what -skal | must/shall etc. -selv | myself/youself/herself/ourselves etc., even -her | here -alle | all/everyone/everybody etc. -vil | will (verb) -blev | past tense of "to stay/to remain/to get/to become" -kunne | could -ind | in -når | when -være | present tense of "to be" -dog | however/yet/after all -noget | something -ville | would -jo | you know/you see (adv), yes -deres | their/theirs -efter | after/behind/according to/for/by/from, later/afterwards -ned | down -skulle | should -denne | this -end | than -dette | this -mit | my/mine -også | also -under | under/beneath/below/during, below/underneath -have | have -dig | you -anden | other -hende | her -mine | my -alt | everything -meget | much/very, plenty of -sit | his, her, its, one's -sine | his, her, its, one's -vor | our -mod | against -disse | these -hvis | if -din | your/yours -nogle | some -hos | by/at -blive | be/become -mange | many -ad | by/through -bliver | present tense of "to be/to become" -hendes | her/hers -været | be -thi | for (conj) -jer | you -sådan | such, like this/like that diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_de.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_de.txt deleted file mode 100644 index 86525e7ae08..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_de.txt +++ /dev/null @@ -1,294 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A German stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | The number of forms in this list is reduced significantly by passing it - | through the German stemmer. - - -aber | but - -alle | all -allem -allen -aller -alles - -als | than, as -also | so -am | an + dem -an | at - -ander | other -andere -anderem -anderen -anderer -anderes -anderm -andern -anderr -anders - -auch | also -auf | on -aus | out of -bei | by -bin | am -bis | until -bist | art -da | there -damit | with it -dann | then - -der | the -den -des -dem -die -das - -daß | that - -derselbe | the same -derselben -denselben -desselben -demselben -dieselbe -dieselben -dasselbe - -dazu | to that - -dein | thy -deine -deinem -deinen -deiner -deines - -denn | because - -derer | of those -dessen | of him - -dich | thee -dir | to thee -du | thou - -dies | this -diese -diesem -diesen -dieser -dieses - - -doch | (several meanings) -dort | (over) there - - -durch | through - -ein | a -eine -einem -einen -einer -eines - -einig | some -einige -einigem -einigen -einiger -einiges - -einmal | once - -er | he -ihn | him -ihm | to him - -es | it -etwas | something - -euer | your -eure -eurem -euren -eurer -eures - -für | for -gegen | towards -gewesen | p.p. of sein -hab | have -habe | have -haben | have -hat | has -hatte | had -hatten | had -hier | here -hin | there -hinter | behind - -ich | I -mich | me -mir | to me - - -ihr | you, to her -ihre -ihrem -ihren -ihrer -ihres -euch | to you - -im | in + dem -in | in -indem | while -ins | in + das -ist | is - -jede | each, every -jedem -jeden -jeder -jedes - -jene | that -jenem -jenen -jener -jenes - -jetzt | now -kann | can - -kein | no -keine -keinem -keinen -keiner -keines - -können | can -könnte | could -machen | do -man | one - -manche | some, many a -manchem -manchen -mancher -manches - -mein | my -meine -meinem -meinen -meiner -meines - -mit | with -muss | must -musste | had to -nach | to(wards) -nicht | not -nichts | nothing -noch | still, yet -nun | now -nur | only -ob | whether -oder | or -ohne | without -sehr | very - -sein | his -seine -seinem -seinen -seiner -seines - -selbst | self -sich | herself - -sie | they, she -ihnen | to them - -sind | are -so | so - -solche | such -solchem -solchen -solcher -solches - -soll | shall -sollte | should -sondern | but -sonst | else -über | over -um | about, around -und | and - -uns | us -unse -unsem -unsen -unser -unses - -unter | under -viel | much -vom | von + dem -von | from -vor | before -während | while -war | was -waren | were -warst | wast -was | what -weg | away, off -weil | because -weiter | further - -welche | which -welchem -welchen -welcher -welches - -wenn | when -werde | will -werden | will -wie | how -wieder | again -will | want -wir | we -wird | will -wirst | willst -wo | where -wollen | want -wollte | wanted -würde | would -würden | would -zu | to -zum | zu + dem -zur | zu + der -zwar | indeed -zwischen | between - diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_el.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_el.txt deleted file mode 100644 index 232681f5bd6..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_el.txt +++ /dev/null @@ -1,78 +0,0 @@ -# Lucene Greek Stopwords list -# Note: by default this file is used after GreekLowerCaseFilter, -# so when modifying this file use 'σ' instead of 'ς' -ο -η -το -οι -τα -του -τησ -των -τον -την -και -κι -κ -ειμαι -εισαι -ειναι -ειμαστε -ειστε -στο -στον -στη -στην -μα -αλλα -απο -για -προσ -με -σε -ωσ -παρα -αντι -κατα -μετα -θα -να -δε -δεν -μη -μην -επι -ενω -εαν -αν -τοτε -που -πωσ -ποιοσ -ποια -ποιο -ποιοι -ποιεσ -ποιων -ποιουσ -αυτοσ -αυτη -αυτο -αυτοι -αυτων -αυτουσ -αυτεσ -αυτα -εκεινοσ -εκεινη -εκεινο -εκεινοι -εκεινεσ -εκεινα -εκεινων -εκεινουσ -οπωσ -ομωσ -ισωσ -οσο -οτι diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_en.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_en.txt deleted file mode 100644 index 2c164c0b2a1..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_en.txt +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -# Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -such -that -the -their -then -there -these -they -this -to -was -will -with diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_es.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_es.txt deleted file mode 100644 index 487d78c8d56..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_es.txt +++ /dev/null @@ -1,356 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Spanish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | from, of -la | the, her -que | who, that -el | the -en | in -y | and -a | to -los | the, them -del | de + el -se | himself, from him etc -las | the, them -por | for, by, etc -un | a -para | for -con | with -no | no -una | a -su | his, her -al | a + el - | es from SER -lo | him -como | how -más | more -pero | pero -sus | su plural -le | to him, her -ya | already -o | or - | fue from SER -este | this - | ha from HABER -sí | himself etc -porque | because -esta | this - | son from SER -entre | between - | está from ESTAR -cuando | when -muy | very -sin | without -sobre | on - | ser from SER - | tiene from TENER -también | also -me | me -hasta | until -hay | there is/are -donde | where - | han from HABER -quien | whom, that - | están from ESTAR - | estado from ESTAR -desde | from -todo | all -nos | us -durante | during - | estados from ESTAR -todos | all -uno | a -les | to them -ni | nor -contra | against -otros | other - | fueron from SER -ese | that -eso | that - | había from HABER -ante | before -ellos | they -e | and (variant of y) -esto | this -mí | me -antes | before -algunos | some -qué | what? -unos | a -yo | I -otro | other -otras | other -otra | other -él | he -tanto | so much, many -esa | that -estos | these -mucho | much, many -quienes | who -nada | nothing -muchos | many -cual | who - | sea from SER -poco | few -ella | she -estar | to be - | haber from HABER -estas | these - | estaba from ESTAR - | estamos from ESTAR -algunas | some -algo | something -nosotros | we - - | other forms - -mi | me -mis | mi plural -tú | thou -te | thee -ti | thee -tu | thy -tus | tu plural -ellas | they -nosotras | we -vosotros | you -vosotras | you -os | you -mío | mine -mía | -míos | -mías | -tuyo | thine -tuya | -tuyos | -tuyas | -suyo | his, hers, theirs -suya | -suyos | -suyas | -nuestro | ours -nuestra | -nuestros | -nuestras | -vuestro | yours -vuestra | -vuestros | -vuestras | -esos | those -esas | those - - | forms of estar, to be (not including the infinitive): -estoy -estás -está -estamos -estáis -están -esté -estés -estemos -estéis -estén -estaré -estarás -estará -estaremos -estaréis -estarán -estaría -estarías -estaríamos -estaríais -estarían -estaba -estabas -estábamos -estabais -estaban -estuve -estuviste -estuvo -estuvimos -estuvisteis -estuvieron -estuviera -estuvieras -estuviéramos -estuvierais -estuvieran -estuviese -estuvieses -estuviésemos -estuvieseis -estuviesen -estando -estado -estada -estados -estadas -estad - - | forms of haber, to have (not including the infinitive): -he -has -ha -hemos -habéis -han -haya -hayas -hayamos -hayáis -hayan -habré -habrás -habrá -habremos -habréis -habrán -habría -habrías -habríamos -habríais -habrían -había -habías -habíamos -habíais -habían -hube -hubiste -hubo -hubimos -hubisteis -hubieron -hubiera -hubieras -hubiéramos -hubierais -hubieran -hubiese -hubieses -hubiésemos -hubieseis -hubiesen -habiendo -habido -habida -habidos -habidas - - | forms of ser, to be (not including the infinitive): -soy -eres -es -somos -sois -son -sea -seas -seamos -seáis -sean -seré -serás -será -seremos -seréis -serán -sería -serías -seríamos -seríais -serían -era -eras -éramos -erais -eran -fui -fuiste -fue -fuimos -fuisteis -fueron -fuera -fueras -fuéramos -fuerais -fueran -fuese -fueses -fuésemos -fueseis -fuesen -siendo -sido - | sed also means 'thirst' - - | forms of tener, to have (not including the infinitive): -tengo -tienes -tiene -tenemos -tenéis -tienen -tenga -tengas -tengamos -tengáis -tengan -tendré -tendrás -tendrá -tendremos -tendréis -tendrán -tendría -tendrías -tendríamos -tendríais -tendrían -tenía -tenías -teníamos -teníais -tenían -tuve -tuviste -tuvo -tuvimos -tuvisteis -tuvieron -tuviera -tuvieras -tuviéramos -tuvierais -tuvieran -tuviese -tuvieses -tuviésemos -tuvieseis -tuviesen -teniendo -tenido -tenida -tenidos -tenidas -tened - diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_eu.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_eu.txt deleted file mode 100644 index 25f1db93460..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_eu.txt +++ /dev/null @@ -1,99 +0,0 @@ -# example set of basque stopwords -al -anitz -arabera -asko -baina -bat -batean -batek -bati -batzuei -batzuek -batzuetan -batzuk -bera -beraiek -berau -berauek -bere -berori -beroriek -beste -bezala -da -dago -dira -ditu -du -dute -edo -egin -ere -eta -eurak -ez -gainera -gu -gutxi -guzti -haiei -haiek -haietan -hainbeste -hala -han -handik -hango -hara -hari -hark -hartan -hau -hauei -hauek -hauetan -hemen -hemendik -hemengo -hi -hona -honek -honela -honetan -honi -hor -hori -horiei -horiek -horietan -horko -horra -horrek -horrela -horretan -horri -hortik -hura -izan -ni -noiz -nola -non -nondik -nongo -nor -nora -ze -zein -zen -zenbait -zenbat -zer -zergatik -ziren -zituen -zu -zuek -zuen -zuten diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_fa.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_fa.txt deleted file mode 100644 index 723641c6da7..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_fa.txt +++ /dev/null @@ -1,313 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Note: by default this file is used after normalization, so when adding entries -# to this file, use the arabic 'ي' instead of 'ی' -انان -نداشته -سراسر -خياه -ايشان -وي -تاكنون -بيشتري -دوم -پس -ناشي -وگو -يا -داشتند -سپس -هنگام -هرگز -پنج -نشان -امسال -ديگر -گروهي -شدند -چطور -ده -و -دو -نخستين -ولي -چرا -چه -وسط -ه -كدام -قابل -يك -رفت -هفت -همچنين -در -هزار -بله -بلي -شايد -اما -شناسي -گرفته -دهد -داشته -دانست -داشتن -خواهيم -ميليارد -وقتيكه -امد -خواهد -جز -اورده -شده -بلكه -خدمات -شدن -برخي -نبود -بسياري -جلوگيري -حق -كردند -نوعي -بعري -نكرده -نظير -نبايد -بوده -بودن -داد -اورد -هست -جايي -شود -دنبال -داده -بايد -سابق -هيچ -همان -انجا -كمتر -كجاست -گردد -كسي -تر -مردم -تان -دادن -بودند -سري -جدا -ندارند -مگر -يكديگر -دارد -دهند -بنابراين -هنگامي -سمت -جا -انچه -خود -دادند -زياد -دارند -اثر -بدون -بهترين -بيشتر -البته -به -براساس -بيرون -كرد -بعضي -گرفت -توي -اي -ميليون -او -جريان -تول -بر -مانند -برابر -باشيم -مدتي -گويند -اكنون -تا -تنها -جديد -چند -بي -نشده -كردن -كردم -گويد -كرده -كنيم -نمي -نزد -روي -قصد -فقط -بالاي -ديگران -اين -ديروز -توسط -سوم -ايم -دانند -سوي -استفاده -شما -كنار -داريم -ساخته -طور -امده -رفته -نخست -بيست -نزديك -طي -كنيد -از -انها -تمامي -داشت -يكي -طريق -اش -چيست -روب -نمايد -گفت -چندين -چيزي -تواند -ام -ايا -با -ان -ايد -ترين -اينكه -ديگري -راه -هايي -بروز -همچنان -پاعين -كس -حدود -مختلف -مقابل -چيز -گيرد -ندارد -ضد -همچون -سازي -شان -مورد -باره -مرسي -خويش -برخوردار -چون -خارج -شش -هنوز -تحت -ضمن -هستيم -گفته -فكر -بسيار -پيش -براي -روزهاي -انكه -نخواهد -بالا -كل -وقتي -كي -چنين -كه -گيري -نيست -است -كجا -كند -نيز -يابد -بندي -حتي -توانند -عقب -خواست -كنند -بين -تمام -همه -ما -باشند -مثل -شد -اري -باشد -اره -طبق -بعد -اگر -صورت -غير -جاي -بيش -ريزي -اند -زيرا -چگونه -بار -لطفا -مي -درباره -من -ديده -همين -گذاري -برداري -علت -گذاشته -هم -فوق -نه -ها -شوند -اباد -همواره -هر -اول -خواهند -چهار -نام -امروز -مان -هاي -قبل -كنم -سعي -تازه -را -هستند -زير -جلوي -عنوان -بود diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_fi.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_fi.txt deleted file mode 100644 index 4372c9a055b..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_fi.txt +++ /dev/null @@ -1,97 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| forms of BE - -olla -olen -olet -on -olemme -olette -ovat -ole | negative form - -oli -olisi -olisit -olisin -olisimme -olisitte -olisivat -olit -olin -olimme -olitte -olivat -ollut -olleet - -en | negation -et -ei -emme -ette -eivät - -|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans -minä minun minut minua minussa minusta minuun minulla minulta minulle | I -sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you -hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she -me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we -te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you -he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they - -tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this -tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that -se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it -nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these -nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those -ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they - -kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who -ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) -mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what -mitkä | (pl) - -joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which -jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) - -| conjunctions - -että | that -ja | and -jos | if -koska | because -kuin | than -mutta | but -niin | so -sekä | and -sillä | for -tai | or -vaan | but -vai | or -vaikka | although - - -| prepositions - -kanssa | with -mukaan | according to -noin | about -poikki | across -yli | over, across - -| other - -kun | when -niin | so -nyt | now -itse | self - diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_fr.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_fr.txt deleted file mode 100644 index 749abae6846..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_fr.txt +++ /dev/null @@ -1,186 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A French stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -au | a + le -aux | a + les -avec | with -ce | this -ces | these -dans | with -de | of -des | de + les -du | de + le -elle | she -en | `of them' etc -et | and -eux | them -il | he -je | I -la | the -le | the -leur | their -lui | him -ma | my (fem) -mais | but -me | me -même | same; as in moi-même (myself) etc -mes | me (pl) -moi | me -mon | my (masc) -ne | not -nos | our (pl) -notre | our -nous | we -on | one -ou | where -par | by -pas | not -pour | for -qu | que before vowel -que | that -qui | who -sa | his, her (fem) -se | oneself -ses | his (pl) -son | his, her (masc) -sur | on -ta | thy (fem) -te | thee -tes | thy (pl) -toi | thee -ton | thy (masc) -tu | thou -un | a -une | a -vos | your (pl) -votre | your -vous | you - - | single letter forms - -c | c' -d | d' -j | j' -l | l' -à | to, at -m | m' -n | n' -s | s' -t | t' -y | there - - | forms of être (not including the infinitive): -été -étée -étées -étés -étant -suis -es -est -sommes -êtes -sont -serai -seras -sera -serons -serez -seront -serais -serait -serions -seriez -seraient -étais -était -étions -étiez -étaient -fus -fut -fûmes -fûtes -furent -sois -soit -soyons -soyez -soient -fusse -fusses -fût -fussions -fussiez -fussent - - | forms of avoir (not including the infinitive): -ayant -eu -eue -eues -eus -ai -as -avons -avez -ont -aurai -auras -aura -aurons -aurez -auront -aurais -aurait -aurions -auriez -auraient -avais -avait -avions -aviez -avaient -eut -eûmes -eûtes -eurent -aie -aies -ait -ayons -ayez -aient -eusse -eusses -eût -eussions -eussiez -eussent - - | Later additions (from Jean-Christophe Deschamps) -ceci | this -cela | that -celà | that -cet | this -cette | this -ici | here -ils | they -les | the (pl) -leurs | their (pl) -quel | which -quels | which -quelle | which -quelles | which -sans | without -soi | oneself - diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_ga.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_ga.txt deleted file mode 100644 index 9ff88d747e5..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_ga.txt +++ /dev/null @@ -1,110 +0,0 @@ - -a -ach -ag -agus -an -aon -ar -arna -as -b' -ba -beirt -bhúr -caoga -ceathair -ceathrar -chomh -chtó -chuig -chun -cois -céad -cúig -cúigear -d' -daichead -dar -de -deich -deichniúr -den -dhá -do -don -dtí -dá -dár -dó -faoi -faoin -faoina -faoinár -fara -fiche -gach -gan -go -gur -haon -hocht -i -iad -idir -in -ina -ins -inár -is -le -leis -lena -lenár -m' -mar -mo -mé -na -nach -naoi -naonúr -ná -ní -níor -nó -nócha -ocht -ochtar -os -roimh -sa -seacht -seachtar -seachtó -seasca -seisear -siad -sibh -sinn -sna -sé -sí -tar -thar -thú -triúr -trí -trína -trínár -tríocha -tú -um -ár -é -éis -í -ó -ón -óna -ónár diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_gl.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_gl.txt deleted file mode 100644 index d8760b12c14..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_gl.txt +++ /dev/null @@ -1,161 +0,0 @@ -# galican stopwords -a -aínda -alí -aquel -aquela -aquelas -aqueles -aquilo -aquí -ao -aos -as -así -á -ben -cando -che -co -coa -comigo -con -connosco -contigo -convosco -coas -cos -cun -cuns -cunha -cunhas -da -dalgunha -dalgunhas -dalgún -dalgúns -das -de -del -dela -delas -deles -desde -deste -do -dos -dun -duns -dunha -dunhas -e -el -ela -elas -eles -en -era -eran -esa -esas -ese -eses -esta -estar -estaba -está -están -este -estes -estiven -estou -eu -é -facer -foi -foron -fun -había -hai -iso -isto -la -las -lle -lles -lo -los -mais -me -meu -meus -min -miña -miñas -moi -na -nas -neste -nin -no -non -nos -nosa -nosas -noso -nosos -nós -nun -nunha -nuns -nunhas -o -os -ou -ó -ós -para -pero -pode -pois -pola -polas -polo -polos -por -que -se -senón -ser -seu -seus -sexa -sido -sobre -súa -súas -tamén -tan -te -ten -teñen -teño -ter -teu -teus -ti -tido -tiña -tiven -túa -túas -un -unha -unhas -uns -vos -vosa -vosas -voso -vosos -vós diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_hi.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_hi.txt deleted file mode 100644 index 86286bb083b..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_hi.txt +++ /dev/null @@ -1,235 +0,0 @@ -# Also see http://www.opensource.org/licenses/bsd-license.html -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# This file was created by Jacques Savoy and is distributed under the BSD license. -# Note: by default this file also contains forms normalized by HindiNormalizer -# for spelling variation (see section below), such that it can be used whether or -# not you enable that feature. When adding additional entries to this list, -# please add the normalized form as well. -अंदर -अत -अपना -अपनी -अपने -अभी -आदि -आप -इत्यादि -इन -इनका -इन्हीं -इन्हें -इन्हों -इस -इसका -इसकी -इसके -इसमें -इसी -इसे -उन -उनका -उनकी -उनके -उनको -उन्हीं -उन्हें -उन्हों -उस -उसके -उसी -उसे -एक -एवं -एस -ऐसे -और -कई -कर -करता -करते -करना -करने -करें -कहते -कहा -का -काफ़ी -कि -कितना -किन्हें -किन्हों -किया -किर -किस -किसी -किसे -की -कुछ -कुल -के -को -कोई -कौन -कौनसा -गया -घर -जब -जहाँ -जा -जितना -जिन -जिन्हें -जिन्हों -जिस -जिसे -जीधर -जैसा -जैसे -जो -तक -तब -तरह -तिन -तिन्हें -तिन्हों -तिस -तिसे -तो -था -थी -थे -दबारा -दिया -दुसरा -दूसरे -दो -द्वारा -न -नहीं -ना -निहायत -नीचे -ने -पर -पर -पहले -पूरा -पे -फिर -बनी -बही -बहुत -बाद -बाला -बिलकुल -भी -भीतर -मगर -मानो -मे -में -यदि -यह -यहाँ -यही -या -यिह -ये -रखें -रहा -रहे -ऱ्वासा -लिए -लिये -लेकिन -व -वर्ग -वह -वह -वहाँ -वहीं -वाले -वुह -वे -वग़ैरह -संग -सकता -सकते -सबसे -सभी -साथ -साबुत -साभ -सारा -से -सो -ही -हुआ -हुई -हुए -है -हैं -हो -होता -होती -होते -होना -होने -# additional normalized forms of the above -अपनि -जेसे -होति -सभि -तिंहों -इंहों -दवारा -इसि -किंहें -थि -उंहों -ओर -जिंहें -वहिं -अभि -बनि -हि -उंहिं -उंहें -हें -वगेरह -एसे -रवासा -कोन -निचे -काफि -उसि -पुरा -भितर -हे -बहि -वहां -कोइ -यहां -जिंहों -तिंहें -किसि -कइ -यहि -इंहिं -जिधर -इंहें -अदि -इतयादि -हुइ -कोनसा -इसकि -दुसरे -जहां -अप -किंहों -उनकि -भि -वरग -हुअ -जेसा -नहिं diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_hu.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_hu.txt deleted file mode 100644 index 37526da8aa9..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_hu.txt +++ /dev/null @@ -1,211 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| Hungarian stop word list -| prepared by Anna Tordai - -a -ahogy -ahol -aki -akik -akkor -alatt -által -általában -amely -amelyek -amelyekben -amelyeket -amelyet -amelynek -ami -amit -amolyan -amíg -amikor -át -abban -ahhoz -annak -arra -arról -az -azok -azon -azt -azzal -azért -aztán -azután -azonban -bár -be -belül -benne -cikk -cikkek -cikkeket -csak -de -e -eddig -egész -egy -egyes -egyetlen -egyéb -egyik -egyre -ekkor -el -elég -ellen -elő -először -előtt -első -én -éppen -ebben -ehhez -emilyen -ennek -erre -ez -ezt -ezek -ezen -ezzel -ezért -és -fel -felé -hanem -hiszen -hogy -hogyan -igen -így -illetve -ill. -ill -ilyen -ilyenkor -ison -ismét -itt -jó -jól -jobban -kell -kellett -keresztül -keressünk -ki -kívül -között -közül -legalább -lehet -lehetett -legyen -lenne -lenni -lesz -lett -maga -magát -majd -majd -már -más -másik -meg -még -mellett -mert -mely -melyek -mi -mit -míg -miért -milyen -mikor -minden -mindent -mindenki -mindig -mint -mintha -mivel -most -nagy -nagyobb -nagyon -ne -néha -nekem -neki -nem -néhány -nélkül -nincs -olyan -ott -össze -ő -ők -őket -pedig -persze -rá -s -saját -sem -semmi -sok -sokat -sokkal -számára -szemben -szerint -szinte -talán -tehát -teljes -tovább -továbbá -több -úgy -ugyanis -új -újabb -újra -után -utána -utolsó -vagy -vagyis -valaki -valami -valamint -való -vagyok -van -vannak -volt -voltam -voltak -voltunk -vissza -vele -viszont -volna diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_hy.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_hy.txt deleted file mode 100644 index 60c1c50fbc8..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_hy.txt +++ /dev/null @@ -1,46 +0,0 @@ -# example set of Armenian stopwords. -այդ -այլ -այն -այս -դու -դուք -եմ -են -ենք -ես -եք -է -էի -էին -էինք -էիր -էիք -էր -ըստ -թ -ի -ին -իսկ -իր -կամ -համար -հետ -հետո -մենք -մեջ -մի -ն -նա -նաև -նրա -նրանք -որ -որը -որոնք -որպես -ու -ում -պիտի -վրա -և diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_id.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_id.txt deleted file mode 100644 index 4617f83a5c5..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_id.txt +++ /dev/null @@ -1,359 +0,0 @@ -# from appendix D of: A Study of Stemming Effects on Information -# Retrieval in Bahasa Indonesia -ada -adanya -adalah -adapun -agak -agaknya -agar -akan -akankah -akhirnya -aku -akulah -amat -amatlah -anda -andalah -antar -diantaranya -antara -antaranya -diantara -apa -apaan -mengapa -apabila -apakah -apalagi -apatah -atau -ataukah -ataupun -bagai -bagaikan -sebagai -sebagainya -bagaimana -bagaimanapun -sebagaimana -bagaimanakah -bagi -bahkan -bahwa -bahwasanya -sebaliknya -banyak -sebanyak -beberapa -seberapa -begini -beginian -beginikah -beginilah -sebegini -begitu -begitukah -begitulah -begitupun -sebegitu -belum -belumlah -sebelum -sebelumnya -sebenarnya -berapa -berapakah -berapalah -berapapun -betulkah -sebetulnya -biasa -biasanya -bila -bilakah -bisa -bisakah -sebisanya -boleh -bolehkah -bolehlah -buat -bukan -bukankah -bukanlah -bukannya -cuma -percuma -dahulu -dalam -dan -dapat -dari -daripada -dekat -demi -demikian -demikianlah -sedemikian -dengan -depan -di -dia -dialah -dini -diri -dirinya -terdiri -dong -dulu -enggak -enggaknya -entah -entahlah -terhadap -terhadapnya -hal -hampir -hanya -hanyalah -harus -haruslah -harusnya -seharusnya -hendak -hendaklah -hendaknya -hingga -sehingga -ia -ialah -ibarat -ingin -inginkah -inginkan -ini -inikah -inilah -itu -itukah -itulah -jangan -jangankan -janganlah -jika -jikalau -juga -justru -kala -kalau -kalaulah -kalaupun -kalian -kami -kamilah -kamu -kamulah -kan -kapan -kapankah -kapanpun -dikarenakan -karena -karenanya -ke -kecil -kemudian -kenapa -kepada -kepadanya -ketika -seketika -khususnya -kini -kinilah -kiranya -sekiranya -kita -kitalah -kok -lagi -lagian -selagi -lah -lain -lainnya -melainkan -selaku -lalu -melalui -terlalu -lama -lamanya -selama -selama -selamanya -lebih -terlebih -bermacam -macam -semacam -maka -makanya -makin -malah -malahan -mampu -mampukah -mana -manakala -manalagi -masih -masihkah -semasih -masing -mau -maupun -semaunya -memang -mereka -merekalah -meski -meskipun -semula -mungkin -mungkinkah -nah -namun -nanti -nantinya -nyaris -oleh -olehnya -seorang -seseorang -pada -padanya -padahal -paling -sepanjang -pantas -sepantasnya -sepantasnyalah -para -pasti -pastilah -per -pernah -pula -pun -merupakan -rupanya -serupa -saat -saatnya -sesaat -saja -sajalah -saling -bersama -sama -sesama -sambil -sampai -sana -sangat -sangatlah -saya -sayalah -se -sebab -sebabnya -sebuah -tersebut -tersebutlah -sedang -sedangkan -sedikit -sedikitnya -segala -segalanya -segera -sesegera -sejak -sejenak -sekali -sekalian -sekalipun -sesekali -sekaligus -sekarang -sekarang -sekitar -sekitarnya -sela -selain -selalu -seluruh -seluruhnya -semakin -sementara -sempat -semua -semuanya -sendiri -sendirinya -seolah -seperti -sepertinya -sering -seringnya -serta -siapa -siapakah -siapapun -disini -disinilah -sini -sinilah -sesuatu -sesuatunya -suatu -sesudah -sesudahnya -sudah -sudahkah -sudahlah -supaya -tadi -tadinya -tak -tanpa -setelah -telah -tentang -tentu -tentulah -tentunya -tertentu -seterusnya -tapi -tetapi -setiap -tiap -setidaknya -tidak -tidakkah -tidaklah -toh -waduh -wah -wahai -sewaktu -walau -walaupun -wong -yaitu -yakni -yang diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_it.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_it.txt deleted file mode 100644 index 1219cc773ab..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_it.txt +++ /dev/null @@ -1,303 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | An Italian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -ad | a (to) before vowel -al | a + il -allo | a + lo -ai | a + i -agli | a + gli -all | a + l' -agl | a + gl' -alla | a + la -alle | a + le -con | with -col | con + il -coi | con + i (forms collo, cogli etc are now very rare) -da | from -dal | da + il -dallo | da + lo -dai | da + i -dagli | da + gli -dall | da + l' -dagl | da + gll' -dalla | da + la -dalle | da + le -di | of -del | di + il -dello | di + lo -dei | di + i -degli | di + gli -dell | di + l' -degl | di + gl' -della | di + la -delle | di + le -in | in -nel | in + el -nello | in + lo -nei | in + i -negli | in + gli -nell | in + l' -negl | in + gl' -nella | in + la -nelle | in + le -su | on -sul | su + il -sullo | su + lo -sui | su + i -sugli | su + gli -sull | su + l' -sugl | su + gl' -sulla | su + la -sulle | su + le -per | through, by -tra | among -contro | against -io | I -tu | thou -lui | he -lei | she -noi | we -voi | you -loro | they -mio | my -mia | -miei | -mie | -tuo | -tua | -tuoi | thy -tue | -suo | -sua | -suoi | his, her -sue | -nostro | our -nostra | -nostri | -nostre | -vostro | your -vostra | -vostri | -vostre | -mi | me -ti | thee -ci | us, there -vi | you, there -lo | him, the -la | her, the -li | them -le | them, the -gli | to him, the -ne | from there etc -il | the -un | a -uno | a -una | a -ma | but -ed | and -se | if -perché | why, because -anche | also -come | how -dov | where (as dov') -dove | where -che | who, that -chi | who -cui | whom -non | not -più | more -quale | who, that -quanto | how much -quanti | -quanta | -quante | -quello | that -quelli | -quella | -quelle | -questo | this -questi | -questa | -queste | -si | yes -tutto | all -tutti | all - - | single letter forms: - -a | at -c | as c' for ce or ci -e | and -i | the -l | as l' -o | or - - | forms of avere, to have (not including the infinitive): - -ho -hai -ha -abbiamo -avete -hanno -abbia -abbiate -abbiano -avrò -avrai -avrà -avremo -avrete -avranno -avrei -avresti -avrebbe -avremmo -avreste -avrebbero -avevo -avevi -aveva -avevamo -avevate -avevano -ebbi -avesti -ebbe -avemmo -aveste -ebbero -avessi -avesse -avessimo -avessero -avendo -avuto -avuta -avuti -avute - - | forms of essere, to be (not including the infinitive): -sono -sei -è -siamo -siete -sia -siate -siano -sarò -sarai -sarà -saremo -sarete -saranno -sarei -saresti -sarebbe -saremmo -sareste -sarebbero -ero -eri -era -eravamo -eravate -erano -fui -fosti -fu -fummo -foste -furono -fossi -fosse -fossimo -fossero -essendo - - | forms of fare, to do (not including the infinitive, fa, fat-): -faccio -fai -facciamo -fanno -faccia -facciate -facciano -farò -farai -farà -faremo -farete -faranno -farei -faresti -farebbe -faremmo -fareste -farebbero -facevo -facevi -faceva -facevamo -facevate -facevano -feci -facesti -fece -facemmo -faceste -fecero -facessi -facesse -facessimo -facessero -facendo - - | forms of stare, to be (not including the infinitive): -sto -stai -sta -stiamo -stanno -stia -stiate -stiano -starò -starai -starà -staremo -starete -staranno -starei -staresti -starebbe -staremmo -stareste -starebbero -stavo -stavi -stava -stavamo -stavate -stavano -stetti -stesti -stette -stemmo -steste -stettero -stessi -stesse -stessimo -stessero -stando diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_ja.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_ja.txt deleted file mode 100644 index d4321be6b16..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_ja.txt +++ /dev/null @@ -1,127 +0,0 @@ -# -# This file defines a stopword set for Japanese. -# -# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. -# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 -# for frequency lists, etc. that can be useful for making your own set (if desired) -# -# Note that there is an overlap between these stopwords and the terms stopped when used -# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note -# that comments are not allowed on the same line as stopwords. -# -# Also note that stopping is done in a case-insensitive manner. Change your StopFilter -# configuration if you need case-sensitive stopping. Lastly, note that stopping is done -# using the same character width as the entries in this file. Since this StopFilter is -# normally done after a CJKWidthFilter in your chain, you would usually want your romaji -# entries to be in half-width and your kana entries to be in full-width. -# -の -に -は -を -た -が -で -て -と -し -れ -さ -ある -いる -も -する -から -な -こと -として -い -や -れる -など -なっ -ない -この -ため -その -あっ -よう -また -もの -という -あり -まで -られ -なる -へ -か -だ -これ -によって -により -おり -より -による -ず -なり -られる -において -ば -なかっ -なく -しかし -について -せ -だっ -その後 -できる -それ -う -ので -なお -のみ -でき -き -つ -における -および -いう -さらに -でも -ら -たり -その他 -に関する -たち -ます -ん -なら -に対して -特に -せる -及び -これら -とき -では -にて -ほか -ながら -うち -そして -とともに -ただし -かつて -それぞれ -または -お -ほど -ものの -に対する -ほとんど -と共に -といった -です -とも -ところ -ここ -##### End of file diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_lv.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_lv.txt deleted file mode 100644 index e21a23c06c3..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_lv.txt +++ /dev/null @@ -1,172 +0,0 @@ -# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins -# the original list of over 800 forms was refined: -# pronouns, adverbs, interjections were removed -# -# prepositions -aiz -ap -ar -apakš -ārpus -augšpus -bez -caur -dēļ -gar -iekš -iz -kopš -labad -lejpus -līdz -no -otrpus -pa -par -pār -pēc -pie -pirms -pret -priekš -starp -šaipus -uz -viņpus -virs -virspus -zem -apakšpus -# Conjunctions -un -bet -jo -ja -ka -lai -tomēr -tikko -turpretī -arī -kaut -gan -tādēļ -tā -ne -tikvien -vien -kā -ir -te -vai -kamēr -# Particles -ar -diezin -droši -diemžēl -nebūt -ik -it -taču -nu -pat -tiklab -iekšpus -nedz -tik -nevis -turpretim -jeb -iekam -iekām -iekāms -kolīdz -līdzko -tiklīdz -jebšu -tālab -tāpēc -nekā -itin -jā -jau -jel -nē -nezin -tad -tikai -vis -tak -iekams -vien -# modal verbs -būt -biju -biji -bija -bijām -bijāt -esmu -esi -esam -esat -būšu -būsi -būs -būsim -būsiet -tikt -tiku -tiki -tika -tikām -tikāt -tieku -tiec -tiek -tiekam -tiekat -tikšu -tiks -tiksim -tiksiet -tapt -tapi -tapāt -topat -tapšu -tapsi -taps -tapsim -tapsiet -kļūt -kļuvu -kļuvi -kļuva -kļuvām -kļuvāt -kļūstu -kļūsti -kļūst -kļūstam -kļūstat -kļūšu -kļūsi -kļūs -kļūsim -kļūsiet -# verbs -varēt -varēju -varējām -varēšu -varēsim -var -varēji -varējāt -varēsi -varēsiet -varat -varēja -varēs diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_nl.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_nl.txt deleted file mode 100644 index 47a2aeacf6f..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_nl.txt +++ /dev/null @@ -1,119 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Dutch stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large sample of Dutch text. - - | Dutch stop words frequently exhibit homonym clashes. These are indicated - | clearly below. - -de | the -en | and -van | of, from -ik | I, the ego -te | (1) chez, at etc, (2) to, (3) too -dat | that, which -die | that, those, who, which -in | in, inside -een | a, an, one -hij | he -het | the, it -niet | not, nothing, naught -zijn | (1) to be, being, (2) his, one's, its -is | is -was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river -op | on, upon, at, in, up, used up -aan | on, upon, to (as dative) -met | with, by -als | like, such as, when -voor | (1) before, in front of, (2) furrow -had | had, past tense all persons sing. of 'hebben' (have) -er | there -maar | but, only -om | round, about, for etc -hem | him -dan | then -zou | should/would, past tense all persons sing. of 'zullen' -of | or, whether, if -wat | what, something, anything -mijn | possessive and noun 'mine' -men | people, 'one' -dit | this -zo | so, thus, in this way -door | through by -over | over, across -ze | she, her, they, them -zich | oneself -bij | (1) a bee, (2) by, near, at -ook | also, too -tot | till, until -je | you -mij | me -uit | out of, from -der | Old Dutch form of 'van der' still found in surnames -daar | (1) there, (2) because -haar | (1) her, their, them, (2) hair -naar | (1) unpleasant, unwell etc, (2) towards, (3) as -heb | present first person sing. of 'to have' -hoe | how, why -heeft | present third person sing. of 'to have' -hebben | 'to have' and various parts thereof -deze | this -u | you -want | (1) for, (2) mitten, (3) rigging -nog | yet, still -zal | 'shall', first and third person sing. of verb 'zullen' (will) -me | me -zij | she, they -nu | now -ge | 'thou', still used in Belgium and south Netherlands -geen | none -omdat | because -iets | something, somewhat -worden | to become, grow, get -toch | yet, still -al | all, every, each -waren | (1) 'were' (2) to wander, (3) wares, (3) -veel | much, many -meer | (1) more, (2) lake -doen | to do, to make -toen | then, when -moet | noun 'spot/mote' and present form of 'to must' -ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' -zonder | without -kan | noun 'can' and present form of 'to be able' -hun | their, them -dus | so, consequently -alles | all, everything, anything -onder | under, beneath -ja | yes, of course -eens | once, one day -hier | here -wie | who -werd | imperfect third person sing. of 'become' -altijd | always -doch | yet, but etc -wordt | present third person sing. of 'become' -wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans -kunnen | to be able -ons | us/our -zelf | self -tegen | against, towards, at -na | after, near -reeds | already -wil | (1) present tense of 'want', (2) 'will', noun, (3) fender -kon | could; past tense of 'to be able' -niets | nothing -uw | your -iemand | somebody -geweest | been; past participle of 'be' -andere | other diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_no.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_no.txt deleted file mode 100644 index a7a2c28ba54..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_no.txt +++ /dev/null @@ -1,194 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Norwegian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This stop word list is for the dominant bokmål dialect. Words unique - | to nynorsk are marked *. - - | Revised by Jan Bruusgaard , Jan 2005 - -og | and -i | in -jeg | I -det | it/this/that -at | to (w. inf.) -en | a/an -et | a/an -den | it/this/that -til | to -er | is/am/are -som | who/that -på | on -de | they / you(formal) -med | with -han | he -av | of -ikke | not -ikkje | not * -der | there -så | so -var | was/were -meg | me -seg | you -men | but -ett | one -har | have -om | about -vi | we -min | my -mitt | my -ha | have -hadde | had -hun | she -nå | now -over | over -da | when/as -ved | by/know -fra | from -du | you -ut | out -sin | your -dem | them -oss | us -opp | up -man | you/one -kan | can -hans | his -hvor | where -eller | or -hva | what -skal | shall/must -selv | self (reflective) -sjøl | self (reflective) -her | here -alle | all -vil | will -bli | become -ble | became -blei | became * -blitt | have become -kunne | could -inn | in -når | when -være | be -kom | come -noen | some -noe | some -ville | would -dere | you -som | who/which/that -deres | their/theirs -kun | only/just -ja | yes -etter | after -ned | down -skulle | should -denne | this -for | for/because -deg | you -si | hers/his -sine | hers/his -sitt | hers/his -mot | against -å | to -meget | much -hvorfor | why -dette | this -disse | these/those -uten | without -hvordan | how -ingen | none -din | your -ditt | your -blir | become -samme | same -hvilken | which -hvilke | which (plural) -sånn | such a -inni | inside/within -mellom | between -vår | our -hver | each -hvem | who -vors | us/ours -hvis | whose -både | both -bare | only/just -enn | than -fordi | as/because -før | before -mange | many -også | also -slik | just -vært | been -være | to be -båe | both * -begge | both -siden | since -dykk | your * -dykkar | yours * -dei | they * -deira | them * -deires | theirs * -deim | them * -di | your (fem.) * -då | as/when * -eg | I * -ein | a/an * -eit | a/an * -eitt | a/an * -elles | or * -honom | he * -hjå | at * -ho | she * -hoe | she * -henne | her -hennar | her/hers -hennes | hers -hoss | how * -hossen | how * -ikkje | not * -ingi | noone * -inkje | noone * -korleis | how * -korso | how * -kva | what/which * -kvar | where * -kvarhelst | where * -kven | who/whom * -kvi | why * -kvifor | why * -me | we * -medan | while * -mi | my * -mine | my * -mykje | much * -no | now * -nokon | some (masc./neut.) * -noka | some (fem.) * -nokor | some * -noko | some * -nokre | some * -si | his/hers * -sia | since * -sidan | since * -so | so * -somt | some * -somme | some * -um | about* -upp | up * -vere | be * -vore | was * -verte | become * -vort | become * -varte | became * -vart | became * - diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_pt.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_pt.txt deleted file mode 100644 index acfeb01af6b..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_pt.txt +++ /dev/null @@ -1,253 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Portuguese stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | of, from -a | the; to, at; her -o | the; him -que | who, that -e | and -do | de + o -da | de + a -em | in -um | a -para | for - | é from SER -com | with -não | not, no -uma | a -os | the; them -no | em + o -se | himself etc -na | em + a -por | for -mais | more -as | the; them -dos | de + os -como | as, like -mas | but - | foi from SER -ao | a + o -ele | he -das | de + as - | tem from TER -à | a + a -seu | his -sua | her -ou | or - | ser from SER -quando | when -muito | much - | há from HAV -nos | em + os; us -já | already, now - | está from EST -eu | I -também | also -só | only, just -pelo | per + o -pela | per + a -até | up to -isso | that -ela | he -entre | between - | era from SER -depois | after -sem | without -mesmo | same -aos | a + os - | ter from TER -seus | his -quem | whom -nas | em + as -me | me -esse | that -eles | they - | estão from EST -você | you - | tinha from TER - | foram from SER -essa | that -num | em + um -nem | nor -suas | her -meu | my -às | a + as -minha | my - | têm from TER -numa | em + uma -pelos | per + os -elas | they - | havia from HAV - | seja from SER -qual | which - | será from SER -nós | we - | tenho from TER -lhe | to him, her -deles | of them -essas | those -esses | those -pelas | per + as -este | this - | fosse from SER -dele | of him - - | other words. There are many contractions such as naquele = em+aquele, - | mo = me+o, but they are rare. - | Indefinite article plural forms are also rare. - -tu | thou -te | thee -vocês | you (plural) -vos | you -lhes | to them -meus | my -minhas -teu | thy -tua -teus -tuas -nosso | our -nossa -nossos -nossas - -dela | of her -delas | of them - -esta | this -estes | these -estas | these -aquele | that -aquela | that -aqueles | those -aquelas | those -isto | this -aquilo | that - - | forms of estar, to be (not including the infinitive): -estou -está -estamos -estão -estive -esteve -estivemos -estiveram -estava -estávamos -estavam -estivera -estivéramos -esteja -estejamos -estejam -estivesse -estivéssemos -estivessem -estiver -estivermos -estiverem - - | forms of haver, to have (not including the infinitive): -hei -há -havemos -hão -houve -houvemos -houveram -houvera -houvéramos -haja -hajamos -hajam -houvesse -houvéssemos -houvessem -houver -houvermos -houverem -houverei -houverá -houveremos -houverão -houveria -houveríamos -houveriam - - | forms of ser, to be (not including the infinitive): -sou -somos -são -era -éramos -eram -fui -foi -fomos -foram -fora -fôramos -seja -sejamos -sejam -fosse -fôssemos -fossem -for -formos -forem -serei -será -seremos -serão -seria -seríamos -seriam - - | forms of ter, to have (not including the infinitive): -tenho -tem -temos -tém -tinha -tínhamos -tinham -tive -teve -tivemos -tiveram -tivera -tivéramos -tenha -tenhamos -tenham -tivesse -tivéssemos -tivessem -tiver -tivermos -tiverem -terei -terá -teremos -terão -teria -teríamos -teriam diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_ro.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_ro.txt deleted file mode 100644 index 4fdee90a5ba..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_ro.txt +++ /dev/null @@ -1,233 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -acea -aceasta -această -aceea -acei -aceia -acel -acela -acele -acelea -acest -acesta -aceste -acestea -aceşti -aceştia -acolo -acum -ai -aia -aibă -aici -al -ăla -ale -alea -ălea -altceva -altcineva -am -ar -are -aş -aşadar -asemenea -asta -ăsta -astăzi -astea -ăstea -ăştia -asupra -aţi -au -avea -avem -aveţi -azi -bine -bucur -bună -ca -că -căci -când -care -cărei -căror -cărui -cât -câte -câţi -către -câtva -ce -cel -ceva -chiar -cînd -cine -cineva -cît -cîte -cîţi -cîtva -contra -cu -cum -cumva -curând -curînd -da -dă -dacă -dar -datorită -de -deci -deja -deoarece -departe -deşi -din -dinaintea -dintr -dintre -drept -după -ea -ei -el -ele -eram -este -eşti -eu -face -fără -fi -fie -fiecare -fii -fim -fiţi -iar -ieri -îi -îl -îmi -împotriva -în -înainte -înaintea -încât -încît -încotro -între -întrucât -întrucît -îţi -la -lângă -le -li -lîngă -lor -lui -mă -mâine -mea -mei -mele -mereu -meu -mi -mine -mult -multă -mulţi -ne -nicăieri -nici -nimeni -nişte -noastră -noastre -noi -noştri -nostru -nu -ori -oricând -oricare -oricât -orice -oricînd -oricine -oricît -oricum -oriunde -până -pe -pentru -peste -pînă -poate -pot -prea -prima -primul -prin -printr -sa -să -săi -sale -sau -său -se -şi -sînt -sîntem -sînteţi -spre -sub -sunt -suntem -sunteţi -ta -tăi -tale -tău -te -ţi -ţie -tine -toată -toate -tot -toţi -totuşi -tu -un -una -unde -undeva -unei -unele -uneori -unor -vă -vi -voastră -voastre -voi -voştri -vostru -vouă -vreo -vreun diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_ru.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_ru.txt deleted file mode 100644 index 55271400c64..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_ru.txt +++ /dev/null @@ -1,243 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | a russian stop word list. comments begin with vertical bar. each stop - | word is at the start of a line. - - | this is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | letter `ё' is translated to `е'. - -и | and -в | in/into -во | alternative form -не | not -что | what/that -он | he -на | on/onto -я | i -с | from -со | alternative form -как | how -а | milder form of `no' (but) -то | conjunction and form of `that' -все | all -она | she -так | so, thus -его | him -но | but -да | yes/and -ты | thou -к | towards, by -у | around, chez -же | intensifier particle -вы | you -за | beyond, behind -бы | conditional/subj. particle -по | up to, along -только | only -ее | her -мне | to me -было | it was -вот | here is/are, particle -от | away from -меня | me -еще | still, yet, more -нет | no, there isnt/arent -о | about -из | out of -ему | to him -теперь | now -когда | when -даже | even -ну | so, well -вдруг | suddenly -ли | interrogative particle -если | if -уже | already, but homonym of `narrower' -или | or -ни | neither -быть | to be -был | he was -него | prepositional form of его -до | up to -вас | you accusative -нибудь | indef. suffix preceded by hyphen -опять | again -уж | already, but homonym of `adder' -вам | to you -сказал | he said -ведь | particle `after all' -там | there -потом | then -себя | oneself -ничего | nothing -ей | to her -может | usually with `быть' as `maybe' -они | they -тут | here -где | where -есть | there is/are -надо | got to, must -ней | prepositional form of ей -для | for -мы | we -тебя | thee -их | them, their -чем | than -была | she was -сам | self -чтоб | in order to -без | without -будто | as if -человек | man, person, one -чего | genitive form of `what' -раз | once -тоже | also -себе | to oneself -под | beneath -жизнь | life -будет | will be -ж | short form of intensifer particle `же' -тогда | then -кто | who -этот | this -говорил | was saying -того | genitive form of `that' -потому | for that reason -этого | genitive form of `this' -какой | which -совсем | altogether -ним | prepositional form of `его', `они' -здесь | here -этом | prepositional form of `этот' -один | one -почти | almost -мой | my -тем | instrumental/dative plural of `тот', `то' -чтобы | full form of `in order that' -нее | her (acc.) -кажется | it seems -сейчас | now -были | they were -куда | where to -зачем | why -сказать | to say -всех | all (acc., gen. preposn. plural) -никогда | never -сегодня | today -можно | possible, one can -при | by -наконец | finally -два | two -об | alternative form of `о', about -другой | another -хоть | even -после | after -над | above -больше | more -тот | that one (masc.) -через | across, in -эти | these -нас | us -про | about -всего | in all, only, of all -них | prepositional form of `они' (they) -какая | which, feminine -много | lots -разве | interrogative particle -сказала | she said -три | three -эту | this, acc. fem. sing. -моя | my, feminine -впрочем | moreover, besides -хорошо | good -свою | ones own, acc. fem. sing. -этой | oblique form of `эта', fem. `this' -перед | in front of -иногда | sometimes -лучше | better -чуть | a little -том | preposn. form of `that one' -нельзя | one must not -такой | such a one -им | to them -более | more -всегда | always -конечно | of course -всю | acc. fem. sing of `all' -между | between - - - | b: some paradigms - | - | personal pronouns - | - | я меня мне мной [мною] - | ты тебя тебе тобой [тобою] - | он его ему им [него, нему, ним] - | она ее эи ею [нее, нэи, нею] - | оно его ему им [него, нему, ним] - | - | мы нас нам нами - | вы вас вам вами - | они их им ими [них, ним, ними] - | - | себя себе собой [собою] - | - | demonstrative pronouns: этот (this), тот (that) - | - | этот эта это эти - | этого эты это эти - | этого этой этого этих - | этому этой этому этим - | этим этой этим [этою] этими - | этом этой этом этих - | - | тот та то те - | того ту то те - | того той того тех - | тому той тому тем - | тем той тем [тою] теми - | том той том тех - | - | determinative pronouns - | - | (a) весь (all) - | - | весь вся все все - | всего всю все все - | всего всей всего всех - | всему всей всему всем - | всем всей всем [всею] всеми - | всем всей всем всех - | - | (b) сам (himself etc) - | - | сам сама само сами - | самого саму само самих - | самого самой самого самих - | самому самой самому самим - | самим самой самим [самою] самими - | самом самой самом самих - | - | stems of verbs `to be', `to have', `to do' and modal - | - | быть бы буд быв есть суть - | име - | дел - | мог мож мочь - | уме - | хоч хот - | долж - | можн - | нужн - | нельзя - diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_sv.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_sv.txt deleted file mode 100644 index 096f87f6766..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_sv.txt +++ /dev/null @@ -1,133 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Swedish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | Swedish stop words occasionally exhibit homonym clashes. For example - | så = so, but also seed. These are indicated clearly below. - -och | and -det | it, this/that -att | to (with infinitive) -i | in, at -en | a -jag | I -hon | she -som | who, that -han | he -på | on -den | it, this/that -med | with -var | where, each -sig | him(self) etc -för | for -så | so (also: seed) -till | to -är | is -men | but -ett | a -om | if; around, about -hade | had -de | they, these/those -av | of -icke | not, no -mig | me -du | you -henne | her -då | then, when -sin | his -nu | now -har | have -inte | inte någon = no one -hans | his -honom | him -skulle | 'sake' -hennes | her -där | there -min | my -man | one (pronoun) -ej | nor -vid | at, by, on (also: vast) -kunde | could -något | some etc -från | from, off -ut | out -när | when -efter | after, behind -upp | up -vi | we -dem | them -vara | be -vad | what -över | over -än | than -dig | you -kan | can -sina | his -här | here -ha | have -mot | towards -alla | all -under | under (also: wonder) -någon | some etc -eller | or (else) -allt | all -mycket | much -sedan | since -ju | why -denna | this/that -själv | myself, yourself etc -detta | this/that -åt | to -utan | without -varit | was -hur | how -ingen | no -mitt | my -ni | you -bli | to be, become -blev | from bli -oss | us -din | thy -dessa | these/those -några | some etc -deras | their -blir | from bli -mina | my -samma | (the) same -vilken | who, that -er | you, your -sådan | such a -vår | our -blivit | from bli -dess | its -inom | within -mellan | between -sådant | such a -varför | why -varje | each -vilka | who, that -ditt | thy -vem | who -vilket | who, that -sitta | his -sådana | such a -vart | each -dina | thy -vars | whose -vårt | our -våra | our -ert | your -era | your -vilkas | whose - diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_th.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_th.txt deleted file mode 100644 index 07f0fabe692..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_th.txt +++ /dev/null @@ -1,119 +0,0 @@ -# Thai stopwords from: -# "Opinion Detection in Thai Political News Columns -# Based on Subjectivity Analysis" -# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak -ไว้ -ไม่ -ไป -ได้ -ให้ -ใน -โดย -แห่ง -แล้ว -และ -แรก -แบบ -แต่ -เอง -เห็น -เลย -เริ่ม -เรา -เมื่อ -เพื่อ -เพราะ -เป็นการ -เป็น -เปิดเผย -เปิด -เนื่องจาก -เดียวกัน -เดียว -เช่น -เฉพาะ -เคย -เข้า -เขา -อีก -อาจ -อะไร -ออก -อย่าง -อยู่ -อยาก -หาก -หลาย -หลังจาก -หลัง -หรือ -หนึ่ง -ส่วน -ส่ง -สุด -สําหรับ -ว่า -วัน -ลง -ร่วม -ราย -รับ -ระหว่าง -รวม -ยัง -มี -มาก -มา -พร้อม -พบ -ผ่าน -ผล -บาง -น่า -นี้ -นํา -นั้น -นัก -นอกจาก -ทุก -ที่สุด -ที่ -ทําให้ -ทํา -ทาง -ทั้งนี้ -ทั้ง -ถ้า -ถูก -ถึง -ต้อง -ต่างๆ -ต่าง -ต่อ -ตาม -ตั้งแต่ -ตั้ง -ด้าน -ด้วย -ดัง -ซึ่ง -ช่วง -จึง -จาก -จัด -จะ -คือ -ความ -ครั้ง -คง -ขึ้น -ของ -ขอ -ขณะ -ก่อน -ก็ -การ -กับ -กัน -กว่า -กล่าว diff --git a/solr/example/example-DIH/solr/db/conf/lang/stopwords_tr.txt b/solr/example/example-DIH/solr/db/conf/lang/stopwords_tr.txt deleted file mode 100644 index 84d9408d4ea..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/stopwords_tr.txt +++ /dev/null @@ -1,212 +0,0 @@ -# Turkish stopwords from LUCENE-559 -# merged with the list from "Information Retrieval on Turkish Texts" -# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) -acaba -altmış -altı -ama -ancak -arada -aslında -ayrıca -bana -bazı -belki -ben -benden -beni -benim -beri -beş -bile -bin -bir -birçok -biri -birkaç -birkez -birşey -birşeyi -biz -bize -bizden -bizi -bizim -böyle -böylece -bu -buna -bunda -bundan -bunlar -bunları -bunların -bunu -bunun -burada -çok -çünkü -da -daha -dahi -de -defa -değil -diğer -diye -doksan -dokuz -dolayı -dolayısıyla -dört -edecek -eden -ederek -edilecek -ediliyor -edilmesi -ediyor -eğer -elli -en -etmesi -etti -ettiği -ettiğini -gibi -göre -halen -hangi -hatta -hem -henüz -hep -hepsi -her -herhangi -herkesin -hiç -hiçbir -için -iki -ile -ilgili -ise -işte -itibaren -itibariyle -kadar -karşın -katrilyon -kendi -kendilerine -kendini -kendisi -kendisine -kendisini -kez -ki -kim -kimden -kime -kimi -kimse -kırk -milyar -milyon -mu -mü -mı -nasıl -ne -neden -nedenle -nerde -nerede -nereye -niye -niçin -o -olan -olarak -oldu -olduğu -olduğunu -olduklarını -olmadı -olmadığı -olmak -olması -olmayan -olmaz -olsa -olsun -olup -olur -olursa -oluyor -on -ona -ondan -onlar -onlardan -onları -onların -onu -onun -otuz -oysa -öyle -pek -rağmen -sadece -sanki -sekiz -seksen -sen -senden -seni -senin -siz -sizden -sizi -sizin -şey -şeyden -şeyi -şeyler -şöyle -şu -şuna -şunda -şundan -şunları -şunu -tarafından -trilyon -tüm -üç -üzere -var -vardı -ve -veya -ya -yani -yapacak -yapılan -yapılması -yapıyor -yapmak -yaptı -yaptığı -yaptığını -yaptıkları -yedi -yerine -yetmiş -yine -yirmi -yoksa -yüz -zaten diff --git a/solr/example/example-DIH/solr/db/conf/lang/userdict_ja.txt b/solr/example/example-DIH/solr/db/conf/lang/userdict_ja.txt deleted file mode 100644 index 6f0368e4d81..00000000000 --- a/solr/example/example-DIH/solr/db/conf/lang/userdict_ja.txt +++ /dev/null @@ -1,29 +0,0 @@ -# -# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) -# -# Add entries to this file in order to override the statistical model in terms -# of segmentation, readings and part-of-speech tags. Notice that entries do -# not have weights since they are always used when found. This is by-design -# in order to maximize ease-of-use. -# -# Entries are defined using the following CSV format: -# , ... , ... , -# -# Notice that a single half-width space separates tokens and readings, and -# that the number tokens and readings must match exactly. -# -# Also notice that multiple entries with the same is undefined. -# -# Whitespace only lines are ignored. Comments are not allowed on entry lines. -# - -# Custom segmentation for kanji compounds -日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 -関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 - -# Custom segmentation for compound katakana -トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 -ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 - -# Custom reading for former sumo wrestler -朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr/example/example-DIH/solr/db/conf/managed-schema b/solr/example/example-DIH/solr/db/conf/managed-schema deleted file mode 100644 index 79e3dae3a98..00000000000 --- a/solr/example/example-DIH/solr/db/conf/managed-schema +++ /dev/null @@ -1,1143 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - id - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/db/conf/mapping-FoldToASCII.txt b/solr/example/example-DIH/solr/db/conf/mapping-FoldToASCII.txt deleted file mode 100644 index 9a84b6eac34..00000000000 --- a/solr/example/example-DIH/solr/db/conf/mapping-FoldToASCII.txt +++ /dev/null @@ -1,3813 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# This map converts alphabetic, numeric, and symbolic Unicode characters -# which are not in the first 127 ASCII characters (the "Basic Latin" Unicode -# block) into their ASCII equivalents, if one exists. -# -# Characters from the following Unicode blocks are converted; however, only -# those characters with reasonable ASCII alternatives are converted: -# -# - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf -# - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf -# - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf -# - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf -# - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf -# - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf -# - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf -# - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf -# - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf -# - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf -# - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf -# - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf -# - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf -# - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf -# - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf -# - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf -# -# See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode -# -# The set of character conversions supported by this map is a superset of -# those supported by the map represented by mapping-ISOLatin1Accent.txt. -# -# See the bottom of this file for the Perl script used to generate the contents -# of this file (without this header) from ASCIIFoldingFilter.java. - - -# Syntax: -# "source" => "target" -# "source".length() > 0 (source cannot be empty.) -# "target".length() >= 0 (target can be empty.) - - -# À [LATIN CAPITAL LETTER A WITH GRAVE] -"\u00C0" => "A" - -# Á [LATIN CAPITAL LETTER A WITH ACUTE] -"\u00C1" => "A" - -#  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] -"\u00C2" => "A" - -# à [LATIN CAPITAL LETTER A WITH TILDE] -"\u00C3" => "A" - -# Ä [LATIN CAPITAL LETTER A WITH DIAERESIS] -"\u00C4" => "A" - -# Å [LATIN CAPITAL LETTER A WITH RING ABOVE] -"\u00C5" => "A" - -# Ā [LATIN CAPITAL LETTER A WITH MACRON] -"\u0100" => "A" - -# Ă [LATIN CAPITAL LETTER A WITH BREVE] -"\u0102" => "A" - -# Ą [LATIN CAPITAL LETTER A WITH OGONEK] -"\u0104" => "A" - -# Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA] -"\u018F" => "A" - -# Ǎ [LATIN CAPITAL LETTER A WITH CARON] -"\u01CD" => "A" - -# Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] -"\u01DE" => "A" - -# Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] -"\u01E0" => "A" - -# Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] -"\u01FA" => "A" - -# Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] -"\u0200" => "A" - -# Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE] -"\u0202" => "A" - -# Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE] -"\u0226" => "A" - -# Ⱥ [LATIN CAPITAL LETTER A WITH STROKE] -"\u023A" => "A" - -# ᴀ [LATIN LETTER SMALL CAPITAL A] -"\u1D00" => "A" - -# Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW] -"\u1E00" => "A" - -# Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW] -"\u1EA0" => "A" - -# Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE] -"\u1EA2" => "A" - -# Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] -"\u1EA4" => "A" - -# Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] -"\u1EA6" => "A" - -# Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EA8" => "A" - -# Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] -"\u1EAA" => "A" - -# Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] -"\u1EAC" => "A" - -# Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] -"\u1EAE" => "A" - -# Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] -"\u1EB0" => "A" - -# Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] -"\u1EB2" => "A" - -# Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] -"\u1EB4" => "A" - -# Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] -"\u1EB6" => "A" - -# Ⓐ [CIRCLED LATIN CAPITAL LETTER A] -"\u24B6" => "A" - -# A [FULLWIDTH LATIN CAPITAL LETTER A] -"\uFF21" => "A" - -# à [LATIN SMALL LETTER A WITH GRAVE] -"\u00E0" => "a" - -# á [LATIN SMALL LETTER A WITH ACUTE] -"\u00E1" => "a" - -# â [LATIN SMALL LETTER A WITH CIRCUMFLEX] -"\u00E2" => "a" - -# ã [LATIN SMALL LETTER A WITH TILDE] -"\u00E3" => "a" - -# ä [LATIN SMALL LETTER A WITH DIAERESIS] -"\u00E4" => "a" - -# å [LATIN SMALL LETTER A WITH RING ABOVE] -"\u00E5" => "a" - -# ā [LATIN SMALL LETTER A WITH MACRON] -"\u0101" => "a" - -# ă [LATIN SMALL LETTER A WITH BREVE] -"\u0103" => "a" - -# ą [LATIN SMALL LETTER A WITH OGONEK] -"\u0105" => "a" - -# ǎ [LATIN SMALL LETTER A WITH CARON] -"\u01CE" => "a" - -# ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] -"\u01DF" => "a" - -# ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] -"\u01E1" => "a" - -# ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] -"\u01FB" => "a" - -# ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE] -"\u0201" => "a" - -# ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE] -"\u0203" => "a" - -# ȧ [LATIN SMALL LETTER A WITH DOT ABOVE] -"\u0227" => "a" - -# ɐ [LATIN SMALL LETTER TURNED A] -"\u0250" => "a" - -# ə [LATIN SMALL LETTER SCHWA] -"\u0259" => "a" - -# ɚ [LATIN SMALL LETTER SCHWA WITH HOOK] -"\u025A" => "a" - -# ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK] -"\u1D8F" => "a" - -# ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] -"\u1D95" => "a" - -# ạ [LATIN SMALL LETTER A WITH RING BELOW] -"\u1E01" => "a" - -# ả [LATIN SMALL LETTER A WITH RIGHT HALF RING] -"\u1E9A" => "a" - -# ạ [LATIN SMALL LETTER A WITH DOT BELOW] -"\u1EA1" => "a" - -# ả [LATIN SMALL LETTER A WITH HOOK ABOVE] -"\u1EA3" => "a" - -# ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] -"\u1EA5" => "a" - -# ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] -"\u1EA7" => "a" - -# ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EA9" => "a" - -# ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] -"\u1EAB" => "a" - -# ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] -"\u1EAD" => "a" - -# ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE] -"\u1EAF" => "a" - -# ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE] -"\u1EB1" => "a" - -# ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] -"\u1EB3" => "a" - -# ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE] -"\u1EB5" => "a" - -# ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] -"\u1EB7" => "a" - -# ₐ [LATIN SUBSCRIPT SMALL LETTER A] -"\u2090" => "a" - -# ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA] -"\u2094" => "a" - -# ⓐ [CIRCLED LATIN SMALL LETTER A] -"\u24D0" => "a" - -# ⱥ [LATIN SMALL LETTER A WITH STROKE] -"\u2C65" => "a" - -# Ɐ [LATIN CAPITAL LETTER TURNED A] -"\u2C6F" => "a" - -# a [FULLWIDTH LATIN SMALL LETTER A] -"\uFF41" => "a" - -# Ꜳ [LATIN CAPITAL LETTER AA] -"\uA732" => "AA" - -# Æ [LATIN CAPITAL LETTER AE] -"\u00C6" => "AE" - -# Ǣ [LATIN CAPITAL LETTER AE WITH MACRON] -"\u01E2" => "AE" - -# Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE] -"\u01FC" => "AE" - -# ᴁ [LATIN LETTER SMALL CAPITAL AE] -"\u1D01" => "AE" - -# Ꜵ [LATIN CAPITAL LETTER AO] -"\uA734" => "AO" - -# Ꜷ [LATIN CAPITAL LETTER AU] -"\uA736" => "AU" - -# Ꜹ [LATIN CAPITAL LETTER AV] -"\uA738" => "AV" - -# Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] -"\uA73A" => "AV" - -# Ꜽ [LATIN CAPITAL LETTER AY] -"\uA73C" => "AY" - -# ⒜ [PARENTHESIZED LATIN SMALL LETTER A] -"\u249C" => "(a)" - -# ꜳ [LATIN SMALL LETTER AA] -"\uA733" => "aa" - -# æ [LATIN SMALL LETTER AE] -"\u00E6" => "ae" - -# ǣ [LATIN SMALL LETTER AE WITH MACRON] -"\u01E3" => "ae" - -# ǽ [LATIN SMALL LETTER AE WITH ACUTE] -"\u01FD" => "ae" - -# ᴂ [LATIN SMALL LETTER TURNED AE] -"\u1D02" => "ae" - -# ꜵ [LATIN SMALL LETTER AO] -"\uA735" => "ao" - -# ꜷ [LATIN SMALL LETTER AU] -"\uA737" => "au" - -# ꜹ [LATIN SMALL LETTER AV] -"\uA739" => "av" - -# ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] -"\uA73B" => "av" - -# ꜽ [LATIN SMALL LETTER AY] -"\uA73D" => "ay" - -# Ɓ [LATIN CAPITAL LETTER B WITH HOOK] -"\u0181" => "B" - -# Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR] -"\u0182" => "B" - -# Ƀ [LATIN CAPITAL LETTER B WITH STROKE] -"\u0243" => "B" - -# ʙ [LATIN LETTER SMALL CAPITAL B] -"\u0299" => "B" - -# ᴃ [LATIN LETTER SMALL CAPITAL BARRED B] -"\u1D03" => "B" - -# Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE] -"\u1E02" => "B" - -# Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW] -"\u1E04" => "B" - -# Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW] -"\u1E06" => "B" - -# Ⓑ [CIRCLED LATIN CAPITAL LETTER B] -"\u24B7" => "B" - -# B [FULLWIDTH LATIN CAPITAL LETTER B] -"\uFF22" => "B" - -# ƀ [LATIN SMALL LETTER B WITH STROKE] -"\u0180" => "b" - -# ƃ [LATIN SMALL LETTER B WITH TOPBAR] -"\u0183" => "b" - -# ɓ [LATIN SMALL LETTER B WITH HOOK] -"\u0253" => "b" - -# ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE] -"\u1D6C" => "b" - -# ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK] -"\u1D80" => "b" - -# ḃ [LATIN SMALL LETTER B WITH DOT ABOVE] -"\u1E03" => "b" - -# ḅ [LATIN SMALL LETTER B WITH DOT BELOW] -"\u1E05" => "b" - -# ḇ [LATIN SMALL LETTER B WITH LINE BELOW] -"\u1E07" => "b" - -# ⓑ [CIRCLED LATIN SMALL LETTER B] -"\u24D1" => "b" - -# b [FULLWIDTH LATIN SMALL LETTER B] -"\uFF42" => "b" - -# ⒝ [PARENTHESIZED LATIN SMALL LETTER B] -"\u249D" => "(b)" - -# Ç [LATIN CAPITAL LETTER C WITH CEDILLA] -"\u00C7" => "C" - -# Ć [LATIN CAPITAL LETTER C WITH ACUTE] -"\u0106" => "C" - -# Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] -"\u0108" => "C" - -# Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE] -"\u010A" => "C" - -# Č [LATIN CAPITAL LETTER C WITH CARON] -"\u010C" => "C" - -# Ƈ [LATIN CAPITAL LETTER C WITH HOOK] -"\u0187" => "C" - -# Ȼ [LATIN CAPITAL LETTER C WITH STROKE] -"\u023B" => "C" - -# ʗ [LATIN LETTER STRETCHED C] -"\u0297" => "C" - -# ᴄ [LATIN LETTER SMALL CAPITAL C] -"\u1D04" => "C" - -# Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] -"\u1E08" => "C" - -# Ⓒ [CIRCLED LATIN CAPITAL LETTER C] -"\u24B8" => "C" - -# C [FULLWIDTH LATIN CAPITAL LETTER C] -"\uFF23" => "C" - -# ç [LATIN SMALL LETTER C WITH CEDILLA] -"\u00E7" => "c" - -# ć [LATIN SMALL LETTER C WITH ACUTE] -"\u0107" => "c" - -# ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX] -"\u0109" => "c" - -# ċ [LATIN SMALL LETTER C WITH DOT ABOVE] -"\u010B" => "c" - -# č [LATIN SMALL LETTER C WITH CARON] -"\u010D" => "c" - -# ƈ [LATIN SMALL LETTER C WITH HOOK] -"\u0188" => "c" - -# ȼ [LATIN SMALL LETTER C WITH STROKE] -"\u023C" => "c" - -# ɕ [LATIN SMALL LETTER C WITH CURL] -"\u0255" => "c" - -# ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] -"\u1E09" => "c" - -# ↄ [LATIN SMALL LETTER REVERSED C] -"\u2184" => "c" - -# ⓒ [CIRCLED LATIN SMALL LETTER C] -"\u24D2" => "c" - -# Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT] -"\uA73E" => "c" - -# ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT] -"\uA73F" => "c" - -# c [FULLWIDTH LATIN SMALL LETTER C] -"\uFF43" => "c" - -# ⒞ [PARENTHESIZED LATIN SMALL LETTER C] -"\u249E" => "(c)" - -# Ð [LATIN CAPITAL LETTER ETH] -"\u00D0" => "D" - -# Ď [LATIN CAPITAL LETTER D WITH CARON] -"\u010E" => "D" - -# Đ [LATIN CAPITAL LETTER D WITH STROKE] -"\u0110" => "D" - -# Ɖ [LATIN CAPITAL LETTER AFRICAN D] -"\u0189" => "D" - -# Ɗ [LATIN CAPITAL LETTER D WITH HOOK] -"\u018A" => "D" - -# Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR] -"\u018B" => "D" - -# ᴅ [LATIN LETTER SMALL CAPITAL D] -"\u1D05" => "D" - -# ᴆ [LATIN LETTER SMALL CAPITAL ETH] -"\u1D06" => "D" - -# Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE] -"\u1E0A" => "D" - -# Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW] -"\u1E0C" => "D" - -# Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW] -"\u1E0E" => "D" - -# Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA] -"\u1E10" => "D" - -# Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] -"\u1E12" => "D" - -# Ⓓ [CIRCLED LATIN CAPITAL LETTER D] -"\u24B9" => "D" - -# Ꝺ [LATIN CAPITAL LETTER INSULAR D] -"\uA779" => "D" - -# D [FULLWIDTH LATIN CAPITAL LETTER D] -"\uFF24" => "D" - -# ð [LATIN SMALL LETTER ETH] -"\u00F0" => "d" - -# ď [LATIN SMALL LETTER D WITH CARON] -"\u010F" => "d" - -# đ [LATIN SMALL LETTER D WITH STROKE] -"\u0111" => "d" - -# ƌ [LATIN SMALL LETTER D WITH TOPBAR] -"\u018C" => "d" - -# ȡ [LATIN SMALL LETTER D WITH CURL] -"\u0221" => "d" - -# ɖ [LATIN SMALL LETTER D WITH TAIL] -"\u0256" => "d" - -# ɗ [LATIN SMALL LETTER D WITH HOOK] -"\u0257" => "d" - -# ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE] -"\u1D6D" => "d" - -# ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK] -"\u1D81" => "d" - -# ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL] -"\u1D91" => "d" - -# ḋ [LATIN SMALL LETTER D WITH DOT ABOVE] -"\u1E0B" => "d" - -# ḍ [LATIN SMALL LETTER D WITH DOT BELOW] -"\u1E0D" => "d" - -# ḏ [LATIN SMALL LETTER D WITH LINE BELOW] -"\u1E0F" => "d" - -# ḑ [LATIN SMALL LETTER D WITH CEDILLA] -"\u1E11" => "d" - -# ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] -"\u1E13" => "d" - -# ⓓ [CIRCLED LATIN SMALL LETTER D] -"\u24D3" => "d" - -# ꝺ [LATIN SMALL LETTER INSULAR D] -"\uA77A" => "d" - -# d [FULLWIDTH LATIN SMALL LETTER D] -"\uFF44" => "d" - -# DŽ [LATIN CAPITAL LETTER DZ WITH CARON] -"\u01C4" => "DZ" - -# DZ [LATIN CAPITAL LETTER DZ] -"\u01F1" => "DZ" - -# Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] -"\u01C5" => "Dz" - -# Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] -"\u01F2" => "Dz" - -# ⒟ [PARENTHESIZED LATIN SMALL LETTER D] -"\u249F" => "(d)" - -# ȸ [LATIN SMALL LETTER DB DIGRAPH] -"\u0238" => "db" - -# dž [LATIN SMALL LETTER DZ WITH CARON] -"\u01C6" => "dz" - -# dz [LATIN SMALL LETTER DZ] -"\u01F3" => "dz" - -# ʣ [LATIN SMALL LETTER DZ DIGRAPH] -"\u02A3" => "dz" - -# ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] -"\u02A5" => "dz" - -# È [LATIN CAPITAL LETTER E WITH GRAVE] -"\u00C8" => "E" - -# É [LATIN CAPITAL LETTER E WITH ACUTE] -"\u00C9" => "E" - -# Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] -"\u00CA" => "E" - -# Ë [LATIN CAPITAL LETTER E WITH DIAERESIS] -"\u00CB" => "E" - -# Ē [LATIN CAPITAL LETTER E WITH MACRON] -"\u0112" => "E" - -# Ĕ [LATIN CAPITAL LETTER E WITH BREVE] -"\u0114" => "E" - -# Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE] -"\u0116" => "E" - -# Ę [LATIN CAPITAL LETTER E WITH OGONEK] -"\u0118" => "E" - -# Ě [LATIN CAPITAL LETTER E WITH CARON] -"\u011A" => "E" - -# Ǝ [LATIN CAPITAL LETTER REVERSED E] -"\u018E" => "E" - -# Ɛ [LATIN CAPITAL LETTER OPEN E] -"\u0190" => "E" - -# Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] -"\u0204" => "E" - -# Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE] -"\u0206" => "E" - -# Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA] -"\u0228" => "E" - -# Ɇ [LATIN CAPITAL LETTER E WITH STROKE] -"\u0246" => "E" - -# ᴇ [LATIN LETTER SMALL CAPITAL E] -"\u1D07" => "E" - -# Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] -"\u1E14" => "E" - -# Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] -"\u1E16" => "E" - -# Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] -"\u1E18" => "E" - -# Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW] -"\u1E1A" => "E" - -# Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] -"\u1E1C" => "E" - -# Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW] -"\u1EB8" => "E" - -# Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE] -"\u1EBA" => "E" - -# Ẽ [LATIN CAPITAL LETTER E WITH TILDE] -"\u1EBC" => "E" - -# Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] -"\u1EBE" => "E" - -# Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] -"\u1EC0" => "E" - -# Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EC2" => "E" - -# Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] -"\u1EC4" => "E" - -# Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] -"\u1EC6" => "E" - -# Ⓔ [CIRCLED LATIN CAPITAL LETTER E] -"\u24BA" => "E" - -# ⱻ [LATIN LETTER SMALL CAPITAL TURNED E] -"\u2C7B" => "E" - -# E [FULLWIDTH LATIN CAPITAL LETTER E] -"\uFF25" => "E" - -# è [LATIN SMALL LETTER E WITH GRAVE] -"\u00E8" => "e" - -# é [LATIN SMALL LETTER E WITH ACUTE] -"\u00E9" => "e" - -# ê [LATIN SMALL LETTER E WITH CIRCUMFLEX] -"\u00EA" => "e" - -# ë [LATIN SMALL LETTER E WITH DIAERESIS] -"\u00EB" => "e" - -# ē [LATIN SMALL LETTER E WITH MACRON] -"\u0113" => "e" - -# ĕ [LATIN SMALL LETTER E WITH BREVE] -"\u0115" => "e" - -# ė [LATIN SMALL LETTER E WITH DOT ABOVE] -"\u0117" => "e" - -# ę [LATIN SMALL LETTER E WITH OGONEK] -"\u0119" => "e" - -# ě [LATIN SMALL LETTER E WITH CARON] -"\u011B" => "e" - -# ǝ [LATIN SMALL LETTER TURNED E] -"\u01DD" => "e" - -# ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE] -"\u0205" => "e" - -# ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE] -"\u0207" => "e" - -# ȩ [LATIN SMALL LETTER E WITH CEDILLA] -"\u0229" => "e" - -# ɇ [LATIN SMALL LETTER E WITH STROKE] -"\u0247" => "e" - -# ɘ [LATIN SMALL LETTER REVERSED E] -"\u0258" => "e" - -# ɛ [LATIN SMALL LETTER OPEN E] -"\u025B" => "e" - -# ɜ [LATIN SMALL LETTER REVERSED OPEN E] -"\u025C" => "e" - -# ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] -"\u025D" => "e" - -# ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E] -"\u025E" => "e" - -# ʚ [LATIN SMALL LETTER CLOSED OPEN E] -"\u029A" => "e" - -# ᴈ [LATIN SMALL LETTER TURNED OPEN E] -"\u1D08" => "e" - -# ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK] -"\u1D92" => "e" - -# ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] -"\u1D93" => "e" - -# ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] -"\u1D94" => "e" - -# ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE] -"\u1E15" => "e" - -# ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE] -"\u1E17" => "e" - -# ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] -"\u1E19" => "e" - -# ḛ [LATIN SMALL LETTER E WITH TILDE BELOW] -"\u1E1B" => "e" - -# ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] -"\u1E1D" => "e" - -# ẹ [LATIN SMALL LETTER E WITH DOT BELOW] -"\u1EB9" => "e" - -# ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE] -"\u1EBB" => "e" - -# ẽ [LATIN SMALL LETTER E WITH TILDE] -"\u1EBD" => "e" - -# ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] -"\u1EBF" => "e" - -# ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] -"\u1EC1" => "e" - -# ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EC3" => "e" - -# ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] -"\u1EC5" => "e" - -# ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] -"\u1EC7" => "e" - -# ₑ [LATIN SUBSCRIPT SMALL LETTER E] -"\u2091" => "e" - -# ⓔ [CIRCLED LATIN SMALL LETTER E] -"\u24D4" => "e" - -# ⱸ [LATIN SMALL LETTER E WITH NOTCH] -"\u2C78" => "e" - -# e [FULLWIDTH LATIN SMALL LETTER E] -"\uFF45" => "e" - -# ⒠ [PARENTHESIZED LATIN SMALL LETTER E] -"\u24A0" => "(e)" - -# Ƒ [LATIN CAPITAL LETTER F WITH HOOK] -"\u0191" => "F" - -# Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE] -"\u1E1E" => "F" - -# Ⓕ [CIRCLED LATIN CAPITAL LETTER F] -"\u24BB" => "F" - -# ꜰ [LATIN LETTER SMALL CAPITAL F] -"\uA730" => "F" - -# Ꝼ [LATIN CAPITAL LETTER INSULAR F] -"\uA77B" => "F" - -# ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F] -"\uA7FB" => "F" - -# F [FULLWIDTH LATIN CAPITAL LETTER F] -"\uFF26" => "F" - -# ƒ [LATIN SMALL LETTER F WITH HOOK] -"\u0192" => "f" - -# ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE] -"\u1D6E" => "f" - -# ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK] -"\u1D82" => "f" - -# ḟ [LATIN SMALL LETTER F WITH DOT ABOVE] -"\u1E1F" => "f" - -# ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE] -"\u1E9B" => "f" - -# ⓕ [CIRCLED LATIN SMALL LETTER F] -"\u24D5" => "f" - -# ꝼ [LATIN SMALL LETTER INSULAR F] -"\uA77C" => "f" - -# f [FULLWIDTH LATIN SMALL LETTER F] -"\uFF46" => "f" - -# ⒡ [PARENTHESIZED LATIN SMALL LETTER F] -"\u24A1" => "(f)" - -# ff [LATIN SMALL LIGATURE FF] -"\uFB00" => "ff" - -# ffi [LATIN SMALL LIGATURE FFI] -"\uFB03" => "ffi" - -# ffl [LATIN SMALL LIGATURE FFL] -"\uFB04" => "ffl" - -# fi [LATIN SMALL LIGATURE FI] -"\uFB01" => "fi" - -# fl [LATIN SMALL LIGATURE FL] -"\uFB02" => "fl" - -# Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] -"\u011C" => "G" - -# Ğ [LATIN CAPITAL LETTER G WITH BREVE] -"\u011E" => "G" - -# Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE] -"\u0120" => "G" - -# Ģ [LATIN CAPITAL LETTER G WITH CEDILLA] -"\u0122" => "G" - -# Ɠ [LATIN CAPITAL LETTER G WITH HOOK] -"\u0193" => "G" - -# Ǥ [LATIN CAPITAL LETTER G WITH STROKE] -"\u01E4" => "G" - -# ǥ [LATIN SMALL LETTER G WITH STROKE] -"\u01E5" => "G" - -# Ǧ [LATIN CAPITAL LETTER G WITH CARON] -"\u01E6" => "G" - -# ǧ [LATIN SMALL LETTER G WITH CARON] -"\u01E7" => "G" - -# Ǵ [LATIN CAPITAL LETTER G WITH ACUTE] -"\u01F4" => "G" - -# ɢ [LATIN LETTER SMALL CAPITAL G] -"\u0262" => "G" - -# ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK] -"\u029B" => "G" - -# Ḡ [LATIN CAPITAL LETTER G WITH MACRON] -"\u1E20" => "G" - -# Ⓖ [CIRCLED LATIN CAPITAL LETTER G] -"\u24BC" => "G" - -# Ᵹ [LATIN CAPITAL LETTER INSULAR G] -"\uA77D" => "G" - -# Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G] -"\uA77E" => "G" - -# G [FULLWIDTH LATIN CAPITAL LETTER G] -"\uFF27" => "G" - -# ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX] -"\u011D" => "g" - -# ğ [LATIN SMALL LETTER G WITH BREVE] -"\u011F" => "g" - -# ġ [LATIN SMALL LETTER G WITH DOT ABOVE] -"\u0121" => "g" - -# ģ [LATIN SMALL LETTER G WITH CEDILLA] -"\u0123" => "g" - -# ǵ [LATIN SMALL LETTER G WITH ACUTE] -"\u01F5" => "g" - -# ɠ [LATIN SMALL LETTER G WITH HOOK] -"\u0260" => "g" - -# ɡ [LATIN SMALL LETTER SCRIPT G] -"\u0261" => "g" - -# ᵷ [LATIN SMALL LETTER TURNED G] -"\u1D77" => "g" - -# ᵹ [LATIN SMALL LETTER INSULAR G] -"\u1D79" => "g" - -# ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK] -"\u1D83" => "g" - -# ḡ [LATIN SMALL LETTER G WITH MACRON] -"\u1E21" => "g" - -# ⓖ [CIRCLED LATIN SMALL LETTER G] -"\u24D6" => "g" - -# ꝿ [LATIN SMALL LETTER TURNED INSULAR G] -"\uA77F" => "g" - -# g [FULLWIDTH LATIN SMALL LETTER G] -"\uFF47" => "g" - -# ⒢ [PARENTHESIZED LATIN SMALL LETTER G] -"\u24A2" => "(g)" - -# Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] -"\u0124" => "H" - -# Ħ [LATIN CAPITAL LETTER H WITH STROKE] -"\u0126" => "H" - -# Ȟ [LATIN CAPITAL LETTER H WITH CARON] -"\u021E" => "H" - -# ʜ [LATIN LETTER SMALL CAPITAL H] -"\u029C" => "H" - -# Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE] -"\u1E22" => "H" - -# Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW] -"\u1E24" => "H" - -# Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS] -"\u1E26" => "H" - -# Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA] -"\u1E28" => "H" - -# Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW] -"\u1E2A" => "H" - -# Ⓗ [CIRCLED LATIN CAPITAL LETTER H] -"\u24BD" => "H" - -# Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER] -"\u2C67" => "H" - -# Ⱶ [LATIN CAPITAL LETTER HALF H] -"\u2C75" => "H" - -# H [FULLWIDTH LATIN CAPITAL LETTER H] -"\uFF28" => "H" - -# ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX] -"\u0125" => "h" - -# ħ [LATIN SMALL LETTER H WITH STROKE] -"\u0127" => "h" - -# ȟ [LATIN SMALL LETTER H WITH CARON] -"\u021F" => "h" - -# ɥ [LATIN SMALL LETTER TURNED H] -"\u0265" => "h" - -# ɦ [LATIN SMALL LETTER H WITH HOOK] -"\u0266" => "h" - -# ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK] -"\u02AE" => "h" - -# ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] -"\u02AF" => "h" - -# ḣ [LATIN SMALL LETTER H WITH DOT ABOVE] -"\u1E23" => "h" - -# ḥ [LATIN SMALL LETTER H WITH DOT BELOW] -"\u1E25" => "h" - -# ḧ [LATIN SMALL LETTER H WITH DIAERESIS] -"\u1E27" => "h" - -# ḩ [LATIN SMALL LETTER H WITH CEDILLA] -"\u1E29" => "h" - -# ḫ [LATIN SMALL LETTER H WITH BREVE BELOW] -"\u1E2B" => "h" - -# ẖ [LATIN SMALL LETTER H WITH LINE BELOW] -"\u1E96" => "h" - -# ⓗ [CIRCLED LATIN SMALL LETTER H] -"\u24D7" => "h" - -# ⱨ [LATIN SMALL LETTER H WITH DESCENDER] -"\u2C68" => "h" - -# ⱶ [LATIN SMALL LETTER HALF H] -"\u2C76" => "h" - -# h [FULLWIDTH LATIN SMALL LETTER H] -"\uFF48" => "h" - -# Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR] -"\u01F6" => "HV" - -# ⒣ [PARENTHESIZED LATIN SMALL LETTER H] -"\u24A3" => "(h)" - -# ƕ [LATIN SMALL LETTER HV] -"\u0195" => "hv" - -# Ì [LATIN CAPITAL LETTER I WITH GRAVE] -"\u00CC" => "I" - -# Í [LATIN CAPITAL LETTER I WITH ACUTE] -"\u00CD" => "I" - -# Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] -"\u00CE" => "I" - -# Ï [LATIN CAPITAL LETTER I WITH DIAERESIS] -"\u00CF" => "I" - -# Ĩ [LATIN CAPITAL LETTER I WITH TILDE] -"\u0128" => "I" - -# Ī [LATIN CAPITAL LETTER I WITH MACRON] -"\u012A" => "I" - -# Ĭ [LATIN CAPITAL LETTER I WITH BREVE] -"\u012C" => "I" - -# Į [LATIN CAPITAL LETTER I WITH OGONEK] -"\u012E" => "I" - -# İ [LATIN CAPITAL LETTER I WITH DOT ABOVE] -"\u0130" => "I" - -# Ɩ [LATIN CAPITAL LETTER IOTA] -"\u0196" => "I" - -# Ɨ [LATIN CAPITAL LETTER I WITH STROKE] -"\u0197" => "I" - -# Ǐ [LATIN CAPITAL LETTER I WITH CARON] -"\u01CF" => "I" - -# Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] -"\u0208" => "I" - -# Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE] -"\u020A" => "I" - -# ɪ [LATIN LETTER SMALL CAPITAL I] -"\u026A" => "I" - -# ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE] -"\u1D7B" => "I" - -# Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW] -"\u1E2C" => "I" - -# Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] -"\u1E2E" => "I" - -# Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE] -"\u1EC8" => "I" - -# Ị [LATIN CAPITAL LETTER I WITH DOT BELOW] -"\u1ECA" => "I" - -# Ⓘ [CIRCLED LATIN CAPITAL LETTER I] -"\u24BE" => "I" - -# ꟾ [LATIN EPIGRAPHIC LETTER I LONGA] -"\uA7FE" => "I" - -# I [FULLWIDTH LATIN CAPITAL LETTER I] -"\uFF29" => "I" - -# ì [LATIN SMALL LETTER I WITH GRAVE] -"\u00EC" => "i" - -# í [LATIN SMALL LETTER I WITH ACUTE] -"\u00ED" => "i" - -# î [LATIN SMALL LETTER I WITH CIRCUMFLEX] -"\u00EE" => "i" - -# ï [LATIN SMALL LETTER I WITH DIAERESIS] -"\u00EF" => "i" - -# ĩ [LATIN SMALL LETTER I WITH TILDE] -"\u0129" => "i" - -# ī [LATIN SMALL LETTER I WITH MACRON] -"\u012B" => "i" - -# ĭ [LATIN SMALL LETTER I WITH BREVE] -"\u012D" => "i" - -# į [LATIN SMALL LETTER I WITH OGONEK] -"\u012F" => "i" - -# ı [LATIN SMALL LETTER DOTLESS I] -"\u0131" => "i" - -# ǐ [LATIN SMALL LETTER I WITH CARON] -"\u01D0" => "i" - -# ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE] -"\u0209" => "i" - -# ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE] -"\u020B" => "i" - -# ɨ [LATIN SMALL LETTER I WITH STROKE] -"\u0268" => "i" - -# ᴉ [LATIN SMALL LETTER TURNED I] -"\u1D09" => "i" - -# ᵢ [LATIN SUBSCRIPT SMALL LETTER I] -"\u1D62" => "i" - -# ᵼ [LATIN SMALL LETTER IOTA WITH STROKE] -"\u1D7C" => "i" - -# ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK] -"\u1D96" => "i" - -# ḭ [LATIN SMALL LETTER I WITH TILDE BELOW] -"\u1E2D" => "i" - -# ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] -"\u1E2F" => "i" - -# ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE] -"\u1EC9" => "i" - -# ị [LATIN SMALL LETTER I WITH DOT BELOW] -"\u1ECB" => "i" - -# ⁱ [SUPERSCRIPT LATIN SMALL LETTER I] -"\u2071" => "i" - -# ⓘ [CIRCLED LATIN SMALL LETTER I] -"\u24D8" => "i" - -# i [FULLWIDTH LATIN SMALL LETTER I] -"\uFF49" => "i" - -# IJ [LATIN CAPITAL LIGATURE IJ] -"\u0132" => "IJ" - -# ⒤ [PARENTHESIZED LATIN SMALL LETTER I] -"\u24A4" => "(i)" - -# ij [LATIN SMALL LIGATURE IJ] -"\u0133" => "ij" - -# Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] -"\u0134" => "J" - -# Ɉ [LATIN CAPITAL LETTER J WITH STROKE] -"\u0248" => "J" - -# ᴊ [LATIN LETTER SMALL CAPITAL J] -"\u1D0A" => "J" - -# Ⓙ [CIRCLED LATIN CAPITAL LETTER J] -"\u24BF" => "J" - -# J [FULLWIDTH LATIN CAPITAL LETTER J] -"\uFF2A" => "J" - -# ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX] -"\u0135" => "j" - -# ǰ [LATIN SMALL LETTER J WITH CARON] -"\u01F0" => "j" - -# ȷ [LATIN SMALL LETTER DOTLESS J] -"\u0237" => "j" - -# ɉ [LATIN SMALL LETTER J WITH STROKE] -"\u0249" => "j" - -# ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE] -"\u025F" => "j" - -# ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] -"\u0284" => "j" - -# ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL] -"\u029D" => "j" - -# ⓙ [CIRCLED LATIN SMALL LETTER J] -"\u24D9" => "j" - -# ⱼ [LATIN SUBSCRIPT SMALL LETTER J] -"\u2C7C" => "j" - -# j [FULLWIDTH LATIN SMALL LETTER J] -"\uFF4A" => "j" - -# ⒥ [PARENTHESIZED LATIN SMALL LETTER J] -"\u24A5" => "(j)" - -# Ķ [LATIN CAPITAL LETTER K WITH CEDILLA] -"\u0136" => "K" - -# Ƙ [LATIN CAPITAL LETTER K WITH HOOK] -"\u0198" => "K" - -# Ǩ [LATIN CAPITAL LETTER K WITH CARON] -"\u01E8" => "K" - -# ᴋ [LATIN LETTER SMALL CAPITAL K] -"\u1D0B" => "K" - -# Ḱ [LATIN CAPITAL LETTER K WITH ACUTE] -"\u1E30" => "K" - -# Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW] -"\u1E32" => "K" - -# Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW] -"\u1E34" => "K" - -# Ⓚ [CIRCLED LATIN CAPITAL LETTER K] -"\u24C0" => "K" - -# Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER] -"\u2C69" => "K" - -# Ꝁ [LATIN CAPITAL LETTER K WITH STROKE] -"\uA740" => "K" - -# Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] -"\uA742" => "K" - -# Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] -"\uA744" => "K" - -# K [FULLWIDTH LATIN CAPITAL LETTER K] -"\uFF2B" => "K" - -# ķ [LATIN SMALL LETTER K WITH CEDILLA] -"\u0137" => "k" - -# ƙ [LATIN SMALL LETTER K WITH HOOK] -"\u0199" => "k" - -# ǩ [LATIN SMALL LETTER K WITH CARON] -"\u01E9" => "k" - -# ʞ [LATIN SMALL LETTER TURNED K] -"\u029E" => "k" - -# ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK] -"\u1D84" => "k" - -# ḱ [LATIN SMALL LETTER K WITH ACUTE] -"\u1E31" => "k" - -# ḳ [LATIN SMALL LETTER K WITH DOT BELOW] -"\u1E33" => "k" - -# ḵ [LATIN SMALL LETTER K WITH LINE BELOW] -"\u1E35" => "k" - -# ⓚ [CIRCLED LATIN SMALL LETTER K] -"\u24DA" => "k" - -# ⱪ [LATIN SMALL LETTER K WITH DESCENDER] -"\u2C6A" => "k" - -# ꝁ [LATIN SMALL LETTER K WITH STROKE] -"\uA741" => "k" - -# ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE] -"\uA743" => "k" - -# ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] -"\uA745" => "k" - -# k [FULLWIDTH LATIN SMALL LETTER K] -"\uFF4B" => "k" - -# ⒦ [PARENTHESIZED LATIN SMALL LETTER K] -"\u24A6" => "(k)" - -# Ĺ [LATIN CAPITAL LETTER L WITH ACUTE] -"\u0139" => "L" - -# Ļ [LATIN CAPITAL LETTER L WITH CEDILLA] -"\u013B" => "L" - -# Ľ [LATIN CAPITAL LETTER L WITH CARON] -"\u013D" => "L" - -# Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT] -"\u013F" => "L" - -# Ł [LATIN CAPITAL LETTER L WITH STROKE] -"\u0141" => "L" - -# Ƚ [LATIN CAPITAL LETTER L WITH BAR] -"\u023D" => "L" - -# ʟ [LATIN LETTER SMALL CAPITAL L] -"\u029F" => "L" - -# ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE] -"\u1D0C" => "L" - -# Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW] -"\u1E36" => "L" - -# Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] -"\u1E38" => "L" - -# Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW] -"\u1E3A" => "L" - -# Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] -"\u1E3C" => "L" - -# Ⓛ [CIRCLED LATIN CAPITAL LETTER L] -"\u24C1" => "L" - -# Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR] -"\u2C60" => "L" - -# Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] -"\u2C62" => "L" - -# Ꝇ [LATIN CAPITAL LETTER BROKEN L] -"\uA746" => "L" - -# Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE] -"\uA748" => "L" - -# Ꞁ [LATIN CAPITAL LETTER TURNED L] -"\uA780" => "L" - -# L [FULLWIDTH LATIN CAPITAL LETTER L] -"\uFF2C" => "L" - -# ĺ [LATIN SMALL LETTER L WITH ACUTE] -"\u013A" => "l" - -# ļ [LATIN SMALL LETTER L WITH CEDILLA] -"\u013C" => "l" - -# ľ [LATIN SMALL LETTER L WITH CARON] -"\u013E" => "l" - -# ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT] -"\u0140" => "l" - -# ł [LATIN SMALL LETTER L WITH STROKE] -"\u0142" => "l" - -# ƚ [LATIN SMALL LETTER L WITH BAR] -"\u019A" => "l" - -# ȴ [LATIN SMALL LETTER L WITH CURL] -"\u0234" => "l" - -# ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE] -"\u026B" => "l" - -# ɬ [LATIN SMALL LETTER L WITH BELT] -"\u026C" => "l" - -# ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK] -"\u026D" => "l" - -# ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK] -"\u1D85" => "l" - -# ḷ [LATIN SMALL LETTER L WITH DOT BELOW] -"\u1E37" => "l" - -# ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] -"\u1E39" => "l" - -# ḻ [LATIN SMALL LETTER L WITH LINE BELOW] -"\u1E3B" => "l" - -# ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] -"\u1E3D" => "l" - -# ⓛ [CIRCLED LATIN SMALL LETTER L] -"\u24DB" => "l" - -# ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR] -"\u2C61" => "l" - -# ꝇ [LATIN SMALL LETTER BROKEN L] -"\uA747" => "l" - -# ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE] -"\uA749" => "l" - -# ꞁ [LATIN SMALL LETTER TURNED L] -"\uA781" => "l" - -# l [FULLWIDTH LATIN SMALL LETTER L] -"\uFF4C" => "l" - -# LJ [LATIN CAPITAL LETTER LJ] -"\u01C7" => "LJ" - -# Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL] -"\u1EFA" => "LL" - -# Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J] -"\u01C8" => "Lj" - -# ⒧ [PARENTHESIZED LATIN SMALL LETTER L] -"\u24A7" => "(l)" - -# lj [LATIN SMALL LETTER LJ] -"\u01C9" => "lj" - -# ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL] -"\u1EFB" => "ll" - -# ʪ [LATIN SMALL LETTER LS DIGRAPH] -"\u02AA" => "ls" - -# ʫ [LATIN SMALL LETTER LZ DIGRAPH] -"\u02AB" => "lz" - -# Ɯ [LATIN CAPITAL LETTER TURNED M] -"\u019C" => "M" - -# ᴍ [LATIN LETTER SMALL CAPITAL M] -"\u1D0D" => "M" - -# Ḿ [LATIN CAPITAL LETTER M WITH ACUTE] -"\u1E3E" => "M" - -# Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE] -"\u1E40" => "M" - -# Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW] -"\u1E42" => "M" - -# Ⓜ [CIRCLED LATIN CAPITAL LETTER M] -"\u24C2" => "M" - -# Ɱ [LATIN CAPITAL LETTER M WITH HOOK] -"\u2C6E" => "M" - -# ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M] -"\uA7FD" => "M" - -# ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M] -"\uA7FF" => "M" - -# M [FULLWIDTH LATIN CAPITAL LETTER M] -"\uFF2D" => "M" - -# ɯ [LATIN SMALL LETTER TURNED M] -"\u026F" => "m" - -# ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG] -"\u0270" => "m" - -# ɱ [LATIN SMALL LETTER M WITH HOOK] -"\u0271" => "m" - -# ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE] -"\u1D6F" => "m" - -# ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK] -"\u1D86" => "m" - -# ḿ [LATIN SMALL LETTER M WITH ACUTE] -"\u1E3F" => "m" - -# ṁ [LATIN SMALL LETTER M WITH DOT ABOVE] -"\u1E41" => "m" - -# ṃ [LATIN SMALL LETTER M WITH DOT BELOW] -"\u1E43" => "m" - -# ⓜ [CIRCLED LATIN SMALL LETTER M] -"\u24DC" => "m" - -# m [FULLWIDTH LATIN SMALL LETTER M] -"\uFF4D" => "m" - -# ⒨ [PARENTHESIZED LATIN SMALL LETTER M] -"\u24A8" => "(m)" - -# Ñ [LATIN CAPITAL LETTER N WITH TILDE] -"\u00D1" => "N" - -# Ń [LATIN CAPITAL LETTER N WITH ACUTE] -"\u0143" => "N" - -# Ņ [LATIN CAPITAL LETTER N WITH CEDILLA] -"\u0145" => "N" - -# Ň [LATIN CAPITAL LETTER N WITH CARON] -"\u0147" => "N" - -# Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG] -"\u014A" => "N" - -# Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK] -"\u019D" => "N" - -# Ǹ [LATIN CAPITAL LETTER N WITH GRAVE] -"\u01F8" => "N" - -# Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] -"\u0220" => "N" - -# ɴ [LATIN LETTER SMALL CAPITAL N] -"\u0274" => "N" - -# ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N] -"\u1D0E" => "N" - -# Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE] -"\u1E44" => "N" - -# Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW] -"\u1E46" => "N" - -# Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW] -"\u1E48" => "N" - -# Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] -"\u1E4A" => "N" - -# Ⓝ [CIRCLED LATIN CAPITAL LETTER N] -"\u24C3" => "N" - -# N [FULLWIDTH LATIN CAPITAL LETTER N] -"\uFF2E" => "N" - -# ñ [LATIN SMALL LETTER N WITH TILDE] -"\u00F1" => "n" - -# ń [LATIN SMALL LETTER N WITH ACUTE] -"\u0144" => "n" - -# ņ [LATIN SMALL LETTER N WITH CEDILLA] -"\u0146" => "n" - -# ň [LATIN SMALL LETTER N WITH CARON] -"\u0148" => "n" - -# ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] -"\u0149" => "n" - -# ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG] -"\u014B" => "n" - -# ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG] -"\u019E" => "n" - -# ǹ [LATIN SMALL LETTER N WITH GRAVE] -"\u01F9" => "n" - -# ȵ [LATIN SMALL LETTER N WITH CURL] -"\u0235" => "n" - -# ɲ [LATIN SMALL LETTER N WITH LEFT HOOK] -"\u0272" => "n" - -# ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK] -"\u0273" => "n" - -# ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE] -"\u1D70" => "n" - -# ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK] -"\u1D87" => "n" - -# ṅ [LATIN SMALL LETTER N WITH DOT ABOVE] -"\u1E45" => "n" - -# ṇ [LATIN SMALL LETTER N WITH DOT BELOW] -"\u1E47" => "n" - -# ṉ [LATIN SMALL LETTER N WITH LINE BELOW] -"\u1E49" => "n" - -# ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW] -"\u1E4B" => "n" - -# ⁿ [SUPERSCRIPT LATIN SMALL LETTER N] -"\u207F" => "n" - -# ⓝ [CIRCLED LATIN SMALL LETTER N] -"\u24DD" => "n" - -# n [FULLWIDTH LATIN SMALL LETTER N] -"\uFF4E" => "n" - -# NJ [LATIN CAPITAL LETTER NJ] -"\u01CA" => "NJ" - -# Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J] -"\u01CB" => "Nj" - -# ⒩ [PARENTHESIZED LATIN SMALL LETTER N] -"\u24A9" => "(n)" - -# nj [LATIN SMALL LETTER NJ] -"\u01CC" => "nj" - -# Ò [LATIN CAPITAL LETTER O WITH GRAVE] -"\u00D2" => "O" - -# Ó [LATIN CAPITAL LETTER O WITH ACUTE] -"\u00D3" => "O" - -# Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX] -"\u00D4" => "O" - -# Õ [LATIN CAPITAL LETTER O WITH TILDE] -"\u00D5" => "O" - -# Ö [LATIN CAPITAL LETTER O WITH DIAERESIS] -"\u00D6" => "O" - -# Ø [LATIN CAPITAL LETTER O WITH STROKE] -"\u00D8" => "O" - -# Ō [LATIN CAPITAL LETTER O WITH MACRON] -"\u014C" => "O" - -# Ŏ [LATIN CAPITAL LETTER O WITH BREVE] -"\u014E" => "O" - -# Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE] -"\u0150" => "O" - -# Ɔ [LATIN CAPITAL LETTER OPEN O] -"\u0186" => "O" - -# Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE] -"\u019F" => "O" - -# Ơ [LATIN CAPITAL LETTER O WITH HORN] -"\u01A0" => "O" - -# Ǒ [LATIN CAPITAL LETTER O WITH CARON] -"\u01D1" => "O" - -# Ǫ [LATIN CAPITAL LETTER O WITH OGONEK] -"\u01EA" => "O" - -# Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON] -"\u01EC" => "O" - -# Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE] -"\u01FE" => "O" - -# Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE] -"\u020C" => "O" - -# Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE] -"\u020E" => "O" - -# Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON] -"\u022A" => "O" - -# Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON] -"\u022C" => "O" - -# Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE] -"\u022E" => "O" - -# Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON] -"\u0230" => "O" - -# ᴏ [LATIN LETTER SMALL CAPITAL O] -"\u1D0F" => "O" - -# ᴐ [LATIN LETTER SMALL CAPITAL OPEN O] -"\u1D10" => "O" - -# Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE] -"\u1E4C" => "O" - -# Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS] -"\u1E4E" => "O" - -# Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE] -"\u1E50" => "O" - -# Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE] -"\u1E52" => "O" - -# Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW] -"\u1ECC" => "O" - -# Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE] -"\u1ECE" => "O" - -# Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE] -"\u1ED0" => "O" - -# Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE] -"\u1ED2" => "O" - -# Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1ED4" => "O" - -# Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE] -"\u1ED6" => "O" - -# Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW] -"\u1ED8" => "O" - -# Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE] -"\u1EDA" => "O" - -# Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE] -"\u1EDC" => "O" - -# Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE] -"\u1EDE" => "O" - -# Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE] -"\u1EE0" => "O" - -# Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW] -"\u1EE2" => "O" - -# Ⓞ [CIRCLED LATIN CAPITAL LETTER O] -"\u24C4" => "O" - -# Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY] -"\uA74A" => "O" - -# Ꝍ [LATIN CAPITAL LETTER O WITH LOOP] -"\uA74C" => "O" - -# O [FULLWIDTH LATIN CAPITAL LETTER O] -"\uFF2F" => "O" - -# ò [LATIN SMALL LETTER O WITH GRAVE] -"\u00F2" => "o" - -# ó [LATIN SMALL LETTER O WITH ACUTE] -"\u00F3" => "o" - -# ô [LATIN SMALL LETTER O WITH CIRCUMFLEX] -"\u00F4" => "o" - -# õ [LATIN SMALL LETTER O WITH TILDE] -"\u00F5" => "o" - -# ö [LATIN SMALL LETTER O WITH DIAERESIS] -"\u00F6" => "o" - -# ø [LATIN SMALL LETTER O WITH STROKE] -"\u00F8" => "o" - -# ō [LATIN SMALL LETTER O WITH MACRON] -"\u014D" => "o" - -# ŏ [LATIN SMALL LETTER O WITH BREVE] -"\u014F" => "o" - -# ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE] -"\u0151" => "o" - -# ơ [LATIN SMALL LETTER O WITH HORN] -"\u01A1" => "o" - -# ǒ [LATIN SMALL LETTER O WITH CARON] -"\u01D2" => "o" - -# ǫ [LATIN SMALL LETTER O WITH OGONEK] -"\u01EB" => "o" - -# ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON] -"\u01ED" => "o" - -# ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE] -"\u01FF" => "o" - -# ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE] -"\u020D" => "o" - -# ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE] -"\u020F" => "o" - -# ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON] -"\u022B" => "o" - -# ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON] -"\u022D" => "o" - -# ȯ [LATIN SMALL LETTER O WITH DOT ABOVE] -"\u022F" => "o" - -# ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON] -"\u0231" => "o" - -# ɔ [LATIN SMALL LETTER OPEN O] -"\u0254" => "o" - -# ɵ [LATIN SMALL LETTER BARRED O] -"\u0275" => "o" - -# ᴖ [LATIN SMALL LETTER TOP HALF O] -"\u1D16" => "o" - -# ᴗ [LATIN SMALL LETTER BOTTOM HALF O] -"\u1D17" => "o" - -# ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK] -"\u1D97" => "o" - -# ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE] -"\u1E4D" => "o" - -# ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS] -"\u1E4F" => "o" - -# ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE] -"\u1E51" => "o" - -# ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE] -"\u1E53" => "o" - -# ọ [LATIN SMALL LETTER O WITH DOT BELOW] -"\u1ECD" => "o" - -# ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE] -"\u1ECF" => "o" - -# ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE] -"\u1ED1" => "o" - -# ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE] -"\u1ED3" => "o" - -# ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1ED5" => "o" - -# ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE] -"\u1ED7" => "o" - -# ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW] -"\u1ED9" => "o" - -# ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE] -"\u1EDB" => "o" - -# ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE] -"\u1EDD" => "o" - -# ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE] -"\u1EDF" => "o" - -# ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE] -"\u1EE1" => "o" - -# ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW] -"\u1EE3" => "o" - -# ₒ [LATIN SUBSCRIPT SMALL LETTER O] -"\u2092" => "o" - -# ⓞ [CIRCLED LATIN SMALL LETTER O] -"\u24DE" => "o" - -# ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE] -"\u2C7A" => "o" - -# ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY] -"\uA74B" => "o" - -# ꝍ [LATIN SMALL LETTER O WITH LOOP] -"\uA74D" => "o" - -# o [FULLWIDTH LATIN SMALL LETTER O] -"\uFF4F" => "o" - -# Œ [LATIN CAPITAL LIGATURE OE] -"\u0152" => "OE" - -# ɶ [LATIN LETTER SMALL CAPITAL OE] -"\u0276" => "OE" - -# Ꝏ [LATIN CAPITAL LETTER OO] -"\uA74E" => "OO" - -# Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU] -"\u0222" => "OU" - -# ᴕ [LATIN LETTER SMALL CAPITAL OU] -"\u1D15" => "OU" - -# ⒪ [PARENTHESIZED LATIN SMALL LETTER O] -"\u24AA" => "(o)" - -# œ [LATIN SMALL LIGATURE OE] -"\u0153" => "oe" - -# ᴔ [LATIN SMALL LETTER TURNED OE] -"\u1D14" => "oe" - -# ꝏ [LATIN SMALL LETTER OO] -"\uA74F" => "oo" - -# ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU] -"\u0223" => "ou" - -# Ƥ [LATIN CAPITAL LETTER P WITH HOOK] -"\u01A4" => "P" - -# ᴘ [LATIN LETTER SMALL CAPITAL P] -"\u1D18" => "P" - -# Ṕ [LATIN CAPITAL LETTER P WITH ACUTE] -"\u1E54" => "P" - -# Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE] -"\u1E56" => "P" - -# Ⓟ [CIRCLED LATIN CAPITAL LETTER P] -"\u24C5" => "P" - -# Ᵽ [LATIN CAPITAL LETTER P WITH STROKE] -"\u2C63" => "P" - -# Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER] -"\uA750" => "P" - -# Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH] -"\uA752" => "P" - -# Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL] -"\uA754" => "P" - -# P [FULLWIDTH LATIN CAPITAL LETTER P] -"\uFF30" => "P" - -# ƥ [LATIN SMALL LETTER P WITH HOOK] -"\u01A5" => "p" - -# ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE] -"\u1D71" => "p" - -# ᵽ [LATIN SMALL LETTER P WITH STROKE] -"\u1D7D" => "p" - -# ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK] -"\u1D88" => "p" - -# ṕ [LATIN SMALL LETTER P WITH ACUTE] -"\u1E55" => "p" - -# ṗ [LATIN SMALL LETTER P WITH DOT ABOVE] -"\u1E57" => "p" - -# ⓟ [CIRCLED LATIN SMALL LETTER P] -"\u24DF" => "p" - -# ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER] -"\uA751" => "p" - -# ꝓ [LATIN SMALL LETTER P WITH FLOURISH] -"\uA753" => "p" - -# ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL] -"\uA755" => "p" - -# ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P] -"\uA7FC" => "p" - -# p [FULLWIDTH LATIN SMALL LETTER P] -"\uFF50" => "p" - -# ⒫ [PARENTHESIZED LATIN SMALL LETTER P] -"\u24AB" => "(p)" - -# Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL] -"\u024A" => "Q" - -# Ⓠ [CIRCLED LATIN CAPITAL LETTER Q] -"\u24C6" => "Q" - -# Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER] -"\uA756" => "Q" - -# Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE] -"\uA758" => "Q" - -# Q [FULLWIDTH LATIN CAPITAL LETTER Q] -"\uFF31" => "Q" - -# ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA] -"\u0138" => "q" - -# ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL] -"\u024B" => "q" - -# ʠ [LATIN SMALL LETTER Q WITH HOOK] -"\u02A0" => "q" - -# ⓠ [CIRCLED LATIN SMALL LETTER Q] -"\u24E0" => "q" - -# ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER] -"\uA757" => "q" - -# ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE] -"\uA759" => "q" - -# q [FULLWIDTH LATIN SMALL LETTER Q] -"\uFF51" => "q" - -# ⒬ [PARENTHESIZED LATIN SMALL LETTER Q] -"\u24AC" => "(q)" - -# ȹ [LATIN SMALL LETTER QP DIGRAPH] -"\u0239" => "qp" - -# Ŕ [LATIN CAPITAL LETTER R WITH ACUTE] -"\u0154" => "R" - -# Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA] -"\u0156" => "R" - -# Ř [LATIN CAPITAL LETTER R WITH CARON] -"\u0158" => "R" - -# Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE] -"\u0210" => "R" - -# Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE] -"\u0212" => "R" - -# Ɍ [LATIN CAPITAL LETTER R WITH STROKE] -"\u024C" => "R" - -# ʀ [LATIN LETTER SMALL CAPITAL R] -"\u0280" => "R" - -# ʁ [LATIN LETTER SMALL CAPITAL INVERTED R] -"\u0281" => "R" - -# ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R] -"\u1D19" => "R" - -# ᴚ [LATIN LETTER SMALL CAPITAL TURNED R] -"\u1D1A" => "R" - -# Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE] -"\u1E58" => "R" - -# Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW] -"\u1E5A" => "R" - -# Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON] -"\u1E5C" => "R" - -# Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW] -"\u1E5E" => "R" - -# Ⓡ [CIRCLED LATIN CAPITAL LETTER R] -"\u24C7" => "R" - -# Ɽ [LATIN CAPITAL LETTER R WITH TAIL] -"\u2C64" => "R" - -# Ꝛ [LATIN CAPITAL LETTER R ROTUNDA] -"\uA75A" => "R" - -# Ꞃ [LATIN CAPITAL LETTER INSULAR R] -"\uA782" => "R" - -# R [FULLWIDTH LATIN CAPITAL LETTER R] -"\uFF32" => "R" - -# ŕ [LATIN SMALL LETTER R WITH ACUTE] -"\u0155" => "r" - -# ŗ [LATIN SMALL LETTER R WITH CEDILLA] -"\u0157" => "r" - -# ř [LATIN SMALL LETTER R WITH CARON] -"\u0159" => "r" - -# ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE] -"\u0211" => "r" - -# ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE] -"\u0213" => "r" - -# ɍ [LATIN SMALL LETTER R WITH STROKE] -"\u024D" => "r" - -# ɼ [LATIN SMALL LETTER R WITH LONG LEG] -"\u027C" => "r" - -# ɽ [LATIN SMALL LETTER R WITH TAIL] -"\u027D" => "r" - -# ɾ [LATIN SMALL LETTER R WITH FISHHOOK] -"\u027E" => "r" - -# ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK] -"\u027F" => "r" - -# ᵣ [LATIN SUBSCRIPT SMALL LETTER R] -"\u1D63" => "r" - -# ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE] -"\u1D72" => "r" - -# ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE] -"\u1D73" => "r" - -# ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK] -"\u1D89" => "r" - -# ṙ [LATIN SMALL LETTER R WITH DOT ABOVE] -"\u1E59" => "r" - -# ṛ [LATIN SMALL LETTER R WITH DOT BELOW] -"\u1E5B" => "r" - -# ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON] -"\u1E5D" => "r" - -# ṟ [LATIN SMALL LETTER R WITH LINE BELOW] -"\u1E5F" => "r" - -# ⓡ [CIRCLED LATIN SMALL LETTER R] -"\u24E1" => "r" - -# ꝛ [LATIN SMALL LETTER R ROTUNDA] -"\uA75B" => "r" - -# ꞃ [LATIN SMALL LETTER INSULAR R] -"\uA783" => "r" - -# r [FULLWIDTH LATIN SMALL LETTER R] -"\uFF52" => "r" - -# ⒭ [PARENTHESIZED LATIN SMALL LETTER R] -"\u24AD" => "(r)" - -# Ś [LATIN CAPITAL LETTER S WITH ACUTE] -"\u015A" => "S" - -# Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX] -"\u015C" => "S" - -# Ş [LATIN CAPITAL LETTER S WITH CEDILLA] -"\u015E" => "S" - -# Š [LATIN CAPITAL LETTER S WITH CARON] -"\u0160" => "S" - -# Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW] -"\u0218" => "S" - -# Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE] -"\u1E60" => "S" - -# Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW] -"\u1E62" => "S" - -# Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE] -"\u1E64" => "S" - -# Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE] -"\u1E66" => "S" - -# Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE] -"\u1E68" => "S" - -# Ⓢ [CIRCLED LATIN CAPITAL LETTER S] -"\u24C8" => "S" - -# ꜱ [LATIN LETTER SMALL CAPITAL S] -"\uA731" => "S" - -# ꞅ [LATIN SMALL LETTER INSULAR S] -"\uA785" => "S" - -# S [FULLWIDTH LATIN CAPITAL LETTER S] -"\uFF33" => "S" - -# ś [LATIN SMALL LETTER S WITH ACUTE] -"\u015B" => "s" - -# ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX] -"\u015D" => "s" - -# ş [LATIN SMALL LETTER S WITH CEDILLA] -"\u015F" => "s" - -# š [LATIN SMALL LETTER S WITH CARON] -"\u0161" => "s" - -# ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S] -"\u017F" => "s" - -# ș [LATIN SMALL LETTER S WITH COMMA BELOW] -"\u0219" => "s" - -# ȿ [LATIN SMALL LETTER S WITH SWASH TAIL] -"\u023F" => "s" - -# ʂ [LATIN SMALL LETTER S WITH HOOK] -"\u0282" => "s" - -# ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE] -"\u1D74" => "s" - -# ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK] -"\u1D8A" => "s" - -# ṡ [LATIN SMALL LETTER S WITH DOT ABOVE] -"\u1E61" => "s" - -# ṣ [LATIN SMALL LETTER S WITH DOT BELOW] -"\u1E63" => "s" - -# ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE] -"\u1E65" => "s" - -# ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE] -"\u1E67" => "s" - -# ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE] -"\u1E69" => "s" - -# ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE] -"\u1E9C" => "s" - -# ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE] -"\u1E9D" => "s" - -# ⓢ [CIRCLED LATIN SMALL LETTER S] -"\u24E2" => "s" - -# Ꞅ [LATIN CAPITAL LETTER INSULAR S] -"\uA784" => "s" - -# s [FULLWIDTH LATIN SMALL LETTER S] -"\uFF53" => "s" - -# ẞ [LATIN CAPITAL LETTER SHARP S] -"\u1E9E" => "SS" - -# ⒮ [PARENTHESIZED LATIN SMALL LETTER S] -"\u24AE" => "(s)" - -# ß [LATIN SMALL LETTER SHARP S] -"\u00DF" => "ss" - -# st [LATIN SMALL LIGATURE ST] -"\uFB06" => "st" - -# Ţ [LATIN CAPITAL LETTER T WITH CEDILLA] -"\u0162" => "T" - -# Ť [LATIN CAPITAL LETTER T WITH CARON] -"\u0164" => "T" - -# Ŧ [LATIN CAPITAL LETTER T WITH STROKE] -"\u0166" => "T" - -# Ƭ [LATIN CAPITAL LETTER T WITH HOOK] -"\u01AC" => "T" - -# Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK] -"\u01AE" => "T" - -# Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW] -"\u021A" => "T" - -# Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE] -"\u023E" => "T" - -# ᴛ [LATIN LETTER SMALL CAPITAL T] -"\u1D1B" => "T" - -# Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE] -"\u1E6A" => "T" - -# Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW] -"\u1E6C" => "T" - -# Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW] -"\u1E6E" => "T" - -# Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW] -"\u1E70" => "T" - -# Ⓣ [CIRCLED LATIN CAPITAL LETTER T] -"\u24C9" => "T" - -# Ꞇ [LATIN CAPITAL LETTER INSULAR T] -"\uA786" => "T" - -# T [FULLWIDTH LATIN CAPITAL LETTER T] -"\uFF34" => "T" - -# ţ [LATIN SMALL LETTER T WITH CEDILLA] -"\u0163" => "t" - -# ť [LATIN SMALL LETTER T WITH CARON] -"\u0165" => "t" - -# ŧ [LATIN SMALL LETTER T WITH STROKE] -"\u0167" => "t" - -# ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK] -"\u01AB" => "t" - -# ƭ [LATIN SMALL LETTER T WITH HOOK] -"\u01AD" => "t" - -# ț [LATIN SMALL LETTER T WITH COMMA BELOW] -"\u021B" => "t" - -# ȶ [LATIN SMALL LETTER T WITH CURL] -"\u0236" => "t" - -# ʇ [LATIN SMALL LETTER TURNED T] -"\u0287" => "t" - -# ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK] -"\u0288" => "t" - -# ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE] -"\u1D75" => "t" - -# ṫ [LATIN SMALL LETTER T WITH DOT ABOVE] -"\u1E6B" => "t" - -# ṭ [LATIN SMALL LETTER T WITH DOT BELOW] -"\u1E6D" => "t" - -# ṯ [LATIN SMALL LETTER T WITH LINE BELOW] -"\u1E6F" => "t" - -# ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW] -"\u1E71" => "t" - -# ẗ [LATIN SMALL LETTER T WITH DIAERESIS] -"\u1E97" => "t" - -# ⓣ [CIRCLED LATIN SMALL LETTER T] -"\u24E3" => "t" - -# ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE] -"\u2C66" => "t" - -# t [FULLWIDTH LATIN SMALL LETTER T] -"\uFF54" => "t" - -# Þ [LATIN CAPITAL LETTER THORN] -"\u00DE" => "TH" - -# Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER] -"\uA766" => "TH" - -# Ꜩ [LATIN CAPITAL LETTER TZ] -"\uA728" => "TZ" - -# ⒯ [PARENTHESIZED LATIN SMALL LETTER T] -"\u24AF" => "(t)" - -# ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL] -"\u02A8" => "tc" - -# þ [LATIN SMALL LETTER THORN] -"\u00FE" => "th" - -# ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH] -"\u1D7A" => "th" - -# ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER] -"\uA767" => "th" - -# ʦ [LATIN SMALL LETTER TS DIGRAPH] -"\u02A6" => "ts" - -# ꜩ [LATIN SMALL LETTER TZ] -"\uA729" => "tz" - -# Ù [LATIN CAPITAL LETTER U WITH GRAVE] -"\u00D9" => "U" - -# Ú [LATIN CAPITAL LETTER U WITH ACUTE] -"\u00DA" => "U" - -# Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX] -"\u00DB" => "U" - -# Ü [LATIN CAPITAL LETTER U WITH DIAERESIS] -"\u00DC" => "U" - -# Ũ [LATIN CAPITAL LETTER U WITH TILDE] -"\u0168" => "U" - -# Ū [LATIN CAPITAL LETTER U WITH MACRON] -"\u016A" => "U" - -# Ŭ [LATIN CAPITAL LETTER U WITH BREVE] -"\u016C" => "U" - -# Ů [LATIN CAPITAL LETTER U WITH RING ABOVE] -"\u016E" => "U" - -# Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE] -"\u0170" => "U" - -# Ų [LATIN CAPITAL LETTER U WITH OGONEK] -"\u0172" => "U" - -# Ư [LATIN CAPITAL LETTER U WITH HORN] -"\u01AF" => "U" - -# Ǔ [LATIN CAPITAL LETTER U WITH CARON] -"\u01D3" => "U" - -# Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON] -"\u01D5" => "U" - -# Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE] -"\u01D7" => "U" - -# Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON] -"\u01D9" => "U" - -# Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE] -"\u01DB" => "U" - -# Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE] -"\u0214" => "U" - -# Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE] -"\u0216" => "U" - -# Ʉ [LATIN CAPITAL LETTER U BAR] -"\u0244" => "U" - -# ᴜ [LATIN LETTER SMALL CAPITAL U] -"\u1D1C" => "U" - -# ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE] -"\u1D7E" => "U" - -# Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW] -"\u1E72" => "U" - -# Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW] -"\u1E74" => "U" - -# Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW] -"\u1E76" => "U" - -# Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE] -"\u1E78" => "U" - -# Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS] -"\u1E7A" => "U" - -# Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW] -"\u1EE4" => "U" - -# Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE] -"\u1EE6" => "U" - -# Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE] -"\u1EE8" => "U" - -# Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE] -"\u1EEA" => "U" - -# Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE] -"\u1EEC" => "U" - -# Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE] -"\u1EEE" => "U" - -# Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW] -"\u1EF0" => "U" - -# Ⓤ [CIRCLED LATIN CAPITAL LETTER U] -"\u24CA" => "U" - -# U [FULLWIDTH LATIN CAPITAL LETTER U] -"\uFF35" => "U" - -# ù [LATIN SMALL LETTER U WITH GRAVE] -"\u00F9" => "u" - -# ú [LATIN SMALL LETTER U WITH ACUTE] -"\u00FA" => "u" - -# û [LATIN SMALL LETTER U WITH CIRCUMFLEX] -"\u00FB" => "u" - -# ü [LATIN SMALL LETTER U WITH DIAERESIS] -"\u00FC" => "u" - -# ũ [LATIN SMALL LETTER U WITH TILDE] -"\u0169" => "u" - -# ū [LATIN SMALL LETTER U WITH MACRON] -"\u016B" => "u" - -# ŭ [LATIN SMALL LETTER U WITH BREVE] -"\u016D" => "u" - -# ů [LATIN SMALL LETTER U WITH RING ABOVE] -"\u016F" => "u" - -# ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE] -"\u0171" => "u" - -# ų [LATIN SMALL LETTER U WITH OGONEK] -"\u0173" => "u" - -# ư [LATIN SMALL LETTER U WITH HORN] -"\u01B0" => "u" - -# ǔ [LATIN SMALL LETTER U WITH CARON] -"\u01D4" => "u" - -# ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON] -"\u01D6" => "u" - -# ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE] -"\u01D8" => "u" - -# ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON] -"\u01DA" => "u" - -# ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE] -"\u01DC" => "u" - -# ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE] -"\u0215" => "u" - -# ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE] -"\u0217" => "u" - -# ʉ [LATIN SMALL LETTER U BAR] -"\u0289" => "u" - -# ᵤ [LATIN SUBSCRIPT SMALL LETTER U] -"\u1D64" => "u" - -# ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK] -"\u1D99" => "u" - -# ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW] -"\u1E73" => "u" - -# ṵ [LATIN SMALL LETTER U WITH TILDE BELOW] -"\u1E75" => "u" - -# ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW] -"\u1E77" => "u" - -# ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE] -"\u1E79" => "u" - -# ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS] -"\u1E7B" => "u" - -# ụ [LATIN SMALL LETTER U WITH DOT BELOW] -"\u1EE5" => "u" - -# ủ [LATIN SMALL LETTER U WITH HOOK ABOVE] -"\u1EE7" => "u" - -# ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE] -"\u1EE9" => "u" - -# ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE] -"\u1EEB" => "u" - -# ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE] -"\u1EED" => "u" - -# ữ [LATIN SMALL LETTER U WITH HORN AND TILDE] -"\u1EEF" => "u" - -# ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW] -"\u1EF1" => "u" - -# ⓤ [CIRCLED LATIN SMALL LETTER U] -"\u24E4" => "u" - -# u [FULLWIDTH LATIN SMALL LETTER U] -"\uFF55" => "u" - -# ⒰ [PARENTHESIZED LATIN SMALL LETTER U] -"\u24B0" => "(u)" - -# ᵫ [LATIN SMALL LETTER UE] -"\u1D6B" => "ue" - -# Ʋ [LATIN CAPITAL LETTER V WITH HOOK] -"\u01B2" => "V" - -# Ʌ [LATIN CAPITAL LETTER TURNED V] -"\u0245" => "V" - -# ᴠ [LATIN LETTER SMALL CAPITAL V] -"\u1D20" => "V" - -# Ṽ [LATIN CAPITAL LETTER V WITH TILDE] -"\u1E7C" => "V" - -# Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW] -"\u1E7E" => "V" - -# Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V] -"\u1EFC" => "V" - -# Ⓥ [CIRCLED LATIN CAPITAL LETTER V] -"\u24CB" => "V" - -# Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE] -"\uA75E" => "V" - -# Ꝩ [LATIN CAPITAL LETTER VEND] -"\uA768" => "V" - -# V [FULLWIDTH LATIN CAPITAL LETTER V] -"\uFF36" => "V" - -# ʋ [LATIN SMALL LETTER V WITH HOOK] -"\u028B" => "v" - -# ʌ [LATIN SMALL LETTER TURNED V] -"\u028C" => "v" - -# ᵥ [LATIN SUBSCRIPT SMALL LETTER V] -"\u1D65" => "v" - -# ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK] -"\u1D8C" => "v" - -# ṽ [LATIN SMALL LETTER V WITH TILDE] -"\u1E7D" => "v" - -# ṿ [LATIN SMALL LETTER V WITH DOT BELOW] -"\u1E7F" => "v" - -# ⓥ [CIRCLED LATIN SMALL LETTER V] -"\u24E5" => "v" - -# ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK] -"\u2C71" => "v" - -# ⱴ [LATIN SMALL LETTER V WITH CURL] -"\u2C74" => "v" - -# ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE] -"\uA75F" => "v" - -# v [FULLWIDTH LATIN SMALL LETTER V] -"\uFF56" => "v" - -# Ꝡ [LATIN CAPITAL LETTER VY] -"\uA760" => "VY" - -# ⒱ [PARENTHESIZED LATIN SMALL LETTER V] -"\u24B1" => "(v)" - -# ꝡ [LATIN SMALL LETTER VY] -"\uA761" => "vy" - -# Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX] -"\u0174" => "W" - -# Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN] -"\u01F7" => "W" - -# ᴡ [LATIN LETTER SMALL CAPITAL W] -"\u1D21" => "W" - -# Ẁ [LATIN CAPITAL LETTER W WITH GRAVE] -"\u1E80" => "W" - -# Ẃ [LATIN CAPITAL LETTER W WITH ACUTE] -"\u1E82" => "W" - -# Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS] -"\u1E84" => "W" - -# Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE] -"\u1E86" => "W" - -# Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW] -"\u1E88" => "W" - -# Ⓦ [CIRCLED LATIN CAPITAL LETTER W] -"\u24CC" => "W" - -# Ⱳ [LATIN CAPITAL LETTER W WITH HOOK] -"\u2C72" => "W" - -# W [FULLWIDTH LATIN CAPITAL LETTER W] -"\uFF37" => "W" - -# ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX] -"\u0175" => "w" - -# ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN] -"\u01BF" => "w" - -# ʍ [LATIN SMALL LETTER TURNED W] -"\u028D" => "w" - -# ẁ [LATIN SMALL LETTER W WITH GRAVE] -"\u1E81" => "w" - -# ẃ [LATIN SMALL LETTER W WITH ACUTE] -"\u1E83" => "w" - -# ẅ [LATIN SMALL LETTER W WITH DIAERESIS] -"\u1E85" => "w" - -# ẇ [LATIN SMALL LETTER W WITH DOT ABOVE] -"\u1E87" => "w" - -# ẉ [LATIN SMALL LETTER W WITH DOT BELOW] -"\u1E89" => "w" - -# ẘ [LATIN SMALL LETTER W WITH RING ABOVE] -"\u1E98" => "w" - -# ⓦ [CIRCLED LATIN SMALL LETTER W] -"\u24E6" => "w" - -# ⱳ [LATIN SMALL LETTER W WITH HOOK] -"\u2C73" => "w" - -# w [FULLWIDTH LATIN SMALL LETTER W] -"\uFF57" => "w" - -# ⒲ [PARENTHESIZED LATIN SMALL LETTER W] -"\u24B2" => "(w)" - -# Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE] -"\u1E8A" => "X" - -# Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS] -"\u1E8C" => "X" - -# Ⓧ [CIRCLED LATIN CAPITAL LETTER X] -"\u24CD" => "X" - -# X [FULLWIDTH LATIN CAPITAL LETTER X] -"\uFF38" => "X" - -# ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK] -"\u1D8D" => "x" - -# ẋ [LATIN SMALL LETTER X WITH DOT ABOVE] -"\u1E8B" => "x" - -# ẍ [LATIN SMALL LETTER X WITH DIAERESIS] -"\u1E8D" => "x" - -# ₓ [LATIN SUBSCRIPT SMALL LETTER X] -"\u2093" => "x" - -# ⓧ [CIRCLED LATIN SMALL LETTER X] -"\u24E7" => "x" - -# x [FULLWIDTH LATIN SMALL LETTER X] -"\uFF58" => "x" - -# ⒳ [PARENTHESIZED LATIN SMALL LETTER X] -"\u24B3" => "(x)" - -# Ý [LATIN CAPITAL LETTER Y WITH ACUTE] -"\u00DD" => "Y" - -# Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX] -"\u0176" => "Y" - -# Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS] -"\u0178" => "Y" - -# Ƴ [LATIN CAPITAL LETTER Y WITH HOOK] -"\u01B3" => "Y" - -# Ȳ [LATIN CAPITAL LETTER Y WITH MACRON] -"\u0232" => "Y" - -# Ɏ [LATIN CAPITAL LETTER Y WITH STROKE] -"\u024E" => "Y" - -# ʏ [LATIN LETTER SMALL CAPITAL Y] -"\u028F" => "Y" - -# Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE] -"\u1E8E" => "Y" - -# Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE] -"\u1EF2" => "Y" - -# Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW] -"\u1EF4" => "Y" - -# Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE] -"\u1EF6" => "Y" - -# Ỹ [LATIN CAPITAL LETTER Y WITH TILDE] -"\u1EF8" => "Y" - -# Ỿ [LATIN CAPITAL LETTER Y WITH LOOP] -"\u1EFE" => "Y" - -# Ⓨ [CIRCLED LATIN CAPITAL LETTER Y] -"\u24CE" => "Y" - -# Y [FULLWIDTH LATIN CAPITAL LETTER Y] -"\uFF39" => "Y" - -# ý [LATIN SMALL LETTER Y WITH ACUTE] -"\u00FD" => "y" - -# ÿ [LATIN SMALL LETTER Y WITH DIAERESIS] -"\u00FF" => "y" - -# ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX] -"\u0177" => "y" - -# ƴ [LATIN SMALL LETTER Y WITH HOOK] -"\u01B4" => "y" - -# ȳ [LATIN SMALL LETTER Y WITH MACRON] -"\u0233" => "y" - -# ɏ [LATIN SMALL LETTER Y WITH STROKE] -"\u024F" => "y" - -# ʎ [LATIN SMALL LETTER TURNED Y] -"\u028E" => "y" - -# ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE] -"\u1E8F" => "y" - -# ẙ [LATIN SMALL LETTER Y WITH RING ABOVE] -"\u1E99" => "y" - -# ỳ [LATIN SMALL LETTER Y WITH GRAVE] -"\u1EF3" => "y" - -# ỵ [LATIN SMALL LETTER Y WITH DOT BELOW] -"\u1EF5" => "y" - -# ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE] -"\u1EF7" => "y" - -# ỹ [LATIN SMALL LETTER Y WITH TILDE] -"\u1EF9" => "y" - -# ỿ [LATIN SMALL LETTER Y WITH LOOP] -"\u1EFF" => "y" - -# ⓨ [CIRCLED LATIN SMALL LETTER Y] -"\u24E8" => "y" - -# y [FULLWIDTH LATIN SMALL LETTER Y] -"\uFF59" => "y" - -# ⒴ [PARENTHESIZED LATIN SMALL LETTER Y] -"\u24B4" => "(y)" - -# Ź [LATIN CAPITAL LETTER Z WITH ACUTE] -"\u0179" => "Z" - -# Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE] -"\u017B" => "Z" - -# Ž [LATIN CAPITAL LETTER Z WITH CARON] -"\u017D" => "Z" - -# Ƶ [LATIN CAPITAL LETTER Z WITH STROKE] -"\u01B5" => "Z" - -# Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH] -"\u021C" => "Z" - -# Ȥ [LATIN CAPITAL LETTER Z WITH HOOK] -"\u0224" => "Z" - -# ᴢ [LATIN LETTER SMALL CAPITAL Z] -"\u1D22" => "Z" - -# Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX] -"\u1E90" => "Z" - -# Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW] -"\u1E92" => "Z" - -# Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW] -"\u1E94" => "Z" - -# Ⓩ [CIRCLED LATIN CAPITAL LETTER Z] -"\u24CF" => "Z" - -# Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER] -"\u2C6B" => "Z" - -# Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z] -"\uA762" => "Z" - -# Z [FULLWIDTH LATIN CAPITAL LETTER Z] -"\uFF3A" => "Z" - -# ź [LATIN SMALL LETTER Z WITH ACUTE] -"\u017A" => "z" - -# ż [LATIN SMALL LETTER Z WITH DOT ABOVE] -"\u017C" => "z" - -# ž [LATIN SMALL LETTER Z WITH CARON] -"\u017E" => "z" - -# ƶ [LATIN SMALL LETTER Z WITH STROKE] -"\u01B6" => "z" - -# ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH] -"\u021D" => "z" - -# ȥ [LATIN SMALL LETTER Z WITH HOOK] -"\u0225" => "z" - -# ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL] -"\u0240" => "z" - -# ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK] -"\u0290" => "z" - -# ʑ [LATIN SMALL LETTER Z WITH CURL] -"\u0291" => "z" - -# ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE] -"\u1D76" => "z" - -# ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK] -"\u1D8E" => "z" - -# ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX] -"\u1E91" => "z" - -# ẓ [LATIN SMALL LETTER Z WITH DOT BELOW] -"\u1E93" => "z" - -# ẕ [LATIN SMALL LETTER Z WITH LINE BELOW] -"\u1E95" => "z" - -# ⓩ [CIRCLED LATIN SMALL LETTER Z] -"\u24E9" => "z" - -# ⱬ [LATIN SMALL LETTER Z WITH DESCENDER] -"\u2C6C" => "z" - -# ꝣ [LATIN SMALL LETTER VISIGOTHIC Z] -"\uA763" => "z" - -# z [FULLWIDTH LATIN SMALL LETTER Z] -"\uFF5A" => "z" - -# ⒵ [PARENTHESIZED LATIN SMALL LETTER Z] -"\u24B5" => "(z)" - -# ⁰ [SUPERSCRIPT ZERO] -"\u2070" => "0" - -# ₀ [SUBSCRIPT ZERO] -"\u2080" => "0" - -# ⓪ [CIRCLED DIGIT ZERO] -"\u24EA" => "0" - -# ⓿ [NEGATIVE CIRCLED DIGIT ZERO] -"\u24FF" => "0" - -# 0 [FULLWIDTH DIGIT ZERO] -"\uFF10" => "0" - -# ¹ [SUPERSCRIPT ONE] -"\u00B9" => "1" - -# ₁ [SUBSCRIPT ONE] -"\u2081" => "1" - -# ① [CIRCLED DIGIT ONE] -"\u2460" => "1" - -# ⓵ [DOUBLE CIRCLED DIGIT ONE] -"\u24F5" => "1" - -# ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE] -"\u2776" => "1" - -# ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE] -"\u2780" => "1" - -# ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE] -"\u278A" => "1" - -# 1 [FULLWIDTH DIGIT ONE] -"\uFF11" => "1" - -# ⒈ [DIGIT ONE FULL STOP] -"\u2488" => "1." - -# ⑴ [PARENTHESIZED DIGIT ONE] -"\u2474" => "(1)" - -# ² [SUPERSCRIPT TWO] -"\u00B2" => "2" - -# ₂ [SUBSCRIPT TWO] -"\u2082" => "2" - -# ② [CIRCLED DIGIT TWO] -"\u2461" => "2" - -# ⓶ [DOUBLE CIRCLED DIGIT TWO] -"\u24F6" => "2" - -# ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO] -"\u2777" => "2" - -# ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO] -"\u2781" => "2" - -# ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO] -"\u278B" => "2" - -# 2 [FULLWIDTH DIGIT TWO] -"\uFF12" => "2" - -# ⒉ [DIGIT TWO FULL STOP] -"\u2489" => "2." - -# ⑵ [PARENTHESIZED DIGIT TWO] -"\u2475" => "(2)" - -# ³ [SUPERSCRIPT THREE] -"\u00B3" => "3" - -# ₃ [SUBSCRIPT THREE] -"\u2083" => "3" - -# ③ [CIRCLED DIGIT THREE] -"\u2462" => "3" - -# ⓷ [DOUBLE CIRCLED DIGIT THREE] -"\u24F7" => "3" - -# ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE] -"\u2778" => "3" - -# ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE] -"\u2782" => "3" - -# ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE] -"\u278C" => "3" - -# 3 [FULLWIDTH DIGIT THREE] -"\uFF13" => "3" - -# ⒊ [DIGIT THREE FULL STOP] -"\u248A" => "3." - -# ⑶ [PARENTHESIZED DIGIT THREE] -"\u2476" => "(3)" - -# ⁴ [SUPERSCRIPT FOUR] -"\u2074" => "4" - -# ₄ [SUBSCRIPT FOUR] -"\u2084" => "4" - -# ④ [CIRCLED DIGIT FOUR] -"\u2463" => "4" - -# ⓸ [DOUBLE CIRCLED DIGIT FOUR] -"\u24F8" => "4" - -# ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR] -"\u2779" => "4" - -# ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR] -"\u2783" => "4" - -# ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR] -"\u278D" => "4" - -# 4 [FULLWIDTH DIGIT FOUR] -"\uFF14" => "4" - -# ⒋ [DIGIT FOUR FULL STOP] -"\u248B" => "4." - -# ⑷ [PARENTHESIZED DIGIT FOUR] -"\u2477" => "(4)" - -# ⁵ [SUPERSCRIPT FIVE] -"\u2075" => "5" - -# ₅ [SUBSCRIPT FIVE] -"\u2085" => "5" - -# ⑤ [CIRCLED DIGIT FIVE] -"\u2464" => "5" - -# ⓹ [DOUBLE CIRCLED DIGIT FIVE] -"\u24F9" => "5" - -# ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE] -"\u277A" => "5" - -# ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE] -"\u2784" => "5" - -# ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE] -"\u278E" => "5" - -# 5 [FULLWIDTH DIGIT FIVE] -"\uFF15" => "5" - -# ⒌ [DIGIT FIVE FULL STOP] -"\u248C" => "5." - -# ⑸ [PARENTHESIZED DIGIT FIVE] -"\u2478" => "(5)" - -# ⁶ [SUPERSCRIPT SIX] -"\u2076" => "6" - -# ₆ [SUBSCRIPT SIX] -"\u2086" => "6" - -# ⑥ [CIRCLED DIGIT SIX] -"\u2465" => "6" - -# ⓺ [DOUBLE CIRCLED DIGIT SIX] -"\u24FA" => "6" - -# ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX] -"\u277B" => "6" - -# ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX] -"\u2785" => "6" - -# ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX] -"\u278F" => "6" - -# 6 [FULLWIDTH DIGIT SIX] -"\uFF16" => "6" - -# ⒍ [DIGIT SIX FULL STOP] -"\u248D" => "6." - -# ⑹ [PARENTHESIZED DIGIT SIX] -"\u2479" => "(6)" - -# ⁷ [SUPERSCRIPT SEVEN] -"\u2077" => "7" - -# ₇ [SUBSCRIPT SEVEN] -"\u2087" => "7" - -# ⑦ [CIRCLED DIGIT SEVEN] -"\u2466" => "7" - -# ⓻ [DOUBLE CIRCLED DIGIT SEVEN] -"\u24FB" => "7" - -# ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN] -"\u277C" => "7" - -# ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN] -"\u2786" => "7" - -# ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN] -"\u2790" => "7" - -# 7 [FULLWIDTH DIGIT SEVEN] -"\uFF17" => "7" - -# ⒎ [DIGIT SEVEN FULL STOP] -"\u248E" => "7." - -# ⑺ [PARENTHESIZED DIGIT SEVEN] -"\u247A" => "(7)" - -# ⁸ [SUPERSCRIPT EIGHT] -"\u2078" => "8" - -# ₈ [SUBSCRIPT EIGHT] -"\u2088" => "8" - -# ⑧ [CIRCLED DIGIT EIGHT] -"\u2467" => "8" - -# ⓼ [DOUBLE CIRCLED DIGIT EIGHT] -"\u24FC" => "8" - -# ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT] -"\u277D" => "8" - -# ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT] -"\u2787" => "8" - -# ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT] -"\u2791" => "8" - -# 8 [FULLWIDTH DIGIT EIGHT] -"\uFF18" => "8" - -# ⒏ [DIGIT EIGHT FULL STOP] -"\u248F" => "8." - -# ⑻ [PARENTHESIZED DIGIT EIGHT] -"\u247B" => "(8)" - -# ⁹ [SUPERSCRIPT NINE] -"\u2079" => "9" - -# ₉ [SUBSCRIPT NINE] -"\u2089" => "9" - -# ⑨ [CIRCLED DIGIT NINE] -"\u2468" => "9" - -# ⓽ [DOUBLE CIRCLED DIGIT NINE] -"\u24FD" => "9" - -# ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE] -"\u277E" => "9" - -# ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE] -"\u2788" => "9" - -# ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE] -"\u2792" => "9" - -# 9 [FULLWIDTH DIGIT NINE] -"\uFF19" => "9" - -# ⒐ [DIGIT NINE FULL STOP] -"\u2490" => "9." - -# ⑼ [PARENTHESIZED DIGIT NINE] -"\u247C" => "(9)" - -# ⑩ [CIRCLED NUMBER TEN] -"\u2469" => "10" - -# ⓾ [DOUBLE CIRCLED NUMBER TEN] -"\u24FE" => "10" - -# ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN] -"\u277F" => "10" - -# ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN] -"\u2789" => "10" - -# ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN] -"\u2793" => "10" - -# ⒑ [NUMBER TEN FULL STOP] -"\u2491" => "10." - -# ⑽ [PARENTHESIZED NUMBER TEN] -"\u247D" => "(10)" - -# ⑪ [CIRCLED NUMBER ELEVEN] -"\u246A" => "11" - -# ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN] -"\u24EB" => "11" - -# ⒒ [NUMBER ELEVEN FULL STOP] -"\u2492" => "11." - -# ⑾ [PARENTHESIZED NUMBER ELEVEN] -"\u247E" => "(11)" - -# ⑫ [CIRCLED NUMBER TWELVE] -"\u246B" => "12" - -# ⓬ [NEGATIVE CIRCLED NUMBER TWELVE] -"\u24EC" => "12" - -# ⒓ [NUMBER TWELVE FULL STOP] -"\u2493" => "12." - -# ⑿ [PARENTHESIZED NUMBER TWELVE] -"\u247F" => "(12)" - -# ⑬ [CIRCLED NUMBER THIRTEEN] -"\u246C" => "13" - -# ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN] -"\u24ED" => "13" - -# ⒔ [NUMBER THIRTEEN FULL STOP] -"\u2494" => "13." - -# ⒀ [PARENTHESIZED NUMBER THIRTEEN] -"\u2480" => "(13)" - -# ⑭ [CIRCLED NUMBER FOURTEEN] -"\u246D" => "14" - -# ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN] -"\u24EE" => "14" - -# ⒕ [NUMBER FOURTEEN FULL STOP] -"\u2495" => "14." - -# ⒁ [PARENTHESIZED NUMBER FOURTEEN] -"\u2481" => "(14)" - -# ⑮ [CIRCLED NUMBER FIFTEEN] -"\u246E" => "15" - -# ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN] -"\u24EF" => "15" - -# ⒖ [NUMBER FIFTEEN FULL STOP] -"\u2496" => "15." - -# ⒂ [PARENTHESIZED NUMBER FIFTEEN] -"\u2482" => "(15)" - -# ⑯ [CIRCLED NUMBER SIXTEEN] -"\u246F" => "16" - -# ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN] -"\u24F0" => "16" - -# ⒗ [NUMBER SIXTEEN FULL STOP] -"\u2497" => "16." - -# ⒃ [PARENTHESIZED NUMBER SIXTEEN] -"\u2483" => "(16)" - -# ⑰ [CIRCLED NUMBER SEVENTEEN] -"\u2470" => "17" - -# ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN] -"\u24F1" => "17" - -# ⒘ [NUMBER SEVENTEEN FULL STOP] -"\u2498" => "17." - -# ⒄ [PARENTHESIZED NUMBER SEVENTEEN] -"\u2484" => "(17)" - -# ⑱ [CIRCLED NUMBER EIGHTEEN] -"\u2471" => "18" - -# ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN] -"\u24F2" => "18" - -# ⒙ [NUMBER EIGHTEEN FULL STOP] -"\u2499" => "18." - -# ⒅ [PARENTHESIZED NUMBER EIGHTEEN] -"\u2485" => "(18)" - -# ⑲ [CIRCLED NUMBER NINETEEN] -"\u2472" => "19" - -# ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN] -"\u24F3" => "19" - -# ⒚ [NUMBER NINETEEN FULL STOP] -"\u249A" => "19." - -# ⒆ [PARENTHESIZED NUMBER NINETEEN] -"\u2486" => "(19)" - -# ⑳ [CIRCLED NUMBER TWENTY] -"\u2473" => "20" - -# ⓴ [NEGATIVE CIRCLED NUMBER TWENTY] -"\u24F4" => "20" - -# ⒛ [NUMBER TWENTY FULL STOP] -"\u249B" => "20." - -# ⒇ [PARENTHESIZED NUMBER TWENTY] -"\u2487" => "(20)" - -# « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK] -"\u00AB" => "\"" - -# » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK] -"\u00BB" => "\"" - -# “ [LEFT DOUBLE QUOTATION MARK] -"\u201C" => "\"" - -# ” [RIGHT DOUBLE QUOTATION MARK] -"\u201D" => "\"" - -# „ [DOUBLE LOW-9 QUOTATION MARK] -"\u201E" => "\"" - -# ″ [DOUBLE PRIME] -"\u2033" => "\"" - -# ‶ [REVERSED DOUBLE PRIME] -"\u2036" => "\"" - -# ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT] -"\u275D" => "\"" - -# ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT] -"\u275E" => "\"" - -# ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT] -"\u276E" => "\"" - -# ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT] -"\u276F" => "\"" - -# " [FULLWIDTH QUOTATION MARK] -"\uFF02" => "\"" - -# ‘ [LEFT SINGLE QUOTATION MARK] -"\u2018" => "\'" - -# ’ [RIGHT SINGLE QUOTATION MARK] -"\u2019" => "\'" - -# ‚ [SINGLE LOW-9 QUOTATION MARK] -"\u201A" => "\'" - -# ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK] -"\u201B" => "\'" - -# ′ [PRIME] -"\u2032" => "\'" - -# ‵ [REVERSED PRIME] -"\u2035" => "\'" - -# ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK] -"\u2039" => "\'" - -# › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK] -"\u203A" => "\'" - -# ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT] -"\u275B" => "\'" - -# ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT] -"\u275C" => "\'" - -# ' [FULLWIDTH APOSTROPHE] -"\uFF07" => "\'" - -# ‐ [HYPHEN] -"\u2010" => "-" - -# ‑ [NON-BREAKING HYPHEN] -"\u2011" => "-" - -# ‒ [FIGURE DASH] -"\u2012" => "-" - -# – [EN DASH] -"\u2013" => "-" - -# — [EM DASH] -"\u2014" => "-" - -# ⁻ [SUPERSCRIPT MINUS] -"\u207B" => "-" - -# ₋ [SUBSCRIPT MINUS] -"\u208B" => "-" - -# - [FULLWIDTH HYPHEN-MINUS] -"\uFF0D" => "-" - -# ⁅ [LEFT SQUARE BRACKET WITH QUILL] -"\u2045" => "[" - -# ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT] -"\u2772" => "[" - -# [ [FULLWIDTH LEFT SQUARE BRACKET] -"\uFF3B" => "[" - -# ⁆ [RIGHT SQUARE BRACKET WITH QUILL] -"\u2046" => "]" - -# ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT] -"\u2773" => "]" - -# ] [FULLWIDTH RIGHT SQUARE BRACKET] -"\uFF3D" => "]" - -# ⁽ [SUPERSCRIPT LEFT PARENTHESIS] -"\u207D" => "(" - -# ₍ [SUBSCRIPT LEFT PARENTHESIS] -"\u208D" => "(" - -# ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT] -"\u2768" => "(" - -# ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT] -"\u276A" => "(" - -# ( [FULLWIDTH LEFT PARENTHESIS] -"\uFF08" => "(" - -# ⸨ [LEFT DOUBLE PARENTHESIS] -"\u2E28" => "((" - -# ⁾ [SUPERSCRIPT RIGHT PARENTHESIS] -"\u207E" => ")" - -# ₎ [SUBSCRIPT RIGHT PARENTHESIS] -"\u208E" => ")" - -# ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT] -"\u2769" => ")" - -# ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT] -"\u276B" => ")" - -# ) [FULLWIDTH RIGHT PARENTHESIS] -"\uFF09" => ")" - -# ⸩ [RIGHT DOUBLE PARENTHESIS] -"\u2E29" => "))" - -# ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT] -"\u276C" => "<" - -# ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT] -"\u2770" => "<" - -# < [FULLWIDTH LESS-THAN SIGN] -"\uFF1C" => "<" - -# ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT] -"\u276D" => ">" - -# ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT] -"\u2771" => ">" - -# > [FULLWIDTH GREATER-THAN SIGN] -"\uFF1E" => ">" - -# ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT] -"\u2774" => "{" - -# { [FULLWIDTH LEFT CURLY BRACKET] -"\uFF5B" => "{" - -# ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT] -"\u2775" => "}" - -# } [FULLWIDTH RIGHT CURLY BRACKET] -"\uFF5D" => "}" - -# ⁺ [SUPERSCRIPT PLUS SIGN] -"\u207A" => "+" - -# ₊ [SUBSCRIPT PLUS SIGN] -"\u208A" => "+" - -# + [FULLWIDTH PLUS SIGN] -"\uFF0B" => "+" - -# ⁼ [SUPERSCRIPT EQUALS SIGN] -"\u207C" => "=" - -# ₌ [SUBSCRIPT EQUALS SIGN] -"\u208C" => "=" - -# = [FULLWIDTH EQUALS SIGN] -"\uFF1D" => "=" - -# ! [FULLWIDTH EXCLAMATION MARK] -"\uFF01" => "!" - -# ‼ [DOUBLE EXCLAMATION MARK] -"\u203C" => "!!" - -# ⁉ [EXCLAMATION QUESTION MARK] -"\u2049" => "!?" - -# # [FULLWIDTH NUMBER SIGN] -"\uFF03" => "#" - -# $ [FULLWIDTH DOLLAR SIGN] -"\uFF04" => "$" - -# ⁒ [COMMERCIAL MINUS SIGN] -"\u2052" => "%" - -# % [FULLWIDTH PERCENT SIGN] -"\uFF05" => "%" - -# & [FULLWIDTH AMPERSAND] -"\uFF06" => "&" - -# ⁎ [LOW ASTERISK] -"\u204E" => "*" - -# * [FULLWIDTH ASTERISK] -"\uFF0A" => "*" - -# , [FULLWIDTH COMMA] -"\uFF0C" => "," - -# . [FULLWIDTH FULL STOP] -"\uFF0E" => "." - -# ⁄ [FRACTION SLASH] -"\u2044" => "/" - -# / [FULLWIDTH SOLIDUS] -"\uFF0F" => "/" - -# : [FULLWIDTH COLON] -"\uFF1A" => ":" - -# ⁏ [REVERSED SEMICOLON] -"\u204F" => ";" - -# ; [FULLWIDTH SEMICOLON] -"\uFF1B" => ";" - -# ? [FULLWIDTH QUESTION MARK] -"\uFF1F" => "?" - -# ⁇ [DOUBLE QUESTION MARK] -"\u2047" => "??" - -# ⁈ [QUESTION EXCLAMATION MARK] -"\u2048" => "?!" - -# @ [FULLWIDTH COMMERCIAL AT] -"\uFF20" => "@" - -# \ [FULLWIDTH REVERSE SOLIDUS] -"\uFF3C" => "\\" - -# ‸ [CARET] -"\u2038" => "^" - -# ^ [FULLWIDTH CIRCUMFLEX ACCENT] -"\uFF3E" => "^" - -# _ [FULLWIDTH LOW LINE] -"\uFF3F" => "_" - -# ⁓ [SWUNG DASH] -"\u2053" => "~" - -# ~ [FULLWIDTH TILDE] -"\uFF5E" => "~" - -################################################################ -# Below is the Perl script used to generate the above mappings # -# from ASCIIFoldingFilter.java: # -################################################################ -# -# #!/usr/bin/perl -# -# use warnings; -# use strict; -# -# my @source_chars = (); -# my @source_char_descriptions = (); -# my $target = ''; -# -# while (<>) { -# if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) { -# push @source_chars, $1; -# push @source_char_descriptions, $2; -# next; -# } -# if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) { -# $target .= $1; -# next; -# } -# if (/break;/) { -# $target = "\\\"" if ($target eq '"'); -# for my $source_char_num (0..$#source_chars) { -# print "# $source_char_descriptions[$source_char_num]\n"; -# print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n"; -# } -# @source_chars = (); -# @source_char_descriptions = (); -# $target = ''; -# } -# } diff --git a/solr/example/example-DIH/solr/db/conf/mapping-ISOLatin1Accent.txt b/solr/example/example-DIH/solr/db/conf/mapping-ISOLatin1Accent.txt deleted file mode 100644 index ede7742581b..00000000000 --- a/solr/example/example-DIH/solr/db/conf/mapping-ISOLatin1Accent.txt +++ /dev/null @@ -1,246 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Syntax: -# "source" => "target" -# "source".length() > 0 (source cannot be empty.) -# "target".length() >= 0 (target can be empty.) - -# example: -# "À" => "A" -# "\u00C0" => "A" -# "\u00C0" => "\u0041" -# "ß" => "ss" -# "\t" => " " -# "\n" => "" - -# À => A -"\u00C0" => "A" - -# Á => A -"\u00C1" => "A" - -#  => A -"\u00C2" => "A" - -# à => A -"\u00C3" => "A" - -# Ä => A -"\u00C4" => "A" - -# Å => A -"\u00C5" => "A" - -# Æ => AE -"\u00C6" => "AE" - -# Ç => C -"\u00C7" => "C" - -# È => E -"\u00C8" => "E" - -# É => E -"\u00C9" => "E" - -# Ê => E -"\u00CA" => "E" - -# Ë => E -"\u00CB" => "E" - -# Ì => I -"\u00CC" => "I" - -# Í => I -"\u00CD" => "I" - -# Î => I -"\u00CE" => "I" - -# Ï => I -"\u00CF" => "I" - -# IJ => IJ -"\u0132" => "IJ" - -# Ð => D -"\u00D0" => "D" - -# Ñ => N -"\u00D1" => "N" - -# Ò => O -"\u00D2" => "O" - -# Ó => O -"\u00D3" => "O" - -# Ô => O -"\u00D4" => "O" - -# Õ => O -"\u00D5" => "O" - -# Ö => O -"\u00D6" => "O" - -# Ø => O -"\u00D8" => "O" - -# Œ => OE -"\u0152" => "OE" - -# Þ -"\u00DE" => "TH" - -# Ù => U -"\u00D9" => "U" - -# Ú => U -"\u00DA" => "U" - -# Û => U -"\u00DB" => "U" - -# Ü => U -"\u00DC" => "U" - -# Ý => Y -"\u00DD" => "Y" - -# Ÿ => Y -"\u0178" => "Y" - -# à => a -"\u00E0" => "a" - -# á => a -"\u00E1" => "a" - -# â => a -"\u00E2" => "a" - -# ã => a -"\u00E3" => "a" - -# ä => a -"\u00E4" => "a" - -# å => a -"\u00E5" => "a" - -# æ => ae -"\u00E6" => "ae" - -# ç => c -"\u00E7" => "c" - -# è => e -"\u00E8" => "e" - -# é => e -"\u00E9" => "e" - -# ê => e -"\u00EA" => "e" - -# ë => e -"\u00EB" => "e" - -# ì => i -"\u00EC" => "i" - -# í => i -"\u00ED" => "i" - -# î => i -"\u00EE" => "i" - -# ï => i -"\u00EF" => "i" - -# ij => ij -"\u0133" => "ij" - -# ð => d -"\u00F0" => "d" - -# ñ => n -"\u00F1" => "n" - -# ò => o -"\u00F2" => "o" - -# ó => o -"\u00F3" => "o" - -# ô => o -"\u00F4" => "o" - -# õ => o -"\u00F5" => "o" - -# ö => o -"\u00F6" => "o" - -# ø => o -"\u00F8" => "o" - -# œ => oe -"\u0153" => "oe" - -# ß => ss -"\u00DF" => "ss" - -# þ => th -"\u00FE" => "th" - -# ù => u -"\u00F9" => "u" - -# ú => u -"\u00FA" => "u" - -# û => u -"\u00FB" => "u" - -# ü => u -"\u00FC" => "u" - -# ý => y -"\u00FD" => "y" - -# ÿ => y -"\u00FF" => "y" - -# ff => ff -"\uFB00" => "ff" - -# fi => fi -"\uFB01" => "fi" - -# fl => fl -"\uFB02" => "fl" - -# ffi => ffi -"\uFB03" => "ffi" - -# ffl => ffl -"\uFB04" => "ffl" - -# ſt => ft -"\uFB05" => "ft" - -# st => st -"\uFB06" => "st" diff --git a/solr/example/example-DIH/solr/db/conf/protwords.txt b/solr/example/example-DIH/solr/db/conf/protwords.txt deleted file mode 100644 index 1dfc0abecbf..00000000000 --- a/solr/example/example-DIH/solr/db/conf/protwords.txt +++ /dev/null @@ -1,21 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# Use a protected word file to protect against the stemmer reducing two -# unrelated words to the same base word. - -# Some non-words that normally won't be encountered, -# just to test that they won't be stemmed. -dontstems -zwhacky - diff --git a/solr/example/example-DIH/solr/db/conf/solrconfig.xml b/solr/example/example-DIH/solr/db/conf/solrconfig.xml deleted file mode 100644 index 11270930681..00000000000 --- a/solr/example/example-DIH/solr/db/conf/solrconfig.xml +++ /dev/null @@ -1,1342 +0,0 @@ - - - - - - - - - 9.0.0 - - - - - - - - - - - - - - - - - - - - ${solr.data.dir:} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ${solr.lock.type:native} - - - - - - - - - - - - - true - - - - - - - - - - - - - - - - ${solr.ulog.dir:} - - - - - ${solr.autoCommit.maxTime:15000} - false - - - - - - ${solr.autoSoftCommit.maxTime:-1} - - - - - - - - - - - - - ${solr.max.booleanClauses:1024} - - - - - - - - - - - - - - - - - - - - - - - - - true - - - - - - 20 - - - 200 - - - - - - - - - - - - static firstSearcher warming in solrconfig.xml - - - - - - false - - - - - - - - - - - - - - - - - - - - - db-data-config.xml - - - - - - - - explicit - 10 - text - - - - - - - - - - - - - - - explicit - json - true - text - - - - - - - - explicit - - - velocity - browse - layout - - - edismax - *:* - 10 - *,score - - - on - 1 - - - - - - text - - - - - - - true - ignored_ - - - true - links - ignored_ - - - - - - - - - text_general - - - - - - default - text - solr.DirectSolrSpellChecker - - internal - - 0.5 - - 2 - - 1 - - 5 - - 4 - - 0.01 - - - - - - wordbreak - solr.WordBreakSolrSpellChecker - name - true - true - 10 - - - - - - - - - - - - - - - - text - - default - wordbreak - on - true - 10 - 5 - 5 - true - true - 10 - 5 - - - spellcheck - - - - - - mySuggester - FuzzyLookupFactory - DocumentDictionaryFactory - cat - price - string - - - - - - true - 10 - - - suggest - - - - - - - - - text - true - - - tvComponent - - - - - - - - - - true - false - - - terms - - - - - - - - string - elevate.xml - - - - - - explicit - text - - - elevator - - - - - - - - - - - 100 - - - - - - - - 70 - - 0.5 - - [-\w ,/\n\"']{20,200} - - - - - - - ]]> - ]]> - - - - - - - - - - - - - - - - - - - - - - - - ,, - ,, - ,, - ,, - ,]]> - ]]> - - - - - - 10 - .,!? - - - - - - - WORD - - - en - US - - - - - - - - - - - - - - - - - - - - - - text/plain; charset=UTF-8 - - - - - ${velocity.template.base.dir:} - - - - - 5 - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/db/conf/spellings.txt b/solr/example/example-DIH/solr/db/conf/spellings.txt deleted file mode 100644 index d7ede6f5611..00000000000 --- a/solr/example/example-DIH/solr/db/conf/spellings.txt +++ /dev/null @@ -1,2 +0,0 @@ -pizza -history \ No newline at end of file diff --git a/solr/example/example-DIH/solr/db/conf/stopwords.txt b/solr/example/example-DIH/solr/db/conf/stopwords.txt deleted file mode 100644 index ae1e83eeb3d..00000000000 --- a/solr/example/example-DIH/solr/db/conf/stopwords.txt +++ /dev/null @@ -1,14 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/solr/example/example-DIH/solr/db/conf/synonyms.txt b/solr/example/example-DIH/solr/db/conf/synonyms.txt deleted file mode 100644 index eab4ee87537..00000000000 --- a/solr/example/example-DIH/solr/db/conf/synonyms.txt +++ /dev/null @@ -1,29 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -#some test synonym mappings unlikely to appear in real input text -aaafoo => aaabar -bbbfoo => bbbfoo bbbbar -cccfoo => cccbar cccbaz -fooaaa,baraaa,bazaaa - -# Some synonym groups specific to this example -GB,gib,gigabyte,gigabytes -MB,mib,megabyte,megabytes -Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming -#after us won't split it into two words. - -# Synonym mappings can be used for spelling correction too -pixima => pixma - diff --git a/solr/example/example-DIH/solr/db/conf/update-script.js b/solr/example/example-DIH/solr/db/conf/update-script.js deleted file mode 100644 index 49b07f9b71e..00000000000 --- a/solr/example/example-DIH/solr/db/conf/update-script.js +++ /dev/null @@ -1,53 +0,0 @@ -/* - This is a basic skeleton JavaScript update processor. - - In order for this to be executed, it must be properly wired into solrconfig.xml; by default it is commented out in - the example solrconfig.xml and must be uncommented to be enabled. - - See http://wiki.apache.org/solr/ScriptUpdateProcessor for more details. -*/ - -function processAdd(cmd) { - - doc = cmd.solrDoc; // org.apache.solr.common.SolrInputDocument - id = doc.getFieldValue("id"); - logger.info("update-script#processAdd: id=" + id); - -// Set a field value: -// doc.setField("foo_s", "whatever"); - -// Get a configuration parameter: -// config_param = params.get('config_param'); // "params" only exists if processor configured with - -// Get a request parameter: -// some_param = req.getParams().get("some_param") - -// Add a field of field names that match a pattern: -// - Potentially useful to determine the fields/attributes represented in a result set, via faceting on field_name_ss -// field_names = doc.getFieldNames().toArray(); -// for(i=0; i < field_names.length; i++) { -// field_name = field_names[i]; -// if (/attr_.*/.test(field_name)) { doc.addField("attribute_ss", field_names[i]); } -// } - -} - -function processDelete(cmd) { - // no-op -} - -function processMergeIndexes(cmd) { - // no-op -} - -function processCommit(cmd) { - // no-op -} - -function processRollback(cmd) { - // no-op -} - -function finish() { - // no-op -} diff --git a/solr/example/example-DIH/solr/db/conf/xslt/example.xsl b/solr/example/example-DIH/solr/db/conf/xslt/example.xsl deleted file mode 100644 index b8992700828..00000000000 --- a/solr/example/example-DIH/solr/db/conf/xslt/example.xsl +++ /dev/null @@ -1,132 +0,0 @@ - - - - - - - - - - - - - - - <xsl:value-of select="$title"/> - - - -

-
- This has been formatted by the sample "example.xsl" transform - - use your own XSLT to get a nicer page -
- - - -
- - - -
- - - - -
-
-
- - - - - - - - - - - - - - javascript:toggle("");? -
- - exp - - - - - -
- - -
- - - - - - - -
    - -
  • -
    -
- - -
- - - - - - - - - - - - - - - - - - - - -
diff --git a/solr/example/example-DIH/solr/db/conf/xslt/example_atom.xsl b/solr/example/example-DIH/solr/db/conf/xslt/example_atom.xsl deleted file mode 100644 index b6c23151dc4..00000000000 --- a/solr/example/example-DIH/solr/db/conf/xslt/example_atom.xsl +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - Example Solr Atom 1.0 Feed - - This has been formatted by the sample "example_atom.xsl" transform - - use your own XSLT to get a nicer Atom feed. - - - Apache Solr - solr-user@lucene.apache.org - - - - - - tag:localhost,2007:example - - - - - - - - - <xsl:value-of select="str[@name='name']"/> - - tag:localhost,2007: - - - - - - diff --git a/solr/example/example-DIH/solr/db/conf/xslt/example_rss.xsl b/solr/example/example-DIH/solr/db/conf/xslt/example_rss.xsl deleted file mode 100644 index c8ab5bfb1ec..00000000000 --- a/solr/example/example-DIH/solr/db/conf/xslt/example_rss.xsl +++ /dev/null @@ -1,66 +0,0 @@ - - - - - - - - - - - - - Example Solr RSS 2.0 Feed - http://localhost:8983/solr - - This has been formatted by the sample "example_rss.xsl" transform - - use your own XSLT to get a nicer RSS feed. - - en-us - http://localhost:8983/solr - - - - - - - - - - - <xsl:value-of select="str[@name='name']"/> - - http://localhost:8983/solr/select?q=id: - - - - - - - http://localhost:8983/solr/select?q=id: - - - - diff --git a/solr/example/example-DIH/solr/db/conf/xslt/luke.xsl b/solr/example/example-DIH/solr/db/conf/xslt/luke.xsl deleted file mode 100644 index 05fb5bfeee2..00000000000 --- a/solr/example/example-DIH/solr/db/conf/xslt/luke.xsl +++ /dev/null @@ -1,337 +0,0 @@ - - - - - - - - - Solr Luke Request Handler Response - - - - - - - - - <xsl:value-of select="$title"/> - - - - - -

- -

-
- -
- -

Index Statistics

- -
- -

Field Statistics

- - - -

Document statistics

- - - - -
- - - - - -
- -
- - -
- -
- -
-
-
- - - - - - - - - - - - - - - - - - - - - -
-

- -

- -
- -
-
-
- - -
- - 50 - 800 - 160 - blue - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- background-color: ; width: px; height: px; -
-
- -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - -
  • - -
  • -
    -
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
diff --git a/solr/example/example-DIH/solr/db/conf/xslt/updateXml.xsl b/solr/example/example-DIH/solr/db/conf/xslt/updateXml.xsl deleted file mode 100644 index 7c4a48e7377..00000000000 --- a/solr/example/example-DIH/solr/db/conf/xslt/updateXml.xsl +++ /dev/null @@ -1,70 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/db/core.properties b/solr/example/example-DIH/solr/db/core.properties deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/solr/example/example-DIH/solr/mail/conf/clustering/carrot2/kmeans-attributes.xml b/solr/example/example-DIH/solr/mail/conf/clustering/carrot2/kmeans-attributes.xml deleted file mode 100644 index d802465f669..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/clustering/carrot2/kmeans-attributes.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/mail/conf/clustering/carrot2/lingo-attributes.xml b/solr/example/example-DIH/solr/mail/conf/clustering/carrot2/lingo-attributes.xml deleted file mode 100644 index 4bf13608b36..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/clustering/carrot2/lingo-attributes.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/solr/example/example-DIH/solr/mail/conf/clustering/carrot2/stc-attributes.xml b/solr/example/example-DIH/solr/mail/conf/clustering/carrot2/stc-attributes.xml deleted file mode 100644 index c1bf110c8fd..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/clustering/carrot2/stc-attributes.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/mail/conf/currency.xml b/solr/example/example-DIH/solr/mail/conf/currency.xml deleted file mode 100644 index 3a9c58afee8..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/currency.xml +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/mail/conf/elevate.xml b/solr/example/example-DIH/solr/mail/conf/elevate.xml deleted file mode 100644 index 2c09ebed669..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/elevate.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - - diff --git a/solr/example/example-DIH/solr/mail/conf/lang/contractions_ca.txt b/solr/example/example-DIH/solr/mail/conf/lang/contractions_ca.txt deleted file mode 100644 index 307a85f913d..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/contractions_ca.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Set of Catalan contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -l -m -n -s -t diff --git a/solr/example/example-DIH/solr/mail/conf/lang/contractions_fr.txt b/solr/example/example-DIH/solr/mail/conf/lang/contractions_fr.txt deleted file mode 100644 index f1bba51b23e..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/contractions_fr.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Set of French contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -l -m -t -qu -n -s -j -d -c -jusqu -quoiqu -lorsqu -puisqu diff --git a/solr/example/example-DIH/solr/mail/conf/lang/contractions_ga.txt b/solr/example/example-DIH/solr/mail/conf/lang/contractions_ga.txt deleted file mode 100644 index 9ebe7fa349a..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/contractions_ga.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Set of Irish contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -m -b diff --git a/solr/example/example-DIH/solr/mail/conf/lang/contractions_it.txt b/solr/example/example-DIH/solr/mail/conf/lang/contractions_it.txt deleted file mode 100644 index cac04095372..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/contractions_it.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Set of Italian contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -c -l -all -dall -dell -nell -sull -coll -pell -gl -agl -dagl -degl -negl -sugl -un -m -t -s -v -d diff --git a/solr/example/example-DIH/solr/mail/conf/lang/hyphenations_ga.txt b/solr/example/example-DIH/solr/mail/conf/lang/hyphenations_ga.txt deleted file mode 100644 index 4d2642cc5a3..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/hyphenations_ga.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Set of Irish hyphenations for StopFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -h -n -t diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stemdict_nl.txt b/solr/example/example-DIH/solr/mail/conf/lang/stemdict_nl.txt deleted file mode 100644 index 441072971d3..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stemdict_nl.txt +++ /dev/null @@ -1,6 +0,0 @@ -# Set of overrides for the dutch stemmer -# TODO: load this as a resource from the analyzer and sync it in build.xml -fiets fiets -bromfiets bromfiets -ei eier -kind kinder diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stoptags_ja.txt b/solr/example/example-DIH/solr/mail/conf/lang/stoptags_ja.txt deleted file mode 100644 index 71b750845e3..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stoptags_ja.txt +++ /dev/null @@ -1,420 +0,0 @@ -# -# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. -# -# Any token with a part-of-speech tag that exactly matches those defined in this -# file are removed from the token stream. -# -# Set your own stoptags by uncommenting the lines below. Note that comments are -# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, -# etc. that can be useful for building you own stoptag set. -# -# The entire possible tagset is provided below for convenience. -# -##### -# noun: unclassified nouns -#名詞 -# -# noun-common: Common nouns or nouns where the sub-classification is undefined -#名詞-一般 -# -# noun-proper: Proper nouns where the sub-classification is undefined -#名詞-固有名詞 -# -# noun-proper-misc: miscellaneous proper nouns -#名詞-固有名詞-一般 -# -# noun-proper-person: Personal names where the sub-classification is undefined -#名詞-固有名詞-人名 -# -# noun-proper-person-misc: names that cannot be divided into surname and -# given name; foreign names; names where the surname or given name is unknown. -# e.g. お市の方 -#名詞-固有名詞-人名-一般 -# -# noun-proper-person-surname: Mainly Japanese surnames. -# e.g. 山田 -#名詞-固有名詞-人名-姓 -# -# noun-proper-person-given_name: Mainly Japanese given names. -# e.g. 太郎 -#名詞-固有名詞-人名-名 -# -# noun-proper-organization: Names representing organizations. -# e.g. 通産省, NHK -#名詞-固有名詞-組織 -# -# noun-proper-place: Place names where the sub-classification is undefined -#名詞-固有名詞-地域 -# -# noun-proper-place-misc: Place names excluding countries. -# e.g. アジア, バルセロナ, 京都 -#名詞-固有名詞-地域-一般 -# -# noun-proper-place-country: Country names. -# e.g. 日本, オーストラリア -#名詞-固有名詞-地域-国 -# -# noun-pronoun: Pronouns where the sub-classification is undefined -#名詞-代名詞 -# -# noun-pronoun-misc: miscellaneous pronouns: -# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ -#名詞-代名詞-一般 -# -# noun-pronoun-contraction: Spoken language contraction made by combining a -# pronoun and the particle 'wa'. -# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ -#名詞-代名詞-縮約 -# -# noun-adverbial: Temporal nouns such as names of days or months that behave -# like adverbs. Nouns that represent amount or ratios and can be used adverbially, -# e.g. 金曜, 一月, 午後, 少量 -#名詞-副詞可能 -# -# noun-verbal: Nouns that take arguments with case and can appear followed by -# 'suru' and related verbs (する, できる, なさる, くださる) -# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り -#名詞-サ変接続 -# -# noun-adjective-base: The base form of adjectives, words that appear before な ("na") -# e.g. 健康, 安易, 駄目, だめ -#名詞-形容動詞語幹 -# -# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. -# e.g. 0, 1, 2, 何, 数, 幾 -#名詞-数 -# -# noun-affix: noun affixes where the sub-classification is undefined -#名詞-非自立 -# -# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that -# attach to the base form of inflectional words, words that cannot be classified -# into any of the other categories below. This category includes indefinite nouns. -# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, -# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, -# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, -# わり, 割り, 割, ん-口語/, もん-口語/ -#名詞-非自立-一般 -# -# noun-affix-adverbial: noun affixes that that can behave as adverbs. -# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, -# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, -# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, -# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, -# 儘, 侭, みぎり, 矢先 -#名詞-非自立-副詞可能 -# -# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars -# with the stem よう(だ) ("you(da)"). -# e.g. よう, やう, 様 (よう) -#名詞-非自立-助動詞語幹 -# -# noun-affix-adjective-base: noun affixes that can connect to the indeclinable -# connection form な (aux "da"). -# e.g. みたい, ふう -#名詞-非自立-形容動詞語幹 -# -# noun-special: special nouns where the sub-classification is undefined. -#名詞-特殊 -# -# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is -# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base -# form of inflectional words. -# e.g. そう -#名詞-特殊-助動詞語幹 -# -# noun-suffix: noun suffixes where the sub-classification is undefined. -#名詞-接尾 -# -# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect -# to ガル or タイ and can combine into compound nouns, words that cannot be classified into -# any of the other categories below. In general, this category is more inclusive than -# 接尾語 ("suffix") and is usually the last element in a compound noun. -# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, -# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 -#名詞-接尾-一般 -# -# noun-suffix-person: Suffixes that form nouns and attach to person names more often -# than other nouns. -# e.g. 君, 様, 著 -#名詞-接尾-人名 -# -# noun-suffix-place: Suffixes that form nouns and attach to place names more often -# than other nouns. -# e.g. 町, 市, 県 -#名詞-接尾-地域 -# -# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that -# can appear before スル ("suru"). -# e.g. 化, 視, 分け, 入り, 落ち, 買い -#名詞-接尾-サ変接続 -# -# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, -# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the -# conjunctive form of inflectional words. -# e.g. そう -#名詞-接尾-助動詞語幹 -# -# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive -# form of inflectional words and appear before the copula だ ("da"). -# e.g. 的, げ, がち -#名詞-接尾-形容動詞語幹 -# -# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. -# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) -#名詞-接尾-副詞可能 -# -# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category -# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach -# to numbers. -# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 -#名詞-接尾-助数詞 -# -# noun-suffix-special: Special suffixes that mainly attach to inflecting words. -# e.g. (楽し) さ, (考え) 方 -#名詞-接尾-特殊 -# -# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words -# together. -# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) -#名詞-接続詞的 -# -# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are -# semantically verb-like. -# e.g. ごらん, ご覧, 御覧, 頂戴 -#名詞-動詞非自立的 -# -# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, -# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") -# is いわく ("iwaku"). -#名詞-引用文字列 -# -# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and -# behave like an adjective. -# e.g. 申し訳, 仕方, とんでも, 違い -#名詞-ナイ形容詞語幹 -# -##### -# prefix: unclassified prefixes -#接頭詞 -# -# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) -# excluding numerical expressions. -# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) -#接頭詞-名詞接続 -# -# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb -# in conjunctive form followed by なる/なさる/くださる. -# e.g. お (読みなさい), お (座り) -#接頭詞-動詞接続 -# -# prefix-adjectival: Prefixes that attach to adjectives. -# e.g. お (寒いですねえ), バカ (でかい) -#接頭詞-形容詞接続 -# -# prefix-numerical: Prefixes that attach to numerical expressions. -# e.g. 約, およそ, 毎時 -#接頭詞-数接続 -# -##### -# verb: unclassified verbs -#動詞 -# -# verb-main: -#動詞-自立 -# -# verb-auxiliary: -#動詞-非自立 -# -# verb-suffix: -#動詞-接尾 -# -##### -# adjective: unclassified adjectives -#形容詞 -# -# adjective-main: -#形容詞-自立 -# -# adjective-auxiliary: -#形容詞-非自立 -# -# adjective-suffix: -#形容詞-接尾 -# -##### -# adverb: unclassified adverbs -#副詞 -# -# adverb-misc: Words that can be segmented into one unit and where adnominal -# modification is not possible. -# e.g. あいかわらず, 多分 -#副詞-一般 -# -# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, -# な, する, だ, etc. -# e.g. こんなに, そんなに, あんなに, なにか, なんでも -#副詞-助詞類接続 -# -##### -# adnominal: Words that only have noun-modifying forms. -# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, -# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, -# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き -#連体詞 -# -##### -# conjunction: Conjunctions that can occur independently. -# e.g. が, けれども, そして, じゃあ, それどころか -接続詞 -# -##### -# particle: unclassified particles. -助詞 -# -# particle-case: case particles where the subclassification is undefined. -助詞-格助詞 -# -# particle-case-misc: Case particles. -# e.g. から, が, で, と, に, へ, より, を, の, にて -助詞-格助詞-一般 -# -# particle-case-quote: the "to" that appears after nouns, a person’s speech, -# quotation marks, expressions of decisions from a meeting, reasons, judgements, -# conjectures, etc. -# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) -助詞-格助詞-引用 -# -# particle-case-compound: Compounds of particles and verbs that mainly behave -# like case particles. -# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, -# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, -# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, -# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, -# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, -# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, -# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, -# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ -助詞-格助詞-連語 -# -# particle-conjunctive: -# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, -# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, -# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ -助詞-接続助詞 -# -# particle-dependency: -# e.g. こそ, さえ, しか, すら, は, も, ぞ -助詞-係助詞 -# -# particle-adverbial: -# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, -# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, -# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, -# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, -# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) -助詞-副助詞 -# -# particle-interjective: particles with interjective grammatical roles. -# e.g. (松島) や -助詞-間投助詞 -# -# particle-coordinate: -# e.g. と, たり, だの, だり, とか, なり, や, やら -助詞-並立助詞 -# -# particle-final: -# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, -# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ -助詞-終助詞 -# -# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is -# adverbial, conjunctive, or sentence final. For example: -# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 -# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 -# 「(祈りが届いたせい) か (, 試験に合格した.)」 -# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 -# e.g. か -助詞-副助詞/並立助詞/終助詞 -# -# particle-adnominalizer: The "no" that attaches to nouns and modifies -# non-inflectional words. -助詞-連体化 -# -# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs -# that are giongo, giseigo, or gitaigo. -# e.g. に, と -助詞-副詞化 -# -# particle-special: A particle that does not fit into one of the above classifications. -# This includes particles that are used in Tanka, Haiku, and other poetry. -# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) -助詞-特殊 -# -##### -# auxiliary-verb: -助動詞 -# -##### -# interjection: Greetings and other exclamations. -# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, -# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい -#感動詞 -# -##### -# symbol: unclassified Symbols. -記号 -# -# symbol-misc: A general symbol not in one of the categories below. -# e.g. [○◎@$〒→+] -記号-一般 -# -# symbol-comma: Commas -# e.g. [,、] -記号-読点 -# -# symbol-period: Periods and full stops. -# e.g. [..。] -記号-句点 -# -# symbol-space: Full-width whitespace. -記号-空白 -# -# symbol-open_bracket: -# e.g. [({‘“『【] -記号-括弧開 -# -# symbol-close_bracket: -# e.g. [)}’”』」】] -記号-括弧閉 -# -# symbol-alphabetic: -#記号-アルファベット -# -##### -# other: unclassified other -#その他 -# -# other-interjection: Words that are hard to classify as noun-suffixes or -# sentence-final particles. -# e.g. (だ)ァ -その他-間投 -# -##### -# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. -# e.g. あの, うんと, えと -フィラー -# -##### -# non-verbal: non-verbal sound. -非言語音 -# -##### -# fragment: -#語断片 -# -##### -# unknown: unknown part of speech. -#未知語 -# -##### End of file diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ar.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ar.txt deleted file mode 100644 index 046829db6a2..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ar.txt +++ /dev/null @@ -1,125 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Cleaned on October 11, 2009 (not normalized, so use before normalization) -# This means that when modifying this list, you might need to add some -# redundant entries, for example containing forms with both أ and ا -من -ومن -منها -منه -في -وفي -فيها -فيه -و -ف -ثم -او -أو -ب -بها -به -ا -أ -اى -اي -أي -أى -لا -ولا -الا -ألا -إلا -لكن -ما -وما -كما -فما -عن -مع -اذا -إذا -ان -أن -إن -انها -أنها -إنها -انه -أنه -إنه -بان -بأن -فان -فأن -وان -وأن -وإن -التى -التي -الذى -الذي -الذين -الى -الي -إلى -إلي -على -عليها -عليه -اما -أما -إما -ايضا -أيضا -كل -وكل -لم -ولم -لن -ولن -هى -هي -هو -وهى -وهي -وهو -فهى -فهي -فهو -انت -أنت -لك -لها -له -هذه -هذا -تلك -ذلك -هناك -كانت -كان -يكون -تكون -وكانت -وكان -غير -بعض -قد -نحو -بين -بينما -منذ -ضمن -حيث -الان -الآن -خلال -بعد -قبل -حتى -عند -عندما -لدى -جميع diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_bg.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_bg.txt deleted file mode 100644 index 1ae4ba2ae38..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_bg.txt +++ /dev/null @@ -1,193 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -а -аз -ако -ала -бе -без -беше -би -бил -била -били -било -близо -бъдат -бъде -бяха -в -вас -ваш -ваша -вероятно -вече -взема -ви -вие -винаги -все -всеки -всички -всичко -всяка -във -въпреки -върху -г -ги -главно -го -д -да -дали -до -докато -докога -дори -досега -доста -е -едва -един -ето -за -зад -заедно -заради -засега -затова -защо -защото -и -из -или -им -има -имат -иска -й -каза -как -каква -какво -както -какъв -като -кога -когато -което -които -кой -който -колко -която -къде -където -към -ли -м -ме -между -мен -ми -мнозина -мога -могат -може -моля -момента -му -н -на -над -назад -най -направи -напред -например -нас -не -него -нея -ни -ние -никой -нито -но -някои -някой -няма -обаче -около -освен -особено -от -отгоре -отново -още -пак -по -повече -повечето -под -поне -поради -после -почти -прави -пред -преди -през -при -пък -първо -с -са -само -се -сега -си -скоро -след -сме -според -сред -срещу -сте -съм -със -също -т -тази -така -такива -такъв -там -твой -те -тези -ти -тн -то -това -тогава -този -той -толкова -точно -трябва -тук -тъй -тя -тях -у -харесва -ч -че -често -чрез -ще -щом -я diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ca.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ca.txt deleted file mode 100644 index 3da65deafe1..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ca.txt +++ /dev/null @@ -1,220 +0,0 @@ -# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) -a -abans -ací -ah -així -això -al -als -aleshores -algun -alguna -algunes -alguns -alhora -allà -allí -allò -altra -altre -altres -amb -ambdós -ambdues -apa -aquell -aquella -aquelles -aquells -aquest -aquesta -aquestes -aquests -aquí -baix -cada -cadascú -cadascuna -cadascunes -cadascuns -com -contra -d'un -d'una -d'unes -d'uns -dalt -de -del -dels -des -després -dins -dintre -donat -doncs -durant -e -eh -el -els -em -en -encara -ens -entre -érem -eren -éreu -es -és -esta -està -estàvem -estaven -estàveu -esteu -et -etc -ets -fins -fora -gairebé -ha -han -has -havia -he -hem -heu -hi -ho -i -igual -iguals -ja -l'hi -la -les -li -li'n -llavors -m'he -ma -mal -malgrat -mateix -mateixa -mateixes -mateixos -me -mentre -més -meu -meus -meva -meves -molt -molta -moltes -molts -mon -mons -n'he -n'hi -ne -ni -no -nogensmenys -només -nosaltres -nostra -nostre -nostres -o -oh -oi -on -pas -pel -pels -per -però -perquè -poc -poca -pocs -poques -potser -propi -qual -quals -quan -quant -que -què -quelcom -qui -quin -quina -quines -quins -s'ha -s'han -sa -semblant -semblants -ses -seu -seus -seva -seva -seves -si -sobre -sobretot -sóc -solament -sols -son -són -sons -sota -sou -t'ha -t'han -t'he -ta -tal -també -tampoc -tan -tant -tanta -tantes -teu -teus -teva -teves -ton -tons -tot -tota -totes -tots -un -una -unes -uns -us -va -vaig -vam -van -vas -veu -vosaltres -vostra -vostre -vostres diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ckb.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ckb.txt deleted file mode 100644 index 87abf118fec..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ckb.txt +++ /dev/null @@ -1,136 +0,0 @@ -# set of kurdish stopwords -# note these have been normalized with our scheme (e represented with U+06D5, etc) -# constructed from: -# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al) -# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston) -# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc - -# and -و -# which -کە -# of -ی -# made/did -کرد -# that/which -ئەوەی -# on/head -سەر -# two -دوو -# also -هەروەها -# from/that -لەو -# makes/does -دەکات -# some -چەند -# every -هەر - -# demonstratives -# that -ئەو -# this -ئەم - -# personal pronouns -# I -من -# we -ئێمە -# you -تۆ -# you -ئێوە -# he/she/it -ئەو -# they -ئەوان - -# prepositions -# to/with/by -بە -پێ -# without -بەبێ -# along with/while/during -بەدەم -# in the opinion of -بەلای -# according to -بەپێی -# before -بەرلە -# in the direction of -بەرەوی -# in front of/toward -بەرەوە -# before/in the face of -بەردەم -# without -بێ -# except for -بێجگە -# for -بۆ -# on/in -دە -تێ -# with -دەگەڵ -# after -دوای -# except for/aside from -جگە -# in/from -لە -لێ -# in front of/before/because of -لەبەر -# between/among -لەبەینی -# concerning/about -لەبابەت -# concerning -لەبارەی -# instead of -لەباتی -# beside -لەبن -# instead of -لەبرێتی -# behind -لەدەم -# with/together with -لەگەڵ -# by -لەلایەن -# within -لەناو -# between/among -لەنێو -# for the sake of -لەپێناوی -# with respect to -لەرەوی -# by means of/for -لەرێ -# for the sake of -لەرێگا -# on/on top of/according to -لەسەر -# under -لەژێر -# between/among -ناو -# between/among -نێوان -# after -پاش -# before -پێش -# like -وەک diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_cz.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_cz.txt deleted file mode 100644 index 53c6097dac7..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_cz.txt +++ /dev/null @@ -1,172 +0,0 @@ -a -s -k -o -i -u -v -z -dnes -cz -tímto -budeš -budem -byli -jseš -můj -svým -ta -tomto -tohle -tuto -tyto -jej -zda -proč -máte -tato -kam -tohoto -kdo -kteří -mi -nám -tom -tomuto -mít -nic -proto -kterou -byla -toho -protože -asi -ho -naši -napište -re -což -tím -takže -svých -její -svými -jste -aj -tu -tedy -teto -bylo -kde -ke -pravé -ji -nad -nejsou -či -pod -téma -mezi -přes -ty -pak -vám -ani -když -však -neg -jsem -tento -článku -články -aby -jsme -před -pta -jejich -byl -ještě -až -bez -také -pouze -první -vaše -která -nás -nový -tipy -pokud -může -strana -jeho -své -jiné -zprávy -nové -není -vás -jen -podle -zde -už -být -více -bude -již -než -který -by -které -co -nebo -ten -tak -má -při -od -po -jsou -jak -další -ale -si -se -ve -to -jako -za -zpět -ze -do -pro -je -na -atd -atp -jakmile -přičemž -já -on -ona -ono -oni -ony -my -vy -jí -ji -mě -mne -jemu -tomu -těm -těmu -němu -němuž -jehož -jíž -jelikož -jež -jakož -načež diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_da.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_da.txt deleted file mode 100644 index 42e6145b98e..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_da.txt +++ /dev/null @@ -1,110 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Danish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - -og | and -i | in -jeg | I -det | that (dem. pronoun)/it (pers. pronoun) -at | that (in front of a sentence)/to (with infinitive) -en | a/an -den | it (pers. pronoun)/that (dem. pronoun) -til | to/at/for/until/against/by/of/into, more -er | present tense of "to be" -som | who, as -på | on/upon/in/on/at/to/after/of/with/for, on -de | they -med | with/by/in, along -han | he -af | of/by/from/off/for/in/with/on, off -for | at/for/to/from/by/of/ago, in front/before, because -ikke | not -der | who/which, there/those -var | past tense of "to be" -mig | me/myself -sig | oneself/himself/herself/itself/themselves -men | but -et | a/an/one, one (number), someone/somebody/one -har | present tense of "to have" -om | round/about/for/in/a, about/around/down, if -vi | we -min | my -havde | past tense of "to have" -ham | him -hun | she -nu | now -over | over/above/across/by/beyond/past/on/about, over/past -da | then, when/as/since -fra | from/off/since, off, since -du | you -ud | out -sin | his/her/its/one's -dem | them -os | us/ourselves -op | up -man | you/one -hans | his -hvor | where -eller | or -hvad | what -skal | must/shall etc. -selv | myself/youself/herself/ourselves etc., even -her | here -alle | all/everyone/everybody etc. -vil | will (verb) -blev | past tense of "to stay/to remain/to get/to become" -kunne | could -ind | in -når | when -være | present tense of "to be" -dog | however/yet/after all -noget | something -ville | would -jo | you know/you see (adv), yes -deres | their/theirs -efter | after/behind/according to/for/by/from, later/afterwards -ned | down -skulle | should -denne | this -end | than -dette | this -mit | my/mine -også | also -under | under/beneath/below/during, below/underneath -have | have -dig | you -anden | other -hende | her -mine | my -alt | everything -meget | much/very, plenty of -sit | his, her, its, one's -sine | his, her, its, one's -vor | our -mod | against -disse | these -hvis | if -din | your/yours -nogle | some -hos | by/at -blive | be/become -mange | many -ad | by/through -bliver | present tense of "to be/to become" -hendes | her/hers -været | be -thi | for (conj) -jer | you -sådan | such, like this/like that diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_de.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_de.txt deleted file mode 100644 index 86525e7ae08..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_de.txt +++ /dev/null @@ -1,294 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A German stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | The number of forms in this list is reduced significantly by passing it - | through the German stemmer. - - -aber | but - -alle | all -allem -allen -aller -alles - -als | than, as -also | so -am | an + dem -an | at - -ander | other -andere -anderem -anderen -anderer -anderes -anderm -andern -anderr -anders - -auch | also -auf | on -aus | out of -bei | by -bin | am -bis | until -bist | art -da | there -damit | with it -dann | then - -der | the -den -des -dem -die -das - -daß | that - -derselbe | the same -derselben -denselben -desselben -demselben -dieselbe -dieselben -dasselbe - -dazu | to that - -dein | thy -deine -deinem -deinen -deiner -deines - -denn | because - -derer | of those -dessen | of him - -dich | thee -dir | to thee -du | thou - -dies | this -diese -diesem -diesen -dieser -dieses - - -doch | (several meanings) -dort | (over) there - - -durch | through - -ein | a -eine -einem -einen -einer -eines - -einig | some -einige -einigem -einigen -einiger -einiges - -einmal | once - -er | he -ihn | him -ihm | to him - -es | it -etwas | something - -euer | your -eure -eurem -euren -eurer -eures - -für | for -gegen | towards -gewesen | p.p. of sein -hab | have -habe | have -haben | have -hat | has -hatte | had -hatten | had -hier | here -hin | there -hinter | behind - -ich | I -mich | me -mir | to me - - -ihr | you, to her -ihre -ihrem -ihren -ihrer -ihres -euch | to you - -im | in + dem -in | in -indem | while -ins | in + das -ist | is - -jede | each, every -jedem -jeden -jeder -jedes - -jene | that -jenem -jenen -jener -jenes - -jetzt | now -kann | can - -kein | no -keine -keinem -keinen -keiner -keines - -können | can -könnte | could -machen | do -man | one - -manche | some, many a -manchem -manchen -mancher -manches - -mein | my -meine -meinem -meinen -meiner -meines - -mit | with -muss | must -musste | had to -nach | to(wards) -nicht | not -nichts | nothing -noch | still, yet -nun | now -nur | only -ob | whether -oder | or -ohne | without -sehr | very - -sein | his -seine -seinem -seinen -seiner -seines - -selbst | self -sich | herself - -sie | they, she -ihnen | to them - -sind | are -so | so - -solche | such -solchem -solchen -solcher -solches - -soll | shall -sollte | should -sondern | but -sonst | else -über | over -um | about, around -und | and - -uns | us -unse -unsem -unsen -unser -unses - -unter | under -viel | much -vom | von + dem -von | from -vor | before -während | while -war | was -waren | were -warst | wast -was | what -weg | away, off -weil | because -weiter | further - -welche | which -welchem -welchen -welcher -welches - -wenn | when -werde | will -werden | will -wie | how -wieder | again -will | want -wir | we -wird | will -wirst | willst -wo | where -wollen | want -wollte | wanted -würde | would -würden | would -zu | to -zum | zu + dem -zur | zu + der -zwar | indeed -zwischen | between - diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_el.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_el.txt deleted file mode 100644 index 232681f5bd6..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_el.txt +++ /dev/null @@ -1,78 +0,0 @@ -# Lucene Greek Stopwords list -# Note: by default this file is used after GreekLowerCaseFilter, -# so when modifying this file use 'σ' instead of 'ς' -ο -η -το -οι -τα -του -τησ -των -τον -την -και -κι -κ -ειμαι -εισαι -ειναι -ειμαστε -ειστε -στο -στον -στη -στην -μα -αλλα -απο -για -προσ -με -σε -ωσ -παρα -αντι -κατα -μετα -θα -να -δε -δεν -μη -μην -επι -ενω -εαν -αν -τοτε -που -πωσ -ποιοσ -ποια -ποιο -ποιοι -ποιεσ -ποιων -ποιουσ -αυτοσ -αυτη -αυτο -αυτοι -αυτων -αυτουσ -αυτεσ -αυτα -εκεινοσ -εκεινη -εκεινο -εκεινοι -εκεινεσ -εκεινα -εκεινων -εκεινουσ -οπωσ -ομωσ -ισωσ -οσο -οτι diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_en.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_en.txt deleted file mode 100644 index 2c164c0b2a1..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_en.txt +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -# Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -such -that -the -their -then -there -these -they -this -to -was -will -with diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_es.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_es.txt deleted file mode 100644 index 487d78c8d56..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_es.txt +++ /dev/null @@ -1,356 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Spanish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | from, of -la | the, her -que | who, that -el | the -en | in -y | and -a | to -los | the, them -del | de + el -se | himself, from him etc -las | the, them -por | for, by, etc -un | a -para | for -con | with -no | no -una | a -su | his, her -al | a + el - | es from SER -lo | him -como | how -más | more -pero | pero -sus | su plural -le | to him, her -ya | already -o | or - | fue from SER -este | this - | ha from HABER -sí | himself etc -porque | because -esta | this - | son from SER -entre | between - | está from ESTAR -cuando | when -muy | very -sin | without -sobre | on - | ser from SER - | tiene from TENER -también | also -me | me -hasta | until -hay | there is/are -donde | where - | han from HABER -quien | whom, that - | están from ESTAR - | estado from ESTAR -desde | from -todo | all -nos | us -durante | during - | estados from ESTAR -todos | all -uno | a -les | to them -ni | nor -contra | against -otros | other - | fueron from SER -ese | that -eso | that - | había from HABER -ante | before -ellos | they -e | and (variant of y) -esto | this -mí | me -antes | before -algunos | some -qué | what? -unos | a -yo | I -otro | other -otras | other -otra | other -él | he -tanto | so much, many -esa | that -estos | these -mucho | much, many -quienes | who -nada | nothing -muchos | many -cual | who - | sea from SER -poco | few -ella | she -estar | to be - | haber from HABER -estas | these - | estaba from ESTAR - | estamos from ESTAR -algunas | some -algo | something -nosotros | we - - | other forms - -mi | me -mis | mi plural -tú | thou -te | thee -ti | thee -tu | thy -tus | tu plural -ellas | they -nosotras | we -vosotros | you -vosotras | you -os | you -mío | mine -mía | -míos | -mías | -tuyo | thine -tuya | -tuyos | -tuyas | -suyo | his, hers, theirs -suya | -suyos | -suyas | -nuestro | ours -nuestra | -nuestros | -nuestras | -vuestro | yours -vuestra | -vuestros | -vuestras | -esos | those -esas | those - - | forms of estar, to be (not including the infinitive): -estoy -estás -está -estamos -estáis -están -esté -estés -estemos -estéis -estén -estaré -estarás -estará -estaremos -estaréis -estarán -estaría -estarías -estaríamos -estaríais -estarían -estaba -estabas -estábamos -estabais -estaban -estuve -estuviste -estuvo -estuvimos -estuvisteis -estuvieron -estuviera -estuvieras -estuviéramos -estuvierais -estuvieran -estuviese -estuvieses -estuviésemos -estuvieseis -estuviesen -estando -estado -estada -estados -estadas -estad - - | forms of haber, to have (not including the infinitive): -he -has -ha -hemos -habéis -han -haya -hayas -hayamos -hayáis -hayan -habré -habrás -habrá -habremos -habréis -habrán -habría -habrías -habríamos -habríais -habrían -había -habías -habíamos -habíais -habían -hube -hubiste -hubo -hubimos -hubisteis -hubieron -hubiera -hubieras -hubiéramos -hubierais -hubieran -hubiese -hubieses -hubiésemos -hubieseis -hubiesen -habiendo -habido -habida -habidos -habidas - - | forms of ser, to be (not including the infinitive): -soy -eres -es -somos -sois -son -sea -seas -seamos -seáis -sean -seré -serás -será -seremos -seréis -serán -sería -serías -seríamos -seríais -serían -era -eras -éramos -erais -eran -fui -fuiste -fue -fuimos -fuisteis -fueron -fuera -fueras -fuéramos -fuerais -fueran -fuese -fueses -fuésemos -fueseis -fuesen -siendo -sido - | sed also means 'thirst' - - | forms of tener, to have (not including the infinitive): -tengo -tienes -tiene -tenemos -tenéis -tienen -tenga -tengas -tengamos -tengáis -tengan -tendré -tendrás -tendrá -tendremos -tendréis -tendrán -tendría -tendrías -tendríamos -tendríais -tendrían -tenía -tenías -teníamos -teníais -tenían -tuve -tuviste -tuvo -tuvimos -tuvisteis -tuvieron -tuviera -tuvieras -tuviéramos -tuvierais -tuvieran -tuviese -tuvieses -tuviésemos -tuvieseis -tuviesen -teniendo -tenido -tenida -tenidos -tenidas -tened - diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_eu.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_eu.txt deleted file mode 100644 index 25f1db93460..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_eu.txt +++ /dev/null @@ -1,99 +0,0 @@ -# example set of basque stopwords -al -anitz -arabera -asko -baina -bat -batean -batek -bati -batzuei -batzuek -batzuetan -batzuk -bera -beraiek -berau -berauek -bere -berori -beroriek -beste -bezala -da -dago -dira -ditu -du -dute -edo -egin -ere -eta -eurak -ez -gainera -gu -gutxi -guzti -haiei -haiek -haietan -hainbeste -hala -han -handik -hango -hara -hari -hark -hartan -hau -hauei -hauek -hauetan -hemen -hemendik -hemengo -hi -hona -honek -honela -honetan -honi -hor -hori -horiei -horiek -horietan -horko -horra -horrek -horrela -horretan -horri -hortik -hura -izan -ni -noiz -nola -non -nondik -nongo -nor -nora -ze -zein -zen -zenbait -zenbat -zer -zergatik -ziren -zituen -zu -zuek -zuen -zuten diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_fa.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_fa.txt deleted file mode 100644 index 723641c6da7..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_fa.txt +++ /dev/null @@ -1,313 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Note: by default this file is used after normalization, so when adding entries -# to this file, use the arabic 'ي' instead of 'ی' -انان -نداشته -سراسر -خياه -ايشان -وي -تاكنون -بيشتري -دوم -پس -ناشي -وگو -يا -داشتند -سپس -هنگام -هرگز -پنج -نشان -امسال -ديگر -گروهي -شدند -چطور -ده -و -دو -نخستين -ولي -چرا -چه -وسط -ه -كدام -قابل -يك -رفت -هفت -همچنين -در -هزار -بله -بلي -شايد -اما -شناسي -گرفته -دهد -داشته -دانست -داشتن -خواهيم -ميليارد -وقتيكه -امد -خواهد -جز -اورده -شده -بلكه -خدمات -شدن -برخي -نبود -بسياري -جلوگيري -حق -كردند -نوعي -بعري -نكرده -نظير -نبايد -بوده -بودن -داد -اورد -هست -جايي -شود -دنبال -داده -بايد -سابق -هيچ -همان -انجا -كمتر -كجاست -گردد -كسي -تر -مردم -تان -دادن -بودند -سري -جدا -ندارند -مگر -يكديگر -دارد -دهند -بنابراين -هنگامي -سمت -جا -انچه -خود -دادند -زياد -دارند -اثر -بدون -بهترين -بيشتر -البته -به -براساس -بيرون -كرد -بعضي -گرفت -توي -اي -ميليون -او -جريان -تول -بر -مانند -برابر -باشيم -مدتي -گويند -اكنون -تا -تنها -جديد -چند -بي -نشده -كردن -كردم -گويد -كرده -كنيم -نمي -نزد -روي -قصد -فقط -بالاي -ديگران -اين -ديروز -توسط -سوم -ايم -دانند -سوي -استفاده -شما -كنار -داريم -ساخته -طور -امده -رفته -نخست -بيست -نزديك -طي -كنيد -از -انها -تمامي -داشت -يكي -طريق -اش -چيست -روب -نمايد -گفت -چندين -چيزي -تواند -ام -ايا -با -ان -ايد -ترين -اينكه -ديگري -راه -هايي -بروز -همچنان -پاعين -كس -حدود -مختلف -مقابل -چيز -گيرد -ندارد -ضد -همچون -سازي -شان -مورد -باره -مرسي -خويش -برخوردار -چون -خارج -شش -هنوز -تحت -ضمن -هستيم -گفته -فكر -بسيار -پيش -براي -روزهاي -انكه -نخواهد -بالا -كل -وقتي -كي -چنين -كه -گيري -نيست -است -كجا -كند -نيز -يابد -بندي -حتي -توانند -عقب -خواست -كنند -بين -تمام -همه -ما -باشند -مثل -شد -اري -باشد -اره -طبق -بعد -اگر -صورت -غير -جاي -بيش -ريزي -اند -زيرا -چگونه -بار -لطفا -مي -درباره -من -ديده -همين -گذاري -برداري -علت -گذاشته -هم -فوق -نه -ها -شوند -اباد -همواره -هر -اول -خواهند -چهار -نام -امروز -مان -هاي -قبل -كنم -سعي -تازه -را -هستند -زير -جلوي -عنوان -بود diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_fi.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_fi.txt deleted file mode 100644 index 4372c9a055b..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_fi.txt +++ /dev/null @@ -1,97 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| forms of BE - -olla -olen -olet -on -olemme -olette -ovat -ole | negative form - -oli -olisi -olisit -olisin -olisimme -olisitte -olisivat -olit -olin -olimme -olitte -olivat -ollut -olleet - -en | negation -et -ei -emme -ette -eivät - -|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans -minä minun minut minua minussa minusta minuun minulla minulta minulle | I -sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you -hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she -me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we -te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you -he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they - -tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this -tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that -se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it -nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these -nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those -ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they - -kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who -ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) -mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what -mitkä | (pl) - -joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which -jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) - -| conjunctions - -että | that -ja | and -jos | if -koska | because -kuin | than -mutta | but -niin | so -sekä | and -sillä | for -tai | or -vaan | but -vai | or -vaikka | although - - -| prepositions - -kanssa | with -mukaan | according to -noin | about -poikki | across -yli | over, across - -| other - -kun | when -niin | so -nyt | now -itse | self - diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_fr.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_fr.txt deleted file mode 100644 index 749abae6846..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_fr.txt +++ /dev/null @@ -1,186 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A French stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -au | a + le -aux | a + les -avec | with -ce | this -ces | these -dans | with -de | of -des | de + les -du | de + le -elle | she -en | `of them' etc -et | and -eux | them -il | he -je | I -la | the -le | the -leur | their -lui | him -ma | my (fem) -mais | but -me | me -même | same; as in moi-même (myself) etc -mes | me (pl) -moi | me -mon | my (masc) -ne | not -nos | our (pl) -notre | our -nous | we -on | one -ou | where -par | by -pas | not -pour | for -qu | que before vowel -que | that -qui | who -sa | his, her (fem) -se | oneself -ses | his (pl) -son | his, her (masc) -sur | on -ta | thy (fem) -te | thee -tes | thy (pl) -toi | thee -ton | thy (masc) -tu | thou -un | a -une | a -vos | your (pl) -votre | your -vous | you - - | single letter forms - -c | c' -d | d' -j | j' -l | l' -à | to, at -m | m' -n | n' -s | s' -t | t' -y | there - - | forms of être (not including the infinitive): -été -étée -étées -étés -étant -suis -es -est -sommes -êtes -sont -serai -seras -sera -serons -serez -seront -serais -serait -serions -seriez -seraient -étais -était -étions -étiez -étaient -fus -fut -fûmes -fûtes -furent -sois -soit -soyons -soyez -soient -fusse -fusses -fût -fussions -fussiez -fussent - - | forms of avoir (not including the infinitive): -ayant -eu -eue -eues -eus -ai -as -avons -avez -ont -aurai -auras -aura -aurons -aurez -auront -aurais -aurait -aurions -auriez -auraient -avais -avait -avions -aviez -avaient -eut -eûmes -eûtes -eurent -aie -aies -ait -ayons -ayez -aient -eusse -eusses -eût -eussions -eussiez -eussent - - | Later additions (from Jean-Christophe Deschamps) -ceci | this -cela | that -celà | that -cet | this -cette | this -ici | here -ils | they -les | the (pl) -leurs | their (pl) -quel | which -quels | which -quelle | which -quelles | which -sans | without -soi | oneself - diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ga.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ga.txt deleted file mode 100644 index 9ff88d747e5..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ga.txt +++ /dev/null @@ -1,110 +0,0 @@ - -a -ach -ag -agus -an -aon -ar -arna -as -b' -ba -beirt -bhúr -caoga -ceathair -ceathrar -chomh -chtó -chuig -chun -cois -céad -cúig -cúigear -d' -daichead -dar -de -deich -deichniúr -den -dhá -do -don -dtí -dá -dár -dó -faoi -faoin -faoina -faoinár -fara -fiche -gach -gan -go -gur -haon -hocht -i -iad -idir -in -ina -ins -inár -is -le -leis -lena -lenár -m' -mar -mo -mé -na -nach -naoi -naonúr -ná -ní -níor -nó -nócha -ocht -ochtar -os -roimh -sa -seacht -seachtar -seachtó -seasca -seisear -siad -sibh -sinn -sna -sé -sí -tar -thar -thú -triúr -trí -trína -trínár -tríocha -tú -um -ár -é -éis -í -ó -ón -óna -ónár diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_gl.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_gl.txt deleted file mode 100644 index d8760b12c14..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_gl.txt +++ /dev/null @@ -1,161 +0,0 @@ -# galican stopwords -a -aínda -alí -aquel -aquela -aquelas -aqueles -aquilo -aquí -ao -aos -as -así -á -ben -cando -che -co -coa -comigo -con -connosco -contigo -convosco -coas -cos -cun -cuns -cunha -cunhas -da -dalgunha -dalgunhas -dalgún -dalgúns -das -de -del -dela -delas -deles -desde -deste -do -dos -dun -duns -dunha -dunhas -e -el -ela -elas -eles -en -era -eran -esa -esas -ese -eses -esta -estar -estaba -está -están -este -estes -estiven -estou -eu -é -facer -foi -foron -fun -había -hai -iso -isto -la -las -lle -lles -lo -los -mais -me -meu -meus -min -miña -miñas -moi -na -nas -neste -nin -no -non -nos -nosa -nosas -noso -nosos -nós -nun -nunha -nuns -nunhas -o -os -ou -ó -ós -para -pero -pode -pois -pola -polas -polo -polos -por -que -se -senón -ser -seu -seus -sexa -sido -sobre -súa -súas -tamén -tan -te -ten -teñen -teño -ter -teu -teus -ti -tido -tiña -tiven -túa -túas -un -unha -unhas -uns -vos -vosa -vosas -voso -vosos -vós diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_hi.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_hi.txt deleted file mode 100644 index 86286bb083b..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_hi.txt +++ /dev/null @@ -1,235 +0,0 @@ -# Also see http://www.opensource.org/licenses/bsd-license.html -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# This file was created by Jacques Savoy and is distributed under the BSD license. -# Note: by default this file also contains forms normalized by HindiNormalizer -# for spelling variation (see section below), such that it can be used whether or -# not you enable that feature. When adding additional entries to this list, -# please add the normalized form as well. -अंदर -अत -अपना -अपनी -अपने -अभी -आदि -आप -इत्यादि -इन -इनका -इन्हीं -इन्हें -इन्हों -इस -इसका -इसकी -इसके -इसमें -इसी -इसे -उन -उनका -उनकी -उनके -उनको -उन्हीं -उन्हें -उन्हों -उस -उसके -उसी -उसे -एक -एवं -एस -ऐसे -और -कई -कर -करता -करते -करना -करने -करें -कहते -कहा -का -काफ़ी -कि -कितना -किन्हें -किन्हों -किया -किर -किस -किसी -किसे -की -कुछ -कुल -के -को -कोई -कौन -कौनसा -गया -घर -जब -जहाँ -जा -जितना -जिन -जिन्हें -जिन्हों -जिस -जिसे -जीधर -जैसा -जैसे -जो -तक -तब -तरह -तिन -तिन्हें -तिन्हों -तिस -तिसे -तो -था -थी -थे -दबारा -दिया -दुसरा -दूसरे -दो -द्वारा -न -नहीं -ना -निहायत -नीचे -ने -पर -पर -पहले -पूरा -पे -फिर -बनी -बही -बहुत -बाद -बाला -बिलकुल -भी -भीतर -मगर -मानो -मे -में -यदि -यह -यहाँ -यही -या -यिह -ये -रखें -रहा -रहे -ऱ्वासा -लिए -लिये -लेकिन -व -वर्ग -वह -वह -वहाँ -वहीं -वाले -वुह -वे -वग़ैरह -संग -सकता -सकते -सबसे -सभी -साथ -साबुत -साभ -सारा -से -सो -ही -हुआ -हुई -हुए -है -हैं -हो -होता -होती -होते -होना -होने -# additional normalized forms of the above -अपनि -जेसे -होति -सभि -तिंहों -इंहों -दवारा -इसि -किंहें -थि -उंहों -ओर -जिंहें -वहिं -अभि -बनि -हि -उंहिं -उंहें -हें -वगेरह -एसे -रवासा -कोन -निचे -काफि -उसि -पुरा -भितर -हे -बहि -वहां -कोइ -यहां -जिंहों -तिंहें -किसि -कइ -यहि -इंहिं -जिधर -इंहें -अदि -इतयादि -हुइ -कोनसा -इसकि -दुसरे -जहां -अप -किंहों -उनकि -भि -वरग -हुअ -जेसा -नहिं diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_hu.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_hu.txt deleted file mode 100644 index 37526da8aa9..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_hu.txt +++ /dev/null @@ -1,211 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| Hungarian stop word list -| prepared by Anna Tordai - -a -ahogy -ahol -aki -akik -akkor -alatt -által -általában -amely -amelyek -amelyekben -amelyeket -amelyet -amelynek -ami -amit -amolyan -amíg -amikor -át -abban -ahhoz -annak -arra -arról -az -azok -azon -azt -azzal -azért -aztán -azután -azonban -bár -be -belül -benne -cikk -cikkek -cikkeket -csak -de -e -eddig -egész -egy -egyes -egyetlen -egyéb -egyik -egyre -ekkor -el -elég -ellen -elő -először -előtt -első -én -éppen -ebben -ehhez -emilyen -ennek -erre -ez -ezt -ezek -ezen -ezzel -ezért -és -fel -felé -hanem -hiszen -hogy -hogyan -igen -így -illetve -ill. -ill -ilyen -ilyenkor -ison -ismét -itt -jó -jól -jobban -kell -kellett -keresztül -keressünk -ki -kívül -között -közül -legalább -lehet -lehetett -legyen -lenne -lenni -lesz -lett -maga -magát -majd -majd -már -más -másik -meg -még -mellett -mert -mely -melyek -mi -mit -míg -miért -milyen -mikor -minden -mindent -mindenki -mindig -mint -mintha -mivel -most -nagy -nagyobb -nagyon -ne -néha -nekem -neki -nem -néhány -nélkül -nincs -olyan -ott -össze -ő -ők -őket -pedig -persze -rá -s -saját -sem -semmi -sok -sokat -sokkal -számára -szemben -szerint -szinte -talán -tehát -teljes -tovább -továbbá -több -úgy -ugyanis -új -újabb -újra -után -utána -utolsó -vagy -vagyis -valaki -valami -valamint -való -vagyok -van -vannak -volt -voltam -voltak -voltunk -vissza -vele -viszont -volna diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_hy.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_hy.txt deleted file mode 100644 index 60c1c50fbc8..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_hy.txt +++ /dev/null @@ -1,46 +0,0 @@ -# example set of Armenian stopwords. -այդ -այլ -այն -այս -դու -դուք -եմ -են -ենք -ես -եք -է -էի -էին -էինք -էիր -էիք -էր -ըստ -թ -ի -ին -իսկ -իր -կամ -համար -հետ -հետո -մենք -մեջ -մի -ն -նա -նաև -նրա -նրանք -որ -որը -որոնք -որպես -ու -ում -պիտի -վրա -և diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_id.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_id.txt deleted file mode 100644 index 4617f83a5c5..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_id.txt +++ /dev/null @@ -1,359 +0,0 @@ -# from appendix D of: A Study of Stemming Effects on Information -# Retrieval in Bahasa Indonesia -ada -adanya -adalah -adapun -agak -agaknya -agar -akan -akankah -akhirnya -aku -akulah -amat -amatlah -anda -andalah -antar -diantaranya -antara -antaranya -diantara -apa -apaan -mengapa -apabila -apakah -apalagi -apatah -atau -ataukah -ataupun -bagai -bagaikan -sebagai -sebagainya -bagaimana -bagaimanapun -sebagaimana -bagaimanakah -bagi -bahkan -bahwa -bahwasanya -sebaliknya -banyak -sebanyak -beberapa -seberapa -begini -beginian -beginikah -beginilah -sebegini -begitu -begitukah -begitulah -begitupun -sebegitu -belum -belumlah -sebelum -sebelumnya -sebenarnya -berapa -berapakah -berapalah -berapapun -betulkah -sebetulnya -biasa -biasanya -bila -bilakah -bisa -bisakah -sebisanya -boleh -bolehkah -bolehlah -buat -bukan -bukankah -bukanlah -bukannya -cuma -percuma -dahulu -dalam -dan -dapat -dari -daripada -dekat -demi -demikian -demikianlah -sedemikian -dengan -depan -di -dia -dialah -dini -diri -dirinya -terdiri -dong -dulu -enggak -enggaknya -entah -entahlah -terhadap -terhadapnya -hal -hampir -hanya -hanyalah -harus -haruslah -harusnya -seharusnya -hendak -hendaklah -hendaknya -hingga -sehingga -ia -ialah -ibarat -ingin -inginkah -inginkan -ini -inikah -inilah -itu -itukah -itulah -jangan -jangankan -janganlah -jika -jikalau -juga -justru -kala -kalau -kalaulah -kalaupun -kalian -kami -kamilah -kamu -kamulah -kan -kapan -kapankah -kapanpun -dikarenakan -karena -karenanya -ke -kecil -kemudian -kenapa -kepada -kepadanya -ketika -seketika -khususnya -kini -kinilah -kiranya -sekiranya -kita -kitalah -kok -lagi -lagian -selagi -lah -lain -lainnya -melainkan -selaku -lalu -melalui -terlalu -lama -lamanya -selama -selama -selamanya -lebih -terlebih -bermacam -macam -semacam -maka -makanya -makin -malah -malahan -mampu -mampukah -mana -manakala -manalagi -masih -masihkah -semasih -masing -mau -maupun -semaunya -memang -mereka -merekalah -meski -meskipun -semula -mungkin -mungkinkah -nah -namun -nanti -nantinya -nyaris -oleh -olehnya -seorang -seseorang -pada -padanya -padahal -paling -sepanjang -pantas -sepantasnya -sepantasnyalah -para -pasti -pastilah -per -pernah -pula -pun -merupakan -rupanya -serupa -saat -saatnya -sesaat -saja -sajalah -saling -bersama -sama -sesama -sambil -sampai -sana -sangat -sangatlah -saya -sayalah -se -sebab -sebabnya -sebuah -tersebut -tersebutlah -sedang -sedangkan -sedikit -sedikitnya -segala -segalanya -segera -sesegera -sejak -sejenak -sekali -sekalian -sekalipun -sesekali -sekaligus -sekarang -sekarang -sekitar -sekitarnya -sela -selain -selalu -seluruh -seluruhnya -semakin -sementara -sempat -semua -semuanya -sendiri -sendirinya -seolah -seperti -sepertinya -sering -seringnya -serta -siapa -siapakah -siapapun -disini -disinilah -sini -sinilah -sesuatu -sesuatunya -suatu -sesudah -sesudahnya -sudah -sudahkah -sudahlah -supaya -tadi -tadinya -tak -tanpa -setelah -telah -tentang -tentu -tentulah -tentunya -tertentu -seterusnya -tapi -tetapi -setiap -tiap -setidaknya -tidak -tidakkah -tidaklah -toh -waduh -wah -wahai -sewaktu -walau -walaupun -wong -yaitu -yakni -yang diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_it.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_it.txt deleted file mode 100644 index 1219cc773ab..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_it.txt +++ /dev/null @@ -1,303 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | An Italian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -ad | a (to) before vowel -al | a + il -allo | a + lo -ai | a + i -agli | a + gli -all | a + l' -agl | a + gl' -alla | a + la -alle | a + le -con | with -col | con + il -coi | con + i (forms collo, cogli etc are now very rare) -da | from -dal | da + il -dallo | da + lo -dai | da + i -dagli | da + gli -dall | da + l' -dagl | da + gll' -dalla | da + la -dalle | da + le -di | of -del | di + il -dello | di + lo -dei | di + i -degli | di + gli -dell | di + l' -degl | di + gl' -della | di + la -delle | di + le -in | in -nel | in + el -nello | in + lo -nei | in + i -negli | in + gli -nell | in + l' -negl | in + gl' -nella | in + la -nelle | in + le -su | on -sul | su + il -sullo | su + lo -sui | su + i -sugli | su + gli -sull | su + l' -sugl | su + gl' -sulla | su + la -sulle | su + le -per | through, by -tra | among -contro | against -io | I -tu | thou -lui | he -lei | she -noi | we -voi | you -loro | they -mio | my -mia | -miei | -mie | -tuo | -tua | -tuoi | thy -tue | -suo | -sua | -suoi | his, her -sue | -nostro | our -nostra | -nostri | -nostre | -vostro | your -vostra | -vostri | -vostre | -mi | me -ti | thee -ci | us, there -vi | you, there -lo | him, the -la | her, the -li | them -le | them, the -gli | to him, the -ne | from there etc -il | the -un | a -uno | a -una | a -ma | but -ed | and -se | if -perché | why, because -anche | also -come | how -dov | where (as dov') -dove | where -che | who, that -chi | who -cui | whom -non | not -più | more -quale | who, that -quanto | how much -quanti | -quanta | -quante | -quello | that -quelli | -quella | -quelle | -questo | this -questi | -questa | -queste | -si | yes -tutto | all -tutti | all - - | single letter forms: - -a | at -c | as c' for ce or ci -e | and -i | the -l | as l' -o | or - - | forms of avere, to have (not including the infinitive): - -ho -hai -ha -abbiamo -avete -hanno -abbia -abbiate -abbiano -avrò -avrai -avrà -avremo -avrete -avranno -avrei -avresti -avrebbe -avremmo -avreste -avrebbero -avevo -avevi -aveva -avevamo -avevate -avevano -ebbi -avesti -ebbe -avemmo -aveste -ebbero -avessi -avesse -avessimo -avessero -avendo -avuto -avuta -avuti -avute - - | forms of essere, to be (not including the infinitive): -sono -sei -è -siamo -siete -sia -siate -siano -sarò -sarai -sarà -saremo -sarete -saranno -sarei -saresti -sarebbe -saremmo -sareste -sarebbero -ero -eri -era -eravamo -eravate -erano -fui -fosti -fu -fummo -foste -furono -fossi -fosse -fossimo -fossero -essendo - - | forms of fare, to do (not including the infinitive, fa, fat-): -faccio -fai -facciamo -fanno -faccia -facciate -facciano -farò -farai -farà -faremo -farete -faranno -farei -faresti -farebbe -faremmo -fareste -farebbero -facevo -facevi -faceva -facevamo -facevate -facevano -feci -facesti -fece -facemmo -faceste -fecero -facessi -facesse -facessimo -facessero -facendo - - | forms of stare, to be (not including the infinitive): -sto -stai -sta -stiamo -stanno -stia -stiate -stiano -starò -starai -starà -staremo -starete -staranno -starei -staresti -starebbe -staremmo -stareste -starebbero -stavo -stavi -stava -stavamo -stavate -stavano -stetti -stesti -stette -stemmo -steste -stettero -stessi -stesse -stessimo -stessero -stando diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ja.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ja.txt deleted file mode 100644 index d4321be6b16..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ja.txt +++ /dev/null @@ -1,127 +0,0 @@ -# -# This file defines a stopword set for Japanese. -# -# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. -# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 -# for frequency lists, etc. that can be useful for making your own set (if desired) -# -# Note that there is an overlap between these stopwords and the terms stopped when used -# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note -# that comments are not allowed on the same line as stopwords. -# -# Also note that stopping is done in a case-insensitive manner. Change your StopFilter -# configuration if you need case-sensitive stopping. Lastly, note that stopping is done -# using the same character width as the entries in this file. Since this StopFilter is -# normally done after a CJKWidthFilter in your chain, you would usually want your romaji -# entries to be in half-width and your kana entries to be in full-width. -# -の -に -は -を -た -が -で -て -と -し -れ -さ -ある -いる -も -する -から -な -こと -として -い -や -れる -など -なっ -ない -この -ため -その -あっ -よう -また -もの -という -あり -まで -られ -なる -へ -か -だ -これ -によって -により -おり -より -による -ず -なり -られる -において -ば -なかっ -なく -しかし -について -せ -だっ -その後 -できる -それ -う -ので -なお -のみ -でき -き -つ -における -および -いう -さらに -でも -ら -たり -その他 -に関する -たち -ます -ん -なら -に対して -特に -せる -及び -これら -とき -では -にて -ほか -ながら -うち -そして -とともに -ただし -かつて -それぞれ -または -お -ほど -ものの -に対する -ほとんど -と共に -といった -です -とも -ところ -ここ -##### End of file diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_lv.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_lv.txt deleted file mode 100644 index e21a23c06c3..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_lv.txt +++ /dev/null @@ -1,172 +0,0 @@ -# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins -# the original list of over 800 forms was refined: -# pronouns, adverbs, interjections were removed -# -# prepositions -aiz -ap -ar -apakš -ārpus -augšpus -bez -caur -dēļ -gar -iekš -iz -kopš -labad -lejpus -līdz -no -otrpus -pa -par -pār -pēc -pie -pirms -pret -priekš -starp -šaipus -uz -viņpus -virs -virspus -zem -apakšpus -# Conjunctions -un -bet -jo -ja -ka -lai -tomēr -tikko -turpretī -arī -kaut -gan -tādēļ -tā -ne -tikvien -vien -kā -ir -te -vai -kamēr -# Particles -ar -diezin -droši -diemžēl -nebūt -ik -it -taču -nu -pat -tiklab -iekšpus -nedz -tik -nevis -turpretim -jeb -iekam -iekām -iekāms -kolīdz -līdzko -tiklīdz -jebšu -tālab -tāpēc -nekā -itin -jā -jau -jel -nē -nezin -tad -tikai -vis -tak -iekams -vien -# modal verbs -būt -biju -biji -bija -bijām -bijāt -esmu -esi -esam -esat -būšu -būsi -būs -būsim -būsiet -tikt -tiku -tiki -tika -tikām -tikāt -tieku -tiec -tiek -tiekam -tiekat -tikšu -tiks -tiksim -tiksiet -tapt -tapi -tapāt -topat -tapšu -tapsi -taps -tapsim -tapsiet -kļūt -kļuvu -kļuvi -kļuva -kļuvām -kļuvāt -kļūstu -kļūsti -kļūst -kļūstam -kļūstat -kļūšu -kļūsi -kļūs -kļūsim -kļūsiet -# verbs -varēt -varēju -varējām -varēšu -varēsim -var -varēji -varējāt -varēsi -varēsiet -varat -varēja -varēs diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_nl.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_nl.txt deleted file mode 100644 index 47a2aeacf6f..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_nl.txt +++ /dev/null @@ -1,119 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Dutch stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large sample of Dutch text. - - | Dutch stop words frequently exhibit homonym clashes. These are indicated - | clearly below. - -de | the -en | and -van | of, from -ik | I, the ego -te | (1) chez, at etc, (2) to, (3) too -dat | that, which -die | that, those, who, which -in | in, inside -een | a, an, one -hij | he -het | the, it -niet | not, nothing, naught -zijn | (1) to be, being, (2) his, one's, its -is | is -was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river -op | on, upon, at, in, up, used up -aan | on, upon, to (as dative) -met | with, by -als | like, such as, when -voor | (1) before, in front of, (2) furrow -had | had, past tense all persons sing. of 'hebben' (have) -er | there -maar | but, only -om | round, about, for etc -hem | him -dan | then -zou | should/would, past tense all persons sing. of 'zullen' -of | or, whether, if -wat | what, something, anything -mijn | possessive and noun 'mine' -men | people, 'one' -dit | this -zo | so, thus, in this way -door | through by -over | over, across -ze | she, her, they, them -zich | oneself -bij | (1) a bee, (2) by, near, at -ook | also, too -tot | till, until -je | you -mij | me -uit | out of, from -der | Old Dutch form of 'van der' still found in surnames -daar | (1) there, (2) because -haar | (1) her, their, them, (2) hair -naar | (1) unpleasant, unwell etc, (2) towards, (3) as -heb | present first person sing. of 'to have' -hoe | how, why -heeft | present third person sing. of 'to have' -hebben | 'to have' and various parts thereof -deze | this -u | you -want | (1) for, (2) mitten, (3) rigging -nog | yet, still -zal | 'shall', first and third person sing. of verb 'zullen' (will) -me | me -zij | she, they -nu | now -ge | 'thou', still used in Belgium and south Netherlands -geen | none -omdat | because -iets | something, somewhat -worden | to become, grow, get -toch | yet, still -al | all, every, each -waren | (1) 'were' (2) to wander, (3) wares, (3) -veel | much, many -meer | (1) more, (2) lake -doen | to do, to make -toen | then, when -moet | noun 'spot/mote' and present form of 'to must' -ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' -zonder | without -kan | noun 'can' and present form of 'to be able' -hun | their, them -dus | so, consequently -alles | all, everything, anything -onder | under, beneath -ja | yes, of course -eens | once, one day -hier | here -wie | who -werd | imperfect third person sing. of 'become' -altijd | always -doch | yet, but etc -wordt | present third person sing. of 'become' -wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans -kunnen | to be able -ons | us/our -zelf | self -tegen | against, towards, at -na | after, near -reeds | already -wil | (1) present tense of 'want', (2) 'will', noun, (3) fender -kon | could; past tense of 'to be able' -niets | nothing -uw | your -iemand | somebody -geweest | been; past participle of 'be' -andere | other diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_no.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_no.txt deleted file mode 100644 index a7a2c28ba54..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_no.txt +++ /dev/null @@ -1,194 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Norwegian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This stop word list is for the dominant bokmål dialect. Words unique - | to nynorsk are marked *. - - | Revised by Jan Bruusgaard , Jan 2005 - -og | and -i | in -jeg | I -det | it/this/that -at | to (w. inf.) -en | a/an -et | a/an -den | it/this/that -til | to -er | is/am/are -som | who/that -på | on -de | they / you(formal) -med | with -han | he -av | of -ikke | not -ikkje | not * -der | there -så | so -var | was/were -meg | me -seg | you -men | but -ett | one -har | have -om | about -vi | we -min | my -mitt | my -ha | have -hadde | had -hun | she -nå | now -over | over -da | when/as -ved | by/know -fra | from -du | you -ut | out -sin | your -dem | them -oss | us -opp | up -man | you/one -kan | can -hans | his -hvor | where -eller | or -hva | what -skal | shall/must -selv | self (reflective) -sjøl | self (reflective) -her | here -alle | all -vil | will -bli | become -ble | became -blei | became * -blitt | have become -kunne | could -inn | in -når | when -være | be -kom | come -noen | some -noe | some -ville | would -dere | you -som | who/which/that -deres | their/theirs -kun | only/just -ja | yes -etter | after -ned | down -skulle | should -denne | this -for | for/because -deg | you -si | hers/his -sine | hers/his -sitt | hers/his -mot | against -å | to -meget | much -hvorfor | why -dette | this -disse | these/those -uten | without -hvordan | how -ingen | none -din | your -ditt | your -blir | become -samme | same -hvilken | which -hvilke | which (plural) -sånn | such a -inni | inside/within -mellom | between -vår | our -hver | each -hvem | who -vors | us/ours -hvis | whose -både | both -bare | only/just -enn | than -fordi | as/because -før | before -mange | many -også | also -slik | just -vært | been -være | to be -båe | both * -begge | both -siden | since -dykk | your * -dykkar | yours * -dei | they * -deira | them * -deires | theirs * -deim | them * -di | your (fem.) * -då | as/when * -eg | I * -ein | a/an * -eit | a/an * -eitt | a/an * -elles | or * -honom | he * -hjå | at * -ho | she * -hoe | she * -henne | her -hennar | her/hers -hennes | hers -hoss | how * -hossen | how * -ikkje | not * -ingi | noone * -inkje | noone * -korleis | how * -korso | how * -kva | what/which * -kvar | where * -kvarhelst | where * -kven | who/whom * -kvi | why * -kvifor | why * -me | we * -medan | while * -mi | my * -mine | my * -mykje | much * -no | now * -nokon | some (masc./neut.) * -noka | some (fem.) * -nokor | some * -noko | some * -nokre | some * -si | his/hers * -sia | since * -sidan | since * -so | so * -somt | some * -somme | some * -um | about* -upp | up * -vere | be * -vore | was * -verte | become * -vort | become * -varte | became * -vart | became * - diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_pt.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_pt.txt deleted file mode 100644 index acfeb01af6b..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_pt.txt +++ /dev/null @@ -1,253 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Portuguese stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | of, from -a | the; to, at; her -o | the; him -que | who, that -e | and -do | de + o -da | de + a -em | in -um | a -para | for - | é from SER -com | with -não | not, no -uma | a -os | the; them -no | em + o -se | himself etc -na | em + a -por | for -mais | more -as | the; them -dos | de + os -como | as, like -mas | but - | foi from SER -ao | a + o -ele | he -das | de + as - | tem from TER -à | a + a -seu | his -sua | her -ou | or - | ser from SER -quando | when -muito | much - | há from HAV -nos | em + os; us -já | already, now - | está from EST -eu | I -também | also -só | only, just -pelo | per + o -pela | per + a -até | up to -isso | that -ela | he -entre | between - | era from SER -depois | after -sem | without -mesmo | same -aos | a + os - | ter from TER -seus | his -quem | whom -nas | em + as -me | me -esse | that -eles | they - | estão from EST -você | you - | tinha from TER - | foram from SER -essa | that -num | em + um -nem | nor -suas | her -meu | my -às | a + as -minha | my - | têm from TER -numa | em + uma -pelos | per + os -elas | they - | havia from HAV - | seja from SER -qual | which - | será from SER -nós | we - | tenho from TER -lhe | to him, her -deles | of them -essas | those -esses | those -pelas | per + as -este | this - | fosse from SER -dele | of him - - | other words. There are many contractions such as naquele = em+aquele, - | mo = me+o, but they are rare. - | Indefinite article plural forms are also rare. - -tu | thou -te | thee -vocês | you (plural) -vos | you -lhes | to them -meus | my -minhas -teu | thy -tua -teus -tuas -nosso | our -nossa -nossos -nossas - -dela | of her -delas | of them - -esta | this -estes | these -estas | these -aquele | that -aquela | that -aqueles | those -aquelas | those -isto | this -aquilo | that - - | forms of estar, to be (not including the infinitive): -estou -está -estamos -estão -estive -esteve -estivemos -estiveram -estava -estávamos -estavam -estivera -estivéramos -esteja -estejamos -estejam -estivesse -estivéssemos -estivessem -estiver -estivermos -estiverem - - | forms of haver, to have (not including the infinitive): -hei -há -havemos -hão -houve -houvemos -houveram -houvera -houvéramos -haja -hajamos -hajam -houvesse -houvéssemos -houvessem -houver -houvermos -houverem -houverei -houverá -houveremos -houverão -houveria -houveríamos -houveriam - - | forms of ser, to be (not including the infinitive): -sou -somos -são -era -éramos -eram -fui -foi -fomos -foram -fora -fôramos -seja -sejamos -sejam -fosse -fôssemos -fossem -for -formos -forem -serei -será -seremos -serão -seria -seríamos -seriam - - | forms of ter, to have (not including the infinitive): -tenho -tem -temos -tém -tinha -tínhamos -tinham -tive -teve -tivemos -tiveram -tivera -tivéramos -tenha -tenhamos -tenham -tivesse -tivéssemos -tivessem -tiver -tivermos -tiverem -terei -terá -teremos -terão -teria -teríamos -teriam diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ro.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ro.txt deleted file mode 100644 index 4fdee90a5ba..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ro.txt +++ /dev/null @@ -1,233 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -acea -aceasta -această -aceea -acei -aceia -acel -acela -acele -acelea -acest -acesta -aceste -acestea -aceşti -aceştia -acolo -acum -ai -aia -aibă -aici -al -ăla -ale -alea -ălea -altceva -altcineva -am -ar -are -aş -aşadar -asemenea -asta -ăsta -astăzi -astea -ăstea -ăştia -asupra -aţi -au -avea -avem -aveţi -azi -bine -bucur -bună -ca -că -căci -când -care -cărei -căror -cărui -cât -câte -câţi -către -câtva -ce -cel -ceva -chiar -cînd -cine -cineva -cît -cîte -cîţi -cîtva -contra -cu -cum -cumva -curând -curînd -da -dă -dacă -dar -datorită -de -deci -deja -deoarece -departe -deşi -din -dinaintea -dintr -dintre -drept -după -ea -ei -el -ele -eram -este -eşti -eu -face -fără -fi -fie -fiecare -fii -fim -fiţi -iar -ieri -îi -îl -îmi -împotriva -în -înainte -înaintea -încât -încît -încotro -între -întrucât -întrucît -îţi -la -lângă -le -li -lîngă -lor -lui -mă -mâine -mea -mei -mele -mereu -meu -mi -mine -mult -multă -mulţi -ne -nicăieri -nici -nimeni -nişte -noastră -noastre -noi -noştri -nostru -nu -ori -oricând -oricare -oricât -orice -oricînd -oricine -oricît -oricum -oriunde -până -pe -pentru -peste -pînă -poate -pot -prea -prima -primul -prin -printr -sa -să -săi -sale -sau -său -se -şi -sînt -sîntem -sînteţi -spre -sub -sunt -suntem -sunteţi -ta -tăi -tale -tău -te -ţi -ţie -tine -toată -toate -tot -toţi -totuşi -tu -un -una -unde -undeva -unei -unele -uneori -unor -vă -vi -voastră -voastre -voi -voştri -vostru -vouă -vreo -vreun diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ru.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ru.txt deleted file mode 100644 index 55271400c64..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_ru.txt +++ /dev/null @@ -1,243 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | a russian stop word list. comments begin with vertical bar. each stop - | word is at the start of a line. - - | this is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | letter `ё' is translated to `е'. - -и | and -в | in/into -во | alternative form -не | not -что | what/that -он | he -на | on/onto -я | i -с | from -со | alternative form -как | how -а | milder form of `no' (but) -то | conjunction and form of `that' -все | all -она | she -так | so, thus -его | him -но | but -да | yes/and -ты | thou -к | towards, by -у | around, chez -же | intensifier particle -вы | you -за | beyond, behind -бы | conditional/subj. particle -по | up to, along -только | only -ее | her -мне | to me -было | it was -вот | here is/are, particle -от | away from -меня | me -еще | still, yet, more -нет | no, there isnt/arent -о | about -из | out of -ему | to him -теперь | now -когда | when -даже | even -ну | so, well -вдруг | suddenly -ли | interrogative particle -если | if -уже | already, but homonym of `narrower' -или | or -ни | neither -быть | to be -был | he was -него | prepositional form of его -до | up to -вас | you accusative -нибудь | indef. suffix preceded by hyphen -опять | again -уж | already, but homonym of `adder' -вам | to you -сказал | he said -ведь | particle `after all' -там | there -потом | then -себя | oneself -ничего | nothing -ей | to her -может | usually with `быть' as `maybe' -они | they -тут | here -где | where -есть | there is/are -надо | got to, must -ней | prepositional form of ей -для | for -мы | we -тебя | thee -их | them, their -чем | than -была | she was -сам | self -чтоб | in order to -без | without -будто | as if -человек | man, person, one -чего | genitive form of `what' -раз | once -тоже | also -себе | to oneself -под | beneath -жизнь | life -будет | will be -ж | short form of intensifer particle `же' -тогда | then -кто | who -этот | this -говорил | was saying -того | genitive form of `that' -потому | for that reason -этого | genitive form of `this' -какой | which -совсем | altogether -ним | prepositional form of `его', `они' -здесь | here -этом | prepositional form of `этот' -один | one -почти | almost -мой | my -тем | instrumental/dative plural of `тот', `то' -чтобы | full form of `in order that' -нее | her (acc.) -кажется | it seems -сейчас | now -были | they were -куда | where to -зачем | why -сказать | to say -всех | all (acc., gen. preposn. plural) -никогда | never -сегодня | today -можно | possible, one can -при | by -наконец | finally -два | two -об | alternative form of `о', about -другой | another -хоть | even -после | after -над | above -больше | more -тот | that one (masc.) -через | across, in -эти | these -нас | us -про | about -всего | in all, only, of all -них | prepositional form of `они' (they) -какая | which, feminine -много | lots -разве | interrogative particle -сказала | she said -три | three -эту | this, acc. fem. sing. -моя | my, feminine -впрочем | moreover, besides -хорошо | good -свою | ones own, acc. fem. sing. -этой | oblique form of `эта', fem. `this' -перед | in front of -иногда | sometimes -лучше | better -чуть | a little -том | preposn. form of `that one' -нельзя | one must not -такой | such a one -им | to them -более | more -всегда | always -конечно | of course -всю | acc. fem. sing of `all' -между | between - - - | b: some paradigms - | - | personal pronouns - | - | я меня мне мной [мною] - | ты тебя тебе тобой [тобою] - | он его ему им [него, нему, ним] - | она ее эи ею [нее, нэи, нею] - | оно его ему им [него, нему, ним] - | - | мы нас нам нами - | вы вас вам вами - | они их им ими [них, ним, ними] - | - | себя себе собой [собою] - | - | demonstrative pronouns: этот (this), тот (that) - | - | этот эта это эти - | этого эты это эти - | этого этой этого этих - | этому этой этому этим - | этим этой этим [этою] этими - | этом этой этом этих - | - | тот та то те - | того ту то те - | того той того тех - | тому той тому тем - | тем той тем [тою] теми - | том той том тех - | - | determinative pronouns - | - | (a) весь (all) - | - | весь вся все все - | всего всю все все - | всего всей всего всех - | всему всей всему всем - | всем всей всем [всею] всеми - | всем всей всем всех - | - | (b) сам (himself etc) - | - | сам сама само сами - | самого саму само самих - | самого самой самого самих - | самому самой самому самим - | самим самой самим [самою] самими - | самом самой самом самих - | - | stems of verbs `to be', `to have', `to do' and modal - | - | быть бы буд быв есть суть - | име - | дел - | мог мож мочь - | уме - | хоч хот - | долж - | можн - | нужн - | нельзя - diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_sv.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_sv.txt deleted file mode 100644 index 096f87f6766..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_sv.txt +++ /dev/null @@ -1,133 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Swedish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | Swedish stop words occasionally exhibit homonym clashes. For example - | så = so, but also seed. These are indicated clearly below. - -och | and -det | it, this/that -att | to (with infinitive) -i | in, at -en | a -jag | I -hon | she -som | who, that -han | he -på | on -den | it, this/that -med | with -var | where, each -sig | him(self) etc -för | for -så | so (also: seed) -till | to -är | is -men | but -ett | a -om | if; around, about -hade | had -de | they, these/those -av | of -icke | not, no -mig | me -du | you -henne | her -då | then, when -sin | his -nu | now -har | have -inte | inte någon = no one -hans | his -honom | him -skulle | 'sake' -hennes | her -där | there -min | my -man | one (pronoun) -ej | nor -vid | at, by, on (also: vast) -kunde | could -något | some etc -från | from, off -ut | out -när | when -efter | after, behind -upp | up -vi | we -dem | them -vara | be -vad | what -över | over -än | than -dig | you -kan | can -sina | his -här | here -ha | have -mot | towards -alla | all -under | under (also: wonder) -någon | some etc -eller | or (else) -allt | all -mycket | much -sedan | since -ju | why -denna | this/that -själv | myself, yourself etc -detta | this/that -åt | to -utan | without -varit | was -hur | how -ingen | no -mitt | my -ni | you -bli | to be, become -blev | from bli -oss | us -din | thy -dessa | these/those -några | some etc -deras | their -blir | from bli -mina | my -samma | (the) same -vilken | who, that -er | you, your -sådan | such a -vår | our -blivit | from bli -dess | its -inom | within -mellan | between -sådant | such a -varför | why -varje | each -vilka | who, that -ditt | thy -vem | who -vilket | who, that -sitta | his -sådana | such a -vart | each -dina | thy -vars | whose -vårt | our -våra | our -ert | your -era | your -vilkas | whose - diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_th.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_th.txt deleted file mode 100644 index 07f0fabe692..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_th.txt +++ /dev/null @@ -1,119 +0,0 @@ -# Thai stopwords from: -# "Opinion Detection in Thai Political News Columns -# Based on Subjectivity Analysis" -# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak -ไว้ -ไม่ -ไป -ได้ -ให้ -ใน -โดย -แห่ง -แล้ว -และ -แรก -แบบ -แต่ -เอง -เห็น -เลย -เริ่ม -เรา -เมื่อ -เพื่อ -เพราะ -เป็นการ -เป็น -เปิดเผย -เปิด -เนื่องจาก -เดียวกัน -เดียว -เช่น -เฉพาะ -เคย -เข้า -เขา -อีก -อาจ -อะไร -ออก -อย่าง -อยู่ -อยาก -หาก -หลาย -หลังจาก -หลัง -หรือ -หนึ่ง -ส่วน -ส่ง -สุด -สําหรับ -ว่า -วัน -ลง -ร่วม -ราย -รับ -ระหว่าง -รวม -ยัง -มี -มาก -มา -พร้อม -พบ -ผ่าน -ผล -บาง -น่า -นี้ -นํา -นั้น -นัก -นอกจาก -ทุก -ที่สุด -ที่ -ทําให้ -ทํา -ทาง -ทั้งนี้ -ทั้ง -ถ้า -ถูก -ถึง -ต้อง -ต่างๆ -ต่าง -ต่อ -ตาม -ตั้งแต่ -ตั้ง -ด้าน -ด้วย -ดัง -ซึ่ง -ช่วง -จึง -จาก -จัด -จะ -คือ -ความ -ครั้ง -คง -ขึ้น -ของ -ขอ -ขณะ -ก่อน -ก็ -การ -กับ -กัน -กว่า -กล่าว diff --git a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_tr.txt b/solr/example/example-DIH/solr/mail/conf/lang/stopwords_tr.txt deleted file mode 100644 index 84d9408d4ea..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/stopwords_tr.txt +++ /dev/null @@ -1,212 +0,0 @@ -# Turkish stopwords from LUCENE-559 -# merged with the list from "Information Retrieval on Turkish Texts" -# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) -acaba -altmış -altı -ama -ancak -arada -aslında -ayrıca -bana -bazı -belki -ben -benden -beni -benim -beri -beş -bile -bin -bir -birçok -biri -birkaç -birkez -birşey -birşeyi -biz -bize -bizden -bizi -bizim -böyle -böylece -bu -buna -bunda -bundan -bunlar -bunları -bunların -bunu -bunun -burada -çok -çünkü -da -daha -dahi -de -defa -değil -diğer -diye -doksan -dokuz -dolayı -dolayısıyla -dört -edecek -eden -ederek -edilecek -ediliyor -edilmesi -ediyor -eğer -elli -en -etmesi -etti -ettiği -ettiğini -gibi -göre -halen -hangi -hatta -hem -henüz -hep -hepsi -her -herhangi -herkesin -hiç -hiçbir -için -iki -ile -ilgili -ise -işte -itibaren -itibariyle -kadar -karşın -katrilyon -kendi -kendilerine -kendini -kendisi -kendisine -kendisini -kez -ki -kim -kimden -kime -kimi -kimse -kırk -milyar -milyon -mu -mü -mı -nasıl -ne -neden -nedenle -nerde -nerede -nereye -niye -niçin -o -olan -olarak -oldu -olduğu -olduğunu -olduklarını -olmadı -olmadığı -olmak -olması -olmayan -olmaz -olsa -olsun -olup -olur -olursa -oluyor -on -ona -ondan -onlar -onlardan -onları -onların -onu -onun -otuz -oysa -öyle -pek -rağmen -sadece -sanki -sekiz -seksen -sen -senden -seni -senin -siz -sizden -sizi -sizin -şey -şeyden -şeyi -şeyler -şöyle -şu -şuna -şunda -şundan -şunları -şunu -tarafından -trilyon -tüm -üç -üzere -var -vardı -ve -veya -ya -yani -yapacak -yapılan -yapılması -yapıyor -yapmak -yaptı -yaptığı -yaptığını -yaptıkları -yedi -yerine -yetmiş -yine -yirmi -yoksa -yüz -zaten diff --git a/solr/example/example-DIH/solr/mail/conf/lang/userdict_ja.txt b/solr/example/example-DIH/solr/mail/conf/lang/userdict_ja.txt deleted file mode 100644 index 6f0368e4d81..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/lang/userdict_ja.txt +++ /dev/null @@ -1,29 +0,0 @@ -# -# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) -# -# Add entries to this file in order to override the statistical model in terms -# of segmentation, readings and part-of-speech tags. Notice that entries do -# not have weights since they are always used when found. This is by-design -# in order to maximize ease-of-use. -# -# Entries are defined using the following CSV format: -# , ... , ... , -# -# Notice that a single half-width space separates tokens and readings, and -# that the number tokens and readings must match exactly. -# -# Also notice that multiple entries with the same is undefined. -# -# Whitespace only lines are ignored. Comments are not allowed on entry lines. -# - -# Custom segmentation for kanji compounds -日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 -関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 - -# Custom segmentation for compound katakana -トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 -ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 - -# Custom reading for former sumo wrestler -朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr/example/example-DIH/solr/mail/conf/mail-data-config.xml b/solr/example/example-DIH/solr/mail/conf/mail-data-config.xml deleted file mode 100644 index 736aea7cc99..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/mail-data-config.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - diff --git a/solr/example/example-DIH/solr/mail/conf/managed-schema b/solr/example/example-DIH/solr/mail/conf/managed-schema deleted file mode 100644 index d4502125187..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/managed-schema +++ /dev/null @@ -1,1062 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - messageId - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/mail/conf/mapping-FoldToASCII.txt b/solr/example/example-DIH/solr/mail/conf/mapping-FoldToASCII.txt deleted file mode 100644 index 9a84b6eac34..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/mapping-FoldToASCII.txt +++ /dev/null @@ -1,3813 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# This map converts alphabetic, numeric, and symbolic Unicode characters -# which are not in the first 127 ASCII characters (the "Basic Latin" Unicode -# block) into their ASCII equivalents, if one exists. -# -# Characters from the following Unicode blocks are converted; however, only -# those characters with reasonable ASCII alternatives are converted: -# -# - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf -# - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf -# - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf -# - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf -# - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf -# - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf -# - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf -# - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf -# - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf -# - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf -# - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf -# - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf -# - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf -# - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf -# - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf -# - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf -# -# See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode -# -# The set of character conversions supported by this map is a superset of -# those supported by the map represented by mapping-ISOLatin1Accent.txt. -# -# See the bottom of this file for the Perl script used to generate the contents -# of this file (without this header) from ASCIIFoldingFilter.java. - - -# Syntax: -# "source" => "target" -# "source".length() > 0 (source cannot be empty.) -# "target".length() >= 0 (target can be empty.) - - -# À [LATIN CAPITAL LETTER A WITH GRAVE] -"\u00C0" => "A" - -# Á [LATIN CAPITAL LETTER A WITH ACUTE] -"\u00C1" => "A" - -#  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] -"\u00C2" => "A" - -# à [LATIN CAPITAL LETTER A WITH TILDE] -"\u00C3" => "A" - -# Ä [LATIN CAPITAL LETTER A WITH DIAERESIS] -"\u00C4" => "A" - -# Å [LATIN CAPITAL LETTER A WITH RING ABOVE] -"\u00C5" => "A" - -# Ā [LATIN CAPITAL LETTER A WITH MACRON] -"\u0100" => "A" - -# Ă [LATIN CAPITAL LETTER A WITH BREVE] -"\u0102" => "A" - -# Ą [LATIN CAPITAL LETTER A WITH OGONEK] -"\u0104" => "A" - -# Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA] -"\u018F" => "A" - -# Ǎ [LATIN CAPITAL LETTER A WITH CARON] -"\u01CD" => "A" - -# Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] -"\u01DE" => "A" - -# Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] -"\u01E0" => "A" - -# Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] -"\u01FA" => "A" - -# Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] -"\u0200" => "A" - -# Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE] -"\u0202" => "A" - -# Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE] -"\u0226" => "A" - -# Ⱥ [LATIN CAPITAL LETTER A WITH STROKE] -"\u023A" => "A" - -# ᴀ [LATIN LETTER SMALL CAPITAL A] -"\u1D00" => "A" - -# Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW] -"\u1E00" => "A" - -# Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW] -"\u1EA0" => "A" - -# Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE] -"\u1EA2" => "A" - -# Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] -"\u1EA4" => "A" - -# Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] -"\u1EA6" => "A" - -# Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EA8" => "A" - -# Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] -"\u1EAA" => "A" - -# Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] -"\u1EAC" => "A" - -# Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] -"\u1EAE" => "A" - -# Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] -"\u1EB0" => "A" - -# Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] -"\u1EB2" => "A" - -# Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] -"\u1EB4" => "A" - -# Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] -"\u1EB6" => "A" - -# Ⓐ [CIRCLED LATIN CAPITAL LETTER A] -"\u24B6" => "A" - -# A [FULLWIDTH LATIN CAPITAL LETTER A] -"\uFF21" => "A" - -# à [LATIN SMALL LETTER A WITH GRAVE] -"\u00E0" => "a" - -# á [LATIN SMALL LETTER A WITH ACUTE] -"\u00E1" => "a" - -# â [LATIN SMALL LETTER A WITH CIRCUMFLEX] -"\u00E2" => "a" - -# ã [LATIN SMALL LETTER A WITH TILDE] -"\u00E3" => "a" - -# ä [LATIN SMALL LETTER A WITH DIAERESIS] -"\u00E4" => "a" - -# å [LATIN SMALL LETTER A WITH RING ABOVE] -"\u00E5" => "a" - -# ā [LATIN SMALL LETTER A WITH MACRON] -"\u0101" => "a" - -# ă [LATIN SMALL LETTER A WITH BREVE] -"\u0103" => "a" - -# ą [LATIN SMALL LETTER A WITH OGONEK] -"\u0105" => "a" - -# ǎ [LATIN SMALL LETTER A WITH CARON] -"\u01CE" => "a" - -# ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] -"\u01DF" => "a" - -# ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] -"\u01E1" => "a" - -# ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] -"\u01FB" => "a" - -# ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE] -"\u0201" => "a" - -# ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE] -"\u0203" => "a" - -# ȧ [LATIN SMALL LETTER A WITH DOT ABOVE] -"\u0227" => "a" - -# ɐ [LATIN SMALL LETTER TURNED A] -"\u0250" => "a" - -# ə [LATIN SMALL LETTER SCHWA] -"\u0259" => "a" - -# ɚ [LATIN SMALL LETTER SCHWA WITH HOOK] -"\u025A" => "a" - -# ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK] -"\u1D8F" => "a" - -# ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] -"\u1D95" => "a" - -# ạ [LATIN SMALL LETTER A WITH RING BELOW] -"\u1E01" => "a" - -# ả [LATIN SMALL LETTER A WITH RIGHT HALF RING] -"\u1E9A" => "a" - -# ạ [LATIN SMALL LETTER A WITH DOT BELOW] -"\u1EA1" => "a" - -# ả [LATIN SMALL LETTER A WITH HOOK ABOVE] -"\u1EA3" => "a" - -# ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] -"\u1EA5" => "a" - -# ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] -"\u1EA7" => "a" - -# ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EA9" => "a" - -# ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] -"\u1EAB" => "a" - -# ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] -"\u1EAD" => "a" - -# ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE] -"\u1EAF" => "a" - -# ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE] -"\u1EB1" => "a" - -# ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] -"\u1EB3" => "a" - -# ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE] -"\u1EB5" => "a" - -# ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] -"\u1EB7" => "a" - -# ₐ [LATIN SUBSCRIPT SMALL LETTER A] -"\u2090" => "a" - -# ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA] -"\u2094" => "a" - -# ⓐ [CIRCLED LATIN SMALL LETTER A] -"\u24D0" => "a" - -# ⱥ [LATIN SMALL LETTER A WITH STROKE] -"\u2C65" => "a" - -# Ɐ [LATIN CAPITAL LETTER TURNED A] -"\u2C6F" => "a" - -# a [FULLWIDTH LATIN SMALL LETTER A] -"\uFF41" => "a" - -# Ꜳ [LATIN CAPITAL LETTER AA] -"\uA732" => "AA" - -# Æ [LATIN CAPITAL LETTER AE] -"\u00C6" => "AE" - -# Ǣ [LATIN CAPITAL LETTER AE WITH MACRON] -"\u01E2" => "AE" - -# Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE] -"\u01FC" => "AE" - -# ᴁ [LATIN LETTER SMALL CAPITAL AE] -"\u1D01" => "AE" - -# Ꜵ [LATIN CAPITAL LETTER AO] -"\uA734" => "AO" - -# Ꜷ [LATIN CAPITAL LETTER AU] -"\uA736" => "AU" - -# Ꜹ [LATIN CAPITAL LETTER AV] -"\uA738" => "AV" - -# Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] -"\uA73A" => "AV" - -# Ꜽ [LATIN CAPITAL LETTER AY] -"\uA73C" => "AY" - -# ⒜ [PARENTHESIZED LATIN SMALL LETTER A] -"\u249C" => "(a)" - -# ꜳ [LATIN SMALL LETTER AA] -"\uA733" => "aa" - -# æ [LATIN SMALL LETTER AE] -"\u00E6" => "ae" - -# ǣ [LATIN SMALL LETTER AE WITH MACRON] -"\u01E3" => "ae" - -# ǽ [LATIN SMALL LETTER AE WITH ACUTE] -"\u01FD" => "ae" - -# ᴂ [LATIN SMALL LETTER TURNED AE] -"\u1D02" => "ae" - -# ꜵ [LATIN SMALL LETTER AO] -"\uA735" => "ao" - -# ꜷ [LATIN SMALL LETTER AU] -"\uA737" => "au" - -# ꜹ [LATIN SMALL LETTER AV] -"\uA739" => "av" - -# ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] -"\uA73B" => "av" - -# ꜽ [LATIN SMALL LETTER AY] -"\uA73D" => "ay" - -# Ɓ [LATIN CAPITAL LETTER B WITH HOOK] -"\u0181" => "B" - -# Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR] -"\u0182" => "B" - -# Ƀ [LATIN CAPITAL LETTER B WITH STROKE] -"\u0243" => "B" - -# ʙ [LATIN LETTER SMALL CAPITAL B] -"\u0299" => "B" - -# ᴃ [LATIN LETTER SMALL CAPITAL BARRED B] -"\u1D03" => "B" - -# Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE] -"\u1E02" => "B" - -# Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW] -"\u1E04" => "B" - -# Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW] -"\u1E06" => "B" - -# Ⓑ [CIRCLED LATIN CAPITAL LETTER B] -"\u24B7" => "B" - -# B [FULLWIDTH LATIN CAPITAL LETTER B] -"\uFF22" => "B" - -# ƀ [LATIN SMALL LETTER B WITH STROKE] -"\u0180" => "b" - -# ƃ [LATIN SMALL LETTER B WITH TOPBAR] -"\u0183" => "b" - -# ɓ [LATIN SMALL LETTER B WITH HOOK] -"\u0253" => "b" - -# ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE] -"\u1D6C" => "b" - -# ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK] -"\u1D80" => "b" - -# ḃ [LATIN SMALL LETTER B WITH DOT ABOVE] -"\u1E03" => "b" - -# ḅ [LATIN SMALL LETTER B WITH DOT BELOW] -"\u1E05" => "b" - -# ḇ [LATIN SMALL LETTER B WITH LINE BELOW] -"\u1E07" => "b" - -# ⓑ [CIRCLED LATIN SMALL LETTER B] -"\u24D1" => "b" - -# b [FULLWIDTH LATIN SMALL LETTER B] -"\uFF42" => "b" - -# ⒝ [PARENTHESIZED LATIN SMALL LETTER B] -"\u249D" => "(b)" - -# Ç [LATIN CAPITAL LETTER C WITH CEDILLA] -"\u00C7" => "C" - -# Ć [LATIN CAPITAL LETTER C WITH ACUTE] -"\u0106" => "C" - -# Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] -"\u0108" => "C" - -# Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE] -"\u010A" => "C" - -# Č [LATIN CAPITAL LETTER C WITH CARON] -"\u010C" => "C" - -# Ƈ [LATIN CAPITAL LETTER C WITH HOOK] -"\u0187" => "C" - -# Ȼ [LATIN CAPITAL LETTER C WITH STROKE] -"\u023B" => "C" - -# ʗ [LATIN LETTER STRETCHED C] -"\u0297" => "C" - -# ᴄ [LATIN LETTER SMALL CAPITAL C] -"\u1D04" => "C" - -# Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] -"\u1E08" => "C" - -# Ⓒ [CIRCLED LATIN CAPITAL LETTER C] -"\u24B8" => "C" - -# C [FULLWIDTH LATIN CAPITAL LETTER C] -"\uFF23" => "C" - -# ç [LATIN SMALL LETTER C WITH CEDILLA] -"\u00E7" => "c" - -# ć [LATIN SMALL LETTER C WITH ACUTE] -"\u0107" => "c" - -# ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX] -"\u0109" => "c" - -# ċ [LATIN SMALL LETTER C WITH DOT ABOVE] -"\u010B" => "c" - -# č [LATIN SMALL LETTER C WITH CARON] -"\u010D" => "c" - -# ƈ [LATIN SMALL LETTER C WITH HOOK] -"\u0188" => "c" - -# ȼ [LATIN SMALL LETTER C WITH STROKE] -"\u023C" => "c" - -# ɕ [LATIN SMALL LETTER C WITH CURL] -"\u0255" => "c" - -# ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] -"\u1E09" => "c" - -# ↄ [LATIN SMALL LETTER REVERSED C] -"\u2184" => "c" - -# ⓒ [CIRCLED LATIN SMALL LETTER C] -"\u24D2" => "c" - -# Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT] -"\uA73E" => "c" - -# ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT] -"\uA73F" => "c" - -# c [FULLWIDTH LATIN SMALL LETTER C] -"\uFF43" => "c" - -# ⒞ [PARENTHESIZED LATIN SMALL LETTER C] -"\u249E" => "(c)" - -# Ð [LATIN CAPITAL LETTER ETH] -"\u00D0" => "D" - -# Ď [LATIN CAPITAL LETTER D WITH CARON] -"\u010E" => "D" - -# Đ [LATIN CAPITAL LETTER D WITH STROKE] -"\u0110" => "D" - -# Ɖ [LATIN CAPITAL LETTER AFRICAN D] -"\u0189" => "D" - -# Ɗ [LATIN CAPITAL LETTER D WITH HOOK] -"\u018A" => "D" - -# Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR] -"\u018B" => "D" - -# ᴅ [LATIN LETTER SMALL CAPITAL D] -"\u1D05" => "D" - -# ᴆ [LATIN LETTER SMALL CAPITAL ETH] -"\u1D06" => "D" - -# Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE] -"\u1E0A" => "D" - -# Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW] -"\u1E0C" => "D" - -# Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW] -"\u1E0E" => "D" - -# Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA] -"\u1E10" => "D" - -# Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] -"\u1E12" => "D" - -# Ⓓ [CIRCLED LATIN CAPITAL LETTER D] -"\u24B9" => "D" - -# Ꝺ [LATIN CAPITAL LETTER INSULAR D] -"\uA779" => "D" - -# D [FULLWIDTH LATIN CAPITAL LETTER D] -"\uFF24" => "D" - -# ð [LATIN SMALL LETTER ETH] -"\u00F0" => "d" - -# ď [LATIN SMALL LETTER D WITH CARON] -"\u010F" => "d" - -# đ [LATIN SMALL LETTER D WITH STROKE] -"\u0111" => "d" - -# ƌ [LATIN SMALL LETTER D WITH TOPBAR] -"\u018C" => "d" - -# ȡ [LATIN SMALL LETTER D WITH CURL] -"\u0221" => "d" - -# ɖ [LATIN SMALL LETTER D WITH TAIL] -"\u0256" => "d" - -# ɗ [LATIN SMALL LETTER D WITH HOOK] -"\u0257" => "d" - -# ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE] -"\u1D6D" => "d" - -# ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK] -"\u1D81" => "d" - -# ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL] -"\u1D91" => "d" - -# ḋ [LATIN SMALL LETTER D WITH DOT ABOVE] -"\u1E0B" => "d" - -# ḍ [LATIN SMALL LETTER D WITH DOT BELOW] -"\u1E0D" => "d" - -# ḏ [LATIN SMALL LETTER D WITH LINE BELOW] -"\u1E0F" => "d" - -# ḑ [LATIN SMALL LETTER D WITH CEDILLA] -"\u1E11" => "d" - -# ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] -"\u1E13" => "d" - -# ⓓ [CIRCLED LATIN SMALL LETTER D] -"\u24D3" => "d" - -# ꝺ [LATIN SMALL LETTER INSULAR D] -"\uA77A" => "d" - -# d [FULLWIDTH LATIN SMALL LETTER D] -"\uFF44" => "d" - -# DŽ [LATIN CAPITAL LETTER DZ WITH CARON] -"\u01C4" => "DZ" - -# DZ [LATIN CAPITAL LETTER DZ] -"\u01F1" => "DZ" - -# Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] -"\u01C5" => "Dz" - -# Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] -"\u01F2" => "Dz" - -# ⒟ [PARENTHESIZED LATIN SMALL LETTER D] -"\u249F" => "(d)" - -# ȸ [LATIN SMALL LETTER DB DIGRAPH] -"\u0238" => "db" - -# dž [LATIN SMALL LETTER DZ WITH CARON] -"\u01C6" => "dz" - -# dz [LATIN SMALL LETTER DZ] -"\u01F3" => "dz" - -# ʣ [LATIN SMALL LETTER DZ DIGRAPH] -"\u02A3" => "dz" - -# ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] -"\u02A5" => "dz" - -# È [LATIN CAPITAL LETTER E WITH GRAVE] -"\u00C8" => "E" - -# É [LATIN CAPITAL LETTER E WITH ACUTE] -"\u00C9" => "E" - -# Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] -"\u00CA" => "E" - -# Ë [LATIN CAPITAL LETTER E WITH DIAERESIS] -"\u00CB" => "E" - -# Ē [LATIN CAPITAL LETTER E WITH MACRON] -"\u0112" => "E" - -# Ĕ [LATIN CAPITAL LETTER E WITH BREVE] -"\u0114" => "E" - -# Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE] -"\u0116" => "E" - -# Ę [LATIN CAPITAL LETTER E WITH OGONEK] -"\u0118" => "E" - -# Ě [LATIN CAPITAL LETTER E WITH CARON] -"\u011A" => "E" - -# Ǝ [LATIN CAPITAL LETTER REVERSED E] -"\u018E" => "E" - -# Ɛ [LATIN CAPITAL LETTER OPEN E] -"\u0190" => "E" - -# Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] -"\u0204" => "E" - -# Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE] -"\u0206" => "E" - -# Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA] -"\u0228" => "E" - -# Ɇ [LATIN CAPITAL LETTER E WITH STROKE] -"\u0246" => "E" - -# ᴇ [LATIN LETTER SMALL CAPITAL E] -"\u1D07" => "E" - -# Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] -"\u1E14" => "E" - -# Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] -"\u1E16" => "E" - -# Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] -"\u1E18" => "E" - -# Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW] -"\u1E1A" => "E" - -# Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] -"\u1E1C" => "E" - -# Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW] -"\u1EB8" => "E" - -# Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE] -"\u1EBA" => "E" - -# Ẽ [LATIN CAPITAL LETTER E WITH TILDE] -"\u1EBC" => "E" - -# Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] -"\u1EBE" => "E" - -# Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] -"\u1EC0" => "E" - -# Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EC2" => "E" - -# Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] -"\u1EC4" => "E" - -# Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] -"\u1EC6" => "E" - -# Ⓔ [CIRCLED LATIN CAPITAL LETTER E] -"\u24BA" => "E" - -# ⱻ [LATIN LETTER SMALL CAPITAL TURNED E] -"\u2C7B" => "E" - -# E [FULLWIDTH LATIN CAPITAL LETTER E] -"\uFF25" => "E" - -# è [LATIN SMALL LETTER E WITH GRAVE] -"\u00E8" => "e" - -# é [LATIN SMALL LETTER E WITH ACUTE] -"\u00E9" => "e" - -# ê [LATIN SMALL LETTER E WITH CIRCUMFLEX] -"\u00EA" => "e" - -# ë [LATIN SMALL LETTER E WITH DIAERESIS] -"\u00EB" => "e" - -# ē [LATIN SMALL LETTER E WITH MACRON] -"\u0113" => "e" - -# ĕ [LATIN SMALL LETTER E WITH BREVE] -"\u0115" => "e" - -# ė [LATIN SMALL LETTER E WITH DOT ABOVE] -"\u0117" => "e" - -# ę [LATIN SMALL LETTER E WITH OGONEK] -"\u0119" => "e" - -# ě [LATIN SMALL LETTER E WITH CARON] -"\u011B" => "e" - -# ǝ [LATIN SMALL LETTER TURNED E] -"\u01DD" => "e" - -# ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE] -"\u0205" => "e" - -# ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE] -"\u0207" => "e" - -# ȩ [LATIN SMALL LETTER E WITH CEDILLA] -"\u0229" => "e" - -# ɇ [LATIN SMALL LETTER E WITH STROKE] -"\u0247" => "e" - -# ɘ [LATIN SMALL LETTER REVERSED E] -"\u0258" => "e" - -# ɛ [LATIN SMALL LETTER OPEN E] -"\u025B" => "e" - -# ɜ [LATIN SMALL LETTER REVERSED OPEN E] -"\u025C" => "e" - -# ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] -"\u025D" => "e" - -# ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E] -"\u025E" => "e" - -# ʚ [LATIN SMALL LETTER CLOSED OPEN E] -"\u029A" => "e" - -# ᴈ [LATIN SMALL LETTER TURNED OPEN E] -"\u1D08" => "e" - -# ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK] -"\u1D92" => "e" - -# ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] -"\u1D93" => "e" - -# ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] -"\u1D94" => "e" - -# ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE] -"\u1E15" => "e" - -# ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE] -"\u1E17" => "e" - -# ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] -"\u1E19" => "e" - -# ḛ [LATIN SMALL LETTER E WITH TILDE BELOW] -"\u1E1B" => "e" - -# ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] -"\u1E1D" => "e" - -# ẹ [LATIN SMALL LETTER E WITH DOT BELOW] -"\u1EB9" => "e" - -# ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE] -"\u1EBB" => "e" - -# ẽ [LATIN SMALL LETTER E WITH TILDE] -"\u1EBD" => "e" - -# ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] -"\u1EBF" => "e" - -# ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] -"\u1EC1" => "e" - -# ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EC3" => "e" - -# ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] -"\u1EC5" => "e" - -# ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] -"\u1EC7" => "e" - -# ₑ [LATIN SUBSCRIPT SMALL LETTER E] -"\u2091" => "e" - -# ⓔ [CIRCLED LATIN SMALL LETTER E] -"\u24D4" => "e" - -# ⱸ [LATIN SMALL LETTER E WITH NOTCH] -"\u2C78" => "e" - -# e [FULLWIDTH LATIN SMALL LETTER E] -"\uFF45" => "e" - -# ⒠ [PARENTHESIZED LATIN SMALL LETTER E] -"\u24A0" => "(e)" - -# Ƒ [LATIN CAPITAL LETTER F WITH HOOK] -"\u0191" => "F" - -# Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE] -"\u1E1E" => "F" - -# Ⓕ [CIRCLED LATIN CAPITAL LETTER F] -"\u24BB" => "F" - -# ꜰ [LATIN LETTER SMALL CAPITAL F] -"\uA730" => "F" - -# Ꝼ [LATIN CAPITAL LETTER INSULAR F] -"\uA77B" => "F" - -# ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F] -"\uA7FB" => "F" - -# F [FULLWIDTH LATIN CAPITAL LETTER F] -"\uFF26" => "F" - -# ƒ [LATIN SMALL LETTER F WITH HOOK] -"\u0192" => "f" - -# ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE] -"\u1D6E" => "f" - -# ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK] -"\u1D82" => "f" - -# ḟ [LATIN SMALL LETTER F WITH DOT ABOVE] -"\u1E1F" => "f" - -# ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE] -"\u1E9B" => "f" - -# ⓕ [CIRCLED LATIN SMALL LETTER F] -"\u24D5" => "f" - -# ꝼ [LATIN SMALL LETTER INSULAR F] -"\uA77C" => "f" - -# f [FULLWIDTH LATIN SMALL LETTER F] -"\uFF46" => "f" - -# ⒡ [PARENTHESIZED LATIN SMALL LETTER F] -"\u24A1" => "(f)" - -# ff [LATIN SMALL LIGATURE FF] -"\uFB00" => "ff" - -# ffi [LATIN SMALL LIGATURE FFI] -"\uFB03" => "ffi" - -# ffl [LATIN SMALL LIGATURE FFL] -"\uFB04" => "ffl" - -# fi [LATIN SMALL LIGATURE FI] -"\uFB01" => "fi" - -# fl [LATIN SMALL LIGATURE FL] -"\uFB02" => "fl" - -# Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] -"\u011C" => "G" - -# Ğ [LATIN CAPITAL LETTER G WITH BREVE] -"\u011E" => "G" - -# Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE] -"\u0120" => "G" - -# Ģ [LATIN CAPITAL LETTER G WITH CEDILLA] -"\u0122" => "G" - -# Ɠ [LATIN CAPITAL LETTER G WITH HOOK] -"\u0193" => "G" - -# Ǥ [LATIN CAPITAL LETTER G WITH STROKE] -"\u01E4" => "G" - -# ǥ [LATIN SMALL LETTER G WITH STROKE] -"\u01E5" => "G" - -# Ǧ [LATIN CAPITAL LETTER G WITH CARON] -"\u01E6" => "G" - -# ǧ [LATIN SMALL LETTER G WITH CARON] -"\u01E7" => "G" - -# Ǵ [LATIN CAPITAL LETTER G WITH ACUTE] -"\u01F4" => "G" - -# ɢ [LATIN LETTER SMALL CAPITAL G] -"\u0262" => "G" - -# ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK] -"\u029B" => "G" - -# Ḡ [LATIN CAPITAL LETTER G WITH MACRON] -"\u1E20" => "G" - -# Ⓖ [CIRCLED LATIN CAPITAL LETTER G] -"\u24BC" => "G" - -# Ᵹ [LATIN CAPITAL LETTER INSULAR G] -"\uA77D" => "G" - -# Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G] -"\uA77E" => "G" - -# G [FULLWIDTH LATIN CAPITAL LETTER G] -"\uFF27" => "G" - -# ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX] -"\u011D" => "g" - -# ğ [LATIN SMALL LETTER G WITH BREVE] -"\u011F" => "g" - -# ġ [LATIN SMALL LETTER G WITH DOT ABOVE] -"\u0121" => "g" - -# ģ [LATIN SMALL LETTER G WITH CEDILLA] -"\u0123" => "g" - -# ǵ [LATIN SMALL LETTER G WITH ACUTE] -"\u01F5" => "g" - -# ɠ [LATIN SMALL LETTER G WITH HOOK] -"\u0260" => "g" - -# ɡ [LATIN SMALL LETTER SCRIPT G] -"\u0261" => "g" - -# ᵷ [LATIN SMALL LETTER TURNED G] -"\u1D77" => "g" - -# ᵹ [LATIN SMALL LETTER INSULAR G] -"\u1D79" => "g" - -# ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK] -"\u1D83" => "g" - -# ḡ [LATIN SMALL LETTER G WITH MACRON] -"\u1E21" => "g" - -# ⓖ [CIRCLED LATIN SMALL LETTER G] -"\u24D6" => "g" - -# ꝿ [LATIN SMALL LETTER TURNED INSULAR G] -"\uA77F" => "g" - -# g [FULLWIDTH LATIN SMALL LETTER G] -"\uFF47" => "g" - -# ⒢ [PARENTHESIZED LATIN SMALL LETTER G] -"\u24A2" => "(g)" - -# Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] -"\u0124" => "H" - -# Ħ [LATIN CAPITAL LETTER H WITH STROKE] -"\u0126" => "H" - -# Ȟ [LATIN CAPITAL LETTER H WITH CARON] -"\u021E" => "H" - -# ʜ [LATIN LETTER SMALL CAPITAL H] -"\u029C" => "H" - -# Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE] -"\u1E22" => "H" - -# Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW] -"\u1E24" => "H" - -# Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS] -"\u1E26" => "H" - -# Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA] -"\u1E28" => "H" - -# Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW] -"\u1E2A" => "H" - -# Ⓗ [CIRCLED LATIN CAPITAL LETTER H] -"\u24BD" => "H" - -# Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER] -"\u2C67" => "H" - -# Ⱶ [LATIN CAPITAL LETTER HALF H] -"\u2C75" => "H" - -# H [FULLWIDTH LATIN CAPITAL LETTER H] -"\uFF28" => "H" - -# ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX] -"\u0125" => "h" - -# ħ [LATIN SMALL LETTER H WITH STROKE] -"\u0127" => "h" - -# ȟ [LATIN SMALL LETTER H WITH CARON] -"\u021F" => "h" - -# ɥ [LATIN SMALL LETTER TURNED H] -"\u0265" => "h" - -# ɦ [LATIN SMALL LETTER H WITH HOOK] -"\u0266" => "h" - -# ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK] -"\u02AE" => "h" - -# ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] -"\u02AF" => "h" - -# ḣ [LATIN SMALL LETTER H WITH DOT ABOVE] -"\u1E23" => "h" - -# ḥ [LATIN SMALL LETTER H WITH DOT BELOW] -"\u1E25" => "h" - -# ḧ [LATIN SMALL LETTER H WITH DIAERESIS] -"\u1E27" => "h" - -# ḩ [LATIN SMALL LETTER H WITH CEDILLA] -"\u1E29" => "h" - -# ḫ [LATIN SMALL LETTER H WITH BREVE BELOW] -"\u1E2B" => "h" - -# ẖ [LATIN SMALL LETTER H WITH LINE BELOW] -"\u1E96" => "h" - -# ⓗ [CIRCLED LATIN SMALL LETTER H] -"\u24D7" => "h" - -# ⱨ [LATIN SMALL LETTER H WITH DESCENDER] -"\u2C68" => "h" - -# ⱶ [LATIN SMALL LETTER HALF H] -"\u2C76" => "h" - -# h [FULLWIDTH LATIN SMALL LETTER H] -"\uFF48" => "h" - -# Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR] -"\u01F6" => "HV" - -# ⒣ [PARENTHESIZED LATIN SMALL LETTER H] -"\u24A3" => "(h)" - -# ƕ [LATIN SMALL LETTER HV] -"\u0195" => "hv" - -# Ì [LATIN CAPITAL LETTER I WITH GRAVE] -"\u00CC" => "I" - -# Í [LATIN CAPITAL LETTER I WITH ACUTE] -"\u00CD" => "I" - -# Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] -"\u00CE" => "I" - -# Ï [LATIN CAPITAL LETTER I WITH DIAERESIS] -"\u00CF" => "I" - -# Ĩ [LATIN CAPITAL LETTER I WITH TILDE] -"\u0128" => "I" - -# Ī [LATIN CAPITAL LETTER I WITH MACRON] -"\u012A" => "I" - -# Ĭ [LATIN CAPITAL LETTER I WITH BREVE] -"\u012C" => "I" - -# Į [LATIN CAPITAL LETTER I WITH OGONEK] -"\u012E" => "I" - -# İ [LATIN CAPITAL LETTER I WITH DOT ABOVE] -"\u0130" => "I" - -# Ɩ [LATIN CAPITAL LETTER IOTA] -"\u0196" => "I" - -# Ɨ [LATIN CAPITAL LETTER I WITH STROKE] -"\u0197" => "I" - -# Ǐ [LATIN CAPITAL LETTER I WITH CARON] -"\u01CF" => "I" - -# Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] -"\u0208" => "I" - -# Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE] -"\u020A" => "I" - -# ɪ [LATIN LETTER SMALL CAPITAL I] -"\u026A" => "I" - -# ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE] -"\u1D7B" => "I" - -# Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW] -"\u1E2C" => "I" - -# Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] -"\u1E2E" => "I" - -# Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE] -"\u1EC8" => "I" - -# Ị [LATIN CAPITAL LETTER I WITH DOT BELOW] -"\u1ECA" => "I" - -# Ⓘ [CIRCLED LATIN CAPITAL LETTER I] -"\u24BE" => "I" - -# ꟾ [LATIN EPIGRAPHIC LETTER I LONGA] -"\uA7FE" => "I" - -# I [FULLWIDTH LATIN CAPITAL LETTER I] -"\uFF29" => "I" - -# ì [LATIN SMALL LETTER I WITH GRAVE] -"\u00EC" => "i" - -# í [LATIN SMALL LETTER I WITH ACUTE] -"\u00ED" => "i" - -# î [LATIN SMALL LETTER I WITH CIRCUMFLEX] -"\u00EE" => "i" - -# ï [LATIN SMALL LETTER I WITH DIAERESIS] -"\u00EF" => "i" - -# ĩ [LATIN SMALL LETTER I WITH TILDE] -"\u0129" => "i" - -# ī [LATIN SMALL LETTER I WITH MACRON] -"\u012B" => "i" - -# ĭ [LATIN SMALL LETTER I WITH BREVE] -"\u012D" => "i" - -# į [LATIN SMALL LETTER I WITH OGONEK] -"\u012F" => "i" - -# ı [LATIN SMALL LETTER DOTLESS I] -"\u0131" => "i" - -# ǐ [LATIN SMALL LETTER I WITH CARON] -"\u01D0" => "i" - -# ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE] -"\u0209" => "i" - -# ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE] -"\u020B" => "i" - -# ɨ [LATIN SMALL LETTER I WITH STROKE] -"\u0268" => "i" - -# ᴉ [LATIN SMALL LETTER TURNED I] -"\u1D09" => "i" - -# ᵢ [LATIN SUBSCRIPT SMALL LETTER I] -"\u1D62" => "i" - -# ᵼ [LATIN SMALL LETTER IOTA WITH STROKE] -"\u1D7C" => "i" - -# ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK] -"\u1D96" => "i" - -# ḭ [LATIN SMALL LETTER I WITH TILDE BELOW] -"\u1E2D" => "i" - -# ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] -"\u1E2F" => "i" - -# ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE] -"\u1EC9" => "i" - -# ị [LATIN SMALL LETTER I WITH DOT BELOW] -"\u1ECB" => "i" - -# ⁱ [SUPERSCRIPT LATIN SMALL LETTER I] -"\u2071" => "i" - -# ⓘ [CIRCLED LATIN SMALL LETTER I] -"\u24D8" => "i" - -# i [FULLWIDTH LATIN SMALL LETTER I] -"\uFF49" => "i" - -# IJ [LATIN CAPITAL LIGATURE IJ] -"\u0132" => "IJ" - -# ⒤ [PARENTHESIZED LATIN SMALL LETTER I] -"\u24A4" => "(i)" - -# ij [LATIN SMALL LIGATURE IJ] -"\u0133" => "ij" - -# Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] -"\u0134" => "J" - -# Ɉ [LATIN CAPITAL LETTER J WITH STROKE] -"\u0248" => "J" - -# ᴊ [LATIN LETTER SMALL CAPITAL J] -"\u1D0A" => "J" - -# Ⓙ [CIRCLED LATIN CAPITAL LETTER J] -"\u24BF" => "J" - -# J [FULLWIDTH LATIN CAPITAL LETTER J] -"\uFF2A" => "J" - -# ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX] -"\u0135" => "j" - -# ǰ [LATIN SMALL LETTER J WITH CARON] -"\u01F0" => "j" - -# ȷ [LATIN SMALL LETTER DOTLESS J] -"\u0237" => "j" - -# ɉ [LATIN SMALL LETTER J WITH STROKE] -"\u0249" => "j" - -# ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE] -"\u025F" => "j" - -# ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] -"\u0284" => "j" - -# ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL] -"\u029D" => "j" - -# ⓙ [CIRCLED LATIN SMALL LETTER J] -"\u24D9" => "j" - -# ⱼ [LATIN SUBSCRIPT SMALL LETTER J] -"\u2C7C" => "j" - -# j [FULLWIDTH LATIN SMALL LETTER J] -"\uFF4A" => "j" - -# ⒥ [PARENTHESIZED LATIN SMALL LETTER J] -"\u24A5" => "(j)" - -# Ķ [LATIN CAPITAL LETTER K WITH CEDILLA] -"\u0136" => "K" - -# Ƙ [LATIN CAPITAL LETTER K WITH HOOK] -"\u0198" => "K" - -# Ǩ [LATIN CAPITAL LETTER K WITH CARON] -"\u01E8" => "K" - -# ᴋ [LATIN LETTER SMALL CAPITAL K] -"\u1D0B" => "K" - -# Ḱ [LATIN CAPITAL LETTER K WITH ACUTE] -"\u1E30" => "K" - -# Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW] -"\u1E32" => "K" - -# Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW] -"\u1E34" => "K" - -# Ⓚ [CIRCLED LATIN CAPITAL LETTER K] -"\u24C0" => "K" - -# Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER] -"\u2C69" => "K" - -# Ꝁ [LATIN CAPITAL LETTER K WITH STROKE] -"\uA740" => "K" - -# Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] -"\uA742" => "K" - -# Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] -"\uA744" => "K" - -# K [FULLWIDTH LATIN CAPITAL LETTER K] -"\uFF2B" => "K" - -# ķ [LATIN SMALL LETTER K WITH CEDILLA] -"\u0137" => "k" - -# ƙ [LATIN SMALL LETTER K WITH HOOK] -"\u0199" => "k" - -# ǩ [LATIN SMALL LETTER K WITH CARON] -"\u01E9" => "k" - -# ʞ [LATIN SMALL LETTER TURNED K] -"\u029E" => "k" - -# ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK] -"\u1D84" => "k" - -# ḱ [LATIN SMALL LETTER K WITH ACUTE] -"\u1E31" => "k" - -# ḳ [LATIN SMALL LETTER K WITH DOT BELOW] -"\u1E33" => "k" - -# ḵ [LATIN SMALL LETTER K WITH LINE BELOW] -"\u1E35" => "k" - -# ⓚ [CIRCLED LATIN SMALL LETTER K] -"\u24DA" => "k" - -# ⱪ [LATIN SMALL LETTER K WITH DESCENDER] -"\u2C6A" => "k" - -# ꝁ [LATIN SMALL LETTER K WITH STROKE] -"\uA741" => "k" - -# ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE] -"\uA743" => "k" - -# ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] -"\uA745" => "k" - -# k [FULLWIDTH LATIN SMALL LETTER K] -"\uFF4B" => "k" - -# ⒦ [PARENTHESIZED LATIN SMALL LETTER K] -"\u24A6" => "(k)" - -# Ĺ [LATIN CAPITAL LETTER L WITH ACUTE] -"\u0139" => "L" - -# Ļ [LATIN CAPITAL LETTER L WITH CEDILLA] -"\u013B" => "L" - -# Ľ [LATIN CAPITAL LETTER L WITH CARON] -"\u013D" => "L" - -# Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT] -"\u013F" => "L" - -# Ł [LATIN CAPITAL LETTER L WITH STROKE] -"\u0141" => "L" - -# Ƚ [LATIN CAPITAL LETTER L WITH BAR] -"\u023D" => "L" - -# ʟ [LATIN LETTER SMALL CAPITAL L] -"\u029F" => "L" - -# ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE] -"\u1D0C" => "L" - -# Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW] -"\u1E36" => "L" - -# Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] -"\u1E38" => "L" - -# Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW] -"\u1E3A" => "L" - -# Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] -"\u1E3C" => "L" - -# Ⓛ [CIRCLED LATIN CAPITAL LETTER L] -"\u24C1" => "L" - -# Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR] -"\u2C60" => "L" - -# Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] -"\u2C62" => "L" - -# Ꝇ [LATIN CAPITAL LETTER BROKEN L] -"\uA746" => "L" - -# Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE] -"\uA748" => "L" - -# Ꞁ [LATIN CAPITAL LETTER TURNED L] -"\uA780" => "L" - -# L [FULLWIDTH LATIN CAPITAL LETTER L] -"\uFF2C" => "L" - -# ĺ [LATIN SMALL LETTER L WITH ACUTE] -"\u013A" => "l" - -# ļ [LATIN SMALL LETTER L WITH CEDILLA] -"\u013C" => "l" - -# ľ [LATIN SMALL LETTER L WITH CARON] -"\u013E" => "l" - -# ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT] -"\u0140" => "l" - -# ł [LATIN SMALL LETTER L WITH STROKE] -"\u0142" => "l" - -# ƚ [LATIN SMALL LETTER L WITH BAR] -"\u019A" => "l" - -# ȴ [LATIN SMALL LETTER L WITH CURL] -"\u0234" => "l" - -# ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE] -"\u026B" => "l" - -# ɬ [LATIN SMALL LETTER L WITH BELT] -"\u026C" => "l" - -# ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK] -"\u026D" => "l" - -# ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK] -"\u1D85" => "l" - -# ḷ [LATIN SMALL LETTER L WITH DOT BELOW] -"\u1E37" => "l" - -# ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] -"\u1E39" => "l" - -# ḻ [LATIN SMALL LETTER L WITH LINE BELOW] -"\u1E3B" => "l" - -# ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] -"\u1E3D" => "l" - -# ⓛ [CIRCLED LATIN SMALL LETTER L] -"\u24DB" => "l" - -# ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR] -"\u2C61" => "l" - -# ꝇ [LATIN SMALL LETTER BROKEN L] -"\uA747" => "l" - -# ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE] -"\uA749" => "l" - -# ꞁ [LATIN SMALL LETTER TURNED L] -"\uA781" => "l" - -# l [FULLWIDTH LATIN SMALL LETTER L] -"\uFF4C" => "l" - -# LJ [LATIN CAPITAL LETTER LJ] -"\u01C7" => "LJ" - -# Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL] -"\u1EFA" => "LL" - -# Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J] -"\u01C8" => "Lj" - -# ⒧ [PARENTHESIZED LATIN SMALL LETTER L] -"\u24A7" => "(l)" - -# lj [LATIN SMALL LETTER LJ] -"\u01C9" => "lj" - -# ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL] -"\u1EFB" => "ll" - -# ʪ [LATIN SMALL LETTER LS DIGRAPH] -"\u02AA" => "ls" - -# ʫ [LATIN SMALL LETTER LZ DIGRAPH] -"\u02AB" => "lz" - -# Ɯ [LATIN CAPITAL LETTER TURNED M] -"\u019C" => "M" - -# ᴍ [LATIN LETTER SMALL CAPITAL M] -"\u1D0D" => "M" - -# Ḿ [LATIN CAPITAL LETTER M WITH ACUTE] -"\u1E3E" => "M" - -# Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE] -"\u1E40" => "M" - -# Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW] -"\u1E42" => "M" - -# Ⓜ [CIRCLED LATIN CAPITAL LETTER M] -"\u24C2" => "M" - -# Ɱ [LATIN CAPITAL LETTER M WITH HOOK] -"\u2C6E" => "M" - -# ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M] -"\uA7FD" => "M" - -# ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M] -"\uA7FF" => "M" - -# M [FULLWIDTH LATIN CAPITAL LETTER M] -"\uFF2D" => "M" - -# ɯ [LATIN SMALL LETTER TURNED M] -"\u026F" => "m" - -# ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG] -"\u0270" => "m" - -# ɱ [LATIN SMALL LETTER M WITH HOOK] -"\u0271" => "m" - -# ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE] -"\u1D6F" => "m" - -# ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK] -"\u1D86" => "m" - -# ḿ [LATIN SMALL LETTER M WITH ACUTE] -"\u1E3F" => "m" - -# ṁ [LATIN SMALL LETTER M WITH DOT ABOVE] -"\u1E41" => "m" - -# ṃ [LATIN SMALL LETTER M WITH DOT BELOW] -"\u1E43" => "m" - -# ⓜ [CIRCLED LATIN SMALL LETTER M] -"\u24DC" => "m" - -# m [FULLWIDTH LATIN SMALL LETTER M] -"\uFF4D" => "m" - -# ⒨ [PARENTHESIZED LATIN SMALL LETTER M] -"\u24A8" => "(m)" - -# Ñ [LATIN CAPITAL LETTER N WITH TILDE] -"\u00D1" => "N" - -# Ń [LATIN CAPITAL LETTER N WITH ACUTE] -"\u0143" => "N" - -# Ņ [LATIN CAPITAL LETTER N WITH CEDILLA] -"\u0145" => "N" - -# Ň [LATIN CAPITAL LETTER N WITH CARON] -"\u0147" => "N" - -# Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG] -"\u014A" => "N" - -# Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK] -"\u019D" => "N" - -# Ǹ [LATIN CAPITAL LETTER N WITH GRAVE] -"\u01F8" => "N" - -# Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] -"\u0220" => "N" - -# ɴ [LATIN LETTER SMALL CAPITAL N] -"\u0274" => "N" - -# ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N] -"\u1D0E" => "N" - -# Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE] -"\u1E44" => "N" - -# Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW] -"\u1E46" => "N" - -# Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW] -"\u1E48" => "N" - -# Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] -"\u1E4A" => "N" - -# Ⓝ [CIRCLED LATIN CAPITAL LETTER N] -"\u24C3" => "N" - -# N [FULLWIDTH LATIN CAPITAL LETTER N] -"\uFF2E" => "N" - -# ñ [LATIN SMALL LETTER N WITH TILDE] -"\u00F1" => "n" - -# ń [LATIN SMALL LETTER N WITH ACUTE] -"\u0144" => "n" - -# ņ [LATIN SMALL LETTER N WITH CEDILLA] -"\u0146" => "n" - -# ň [LATIN SMALL LETTER N WITH CARON] -"\u0148" => "n" - -# ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] -"\u0149" => "n" - -# ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG] -"\u014B" => "n" - -# ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG] -"\u019E" => "n" - -# ǹ [LATIN SMALL LETTER N WITH GRAVE] -"\u01F9" => "n" - -# ȵ [LATIN SMALL LETTER N WITH CURL] -"\u0235" => "n" - -# ɲ [LATIN SMALL LETTER N WITH LEFT HOOK] -"\u0272" => "n" - -# ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK] -"\u0273" => "n" - -# ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE] -"\u1D70" => "n" - -# ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK] -"\u1D87" => "n" - -# ṅ [LATIN SMALL LETTER N WITH DOT ABOVE] -"\u1E45" => "n" - -# ṇ [LATIN SMALL LETTER N WITH DOT BELOW] -"\u1E47" => "n" - -# ṉ [LATIN SMALL LETTER N WITH LINE BELOW] -"\u1E49" => "n" - -# ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW] -"\u1E4B" => "n" - -# ⁿ [SUPERSCRIPT LATIN SMALL LETTER N] -"\u207F" => "n" - -# ⓝ [CIRCLED LATIN SMALL LETTER N] -"\u24DD" => "n" - -# n [FULLWIDTH LATIN SMALL LETTER N] -"\uFF4E" => "n" - -# NJ [LATIN CAPITAL LETTER NJ] -"\u01CA" => "NJ" - -# Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J] -"\u01CB" => "Nj" - -# ⒩ [PARENTHESIZED LATIN SMALL LETTER N] -"\u24A9" => "(n)" - -# nj [LATIN SMALL LETTER NJ] -"\u01CC" => "nj" - -# Ò [LATIN CAPITAL LETTER O WITH GRAVE] -"\u00D2" => "O" - -# Ó [LATIN CAPITAL LETTER O WITH ACUTE] -"\u00D3" => "O" - -# Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX] -"\u00D4" => "O" - -# Õ [LATIN CAPITAL LETTER O WITH TILDE] -"\u00D5" => "O" - -# Ö [LATIN CAPITAL LETTER O WITH DIAERESIS] -"\u00D6" => "O" - -# Ø [LATIN CAPITAL LETTER O WITH STROKE] -"\u00D8" => "O" - -# Ō [LATIN CAPITAL LETTER O WITH MACRON] -"\u014C" => "O" - -# Ŏ [LATIN CAPITAL LETTER O WITH BREVE] -"\u014E" => "O" - -# Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE] -"\u0150" => "O" - -# Ɔ [LATIN CAPITAL LETTER OPEN O] -"\u0186" => "O" - -# Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE] -"\u019F" => "O" - -# Ơ [LATIN CAPITAL LETTER O WITH HORN] -"\u01A0" => "O" - -# Ǒ [LATIN CAPITAL LETTER O WITH CARON] -"\u01D1" => "O" - -# Ǫ [LATIN CAPITAL LETTER O WITH OGONEK] -"\u01EA" => "O" - -# Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON] -"\u01EC" => "O" - -# Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE] -"\u01FE" => "O" - -# Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE] -"\u020C" => "O" - -# Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE] -"\u020E" => "O" - -# Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON] -"\u022A" => "O" - -# Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON] -"\u022C" => "O" - -# Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE] -"\u022E" => "O" - -# Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON] -"\u0230" => "O" - -# ᴏ [LATIN LETTER SMALL CAPITAL O] -"\u1D0F" => "O" - -# ᴐ [LATIN LETTER SMALL CAPITAL OPEN O] -"\u1D10" => "O" - -# Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE] -"\u1E4C" => "O" - -# Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS] -"\u1E4E" => "O" - -# Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE] -"\u1E50" => "O" - -# Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE] -"\u1E52" => "O" - -# Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW] -"\u1ECC" => "O" - -# Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE] -"\u1ECE" => "O" - -# Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE] -"\u1ED0" => "O" - -# Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE] -"\u1ED2" => "O" - -# Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1ED4" => "O" - -# Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE] -"\u1ED6" => "O" - -# Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW] -"\u1ED8" => "O" - -# Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE] -"\u1EDA" => "O" - -# Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE] -"\u1EDC" => "O" - -# Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE] -"\u1EDE" => "O" - -# Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE] -"\u1EE0" => "O" - -# Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW] -"\u1EE2" => "O" - -# Ⓞ [CIRCLED LATIN CAPITAL LETTER O] -"\u24C4" => "O" - -# Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY] -"\uA74A" => "O" - -# Ꝍ [LATIN CAPITAL LETTER O WITH LOOP] -"\uA74C" => "O" - -# O [FULLWIDTH LATIN CAPITAL LETTER O] -"\uFF2F" => "O" - -# ò [LATIN SMALL LETTER O WITH GRAVE] -"\u00F2" => "o" - -# ó [LATIN SMALL LETTER O WITH ACUTE] -"\u00F3" => "o" - -# ô [LATIN SMALL LETTER O WITH CIRCUMFLEX] -"\u00F4" => "o" - -# õ [LATIN SMALL LETTER O WITH TILDE] -"\u00F5" => "o" - -# ö [LATIN SMALL LETTER O WITH DIAERESIS] -"\u00F6" => "o" - -# ø [LATIN SMALL LETTER O WITH STROKE] -"\u00F8" => "o" - -# ō [LATIN SMALL LETTER O WITH MACRON] -"\u014D" => "o" - -# ŏ [LATIN SMALL LETTER O WITH BREVE] -"\u014F" => "o" - -# ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE] -"\u0151" => "o" - -# ơ [LATIN SMALL LETTER O WITH HORN] -"\u01A1" => "o" - -# ǒ [LATIN SMALL LETTER O WITH CARON] -"\u01D2" => "o" - -# ǫ [LATIN SMALL LETTER O WITH OGONEK] -"\u01EB" => "o" - -# ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON] -"\u01ED" => "o" - -# ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE] -"\u01FF" => "o" - -# ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE] -"\u020D" => "o" - -# ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE] -"\u020F" => "o" - -# ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON] -"\u022B" => "o" - -# ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON] -"\u022D" => "o" - -# ȯ [LATIN SMALL LETTER O WITH DOT ABOVE] -"\u022F" => "o" - -# ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON] -"\u0231" => "o" - -# ɔ [LATIN SMALL LETTER OPEN O] -"\u0254" => "o" - -# ɵ [LATIN SMALL LETTER BARRED O] -"\u0275" => "o" - -# ᴖ [LATIN SMALL LETTER TOP HALF O] -"\u1D16" => "o" - -# ᴗ [LATIN SMALL LETTER BOTTOM HALF O] -"\u1D17" => "o" - -# ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK] -"\u1D97" => "o" - -# ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE] -"\u1E4D" => "o" - -# ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS] -"\u1E4F" => "o" - -# ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE] -"\u1E51" => "o" - -# ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE] -"\u1E53" => "o" - -# ọ [LATIN SMALL LETTER O WITH DOT BELOW] -"\u1ECD" => "o" - -# ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE] -"\u1ECF" => "o" - -# ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE] -"\u1ED1" => "o" - -# ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE] -"\u1ED3" => "o" - -# ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1ED5" => "o" - -# ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE] -"\u1ED7" => "o" - -# ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW] -"\u1ED9" => "o" - -# ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE] -"\u1EDB" => "o" - -# ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE] -"\u1EDD" => "o" - -# ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE] -"\u1EDF" => "o" - -# ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE] -"\u1EE1" => "o" - -# ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW] -"\u1EE3" => "o" - -# ₒ [LATIN SUBSCRIPT SMALL LETTER O] -"\u2092" => "o" - -# ⓞ [CIRCLED LATIN SMALL LETTER O] -"\u24DE" => "o" - -# ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE] -"\u2C7A" => "o" - -# ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY] -"\uA74B" => "o" - -# ꝍ [LATIN SMALL LETTER O WITH LOOP] -"\uA74D" => "o" - -# o [FULLWIDTH LATIN SMALL LETTER O] -"\uFF4F" => "o" - -# Œ [LATIN CAPITAL LIGATURE OE] -"\u0152" => "OE" - -# ɶ [LATIN LETTER SMALL CAPITAL OE] -"\u0276" => "OE" - -# Ꝏ [LATIN CAPITAL LETTER OO] -"\uA74E" => "OO" - -# Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU] -"\u0222" => "OU" - -# ᴕ [LATIN LETTER SMALL CAPITAL OU] -"\u1D15" => "OU" - -# ⒪ [PARENTHESIZED LATIN SMALL LETTER O] -"\u24AA" => "(o)" - -# œ [LATIN SMALL LIGATURE OE] -"\u0153" => "oe" - -# ᴔ [LATIN SMALL LETTER TURNED OE] -"\u1D14" => "oe" - -# ꝏ [LATIN SMALL LETTER OO] -"\uA74F" => "oo" - -# ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU] -"\u0223" => "ou" - -# Ƥ [LATIN CAPITAL LETTER P WITH HOOK] -"\u01A4" => "P" - -# ᴘ [LATIN LETTER SMALL CAPITAL P] -"\u1D18" => "P" - -# Ṕ [LATIN CAPITAL LETTER P WITH ACUTE] -"\u1E54" => "P" - -# Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE] -"\u1E56" => "P" - -# Ⓟ [CIRCLED LATIN CAPITAL LETTER P] -"\u24C5" => "P" - -# Ᵽ [LATIN CAPITAL LETTER P WITH STROKE] -"\u2C63" => "P" - -# Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER] -"\uA750" => "P" - -# Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH] -"\uA752" => "P" - -# Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL] -"\uA754" => "P" - -# P [FULLWIDTH LATIN CAPITAL LETTER P] -"\uFF30" => "P" - -# ƥ [LATIN SMALL LETTER P WITH HOOK] -"\u01A5" => "p" - -# ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE] -"\u1D71" => "p" - -# ᵽ [LATIN SMALL LETTER P WITH STROKE] -"\u1D7D" => "p" - -# ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK] -"\u1D88" => "p" - -# ṕ [LATIN SMALL LETTER P WITH ACUTE] -"\u1E55" => "p" - -# ṗ [LATIN SMALL LETTER P WITH DOT ABOVE] -"\u1E57" => "p" - -# ⓟ [CIRCLED LATIN SMALL LETTER P] -"\u24DF" => "p" - -# ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER] -"\uA751" => "p" - -# ꝓ [LATIN SMALL LETTER P WITH FLOURISH] -"\uA753" => "p" - -# ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL] -"\uA755" => "p" - -# ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P] -"\uA7FC" => "p" - -# p [FULLWIDTH LATIN SMALL LETTER P] -"\uFF50" => "p" - -# ⒫ [PARENTHESIZED LATIN SMALL LETTER P] -"\u24AB" => "(p)" - -# Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL] -"\u024A" => "Q" - -# Ⓠ [CIRCLED LATIN CAPITAL LETTER Q] -"\u24C6" => "Q" - -# Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER] -"\uA756" => "Q" - -# Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE] -"\uA758" => "Q" - -# Q [FULLWIDTH LATIN CAPITAL LETTER Q] -"\uFF31" => "Q" - -# ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA] -"\u0138" => "q" - -# ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL] -"\u024B" => "q" - -# ʠ [LATIN SMALL LETTER Q WITH HOOK] -"\u02A0" => "q" - -# ⓠ [CIRCLED LATIN SMALL LETTER Q] -"\u24E0" => "q" - -# ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER] -"\uA757" => "q" - -# ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE] -"\uA759" => "q" - -# q [FULLWIDTH LATIN SMALL LETTER Q] -"\uFF51" => "q" - -# ⒬ [PARENTHESIZED LATIN SMALL LETTER Q] -"\u24AC" => "(q)" - -# ȹ [LATIN SMALL LETTER QP DIGRAPH] -"\u0239" => "qp" - -# Ŕ [LATIN CAPITAL LETTER R WITH ACUTE] -"\u0154" => "R" - -# Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA] -"\u0156" => "R" - -# Ř [LATIN CAPITAL LETTER R WITH CARON] -"\u0158" => "R" - -# Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE] -"\u0210" => "R" - -# Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE] -"\u0212" => "R" - -# Ɍ [LATIN CAPITAL LETTER R WITH STROKE] -"\u024C" => "R" - -# ʀ [LATIN LETTER SMALL CAPITAL R] -"\u0280" => "R" - -# ʁ [LATIN LETTER SMALL CAPITAL INVERTED R] -"\u0281" => "R" - -# ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R] -"\u1D19" => "R" - -# ᴚ [LATIN LETTER SMALL CAPITAL TURNED R] -"\u1D1A" => "R" - -# Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE] -"\u1E58" => "R" - -# Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW] -"\u1E5A" => "R" - -# Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON] -"\u1E5C" => "R" - -# Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW] -"\u1E5E" => "R" - -# Ⓡ [CIRCLED LATIN CAPITAL LETTER R] -"\u24C7" => "R" - -# Ɽ [LATIN CAPITAL LETTER R WITH TAIL] -"\u2C64" => "R" - -# Ꝛ [LATIN CAPITAL LETTER R ROTUNDA] -"\uA75A" => "R" - -# Ꞃ [LATIN CAPITAL LETTER INSULAR R] -"\uA782" => "R" - -# R [FULLWIDTH LATIN CAPITAL LETTER R] -"\uFF32" => "R" - -# ŕ [LATIN SMALL LETTER R WITH ACUTE] -"\u0155" => "r" - -# ŗ [LATIN SMALL LETTER R WITH CEDILLA] -"\u0157" => "r" - -# ř [LATIN SMALL LETTER R WITH CARON] -"\u0159" => "r" - -# ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE] -"\u0211" => "r" - -# ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE] -"\u0213" => "r" - -# ɍ [LATIN SMALL LETTER R WITH STROKE] -"\u024D" => "r" - -# ɼ [LATIN SMALL LETTER R WITH LONG LEG] -"\u027C" => "r" - -# ɽ [LATIN SMALL LETTER R WITH TAIL] -"\u027D" => "r" - -# ɾ [LATIN SMALL LETTER R WITH FISHHOOK] -"\u027E" => "r" - -# ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK] -"\u027F" => "r" - -# ᵣ [LATIN SUBSCRIPT SMALL LETTER R] -"\u1D63" => "r" - -# ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE] -"\u1D72" => "r" - -# ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE] -"\u1D73" => "r" - -# ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK] -"\u1D89" => "r" - -# ṙ [LATIN SMALL LETTER R WITH DOT ABOVE] -"\u1E59" => "r" - -# ṛ [LATIN SMALL LETTER R WITH DOT BELOW] -"\u1E5B" => "r" - -# ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON] -"\u1E5D" => "r" - -# ṟ [LATIN SMALL LETTER R WITH LINE BELOW] -"\u1E5F" => "r" - -# ⓡ [CIRCLED LATIN SMALL LETTER R] -"\u24E1" => "r" - -# ꝛ [LATIN SMALL LETTER R ROTUNDA] -"\uA75B" => "r" - -# ꞃ [LATIN SMALL LETTER INSULAR R] -"\uA783" => "r" - -# r [FULLWIDTH LATIN SMALL LETTER R] -"\uFF52" => "r" - -# ⒭ [PARENTHESIZED LATIN SMALL LETTER R] -"\u24AD" => "(r)" - -# Ś [LATIN CAPITAL LETTER S WITH ACUTE] -"\u015A" => "S" - -# Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX] -"\u015C" => "S" - -# Ş [LATIN CAPITAL LETTER S WITH CEDILLA] -"\u015E" => "S" - -# Š [LATIN CAPITAL LETTER S WITH CARON] -"\u0160" => "S" - -# Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW] -"\u0218" => "S" - -# Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE] -"\u1E60" => "S" - -# Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW] -"\u1E62" => "S" - -# Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE] -"\u1E64" => "S" - -# Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE] -"\u1E66" => "S" - -# Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE] -"\u1E68" => "S" - -# Ⓢ [CIRCLED LATIN CAPITAL LETTER S] -"\u24C8" => "S" - -# ꜱ [LATIN LETTER SMALL CAPITAL S] -"\uA731" => "S" - -# ꞅ [LATIN SMALL LETTER INSULAR S] -"\uA785" => "S" - -# S [FULLWIDTH LATIN CAPITAL LETTER S] -"\uFF33" => "S" - -# ś [LATIN SMALL LETTER S WITH ACUTE] -"\u015B" => "s" - -# ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX] -"\u015D" => "s" - -# ş [LATIN SMALL LETTER S WITH CEDILLA] -"\u015F" => "s" - -# š [LATIN SMALL LETTER S WITH CARON] -"\u0161" => "s" - -# ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S] -"\u017F" => "s" - -# ș [LATIN SMALL LETTER S WITH COMMA BELOW] -"\u0219" => "s" - -# ȿ [LATIN SMALL LETTER S WITH SWASH TAIL] -"\u023F" => "s" - -# ʂ [LATIN SMALL LETTER S WITH HOOK] -"\u0282" => "s" - -# ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE] -"\u1D74" => "s" - -# ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK] -"\u1D8A" => "s" - -# ṡ [LATIN SMALL LETTER S WITH DOT ABOVE] -"\u1E61" => "s" - -# ṣ [LATIN SMALL LETTER S WITH DOT BELOW] -"\u1E63" => "s" - -# ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE] -"\u1E65" => "s" - -# ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE] -"\u1E67" => "s" - -# ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE] -"\u1E69" => "s" - -# ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE] -"\u1E9C" => "s" - -# ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE] -"\u1E9D" => "s" - -# ⓢ [CIRCLED LATIN SMALL LETTER S] -"\u24E2" => "s" - -# Ꞅ [LATIN CAPITAL LETTER INSULAR S] -"\uA784" => "s" - -# s [FULLWIDTH LATIN SMALL LETTER S] -"\uFF53" => "s" - -# ẞ [LATIN CAPITAL LETTER SHARP S] -"\u1E9E" => "SS" - -# ⒮ [PARENTHESIZED LATIN SMALL LETTER S] -"\u24AE" => "(s)" - -# ß [LATIN SMALL LETTER SHARP S] -"\u00DF" => "ss" - -# st [LATIN SMALL LIGATURE ST] -"\uFB06" => "st" - -# Ţ [LATIN CAPITAL LETTER T WITH CEDILLA] -"\u0162" => "T" - -# Ť [LATIN CAPITAL LETTER T WITH CARON] -"\u0164" => "T" - -# Ŧ [LATIN CAPITAL LETTER T WITH STROKE] -"\u0166" => "T" - -# Ƭ [LATIN CAPITAL LETTER T WITH HOOK] -"\u01AC" => "T" - -# Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK] -"\u01AE" => "T" - -# Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW] -"\u021A" => "T" - -# Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE] -"\u023E" => "T" - -# ᴛ [LATIN LETTER SMALL CAPITAL T] -"\u1D1B" => "T" - -# Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE] -"\u1E6A" => "T" - -# Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW] -"\u1E6C" => "T" - -# Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW] -"\u1E6E" => "T" - -# Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW] -"\u1E70" => "T" - -# Ⓣ [CIRCLED LATIN CAPITAL LETTER T] -"\u24C9" => "T" - -# Ꞇ [LATIN CAPITAL LETTER INSULAR T] -"\uA786" => "T" - -# T [FULLWIDTH LATIN CAPITAL LETTER T] -"\uFF34" => "T" - -# ţ [LATIN SMALL LETTER T WITH CEDILLA] -"\u0163" => "t" - -# ť [LATIN SMALL LETTER T WITH CARON] -"\u0165" => "t" - -# ŧ [LATIN SMALL LETTER T WITH STROKE] -"\u0167" => "t" - -# ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK] -"\u01AB" => "t" - -# ƭ [LATIN SMALL LETTER T WITH HOOK] -"\u01AD" => "t" - -# ț [LATIN SMALL LETTER T WITH COMMA BELOW] -"\u021B" => "t" - -# ȶ [LATIN SMALL LETTER T WITH CURL] -"\u0236" => "t" - -# ʇ [LATIN SMALL LETTER TURNED T] -"\u0287" => "t" - -# ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK] -"\u0288" => "t" - -# ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE] -"\u1D75" => "t" - -# ṫ [LATIN SMALL LETTER T WITH DOT ABOVE] -"\u1E6B" => "t" - -# ṭ [LATIN SMALL LETTER T WITH DOT BELOW] -"\u1E6D" => "t" - -# ṯ [LATIN SMALL LETTER T WITH LINE BELOW] -"\u1E6F" => "t" - -# ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW] -"\u1E71" => "t" - -# ẗ [LATIN SMALL LETTER T WITH DIAERESIS] -"\u1E97" => "t" - -# ⓣ [CIRCLED LATIN SMALL LETTER T] -"\u24E3" => "t" - -# ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE] -"\u2C66" => "t" - -# t [FULLWIDTH LATIN SMALL LETTER T] -"\uFF54" => "t" - -# Þ [LATIN CAPITAL LETTER THORN] -"\u00DE" => "TH" - -# Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER] -"\uA766" => "TH" - -# Ꜩ [LATIN CAPITAL LETTER TZ] -"\uA728" => "TZ" - -# ⒯ [PARENTHESIZED LATIN SMALL LETTER T] -"\u24AF" => "(t)" - -# ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL] -"\u02A8" => "tc" - -# þ [LATIN SMALL LETTER THORN] -"\u00FE" => "th" - -# ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH] -"\u1D7A" => "th" - -# ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER] -"\uA767" => "th" - -# ʦ [LATIN SMALL LETTER TS DIGRAPH] -"\u02A6" => "ts" - -# ꜩ [LATIN SMALL LETTER TZ] -"\uA729" => "tz" - -# Ù [LATIN CAPITAL LETTER U WITH GRAVE] -"\u00D9" => "U" - -# Ú [LATIN CAPITAL LETTER U WITH ACUTE] -"\u00DA" => "U" - -# Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX] -"\u00DB" => "U" - -# Ü [LATIN CAPITAL LETTER U WITH DIAERESIS] -"\u00DC" => "U" - -# Ũ [LATIN CAPITAL LETTER U WITH TILDE] -"\u0168" => "U" - -# Ū [LATIN CAPITAL LETTER U WITH MACRON] -"\u016A" => "U" - -# Ŭ [LATIN CAPITAL LETTER U WITH BREVE] -"\u016C" => "U" - -# Ů [LATIN CAPITAL LETTER U WITH RING ABOVE] -"\u016E" => "U" - -# Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE] -"\u0170" => "U" - -# Ų [LATIN CAPITAL LETTER U WITH OGONEK] -"\u0172" => "U" - -# Ư [LATIN CAPITAL LETTER U WITH HORN] -"\u01AF" => "U" - -# Ǔ [LATIN CAPITAL LETTER U WITH CARON] -"\u01D3" => "U" - -# Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON] -"\u01D5" => "U" - -# Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE] -"\u01D7" => "U" - -# Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON] -"\u01D9" => "U" - -# Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE] -"\u01DB" => "U" - -# Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE] -"\u0214" => "U" - -# Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE] -"\u0216" => "U" - -# Ʉ [LATIN CAPITAL LETTER U BAR] -"\u0244" => "U" - -# ᴜ [LATIN LETTER SMALL CAPITAL U] -"\u1D1C" => "U" - -# ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE] -"\u1D7E" => "U" - -# Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW] -"\u1E72" => "U" - -# Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW] -"\u1E74" => "U" - -# Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW] -"\u1E76" => "U" - -# Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE] -"\u1E78" => "U" - -# Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS] -"\u1E7A" => "U" - -# Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW] -"\u1EE4" => "U" - -# Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE] -"\u1EE6" => "U" - -# Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE] -"\u1EE8" => "U" - -# Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE] -"\u1EEA" => "U" - -# Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE] -"\u1EEC" => "U" - -# Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE] -"\u1EEE" => "U" - -# Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW] -"\u1EF0" => "U" - -# Ⓤ [CIRCLED LATIN CAPITAL LETTER U] -"\u24CA" => "U" - -# U [FULLWIDTH LATIN CAPITAL LETTER U] -"\uFF35" => "U" - -# ù [LATIN SMALL LETTER U WITH GRAVE] -"\u00F9" => "u" - -# ú [LATIN SMALL LETTER U WITH ACUTE] -"\u00FA" => "u" - -# û [LATIN SMALL LETTER U WITH CIRCUMFLEX] -"\u00FB" => "u" - -# ü [LATIN SMALL LETTER U WITH DIAERESIS] -"\u00FC" => "u" - -# ũ [LATIN SMALL LETTER U WITH TILDE] -"\u0169" => "u" - -# ū [LATIN SMALL LETTER U WITH MACRON] -"\u016B" => "u" - -# ŭ [LATIN SMALL LETTER U WITH BREVE] -"\u016D" => "u" - -# ů [LATIN SMALL LETTER U WITH RING ABOVE] -"\u016F" => "u" - -# ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE] -"\u0171" => "u" - -# ų [LATIN SMALL LETTER U WITH OGONEK] -"\u0173" => "u" - -# ư [LATIN SMALL LETTER U WITH HORN] -"\u01B0" => "u" - -# ǔ [LATIN SMALL LETTER U WITH CARON] -"\u01D4" => "u" - -# ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON] -"\u01D6" => "u" - -# ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE] -"\u01D8" => "u" - -# ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON] -"\u01DA" => "u" - -# ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE] -"\u01DC" => "u" - -# ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE] -"\u0215" => "u" - -# ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE] -"\u0217" => "u" - -# ʉ [LATIN SMALL LETTER U BAR] -"\u0289" => "u" - -# ᵤ [LATIN SUBSCRIPT SMALL LETTER U] -"\u1D64" => "u" - -# ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK] -"\u1D99" => "u" - -# ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW] -"\u1E73" => "u" - -# ṵ [LATIN SMALL LETTER U WITH TILDE BELOW] -"\u1E75" => "u" - -# ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW] -"\u1E77" => "u" - -# ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE] -"\u1E79" => "u" - -# ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS] -"\u1E7B" => "u" - -# ụ [LATIN SMALL LETTER U WITH DOT BELOW] -"\u1EE5" => "u" - -# ủ [LATIN SMALL LETTER U WITH HOOK ABOVE] -"\u1EE7" => "u" - -# ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE] -"\u1EE9" => "u" - -# ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE] -"\u1EEB" => "u" - -# ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE] -"\u1EED" => "u" - -# ữ [LATIN SMALL LETTER U WITH HORN AND TILDE] -"\u1EEF" => "u" - -# ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW] -"\u1EF1" => "u" - -# ⓤ [CIRCLED LATIN SMALL LETTER U] -"\u24E4" => "u" - -# u [FULLWIDTH LATIN SMALL LETTER U] -"\uFF55" => "u" - -# ⒰ [PARENTHESIZED LATIN SMALL LETTER U] -"\u24B0" => "(u)" - -# ᵫ [LATIN SMALL LETTER UE] -"\u1D6B" => "ue" - -# Ʋ [LATIN CAPITAL LETTER V WITH HOOK] -"\u01B2" => "V" - -# Ʌ [LATIN CAPITAL LETTER TURNED V] -"\u0245" => "V" - -# ᴠ [LATIN LETTER SMALL CAPITAL V] -"\u1D20" => "V" - -# Ṽ [LATIN CAPITAL LETTER V WITH TILDE] -"\u1E7C" => "V" - -# Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW] -"\u1E7E" => "V" - -# Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V] -"\u1EFC" => "V" - -# Ⓥ [CIRCLED LATIN CAPITAL LETTER V] -"\u24CB" => "V" - -# Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE] -"\uA75E" => "V" - -# Ꝩ [LATIN CAPITAL LETTER VEND] -"\uA768" => "V" - -# V [FULLWIDTH LATIN CAPITAL LETTER V] -"\uFF36" => "V" - -# ʋ [LATIN SMALL LETTER V WITH HOOK] -"\u028B" => "v" - -# ʌ [LATIN SMALL LETTER TURNED V] -"\u028C" => "v" - -# ᵥ [LATIN SUBSCRIPT SMALL LETTER V] -"\u1D65" => "v" - -# ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK] -"\u1D8C" => "v" - -# ṽ [LATIN SMALL LETTER V WITH TILDE] -"\u1E7D" => "v" - -# ṿ [LATIN SMALL LETTER V WITH DOT BELOW] -"\u1E7F" => "v" - -# ⓥ [CIRCLED LATIN SMALL LETTER V] -"\u24E5" => "v" - -# ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK] -"\u2C71" => "v" - -# ⱴ [LATIN SMALL LETTER V WITH CURL] -"\u2C74" => "v" - -# ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE] -"\uA75F" => "v" - -# v [FULLWIDTH LATIN SMALL LETTER V] -"\uFF56" => "v" - -# Ꝡ [LATIN CAPITAL LETTER VY] -"\uA760" => "VY" - -# ⒱ [PARENTHESIZED LATIN SMALL LETTER V] -"\u24B1" => "(v)" - -# ꝡ [LATIN SMALL LETTER VY] -"\uA761" => "vy" - -# Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX] -"\u0174" => "W" - -# Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN] -"\u01F7" => "W" - -# ᴡ [LATIN LETTER SMALL CAPITAL W] -"\u1D21" => "W" - -# Ẁ [LATIN CAPITAL LETTER W WITH GRAVE] -"\u1E80" => "W" - -# Ẃ [LATIN CAPITAL LETTER W WITH ACUTE] -"\u1E82" => "W" - -# Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS] -"\u1E84" => "W" - -# Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE] -"\u1E86" => "W" - -# Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW] -"\u1E88" => "W" - -# Ⓦ [CIRCLED LATIN CAPITAL LETTER W] -"\u24CC" => "W" - -# Ⱳ [LATIN CAPITAL LETTER W WITH HOOK] -"\u2C72" => "W" - -# W [FULLWIDTH LATIN CAPITAL LETTER W] -"\uFF37" => "W" - -# ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX] -"\u0175" => "w" - -# ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN] -"\u01BF" => "w" - -# ʍ [LATIN SMALL LETTER TURNED W] -"\u028D" => "w" - -# ẁ [LATIN SMALL LETTER W WITH GRAVE] -"\u1E81" => "w" - -# ẃ [LATIN SMALL LETTER W WITH ACUTE] -"\u1E83" => "w" - -# ẅ [LATIN SMALL LETTER W WITH DIAERESIS] -"\u1E85" => "w" - -# ẇ [LATIN SMALL LETTER W WITH DOT ABOVE] -"\u1E87" => "w" - -# ẉ [LATIN SMALL LETTER W WITH DOT BELOW] -"\u1E89" => "w" - -# ẘ [LATIN SMALL LETTER W WITH RING ABOVE] -"\u1E98" => "w" - -# ⓦ [CIRCLED LATIN SMALL LETTER W] -"\u24E6" => "w" - -# ⱳ [LATIN SMALL LETTER W WITH HOOK] -"\u2C73" => "w" - -# w [FULLWIDTH LATIN SMALL LETTER W] -"\uFF57" => "w" - -# ⒲ [PARENTHESIZED LATIN SMALL LETTER W] -"\u24B2" => "(w)" - -# Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE] -"\u1E8A" => "X" - -# Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS] -"\u1E8C" => "X" - -# Ⓧ [CIRCLED LATIN CAPITAL LETTER X] -"\u24CD" => "X" - -# X [FULLWIDTH LATIN CAPITAL LETTER X] -"\uFF38" => "X" - -# ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK] -"\u1D8D" => "x" - -# ẋ [LATIN SMALL LETTER X WITH DOT ABOVE] -"\u1E8B" => "x" - -# ẍ [LATIN SMALL LETTER X WITH DIAERESIS] -"\u1E8D" => "x" - -# ₓ [LATIN SUBSCRIPT SMALL LETTER X] -"\u2093" => "x" - -# ⓧ [CIRCLED LATIN SMALL LETTER X] -"\u24E7" => "x" - -# x [FULLWIDTH LATIN SMALL LETTER X] -"\uFF58" => "x" - -# ⒳ [PARENTHESIZED LATIN SMALL LETTER X] -"\u24B3" => "(x)" - -# Ý [LATIN CAPITAL LETTER Y WITH ACUTE] -"\u00DD" => "Y" - -# Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX] -"\u0176" => "Y" - -# Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS] -"\u0178" => "Y" - -# Ƴ [LATIN CAPITAL LETTER Y WITH HOOK] -"\u01B3" => "Y" - -# Ȳ [LATIN CAPITAL LETTER Y WITH MACRON] -"\u0232" => "Y" - -# Ɏ [LATIN CAPITAL LETTER Y WITH STROKE] -"\u024E" => "Y" - -# ʏ [LATIN LETTER SMALL CAPITAL Y] -"\u028F" => "Y" - -# Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE] -"\u1E8E" => "Y" - -# Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE] -"\u1EF2" => "Y" - -# Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW] -"\u1EF4" => "Y" - -# Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE] -"\u1EF6" => "Y" - -# Ỹ [LATIN CAPITAL LETTER Y WITH TILDE] -"\u1EF8" => "Y" - -# Ỿ [LATIN CAPITAL LETTER Y WITH LOOP] -"\u1EFE" => "Y" - -# Ⓨ [CIRCLED LATIN CAPITAL LETTER Y] -"\u24CE" => "Y" - -# Y [FULLWIDTH LATIN CAPITAL LETTER Y] -"\uFF39" => "Y" - -# ý [LATIN SMALL LETTER Y WITH ACUTE] -"\u00FD" => "y" - -# ÿ [LATIN SMALL LETTER Y WITH DIAERESIS] -"\u00FF" => "y" - -# ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX] -"\u0177" => "y" - -# ƴ [LATIN SMALL LETTER Y WITH HOOK] -"\u01B4" => "y" - -# ȳ [LATIN SMALL LETTER Y WITH MACRON] -"\u0233" => "y" - -# ɏ [LATIN SMALL LETTER Y WITH STROKE] -"\u024F" => "y" - -# ʎ [LATIN SMALL LETTER TURNED Y] -"\u028E" => "y" - -# ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE] -"\u1E8F" => "y" - -# ẙ [LATIN SMALL LETTER Y WITH RING ABOVE] -"\u1E99" => "y" - -# ỳ [LATIN SMALL LETTER Y WITH GRAVE] -"\u1EF3" => "y" - -# ỵ [LATIN SMALL LETTER Y WITH DOT BELOW] -"\u1EF5" => "y" - -# ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE] -"\u1EF7" => "y" - -# ỹ [LATIN SMALL LETTER Y WITH TILDE] -"\u1EF9" => "y" - -# ỿ [LATIN SMALL LETTER Y WITH LOOP] -"\u1EFF" => "y" - -# ⓨ [CIRCLED LATIN SMALL LETTER Y] -"\u24E8" => "y" - -# y [FULLWIDTH LATIN SMALL LETTER Y] -"\uFF59" => "y" - -# ⒴ [PARENTHESIZED LATIN SMALL LETTER Y] -"\u24B4" => "(y)" - -# Ź [LATIN CAPITAL LETTER Z WITH ACUTE] -"\u0179" => "Z" - -# Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE] -"\u017B" => "Z" - -# Ž [LATIN CAPITAL LETTER Z WITH CARON] -"\u017D" => "Z" - -# Ƶ [LATIN CAPITAL LETTER Z WITH STROKE] -"\u01B5" => "Z" - -# Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH] -"\u021C" => "Z" - -# Ȥ [LATIN CAPITAL LETTER Z WITH HOOK] -"\u0224" => "Z" - -# ᴢ [LATIN LETTER SMALL CAPITAL Z] -"\u1D22" => "Z" - -# Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX] -"\u1E90" => "Z" - -# Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW] -"\u1E92" => "Z" - -# Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW] -"\u1E94" => "Z" - -# Ⓩ [CIRCLED LATIN CAPITAL LETTER Z] -"\u24CF" => "Z" - -# Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER] -"\u2C6B" => "Z" - -# Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z] -"\uA762" => "Z" - -# Z [FULLWIDTH LATIN CAPITAL LETTER Z] -"\uFF3A" => "Z" - -# ź [LATIN SMALL LETTER Z WITH ACUTE] -"\u017A" => "z" - -# ż [LATIN SMALL LETTER Z WITH DOT ABOVE] -"\u017C" => "z" - -# ž [LATIN SMALL LETTER Z WITH CARON] -"\u017E" => "z" - -# ƶ [LATIN SMALL LETTER Z WITH STROKE] -"\u01B6" => "z" - -# ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH] -"\u021D" => "z" - -# ȥ [LATIN SMALL LETTER Z WITH HOOK] -"\u0225" => "z" - -# ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL] -"\u0240" => "z" - -# ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK] -"\u0290" => "z" - -# ʑ [LATIN SMALL LETTER Z WITH CURL] -"\u0291" => "z" - -# ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE] -"\u1D76" => "z" - -# ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK] -"\u1D8E" => "z" - -# ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX] -"\u1E91" => "z" - -# ẓ [LATIN SMALL LETTER Z WITH DOT BELOW] -"\u1E93" => "z" - -# ẕ [LATIN SMALL LETTER Z WITH LINE BELOW] -"\u1E95" => "z" - -# ⓩ [CIRCLED LATIN SMALL LETTER Z] -"\u24E9" => "z" - -# ⱬ [LATIN SMALL LETTER Z WITH DESCENDER] -"\u2C6C" => "z" - -# ꝣ [LATIN SMALL LETTER VISIGOTHIC Z] -"\uA763" => "z" - -# z [FULLWIDTH LATIN SMALL LETTER Z] -"\uFF5A" => "z" - -# ⒵ [PARENTHESIZED LATIN SMALL LETTER Z] -"\u24B5" => "(z)" - -# ⁰ [SUPERSCRIPT ZERO] -"\u2070" => "0" - -# ₀ [SUBSCRIPT ZERO] -"\u2080" => "0" - -# ⓪ [CIRCLED DIGIT ZERO] -"\u24EA" => "0" - -# ⓿ [NEGATIVE CIRCLED DIGIT ZERO] -"\u24FF" => "0" - -# 0 [FULLWIDTH DIGIT ZERO] -"\uFF10" => "0" - -# ¹ [SUPERSCRIPT ONE] -"\u00B9" => "1" - -# ₁ [SUBSCRIPT ONE] -"\u2081" => "1" - -# ① [CIRCLED DIGIT ONE] -"\u2460" => "1" - -# ⓵ [DOUBLE CIRCLED DIGIT ONE] -"\u24F5" => "1" - -# ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE] -"\u2776" => "1" - -# ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE] -"\u2780" => "1" - -# ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE] -"\u278A" => "1" - -# 1 [FULLWIDTH DIGIT ONE] -"\uFF11" => "1" - -# ⒈ [DIGIT ONE FULL STOP] -"\u2488" => "1." - -# ⑴ [PARENTHESIZED DIGIT ONE] -"\u2474" => "(1)" - -# ² [SUPERSCRIPT TWO] -"\u00B2" => "2" - -# ₂ [SUBSCRIPT TWO] -"\u2082" => "2" - -# ② [CIRCLED DIGIT TWO] -"\u2461" => "2" - -# ⓶ [DOUBLE CIRCLED DIGIT TWO] -"\u24F6" => "2" - -# ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO] -"\u2777" => "2" - -# ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO] -"\u2781" => "2" - -# ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO] -"\u278B" => "2" - -# 2 [FULLWIDTH DIGIT TWO] -"\uFF12" => "2" - -# ⒉ [DIGIT TWO FULL STOP] -"\u2489" => "2." - -# ⑵ [PARENTHESIZED DIGIT TWO] -"\u2475" => "(2)" - -# ³ [SUPERSCRIPT THREE] -"\u00B3" => "3" - -# ₃ [SUBSCRIPT THREE] -"\u2083" => "3" - -# ③ [CIRCLED DIGIT THREE] -"\u2462" => "3" - -# ⓷ [DOUBLE CIRCLED DIGIT THREE] -"\u24F7" => "3" - -# ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE] -"\u2778" => "3" - -# ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE] -"\u2782" => "3" - -# ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE] -"\u278C" => "3" - -# 3 [FULLWIDTH DIGIT THREE] -"\uFF13" => "3" - -# ⒊ [DIGIT THREE FULL STOP] -"\u248A" => "3." - -# ⑶ [PARENTHESIZED DIGIT THREE] -"\u2476" => "(3)" - -# ⁴ [SUPERSCRIPT FOUR] -"\u2074" => "4" - -# ₄ [SUBSCRIPT FOUR] -"\u2084" => "4" - -# ④ [CIRCLED DIGIT FOUR] -"\u2463" => "4" - -# ⓸ [DOUBLE CIRCLED DIGIT FOUR] -"\u24F8" => "4" - -# ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR] -"\u2779" => "4" - -# ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR] -"\u2783" => "4" - -# ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR] -"\u278D" => "4" - -# 4 [FULLWIDTH DIGIT FOUR] -"\uFF14" => "4" - -# ⒋ [DIGIT FOUR FULL STOP] -"\u248B" => "4." - -# ⑷ [PARENTHESIZED DIGIT FOUR] -"\u2477" => "(4)" - -# ⁵ [SUPERSCRIPT FIVE] -"\u2075" => "5" - -# ₅ [SUBSCRIPT FIVE] -"\u2085" => "5" - -# ⑤ [CIRCLED DIGIT FIVE] -"\u2464" => "5" - -# ⓹ [DOUBLE CIRCLED DIGIT FIVE] -"\u24F9" => "5" - -# ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE] -"\u277A" => "5" - -# ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE] -"\u2784" => "5" - -# ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE] -"\u278E" => "5" - -# 5 [FULLWIDTH DIGIT FIVE] -"\uFF15" => "5" - -# ⒌ [DIGIT FIVE FULL STOP] -"\u248C" => "5." - -# ⑸ [PARENTHESIZED DIGIT FIVE] -"\u2478" => "(5)" - -# ⁶ [SUPERSCRIPT SIX] -"\u2076" => "6" - -# ₆ [SUBSCRIPT SIX] -"\u2086" => "6" - -# ⑥ [CIRCLED DIGIT SIX] -"\u2465" => "6" - -# ⓺ [DOUBLE CIRCLED DIGIT SIX] -"\u24FA" => "6" - -# ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX] -"\u277B" => "6" - -# ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX] -"\u2785" => "6" - -# ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX] -"\u278F" => "6" - -# 6 [FULLWIDTH DIGIT SIX] -"\uFF16" => "6" - -# ⒍ [DIGIT SIX FULL STOP] -"\u248D" => "6." - -# ⑹ [PARENTHESIZED DIGIT SIX] -"\u2479" => "(6)" - -# ⁷ [SUPERSCRIPT SEVEN] -"\u2077" => "7" - -# ₇ [SUBSCRIPT SEVEN] -"\u2087" => "7" - -# ⑦ [CIRCLED DIGIT SEVEN] -"\u2466" => "7" - -# ⓻ [DOUBLE CIRCLED DIGIT SEVEN] -"\u24FB" => "7" - -# ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN] -"\u277C" => "7" - -# ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN] -"\u2786" => "7" - -# ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN] -"\u2790" => "7" - -# 7 [FULLWIDTH DIGIT SEVEN] -"\uFF17" => "7" - -# ⒎ [DIGIT SEVEN FULL STOP] -"\u248E" => "7." - -# ⑺ [PARENTHESIZED DIGIT SEVEN] -"\u247A" => "(7)" - -# ⁸ [SUPERSCRIPT EIGHT] -"\u2078" => "8" - -# ₈ [SUBSCRIPT EIGHT] -"\u2088" => "8" - -# ⑧ [CIRCLED DIGIT EIGHT] -"\u2467" => "8" - -# ⓼ [DOUBLE CIRCLED DIGIT EIGHT] -"\u24FC" => "8" - -# ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT] -"\u277D" => "8" - -# ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT] -"\u2787" => "8" - -# ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT] -"\u2791" => "8" - -# 8 [FULLWIDTH DIGIT EIGHT] -"\uFF18" => "8" - -# ⒏ [DIGIT EIGHT FULL STOP] -"\u248F" => "8." - -# ⑻ [PARENTHESIZED DIGIT EIGHT] -"\u247B" => "(8)" - -# ⁹ [SUPERSCRIPT NINE] -"\u2079" => "9" - -# ₉ [SUBSCRIPT NINE] -"\u2089" => "9" - -# ⑨ [CIRCLED DIGIT NINE] -"\u2468" => "9" - -# ⓽ [DOUBLE CIRCLED DIGIT NINE] -"\u24FD" => "9" - -# ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE] -"\u277E" => "9" - -# ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE] -"\u2788" => "9" - -# ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE] -"\u2792" => "9" - -# 9 [FULLWIDTH DIGIT NINE] -"\uFF19" => "9" - -# ⒐ [DIGIT NINE FULL STOP] -"\u2490" => "9." - -# ⑼ [PARENTHESIZED DIGIT NINE] -"\u247C" => "(9)" - -# ⑩ [CIRCLED NUMBER TEN] -"\u2469" => "10" - -# ⓾ [DOUBLE CIRCLED NUMBER TEN] -"\u24FE" => "10" - -# ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN] -"\u277F" => "10" - -# ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN] -"\u2789" => "10" - -# ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN] -"\u2793" => "10" - -# ⒑ [NUMBER TEN FULL STOP] -"\u2491" => "10." - -# ⑽ [PARENTHESIZED NUMBER TEN] -"\u247D" => "(10)" - -# ⑪ [CIRCLED NUMBER ELEVEN] -"\u246A" => "11" - -# ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN] -"\u24EB" => "11" - -# ⒒ [NUMBER ELEVEN FULL STOP] -"\u2492" => "11." - -# ⑾ [PARENTHESIZED NUMBER ELEVEN] -"\u247E" => "(11)" - -# ⑫ [CIRCLED NUMBER TWELVE] -"\u246B" => "12" - -# ⓬ [NEGATIVE CIRCLED NUMBER TWELVE] -"\u24EC" => "12" - -# ⒓ [NUMBER TWELVE FULL STOP] -"\u2493" => "12." - -# ⑿ [PARENTHESIZED NUMBER TWELVE] -"\u247F" => "(12)" - -# ⑬ [CIRCLED NUMBER THIRTEEN] -"\u246C" => "13" - -# ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN] -"\u24ED" => "13" - -# ⒔ [NUMBER THIRTEEN FULL STOP] -"\u2494" => "13." - -# ⒀ [PARENTHESIZED NUMBER THIRTEEN] -"\u2480" => "(13)" - -# ⑭ [CIRCLED NUMBER FOURTEEN] -"\u246D" => "14" - -# ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN] -"\u24EE" => "14" - -# ⒕ [NUMBER FOURTEEN FULL STOP] -"\u2495" => "14." - -# ⒁ [PARENTHESIZED NUMBER FOURTEEN] -"\u2481" => "(14)" - -# ⑮ [CIRCLED NUMBER FIFTEEN] -"\u246E" => "15" - -# ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN] -"\u24EF" => "15" - -# ⒖ [NUMBER FIFTEEN FULL STOP] -"\u2496" => "15." - -# ⒂ [PARENTHESIZED NUMBER FIFTEEN] -"\u2482" => "(15)" - -# ⑯ [CIRCLED NUMBER SIXTEEN] -"\u246F" => "16" - -# ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN] -"\u24F0" => "16" - -# ⒗ [NUMBER SIXTEEN FULL STOP] -"\u2497" => "16." - -# ⒃ [PARENTHESIZED NUMBER SIXTEEN] -"\u2483" => "(16)" - -# ⑰ [CIRCLED NUMBER SEVENTEEN] -"\u2470" => "17" - -# ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN] -"\u24F1" => "17" - -# ⒘ [NUMBER SEVENTEEN FULL STOP] -"\u2498" => "17." - -# ⒄ [PARENTHESIZED NUMBER SEVENTEEN] -"\u2484" => "(17)" - -# ⑱ [CIRCLED NUMBER EIGHTEEN] -"\u2471" => "18" - -# ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN] -"\u24F2" => "18" - -# ⒙ [NUMBER EIGHTEEN FULL STOP] -"\u2499" => "18." - -# ⒅ [PARENTHESIZED NUMBER EIGHTEEN] -"\u2485" => "(18)" - -# ⑲ [CIRCLED NUMBER NINETEEN] -"\u2472" => "19" - -# ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN] -"\u24F3" => "19" - -# ⒚ [NUMBER NINETEEN FULL STOP] -"\u249A" => "19." - -# ⒆ [PARENTHESIZED NUMBER NINETEEN] -"\u2486" => "(19)" - -# ⑳ [CIRCLED NUMBER TWENTY] -"\u2473" => "20" - -# ⓴ [NEGATIVE CIRCLED NUMBER TWENTY] -"\u24F4" => "20" - -# ⒛ [NUMBER TWENTY FULL STOP] -"\u249B" => "20." - -# ⒇ [PARENTHESIZED NUMBER TWENTY] -"\u2487" => "(20)" - -# « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK] -"\u00AB" => "\"" - -# » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK] -"\u00BB" => "\"" - -# “ [LEFT DOUBLE QUOTATION MARK] -"\u201C" => "\"" - -# ” [RIGHT DOUBLE QUOTATION MARK] -"\u201D" => "\"" - -# „ [DOUBLE LOW-9 QUOTATION MARK] -"\u201E" => "\"" - -# ″ [DOUBLE PRIME] -"\u2033" => "\"" - -# ‶ [REVERSED DOUBLE PRIME] -"\u2036" => "\"" - -# ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT] -"\u275D" => "\"" - -# ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT] -"\u275E" => "\"" - -# ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT] -"\u276E" => "\"" - -# ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT] -"\u276F" => "\"" - -# " [FULLWIDTH QUOTATION MARK] -"\uFF02" => "\"" - -# ‘ [LEFT SINGLE QUOTATION MARK] -"\u2018" => "\'" - -# ’ [RIGHT SINGLE QUOTATION MARK] -"\u2019" => "\'" - -# ‚ [SINGLE LOW-9 QUOTATION MARK] -"\u201A" => "\'" - -# ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK] -"\u201B" => "\'" - -# ′ [PRIME] -"\u2032" => "\'" - -# ‵ [REVERSED PRIME] -"\u2035" => "\'" - -# ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK] -"\u2039" => "\'" - -# › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK] -"\u203A" => "\'" - -# ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT] -"\u275B" => "\'" - -# ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT] -"\u275C" => "\'" - -# ' [FULLWIDTH APOSTROPHE] -"\uFF07" => "\'" - -# ‐ [HYPHEN] -"\u2010" => "-" - -# ‑ [NON-BREAKING HYPHEN] -"\u2011" => "-" - -# ‒ [FIGURE DASH] -"\u2012" => "-" - -# – [EN DASH] -"\u2013" => "-" - -# — [EM DASH] -"\u2014" => "-" - -# ⁻ [SUPERSCRIPT MINUS] -"\u207B" => "-" - -# ₋ [SUBSCRIPT MINUS] -"\u208B" => "-" - -# - [FULLWIDTH HYPHEN-MINUS] -"\uFF0D" => "-" - -# ⁅ [LEFT SQUARE BRACKET WITH QUILL] -"\u2045" => "[" - -# ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT] -"\u2772" => "[" - -# [ [FULLWIDTH LEFT SQUARE BRACKET] -"\uFF3B" => "[" - -# ⁆ [RIGHT SQUARE BRACKET WITH QUILL] -"\u2046" => "]" - -# ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT] -"\u2773" => "]" - -# ] [FULLWIDTH RIGHT SQUARE BRACKET] -"\uFF3D" => "]" - -# ⁽ [SUPERSCRIPT LEFT PARENTHESIS] -"\u207D" => "(" - -# ₍ [SUBSCRIPT LEFT PARENTHESIS] -"\u208D" => "(" - -# ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT] -"\u2768" => "(" - -# ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT] -"\u276A" => "(" - -# ( [FULLWIDTH LEFT PARENTHESIS] -"\uFF08" => "(" - -# ⸨ [LEFT DOUBLE PARENTHESIS] -"\u2E28" => "((" - -# ⁾ [SUPERSCRIPT RIGHT PARENTHESIS] -"\u207E" => ")" - -# ₎ [SUBSCRIPT RIGHT PARENTHESIS] -"\u208E" => ")" - -# ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT] -"\u2769" => ")" - -# ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT] -"\u276B" => ")" - -# ) [FULLWIDTH RIGHT PARENTHESIS] -"\uFF09" => ")" - -# ⸩ [RIGHT DOUBLE PARENTHESIS] -"\u2E29" => "))" - -# ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT] -"\u276C" => "<" - -# ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT] -"\u2770" => "<" - -# < [FULLWIDTH LESS-THAN SIGN] -"\uFF1C" => "<" - -# ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT] -"\u276D" => ">" - -# ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT] -"\u2771" => ">" - -# > [FULLWIDTH GREATER-THAN SIGN] -"\uFF1E" => ">" - -# ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT] -"\u2774" => "{" - -# { [FULLWIDTH LEFT CURLY BRACKET] -"\uFF5B" => "{" - -# ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT] -"\u2775" => "}" - -# } [FULLWIDTH RIGHT CURLY BRACKET] -"\uFF5D" => "}" - -# ⁺ [SUPERSCRIPT PLUS SIGN] -"\u207A" => "+" - -# ₊ [SUBSCRIPT PLUS SIGN] -"\u208A" => "+" - -# + [FULLWIDTH PLUS SIGN] -"\uFF0B" => "+" - -# ⁼ [SUPERSCRIPT EQUALS SIGN] -"\u207C" => "=" - -# ₌ [SUBSCRIPT EQUALS SIGN] -"\u208C" => "=" - -# = [FULLWIDTH EQUALS SIGN] -"\uFF1D" => "=" - -# ! [FULLWIDTH EXCLAMATION MARK] -"\uFF01" => "!" - -# ‼ [DOUBLE EXCLAMATION MARK] -"\u203C" => "!!" - -# ⁉ [EXCLAMATION QUESTION MARK] -"\u2049" => "!?" - -# # [FULLWIDTH NUMBER SIGN] -"\uFF03" => "#" - -# $ [FULLWIDTH DOLLAR SIGN] -"\uFF04" => "$" - -# ⁒ [COMMERCIAL MINUS SIGN] -"\u2052" => "%" - -# % [FULLWIDTH PERCENT SIGN] -"\uFF05" => "%" - -# & [FULLWIDTH AMPERSAND] -"\uFF06" => "&" - -# ⁎ [LOW ASTERISK] -"\u204E" => "*" - -# * [FULLWIDTH ASTERISK] -"\uFF0A" => "*" - -# , [FULLWIDTH COMMA] -"\uFF0C" => "," - -# . [FULLWIDTH FULL STOP] -"\uFF0E" => "." - -# ⁄ [FRACTION SLASH] -"\u2044" => "/" - -# / [FULLWIDTH SOLIDUS] -"\uFF0F" => "/" - -# : [FULLWIDTH COLON] -"\uFF1A" => ":" - -# ⁏ [REVERSED SEMICOLON] -"\u204F" => ";" - -# ; [FULLWIDTH SEMICOLON] -"\uFF1B" => ";" - -# ? [FULLWIDTH QUESTION MARK] -"\uFF1F" => "?" - -# ⁇ [DOUBLE QUESTION MARK] -"\u2047" => "??" - -# ⁈ [QUESTION EXCLAMATION MARK] -"\u2048" => "?!" - -# @ [FULLWIDTH COMMERCIAL AT] -"\uFF20" => "@" - -# \ [FULLWIDTH REVERSE SOLIDUS] -"\uFF3C" => "\\" - -# ‸ [CARET] -"\u2038" => "^" - -# ^ [FULLWIDTH CIRCUMFLEX ACCENT] -"\uFF3E" => "^" - -# _ [FULLWIDTH LOW LINE] -"\uFF3F" => "_" - -# ⁓ [SWUNG DASH] -"\u2053" => "~" - -# ~ [FULLWIDTH TILDE] -"\uFF5E" => "~" - -################################################################ -# Below is the Perl script used to generate the above mappings # -# from ASCIIFoldingFilter.java: # -################################################################ -# -# #!/usr/bin/perl -# -# use warnings; -# use strict; -# -# my @source_chars = (); -# my @source_char_descriptions = (); -# my $target = ''; -# -# while (<>) { -# if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) { -# push @source_chars, $1; -# push @source_char_descriptions, $2; -# next; -# } -# if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) { -# $target .= $1; -# next; -# } -# if (/break;/) { -# $target = "\\\"" if ($target eq '"'); -# for my $source_char_num (0..$#source_chars) { -# print "# $source_char_descriptions[$source_char_num]\n"; -# print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n"; -# } -# @source_chars = (); -# @source_char_descriptions = (); -# $target = ''; -# } -# } diff --git a/solr/example/example-DIH/solr/mail/conf/mapping-ISOLatin1Accent.txt b/solr/example/example-DIH/solr/mail/conf/mapping-ISOLatin1Accent.txt deleted file mode 100644 index ede7742581b..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/mapping-ISOLatin1Accent.txt +++ /dev/null @@ -1,246 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Syntax: -# "source" => "target" -# "source".length() > 0 (source cannot be empty.) -# "target".length() >= 0 (target can be empty.) - -# example: -# "À" => "A" -# "\u00C0" => "A" -# "\u00C0" => "\u0041" -# "ß" => "ss" -# "\t" => " " -# "\n" => "" - -# À => A -"\u00C0" => "A" - -# Á => A -"\u00C1" => "A" - -#  => A -"\u00C2" => "A" - -# à => A -"\u00C3" => "A" - -# Ä => A -"\u00C4" => "A" - -# Å => A -"\u00C5" => "A" - -# Æ => AE -"\u00C6" => "AE" - -# Ç => C -"\u00C7" => "C" - -# È => E -"\u00C8" => "E" - -# É => E -"\u00C9" => "E" - -# Ê => E -"\u00CA" => "E" - -# Ë => E -"\u00CB" => "E" - -# Ì => I -"\u00CC" => "I" - -# Í => I -"\u00CD" => "I" - -# Î => I -"\u00CE" => "I" - -# Ï => I -"\u00CF" => "I" - -# IJ => IJ -"\u0132" => "IJ" - -# Ð => D -"\u00D0" => "D" - -# Ñ => N -"\u00D1" => "N" - -# Ò => O -"\u00D2" => "O" - -# Ó => O -"\u00D3" => "O" - -# Ô => O -"\u00D4" => "O" - -# Õ => O -"\u00D5" => "O" - -# Ö => O -"\u00D6" => "O" - -# Ø => O -"\u00D8" => "O" - -# Œ => OE -"\u0152" => "OE" - -# Þ -"\u00DE" => "TH" - -# Ù => U -"\u00D9" => "U" - -# Ú => U -"\u00DA" => "U" - -# Û => U -"\u00DB" => "U" - -# Ü => U -"\u00DC" => "U" - -# Ý => Y -"\u00DD" => "Y" - -# Ÿ => Y -"\u0178" => "Y" - -# à => a -"\u00E0" => "a" - -# á => a -"\u00E1" => "a" - -# â => a -"\u00E2" => "a" - -# ã => a -"\u00E3" => "a" - -# ä => a -"\u00E4" => "a" - -# å => a -"\u00E5" => "a" - -# æ => ae -"\u00E6" => "ae" - -# ç => c -"\u00E7" => "c" - -# è => e -"\u00E8" => "e" - -# é => e -"\u00E9" => "e" - -# ê => e -"\u00EA" => "e" - -# ë => e -"\u00EB" => "e" - -# ì => i -"\u00EC" => "i" - -# í => i -"\u00ED" => "i" - -# î => i -"\u00EE" => "i" - -# ï => i -"\u00EF" => "i" - -# ij => ij -"\u0133" => "ij" - -# ð => d -"\u00F0" => "d" - -# ñ => n -"\u00F1" => "n" - -# ò => o -"\u00F2" => "o" - -# ó => o -"\u00F3" => "o" - -# ô => o -"\u00F4" => "o" - -# õ => o -"\u00F5" => "o" - -# ö => o -"\u00F6" => "o" - -# ø => o -"\u00F8" => "o" - -# œ => oe -"\u0153" => "oe" - -# ß => ss -"\u00DF" => "ss" - -# þ => th -"\u00FE" => "th" - -# ù => u -"\u00F9" => "u" - -# ú => u -"\u00FA" => "u" - -# û => u -"\u00FB" => "u" - -# ü => u -"\u00FC" => "u" - -# ý => y -"\u00FD" => "y" - -# ÿ => y -"\u00FF" => "y" - -# ff => ff -"\uFB00" => "ff" - -# fi => fi -"\uFB01" => "fi" - -# fl => fl -"\uFB02" => "fl" - -# ffi => ffi -"\uFB03" => "ffi" - -# ffl => ffl -"\uFB04" => "ffl" - -# ſt => ft -"\uFB05" => "ft" - -# st => st -"\uFB06" => "st" diff --git a/solr/example/example-DIH/solr/mail/conf/protwords.txt b/solr/example/example-DIH/solr/mail/conf/protwords.txt deleted file mode 100644 index 1dfc0abecbf..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/protwords.txt +++ /dev/null @@ -1,21 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# Use a protected word file to protect against the stemmer reducing two -# unrelated words to the same base word. - -# Some non-words that normally won't be encountered, -# just to test that they won't be stemmed. -dontstems -zwhacky - diff --git a/solr/example/example-DIH/solr/mail/conf/solrconfig.xml b/solr/example/example-DIH/solr/mail/conf/solrconfig.xml deleted file mode 100644 index 91b99573539..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/solrconfig.xml +++ /dev/null @@ -1,1345 +0,0 @@ - - - - - - - - - 9.0.0 - - - - - - - - - - - - - - - - - - - - - - - ${solr.data.dir:} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ${solr.lock.type:native} - - - - - - - - - - - - - true - - - - - - - - - - - - - - - - ${solr.ulog.dir:} - - - - - ${solr.autoCommit.maxTime:15000} - false - - - - - - ${solr.autoSoftCommit.maxTime:-1} - - - - - - - - - - - - - ${solr.max.booleanClauses:1024} - - - - - - - - - - - - - - - - - - - - - - - - - true - - - - - - 20 - - - 200 - - - - - - - - - - - - static firstSearcher warming in solrconfig.xml - - - - - - false - - - - - - - - - - - - - - - - - - - - - mail-data-config.xml - - - - - - - - explicit - 10 - text - - - - - - - - - - - - - - - explicit - json - true - text - - - - - - - - explicit - - - velocity - browse - layout - - - edismax - *:* - 10 - *,score - - - on - 1 - - - - - - content - - - - - - - true - ignored_ - - - true - links - ignored_ - - - - - - - - - text_general - - - - - - default - text - solr.DirectSolrSpellChecker - - internal - - 0.5 - - 2 - - 1 - - 5 - - 4 - - 0.01 - - - - - - wordbreak - solr.WordBreakSolrSpellChecker - name - true - true - 10 - - - - - - - - - - - - - - - - text - - default - wordbreak - on - true - 10 - 5 - 5 - true - true - 10 - 5 - - - spellcheck - - - - - - mySuggester - FuzzyLookupFactory - DocumentDictionaryFactory - cat - price - string - - - - - - true - 10 - - - suggest - - - - - - - - - text - true - - - tvComponent - - - - - - - - - - true - false - - - terms - - - - - - - - string - elevate.xml - - - - - - explicit - text - - - elevator - - - - - - - - - - - 100 - - - - - - - - 70 - - 0.5 - - [-\w ,/\n\"']{20,200} - - - - - - - ]]> - ]]> - - - - - - - - - - - - - - - - - - - - - - - - ,, - ,, - ,, - ,, - ,]]> - ]]> - - - - - - 10 - .,!? - - - - - - - WORD - - - en - US - - - - - - - - - - - - - - - - - - - - - - text/plain; charset=UTF-8 - - - - - ${velocity.template.base.dir:} - - - - - 5 - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/mail/conf/spellings.txt b/solr/example/example-DIH/solr/mail/conf/spellings.txt deleted file mode 100644 index d7ede6f5611..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/spellings.txt +++ /dev/null @@ -1,2 +0,0 @@ -pizza -history \ No newline at end of file diff --git a/solr/example/example-DIH/solr/mail/conf/stopwords.txt b/solr/example/example-DIH/solr/mail/conf/stopwords.txt deleted file mode 100644 index ae1e83eeb3d..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/stopwords.txt +++ /dev/null @@ -1,14 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/solr/example/example-DIH/solr/mail/conf/synonyms.txt b/solr/example/example-DIH/solr/mail/conf/synonyms.txt deleted file mode 100644 index eab4ee87537..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/synonyms.txt +++ /dev/null @@ -1,29 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -#some test synonym mappings unlikely to appear in real input text -aaafoo => aaabar -bbbfoo => bbbfoo bbbbar -cccfoo => cccbar cccbaz -fooaaa,baraaa,bazaaa - -# Some synonym groups specific to this example -GB,gib,gigabyte,gigabytes -MB,mib,megabyte,megabytes -Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming -#after us won't split it into two words. - -# Synonym mappings can be used for spelling correction too -pixima => pixma - diff --git a/solr/example/example-DIH/solr/mail/conf/update-script.js b/solr/example/example-DIH/solr/mail/conf/update-script.js deleted file mode 100644 index 49b07f9b71e..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/update-script.js +++ /dev/null @@ -1,53 +0,0 @@ -/* - This is a basic skeleton JavaScript update processor. - - In order for this to be executed, it must be properly wired into solrconfig.xml; by default it is commented out in - the example solrconfig.xml and must be uncommented to be enabled. - - See http://wiki.apache.org/solr/ScriptUpdateProcessor for more details. -*/ - -function processAdd(cmd) { - - doc = cmd.solrDoc; // org.apache.solr.common.SolrInputDocument - id = doc.getFieldValue("id"); - logger.info("update-script#processAdd: id=" + id); - -// Set a field value: -// doc.setField("foo_s", "whatever"); - -// Get a configuration parameter: -// config_param = params.get('config_param'); // "params" only exists if processor configured with - -// Get a request parameter: -// some_param = req.getParams().get("some_param") - -// Add a field of field names that match a pattern: -// - Potentially useful to determine the fields/attributes represented in a result set, via faceting on field_name_ss -// field_names = doc.getFieldNames().toArray(); -// for(i=0; i < field_names.length; i++) { -// field_name = field_names[i]; -// if (/attr_.*/.test(field_name)) { doc.addField("attribute_ss", field_names[i]); } -// } - -} - -function processDelete(cmd) { - // no-op -} - -function processMergeIndexes(cmd) { - // no-op -} - -function processCommit(cmd) { - // no-op -} - -function processRollback(cmd) { - // no-op -} - -function finish() { - // no-op -} diff --git a/solr/example/example-DIH/solr/mail/conf/xslt/example.xsl b/solr/example/example-DIH/solr/mail/conf/xslt/example.xsl deleted file mode 100644 index b8992700828..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/xslt/example.xsl +++ /dev/null @@ -1,132 +0,0 @@ - - - - - - - - - - - - - - - <xsl:value-of select="$title"/> - - - -

-
- This has been formatted by the sample "example.xsl" transform - - use your own XSLT to get a nicer page -
- - - -
- - - -
- - - - -
-
-
- - - - - - - - - - - - - - javascript:toggle("");? -
- - exp - - - - - -
- - -
- - - - - - - -
    - -
  • -
    -
- - -
- - - - - - - - - - - - - - - - - - - - -
diff --git a/solr/example/example-DIH/solr/mail/conf/xslt/example_atom.xsl b/solr/example/example-DIH/solr/mail/conf/xslt/example_atom.xsl deleted file mode 100644 index b6c23151dc4..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/xslt/example_atom.xsl +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - Example Solr Atom 1.0 Feed - - This has been formatted by the sample "example_atom.xsl" transform - - use your own XSLT to get a nicer Atom feed. - - - Apache Solr - solr-user@lucene.apache.org - - - - - - tag:localhost,2007:example - - - - - - - - - <xsl:value-of select="str[@name='name']"/> - - tag:localhost,2007: - - - - - - diff --git a/solr/example/example-DIH/solr/mail/conf/xslt/example_rss.xsl b/solr/example/example-DIH/solr/mail/conf/xslt/example_rss.xsl deleted file mode 100644 index c8ab5bfb1ec..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/xslt/example_rss.xsl +++ /dev/null @@ -1,66 +0,0 @@ - - - - - - - - - - - - - Example Solr RSS 2.0 Feed - http://localhost:8983/solr - - This has been formatted by the sample "example_rss.xsl" transform - - use your own XSLT to get a nicer RSS feed. - - en-us - http://localhost:8983/solr - - - - - - - - - - - <xsl:value-of select="str[@name='name']"/> - - http://localhost:8983/solr/select?q=id: - - - - - - - http://localhost:8983/solr/select?q=id: - - - - diff --git a/solr/example/example-DIH/solr/mail/conf/xslt/luke.xsl b/solr/example/example-DIH/solr/mail/conf/xslt/luke.xsl deleted file mode 100644 index 05fb5bfeee2..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/xslt/luke.xsl +++ /dev/null @@ -1,337 +0,0 @@ - - - - - - - - - Solr Luke Request Handler Response - - - - - - - - - <xsl:value-of select="$title"/> - - - - - -

- -

-
- -

Index Statistics

- -
- -

Field Statistics

- - - -

Document statistics

- - - - - - - - - - -
- -
- - -
- -
- -
-
-
- - - - - - - - - - - - - - - - - - - - - -
-

- -

- -
- -
-
-
- - -
- - 50 - 800 - 160 - blue - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- background-color: ; width: px; height: px; -
-
- -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - -
  • - -
  • -
    -
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/mail/conf/xslt/updateXml.xsl b/solr/example/example-DIH/solr/mail/conf/xslt/updateXml.xsl deleted file mode 100644 index a96e1d02448..00000000000 --- a/solr/example/example-DIH/solr/mail/conf/xslt/updateXml.xsl +++ /dev/null @@ -1,70 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/mail/core.properties b/solr/example/example-DIH/solr/mail/core.properties deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/solr/example/example-DIH/solr/solr.xml b/solr/example/example-DIH/solr/solr.xml deleted file mode 100644 index 191e51f5962..00000000000 --- a/solr/example/example-DIH/solr/solr.xml +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/solr/example/example-DIH/solr/solr/conf/clustering/carrot2/kmeans-attributes.xml b/solr/example/example-DIH/solr/solr/conf/clustering/carrot2/kmeans-attributes.xml deleted file mode 100644 index d802465f669..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/clustering/carrot2/kmeans-attributes.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/solr/conf/clustering/carrot2/lingo-attributes.xml b/solr/example/example-DIH/solr/solr/conf/clustering/carrot2/lingo-attributes.xml deleted file mode 100644 index 4bf13608b36..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/clustering/carrot2/lingo-attributes.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/solr/example/example-DIH/solr/solr/conf/clustering/carrot2/stc-attributes.xml b/solr/example/example-DIH/solr/solr/conf/clustering/carrot2/stc-attributes.xml deleted file mode 100644 index c1bf110c8fd..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/clustering/carrot2/stc-attributes.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/solr/conf/currency.xml b/solr/example/example-DIH/solr/solr/conf/currency.xml deleted file mode 100644 index 532221a90bc..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/currency.xml +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/solr/conf/elevate.xml b/solr/example/example-DIH/solr/solr/conf/elevate.xml deleted file mode 100644 index 2c09ebed669..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/elevate.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - - diff --git a/solr/example/example-DIH/solr/solr/conf/lang/contractions_ca.txt b/solr/example/example-DIH/solr/solr/conf/lang/contractions_ca.txt deleted file mode 100644 index 307a85f913d..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/contractions_ca.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Set of Catalan contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -l -m -n -s -t diff --git a/solr/example/example-DIH/solr/solr/conf/lang/contractions_fr.txt b/solr/example/example-DIH/solr/solr/conf/lang/contractions_fr.txt deleted file mode 100644 index f1bba51b23e..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/contractions_fr.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Set of French contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -l -m -t -qu -n -s -j -d -c -jusqu -quoiqu -lorsqu -puisqu diff --git a/solr/example/example-DIH/solr/solr/conf/lang/contractions_ga.txt b/solr/example/example-DIH/solr/solr/conf/lang/contractions_ga.txt deleted file mode 100644 index 9ebe7fa349a..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/contractions_ga.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Set of Irish contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -m -b diff --git a/solr/example/example-DIH/solr/solr/conf/lang/contractions_it.txt b/solr/example/example-DIH/solr/solr/conf/lang/contractions_it.txt deleted file mode 100644 index cac04095372..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/contractions_it.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Set of Italian contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -c -l -all -dall -dell -nell -sull -coll -pell -gl -agl -dagl -degl -negl -sugl -un -m -t -s -v -d diff --git a/solr/example/example-DIH/solr/solr/conf/lang/hyphenations_ga.txt b/solr/example/example-DIH/solr/solr/conf/lang/hyphenations_ga.txt deleted file mode 100644 index 4d2642cc5a3..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/hyphenations_ga.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Set of Irish hyphenations for StopFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -h -n -t diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stemdict_nl.txt b/solr/example/example-DIH/solr/solr/conf/lang/stemdict_nl.txt deleted file mode 100644 index 441072971d3..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stemdict_nl.txt +++ /dev/null @@ -1,6 +0,0 @@ -# Set of overrides for the dutch stemmer -# TODO: load this as a resource from the analyzer and sync it in build.xml -fiets fiets -bromfiets bromfiets -ei eier -kind kinder diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stoptags_ja.txt b/solr/example/example-DIH/solr/solr/conf/lang/stoptags_ja.txt deleted file mode 100644 index 71b750845e3..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stoptags_ja.txt +++ /dev/null @@ -1,420 +0,0 @@ -# -# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. -# -# Any token with a part-of-speech tag that exactly matches those defined in this -# file are removed from the token stream. -# -# Set your own stoptags by uncommenting the lines below. Note that comments are -# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, -# etc. that can be useful for building you own stoptag set. -# -# The entire possible tagset is provided below for convenience. -# -##### -# noun: unclassified nouns -#名詞 -# -# noun-common: Common nouns or nouns where the sub-classification is undefined -#名詞-一般 -# -# noun-proper: Proper nouns where the sub-classification is undefined -#名詞-固有名詞 -# -# noun-proper-misc: miscellaneous proper nouns -#名詞-固有名詞-一般 -# -# noun-proper-person: Personal names where the sub-classification is undefined -#名詞-固有名詞-人名 -# -# noun-proper-person-misc: names that cannot be divided into surname and -# given name; foreign names; names where the surname or given name is unknown. -# e.g. お市の方 -#名詞-固有名詞-人名-一般 -# -# noun-proper-person-surname: Mainly Japanese surnames. -# e.g. 山田 -#名詞-固有名詞-人名-姓 -# -# noun-proper-person-given_name: Mainly Japanese given names. -# e.g. 太郎 -#名詞-固有名詞-人名-名 -# -# noun-proper-organization: Names representing organizations. -# e.g. 通産省, NHK -#名詞-固有名詞-組織 -# -# noun-proper-place: Place names where the sub-classification is undefined -#名詞-固有名詞-地域 -# -# noun-proper-place-misc: Place names excluding countries. -# e.g. アジア, バルセロナ, 京都 -#名詞-固有名詞-地域-一般 -# -# noun-proper-place-country: Country names. -# e.g. 日本, オーストラリア -#名詞-固有名詞-地域-国 -# -# noun-pronoun: Pronouns where the sub-classification is undefined -#名詞-代名詞 -# -# noun-pronoun-misc: miscellaneous pronouns: -# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ -#名詞-代名詞-一般 -# -# noun-pronoun-contraction: Spoken language contraction made by combining a -# pronoun and the particle 'wa'. -# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ -#名詞-代名詞-縮約 -# -# noun-adverbial: Temporal nouns such as names of days or months that behave -# like adverbs. Nouns that represent amount or ratios and can be used adverbially, -# e.g. 金曜, 一月, 午後, 少量 -#名詞-副詞可能 -# -# noun-verbal: Nouns that take arguments with case and can appear followed by -# 'suru' and related verbs (する, できる, なさる, くださる) -# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り -#名詞-サ変接続 -# -# noun-adjective-base: The base form of adjectives, words that appear before な ("na") -# e.g. 健康, 安易, 駄目, だめ -#名詞-形容動詞語幹 -# -# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. -# e.g. 0, 1, 2, 何, 数, 幾 -#名詞-数 -# -# noun-affix: noun affixes where the sub-classification is undefined -#名詞-非自立 -# -# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that -# attach to the base form of inflectional words, words that cannot be classified -# into any of the other categories below. This category includes indefinite nouns. -# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, -# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, -# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, -# わり, 割り, 割, ん-口語/, もん-口語/ -#名詞-非自立-一般 -# -# noun-affix-adverbial: noun affixes that that can behave as adverbs. -# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, -# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, -# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, -# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, -# 儘, 侭, みぎり, 矢先 -#名詞-非自立-副詞可能 -# -# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars -# with the stem よう(だ) ("you(da)"). -# e.g. よう, やう, 様 (よう) -#名詞-非自立-助動詞語幹 -# -# noun-affix-adjective-base: noun affixes that can connect to the indeclinable -# connection form な (aux "da"). -# e.g. みたい, ふう -#名詞-非自立-形容動詞語幹 -# -# noun-special: special nouns where the sub-classification is undefined. -#名詞-特殊 -# -# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is -# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base -# form of inflectional words. -# e.g. そう -#名詞-特殊-助動詞語幹 -# -# noun-suffix: noun suffixes where the sub-classification is undefined. -#名詞-接尾 -# -# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect -# to ガル or タイ and can combine into compound nouns, words that cannot be classified into -# any of the other categories below. In general, this category is more inclusive than -# 接尾語 ("suffix") and is usually the last element in a compound noun. -# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, -# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 -#名詞-接尾-一般 -# -# noun-suffix-person: Suffixes that form nouns and attach to person names more often -# than other nouns. -# e.g. 君, 様, 著 -#名詞-接尾-人名 -# -# noun-suffix-place: Suffixes that form nouns and attach to place names more often -# than other nouns. -# e.g. 町, 市, 県 -#名詞-接尾-地域 -# -# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that -# can appear before スル ("suru"). -# e.g. 化, 視, 分け, 入り, 落ち, 買い -#名詞-接尾-サ変接続 -# -# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, -# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the -# conjunctive form of inflectional words. -# e.g. そう -#名詞-接尾-助動詞語幹 -# -# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive -# form of inflectional words and appear before the copula だ ("da"). -# e.g. 的, げ, がち -#名詞-接尾-形容動詞語幹 -# -# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. -# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) -#名詞-接尾-副詞可能 -# -# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category -# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach -# to numbers. -# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 -#名詞-接尾-助数詞 -# -# noun-suffix-special: Special suffixes that mainly attach to inflecting words. -# e.g. (楽し) さ, (考え) 方 -#名詞-接尾-特殊 -# -# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words -# together. -# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) -#名詞-接続詞的 -# -# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are -# semantically verb-like. -# e.g. ごらん, ご覧, 御覧, 頂戴 -#名詞-動詞非自立的 -# -# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, -# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") -# is いわく ("iwaku"). -#名詞-引用文字列 -# -# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and -# behave like an adjective. -# e.g. 申し訳, 仕方, とんでも, 違い -#名詞-ナイ形容詞語幹 -# -##### -# prefix: unclassified prefixes -#接頭詞 -# -# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) -# excluding numerical expressions. -# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) -#接頭詞-名詞接続 -# -# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb -# in conjunctive form followed by なる/なさる/くださる. -# e.g. お (読みなさい), お (座り) -#接頭詞-動詞接続 -# -# prefix-adjectival: Prefixes that attach to adjectives. -# e.g. お (寒いですねえ), バカ (でかい) -#接頭詞-形容詞接続 -# -# prefix-numerical: Prefixes that attach to numerical expressions. -# e.g. 約, およそ, 毎時 -#接頭詞-数接続 -# -##### -# verb: unclassified verbs -#動詞 -# -# verb-main: -#動詞-自立 -# -# verb-auxiliary: -#動詞-非自立 -# -# verb-suffix: -#動詞-接尾 -# -##### -# adjective: unclassified adjectives -#形容詞 -# -# adjective-main: -#形容詞-自立 -# -# adjective-auxiliary: -#形容詞-非自立 -# -# adjective-suffix: -#形容詞-接尾 -# -##### -# adverb: unclassified adverbs -#副詞 -# -# adverb-misc: Words that can be segmented into one unit and where adnominal -# modification is not possible. -# e.g. あいかわらず, 多分 -#副詞-一般 -# -# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, -# な, する, だ, etc. -# e.g. こんなに, そんなに, あんなに, なにか, なんでも -#副詞-助詞類接続 -# -##### -# adnominal: Words that only have noun-modifying forms. -# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, -# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, -# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き -#連体詞 -# -##### -# conjunction: Conjunctions that can occur independently. -# e.g. が, けれども, そして, じゃあ, それどころか -接続詞 -# -##### -# particle: unclassified particles. -助詞 -# -# particle-case: case particles where the subclassification is undefined. -助詞-格助詞 -# -# particle-case-misc: Case particles. -# e.g. から, が, で, と, に, へ, より, を, の, にて -助詞-格助詞-一般 -# -# particle-case-quote: the "to" that appears after nouns, a person’s speech, -# quotation marks, expressions of decisions from a meeting, reasons, judgements, -# conjectures, etc. -# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) -助詞-格助詞-引用 -# -# particle-case-compound: Compounds of particles and verbs that mainly behave -# like case particles. -# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, -# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, -# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, -# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, -# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, -# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, -# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, -# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ -助詞-格助詞-連語 -# -# particle-conjunctive: -# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, -# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, -# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ -助詞-接続助詞 -# -# particle-dependency: -# e.g. こそ, さえ, しか, すら, は, も, ぞ -助詞-係助詞 -# -# particle-adverbial: -# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, -# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, -# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, -# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, -# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) -助詞-副助詞 -# -# particle-interjective: particles with interjective grammatical roles. -# e.g. (松島) や -助詞-間投助詞 -# -# particle-coordinate: -# e.g. と, たり, だの, だり, とか, なり, や, やら -助詞-並立助詞 -# -# particle-final: -# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, -# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ -助詞-終助詞 -# -# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is -# adverbial, conjunctive, or sentence final. For example: -# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 -# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 -# 「(祈りが届いたせい) か (, 試験に合格した.)」 -# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 -# e.g. か -助詞-副助詞/並立助詞/終助詞 -# -# particle-adnominalizer: The "no" that attaches to nouns and modifies -# non-inflectional words. -助詞-連体化 -# -# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs -# that are giongo, giseigo, or gitaigo. -# e.g. に, と -助詞-副詞化 -# -# particle-special: A particle that does not fit into one of the above classifications. -# This includes particles that are used in Tanka, Haiku, and other poetry. -# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) -助詞-特殊 -# -##### -# auxiliary-verb: -助動詞 -# -##### -# interjection: Greetings and other exclamations. -# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, -# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい -#感動詞 -# -##### -# symbol: unclassified Symbols. -記号 -# -# symbol-misc: A general symbol not in one of the categories below. -# e.g. [○◎@$〒→+] -記号-一般 -# -# symbol-comma: Commas -# e.g. [,、] -記号-読点 -# -# symbol-period: Periods and full stops. -# e.g. [..。] -記号-句点 -# -# symbol-space: Full-width whitespace. -記号-空白 -# -# symbol-open_bracket: -# e.g. [({‘“『【] -記号-括弧開 -# -# symbol-close_bracket: -# e.g. [)}’”』」】] -記号-括弧閉 -# -# symbol-alphabetic: -#記号-アルファベット -# -##### -# other: unclassified other -#その他 -# -# other-interjection: Words that are hard to classify as noun-suffixes or -# sentence-final particles. -# e.g. (だ)ァ -その他-間投 -# -##### -# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. -# e.g. あの, うんと, えと -フィラー -# -##### -# non-verbal: non-verbal sound. -非言語音 -# -##### -# fragment: -#語断片 -# -##### -# unknown: unknown part of speech. -#未知語 -# -##### End of file diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ar.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ar.txt deleted file mode 100644 index 046829db6a2..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ar.txt +++ /dev/null @@ -1,125 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Cleaned on October 11, 2009 (not normalized, so use before normalization) -# This means that when modifying this list, you might need to add some -# redundant entries, for example containing forms with both أ and ا -من -ومن -منها -منه -في -وفي -فيها -فيه -و -ف -ثم -او -أو -ب -بها -به -ا -أ -اى -اي -أي -أى -لا -ولا -الا -ألا -إلا -لكن -ما -وما -كما -فما -عن -مع -اذا -إذا -ان -أن -إن -انها -أنها -إنها -انه -أنه -إنه -بان -بأن -فان -فأن -وان -وأن -وإن -التى -التي -الذى -الذي -الذين -الى -الي -إلى -إلي -على -عليها -عليه -اما -أما -إما -ايضا -أيضا -كل -وكل -لم -ولم -لن -ولن -هى -هي -هو -وهى -وهي -وهو -فهى -فهي -فهو -انت -أنت -لك -لها -له -هذه -هذا -تلك -ذلك -هناك -كانت -كان -يكون -تكون -وكانت -وكان -غير -بعض -قد -نحو -بين -بينما -منذ -ضمن -حيث -الان -الآن -خلال -بعد -قبل -حتى -عند -عندما -لدى -جميع diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_bg.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_bg.txt deleted file mode 100644 index 1ae4ba2ae38..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_bg.txt +++ /dev/null @@ -1,193 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -а -аз -ако -ала -бе -без -беше -би -бил -била -били -било -близо -бъдат -бъде -бяха -в -вас -ваш -ваша -вероятно -вече -взема -ви -вие -винаги -все -всеки -всички -всичко -всяка -във -въпреки -върху -г -ги -главно -го -д -да -дали -до -докато -докога -дори -досега -доста -е -едва -един -ето -за -зад -заедно -заради -засега -затова -защо -защото -и -из -или -им -има -имат -иска -й -каза -как -каква -какво -както -какъв -като -кога -когато -което -които -кой -който -колко -която -къде -където -към -ли -м -ме -между -мен -ми -мнозина -мога -могат -може -моля -момента -му -н -на -над -назад -най -направи -напред -например -нас -не -него -нея -ни -ние -никой -нито -но -някои -някой -няма -обаче -около -освен -особено -от -отгоре -отново -още -пак -по -повече -повечето -под -поне -поради -после -почти -прави -пред -преди -през -при -пък -първо -с -са -само -се -сега -си -скоро -след -сме -според -сред -срещу -сте -съм -със -също -т -тази -така -такива -такъв -там -твой -те -тези -ти -тн -то -това -тогава -този -той -толкова -точно -трябва -тук -тъй -тя -тях -у -харесва -ч -че -често -чрез -ще -щом -я diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ca.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ca.txt deleted file mode 100644 index 3da65deafe1..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ca.txt +++ /dev/null @@ -1,220 +0,0 @@ -# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) -a -abans -ací -ah -així -això -al -als -aleshores -algun -alguna -algunes -alguns -alhora -allà -allí -allò -altra -altre -altres -amb -ambdós -ambdues -apa -aquell -aquella -aquelles -aquells -aquest -aquesta -aquestes -aquests -aquí -baix -cada -cadascú -cadascuna -cadascunes -cadascuns -com -contra -d'un -d'una -d'unes -d'uns -dalt -de -del -dels -des -després -dins -dintre -donat -doncs -durant -e -eh -el -els -em -en -encara -ens -entre -érem -eren -éreu -es -és -esta -està -estàvem -estaven -estàveu -esteu -et -etc -ets -fins -fora -gairebé -ha -han -has -havia -he -hem -heu -hi -ho -i -igual -iguals -ja -l'hi -la -les -li -li'n -llavors -m'he -ma -mal -malgrat -mateix -mateixa -mateixes -mateixos -me -mentre -més -meu -meus -meva -meves -molt -molta -moltes -molts -mon -mons -n'he -n'hi -ne -ni -no -nogensmenys -només -nosaltres -nostra -nostre -nostres -o -oh -oi -on -pas -pel -pels -per -però -perquè -poc -poca -pocs -poques -potser -propi -qual -quals -quan -quant -que -què -quelcom -qui -quin -quina -quines -quins -s'ha -s'han -sa -semblant -semblants -ses -seu -seus -seva -seva -seves -si -sobre -sobretot -sóc -solament -sols -son -són -sons -sota -sou -t'ha -t'han -t'he -ta -tal -també -tampoc -tan -tant -tanta -tantes -teu -teus -teva -teves -ton -tons -tot -tota -totes -tots -un -una -unes -uns -us -va -vaig -vam -van -vas -veu -vosaltres -vostra -vostre -vostres diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ckb.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ckb.txt deleted file mode 100644 index 87abf118fec..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ckb.txt +++ /dev/null @@ -1,136 +0,0 @@ -# set of kurdish stopwords -# note these have been normalized with our scheme (e represented with U+06D5, etc) -# constructed from: -# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al) -# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston) -# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc - -# and -و -# which -کە -# of -ی -# made/did -کرد -# that/which -ئەوەی -# on/head -سەر -# two -دوو -# also -هەروەها -# from/that -لەو -# makes/does -دەکات -# some -چەند -# every -هەر - -# demonstratives -# that -ئەو -# this -ئەم - -# personal pronouns -# I -من -# we -ئێمە -# you -تۆ -# you -ئێوە -# he/she/it -ئەو -# they -ئەوان - -# prepositions -# to/with/by -بە -پێ -# without -بەبێ -# along with/while/during -بەدەم -# in the opinion of -بەلای -# according to -بەپێی -# before -بەرلە -# in the direction of -بەرەوی -# in front of/toward -بەرەوە -# before/in the face of -بەردەم -# without -بێ -# except for -بێجگە -# for -بۆ -# on/in -دە -تێ -# with -دەگەڵ -# after -دوای -# except for/aside from -جگە -# in/from -لە -لێ -# in front of/before/because of -لەبەر -# between/among -لەبەینی -# concerning/about -لەبابەت -# concerning -لەبارەی -# instead of -لەباتی -# beside -لەبن -# instead of -لەبرێتی -# behind -لەدەم -# with/together with -لەگەڵ -# by -لەلایەن -# within -لەناو -# between/among -لەنێو -# for the sake of -لەپێناوی -# with respect to -لەرەوی -# by means of/for -لەرێ -# for the sake of -لەرێگا -# on/on top of/according to -لەسەر -# under -لەژێر -# between/among -ناو -# between/among -نێوان -# after -پاش -# before -پێش -# like -وەک diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_cz.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_cz.txt deleted file mode 100644 index 53c6097dac7..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_cz.txt +++ /dev/null @@ -1,172 +0,0 @@ -a -s -k -o -i -u -v -z -dnes -cz -tímto -budeš -budem -byli -jseš -můj -svým -ta -tomto -tohle -tuto -tyto -jej -zda -proč -máte -tato -kam -tohoto -kdo -kteří -mi -nám -tom -tomuto -mít -nic -proto -kterou -byla -toho -protože -asi -ho -naši -napište -re -což -tím -takže -svých -její -svými -jste -aj -tu -tedy -teto -bylo -kde -ke -pravé -ji -nad -nejsou -či -pod -téma -mezi -přes -ty -pak -vám -ani -když -však -neg -jsem -tento -článku -články -aby -jsme -před -pta -jejich -byl -ještě -až -bez -také -pouze -první -vaše -která -nás -nový -tipy -pokud -může -strana -jeho -své -jiné -zprávy -nové -není -vás -jen -podle -zde -už -být -více -bude -již -než -který -by -které -co -nebo -ten -tak -má -při -od -po -jsou -jak -další -ale -si -se -ve -to -jako -za -zpět -ze -do -pro -je -na -atd -atp -jakmile -přičemž -já -on -ona -ono -oni -ony -my -vy -jí -ji -mě -mne -jemu -tomu -těm -těmu -němu -němuž -jehož -jíž -jelikož -jež -jakož -načež diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_da.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_da.txt deleted file mode 100644 index 42e6145b98e..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_da.txt +++ /dev/null @@ -1,110 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Danish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - -og | and -i | in -jeg | I -det | that (dem. pronoun)/it (pers. pronoun) -at | that (in front of a sentence)/to (with infinitive) -en | a/an -den | it (pers. pronoun)/that (dem. pronoun) -til | to/at/for/until/against/by/of/into, more -er | present tense of "to be" -som | who, as -på | on/upon/in/on/at/to/after/of/with/for, on -de | they -med | with/by/in, along -han | he -af | of/by/from/off/for/in/with/on, off -for | at/for/to/from/by/of/ago, in front/before, because -ikke | not -der | who/which, there/those -var | past tense of "to be" -mig | me/myself -sig | oneself/himself/herself/itself/themselves -men | but -et | a/an/one, one (number), someone/somebody/one -har | present tense of "to have" -om | round/about/for/in/a, about/around/down, if -vi | we -min | my -havde | past tense of "to have" -ham | him -hun | she -nu | now -over | over/above/across/by/beyond/past/on/about, over/past -da | then, when/as/since -fra | from/off/since, off, since -du | you -ud | out -sin | his/her/its/one's -dem | them -os | us/ourselves -op | up -man | you/one -hans | his -hvor | where -eller | or -hvad | what -skal | must/shall etc. -selv | myself/youself/herself/ourselves etc., even -her | here -alle | all/everyone/everybody etc. -vil | will (verb) -blev | past tense of "to stay/to remain/to get/to become" -kunne | could -ind | in -når | when -være | present tense of "to be" -dog | however/yet/after all -noget | something -ville | would -jo | you know/you see (adv), yes -deres | their/theirs -efter | after/behind/according to/for/by/from, later/afterwards -ned | down -skulle | should -denne | this -end | than -dette | this -mit | my/mine -også | also -under | under/beneath/below/during, below/underneath -have | have -dig | you -anden | other -hende | her -mine | my -alt | everything -meget | much/very, plenty of -sit | his, her, its, one's -sine | his, her, its, one's -vor | our -mod | against -disse | these -hvis | if -din | your/yours -nogle | some -hos | by/at -blive | be/become -mange | many -ad | by/through -bliver | present tense of "to be/to become" -hendes | her/hers -været | be -thi | for (conj) -jer | you -sådan | such, like this/like that diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_de.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_de.txt deleted file mode 100644 index 86525e7ae08..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_de.txt +++ /dev/null @@ -1,294 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A German stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | The number of forms in this list is reduced significantly by passing it - | through the German stemmer. - - -aber | but - -alle | all -allem -allen -aller -alles - -als | than, as -also | so -am | an + dem -an | at - -ander | other -andere -anderem -anderen -anderer -anderes -anderm -andern -anderr -anders - -auch | also -auf | on -aus | out of -bei | by -bin | am -bis | until -bist | art -da | there -damit | with it -dann | then - -der | the -den -des -dem -die -das - -daß | that - -derselbe | the same -derselben -denselben -desselben -demselben -dieselbe -dieselben -dasselbe - -dazu | to that - -dein | thy -deine -deinem -deinen -deiner -deines - -denn | because - -derer | of those -dessen | of him - -dich | thee -dir | to thee -du | thou - -dies | this -diese -diesem -diesen -dieser -dieses - - -doch | (several meanings) -dort | (over) there - - -durch | through - -ein | a -eine -einem -einen -einer -eines - -einig | some -einige -einigem -einigen -einiger -einiges - -einmal | once - -er | he -ihn | him -ihm | to him - -es | it -etwas | something - -euer | your -eure -eurem -euren -eurer -eures - -für | for -gegen | towards -gewesen | p.p. of sein -hab | have -habe | have -haben | have -hat | has -hatte | had -hatten | had -hier | here -hin | there -hinter | behind - -ich | I -mich | me -mir | to me - - -ihr | you, to her -ihre -ihrem -ihren -ihrer -ihres -euch | to you - -im | in + dem -in | in -indem | while -ins | in + das -ist | is - -jede | each, every -jedem -jeden -jeder -jedes - -jene | that -jenem -jenen -jener -jenes - -jetzt | now -kann | can - -kein | no -keine -keinem -keinen -keiner -keines - -können | can -könnte | could -machen | do -man | one - -manche | some, many a -manchem -manchen -mancher -manches - -mein | my -meine -meinem -meinen -meiner -meines - -mit | with -muss | must -musste | had to -nach | to(wards) -nicht | not -nichts | nothing -noch | still, yet -nun | now -nur | only -ob | whether -oder | or -ohne | without -sehr | very - -sein | his -seine -seinem -seinen -seiner -seines - -selbst | self -sich | herself - -sie | they, she -ihnen | to them - -sind | are -so | so - -solche | such -solchem -solchen -solcher -solches - -soll | shall -sollte | should -sondern | but -sonst | else -über | over -um | about, around -und | and - -uns | us -unse -unsem -unsen -unser -unses - -unter | under -viel | much -vom | von + dem -von | from -vor | before -während | while -war | was -waren | were -warst | wast -was | what -weg | away, off -weil | because -weiter | further - -welche | which -welchem -welchen -welcher -welches - -wenn | when -werde | will -werden | will -wie | how -wieder | again -will | want -wir | we -wird | will -wirst | willst -wo | where -wollen | want -wollte | wanted -würde | would -würden | would -zu | to -zum | zu + dem -zur | zu + der -zwar | indeed -zwischen | between - diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_el.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_el.txt deleted file mode 100644 index 232681f5bd6..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_el.txt +++ /dev/null @@ -1,78 +0,0 @@ -# Lucene Greek Stopwords list -# Note: by default this file is used after GreekLowerCaseFilter, -# so when modifying this file use 'σ' instead of 'ς' -ο -η -το -οι -τα -του -τησ -των -τον -την -και -κι -κ -ειμαι -εισαι -ειναι -ειμαστε -ειστε -στο -στον -στη -στην -μα -αλλα -απο -για -προσ -με -σε -ωσ -παρα -αντι -κατα -μετα -θα -να -δε -δεν -μη -μην -επι -ενω -εαν -αν -τοτε -που -πωσ -ποιοσ -ποια -ποιο -ποιοι -ποιεσ -ποιων -ποιουσ -αυτοσ -αυτη -αυτο -αυτοι -αυτων -αυτουσ -αυτεσ -αυτα -εκεινοσ -εκεινη -εκεινο -εκεινοι -εκεινεσ -εκεινα -εκεινων -εκεινουσ -οπωσ -ομωσ -ισωσ -οσο -οτι diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_en.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_en.txt deleted file mode 100644 index 2c164c0b2a1..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_en.txt +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -# Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -such -that -the -their -then -there -these -they -this -to -was -will -with diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_es.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_es.txt deleted file mode 100644 index 487d78c8d56..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_es.txt +++ /dev/null @@ -1,356 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Spanish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | from, of -la | the, her -que | who, that -el | the -en | in -y | and -a | to -los | the, them -del | de + el -se | himself, from him etc -las | the, them -por | for, by, etc -un | a -para | for -con | with -no | no -una | a -su | his, her -al | a + el - | es from SER -lo | him -como | how -más | more -pero | pero -sus | su plural -le | to him, her -ya | already -o | or - | fue from SER -este | this - | ha from HABER -sí | himself etc -porque | because -esta | this - | son from SER -entre | between - | está from ESTAR -cuando | when -muy | very -sin | without -sobre | on - | ser from SER - | tiene from TENER -también | also -me | me -hasta | until -hay | there is/are -donde | where - | han from HABER -quien | whom, that - | están from ESTAR - | estado from ESTAR -desde | from -todo | all -nos | us -durante | during - | estados from ESTAR -todos | all -uno | a -les | to them -ni | nor -contra | against -otros | other - | fueron from SER -ese | that -eso | that - | había from HABER -ante | before -ellos | they -e | and (variant of y) -esto | this -mí | me -antes | before -algunos | some -qué | what? -unos | a -yo | I -otro | other -otras | other -otra | other -él | he -tanto | so much, many -esa | that -estos | these -mucho | much, many -quienes | who -nada | nothing -muchos | many -cual | who - | sea from SER -poco | few -ella | she -estar | to be - | haber from HABER -estas | these - | estaba from ESTAR - | estamos from ESTAR -algunas | some -algo | something -nosotros | we - - | other forms - -mi | me -mis | mi plural -tú | thou -te | thee -ti | thee -tu | thy -tus | tu plural -ellas | they -nosotras | we -vosotros | you -vosotras | you -os | you -mío | mine -mía | -míos | -mías | -tuyo | thine -tuya | -tuyos | -tuyas | -suyo | his, hers, theirs -suya | -suyos | -suyas | -nuestro | ours -nuestra | -nuestros | -nuestras | -vuestro | yours -vuestra | -vuestros | -vuestras | -esos | those -esas | those - - | forms of estar, to be (not including the infinitive): -estoy -estás -está -estamos -estáis -están -esté -estés -estemos -estéis -estén -estaré -estarás -estará -estaremos -estaréis -estarán -estaría -estarías -estaríamos -estaríais -estarían -estaba -estabas -estábamos -estabais -estaban -estuve -estuviste -estuvo -estuvimos -estuvisteis -estuvieron -estuviera -estuvieras -estuviéramos -estuvierais -estuvieran -estuviese -estuvieses -estuviésemos -estuvieseis -estuviesen -estando -estado -estada -estados -estadas -estad - - | forms of haber, to have (not including the infinitive): -he -has -ha -hemos -habéis -han -haya -hayas -hayamos -hayáis -hayan -habré -habrás -habrá -habremos -habréis -habrán -habría -habrías -habríamos -habríais -habrían -había -habías -habíamos -habíais -habían -hube -hubiste -hubo -hubimos -hubisteis -hubieron -hubiera -hubieras -hubiéramos -hubierais -hubieran -hubiese -hubieses -hubiésemos -hubieseis -hubiesen -habiendo -habido -habida -habidos -habidas - - | forms of ser, to be (not including the infinitive): -soy -eres -es -somos -sois -son -sea -seas -seamos -seáis -sean -seré -serás -será -seremos -seréis -serán -sería -serías -seríamos -seríais -serían -era -eras -éramos -erais -eran -fui -fuiste -fue -fuimos -fuisteis -fueron -fuera -fueras -fuéramos -fuerais -fueran -fuese -fueses -fuésemos -fueseis -fuesen -siendo -sido - | sed also means 'thirst' - - | forms of tener, to have (not including the infinitive): -tengo -tienes -tiene -tenemos -tenéis -tienen -tenga -tengas -tengamos -tengáis -tengan -tendré -tendrás -tendrá -tendremos -tendréis -tendrán -tendría -tendrías -tendríamos -tendríais -tendrían -tenía -tenías -teníamos -teníais -tenían -tuve -tuviste -tuvo -tuvimos -tuvisteis -tuvieron -tuviera -tuvieras -tuviéramos -tuvierais -tuvieran -tuviese -tuvieses -tuviésemos -tuvieseis -tuviesen -teniendo -tenido -tenida -tenidos -tenidas -tened - diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_eu.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_eu.txt deleted file mode 100644 index 25f1db93460..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_eu.txt +++ /dev/null @@ -1,99 +0,0 @@ -# example set of basque stopwords -al -anitz -arabera -asko -baina -bat -batean -batek -bati -batzuei -batzuek -batzuetan -batzuk -bera -beraiek -berau -berauek -bere -berori -beroriek -beste -bezala -da -dago -dira -ditu -du -dute -edo -egin -ere -eta -eurak -ez -gainera -gu -gutxi -guzti -haiei -haiek -haietan -hainbeste -hala -han -handik -hango -hara -hari -hark -hartan -hau -hauei -hauek -hauetan -hemen -hemendik -hemengo -hi -hona -honek -honela -honetan -honi -hor -hori -horiei -horiek -horietan -horko -horra -horrek -horrela -horretan -horri -hortik -hura -izan -ni -noiz -nola -non -nondik -nongo -nor -nora -ze -zein -zen -zenbait -zenbat -zer -zergatik -ziren -zituen -zu -zuek -zuen -zuten diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_fa.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_fa.txt deleted file mode 100644 index 723641c6da7..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_fa.txt +++ /dev/null @@ -1,313 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Note: by default this file is used after normalization, so when adding entries -# to this file, use the arabic 'ي' instead of 'ی' -انان -نداشته -سراسر -خياه -ايشان -وي -تاكنون -بيشتري -دوم -پس -ناشي -وگو -يا -داشتند -سپس -هنگام -هرگز -پنج -نشان -امسال -ديگر -گروهي -شدند -چطور -ده -و -دو -نخستين -ولي -چرا -چه -وسط -ه -كدام -قابل -يك -رفت -هفت -همچنين -در -هزار -بله -بلي -شايد -اما -شناسي -گرفته -دهد -داشته -دانست -داشتن -خواهيم -ميليارد -وقتيكه -امد -خواهد -جز -اورده -شده -بلكه -خدمات -شدن -برخي -نبود -بسياري -جلوگيري -حق -كردند -نوعي -بعري -نكرده -نظير -نبايد -بوده -بودن -داد -اورد -هست -جايي -شود -دنبال -داده -بايد -سابق -هيچ -همان -انجا -كمتر -كجاست -گردد -كسي -تر -مردم -تان -دادن -بودند -سري -جدا -ندارند -مگر -يكديگر -دارد -دهند -بنابراين -هنگامي -سمت -جا -انچه -خود -دادند -زياد -دارند -اثر -بدون -بهترين -بيشتر -البته -به -براساس -بيرون -كرد -بعضي -گرفت -توي -اي -ميليون -او -جريان -تول -بر -مانند -برابر -باشيم -مدتي -گويند -اكنون -تا -تنها -جديد -چند -بي -نشده -كردن -كردم -گويد -كرده -كنيم -نمي -نزد -روي -قصد -فقط -بالاي -ديگران -اين -ديروز -توسط -سوم -ايم -دانند -سوي -استفاده -شما -كنار -داريم -ساخته -طور -امده -رفته -نخست -بيست -نزديك -طي -كنيد -از -انها -تمامي -داشت -يكي -طريق -اش -چيست -روب -نمايد -گفت -چندين -چيزي -تواند -ام -ايا -با -ان -ايد -ترين -اينكه -ديگري -راه -هايي -بروز -همچنان -پاعين -كس -حدود -مختلف -مقابل -چيز -گيرد -ندارد -ضد -همچون -سازي -شان -مورد -باره -مرسي -خويش -برخوردار -چون -خارج -شش -هنوز -تحت -ضمن -هستيم -گفته -فكر -بسيار -پيش -براي -روزهاي -انكه -نخواهد -بالا -كل -وقتي -كي -چنين -كه -گيري -نيست -است -كجا -كند -نيز -يابد -بندي -حتي -توانند -عقب -خواست -كنند -بين -تمام -همه -ما -باشند -مثل -شد -اري -باشد -اره -طبق -بعد -اگر -صورت -غير -جاي -بيش -ريزي -اند -زيرا -چگونه -بار -لطفا -مي -درباره -من -ديده -همين -گذاري -برداري -علت -گذاشته -هم -فوق -نه -ها -شوند -اباد -همواره -هر -اول -خواهند -چهار -نام -امروز -مان -هاي -قبل -كنم -سعي -تازه -را -هستند -زير -جلوي -عنوان -بود diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_fi.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_fi.txt deleted file mode 100644 index 4372c9a055b..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_fi.txt +++ /dev/null @@ -1,97 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| forms of BE - -olla -olen -olet -on -olemme -olette -ovat -ole | negative form - -oli -olisi -olisit -olisin -olisimme -olisitte -olisivat -olit -olin -olimme -olitte -olivat -ollut -olleet - -en | negation -et -ei -emme -ette -eivät - -|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans -minä minun minut minua minussa minusta minuun minulla minulta minulle | I -sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you -hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she -me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we -te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you -he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they - -tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this -tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that -se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it -nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these -nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those -ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they - -kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who -ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) -mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what -mitkä | (pl) - -joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which -jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) - -| conjunctions - -että | that -ja | and -jos | if -koska | because -kuin | than -mutta | but -niin | so -sekä | and -sillä | for -tai | or -vaan | but -vai | or -vaikka | although - - -| prepositions - -kanssa | with -mukaan | according to -noin | about -poikki | across -yli | over, across - -| other - -kun | when -niin | so -nyt | now -itse | self - diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_fr.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_fr.txt deleted file mode 100644 index 749abae6846..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_fr.txt +++ /dev/null @@ -1,186 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A French stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -au | a + le -aux | a + les -avec | with -ce | this -ces | these -dans | with -de | of -des | de + les -du | de + le -elle | she -en | `of them' etc -et | and -eux | them -il | he -je | I -la | the -le | the -leur | their -lui | him -ma | my (fem) -mais | but -me | me -même | same; as in moi-même (myself) etc -mes | me (pl) -moi | me -mon | my (masc) -ne | not -nos | our (pl) -notre | our -nous | we -on | one -ou | where -par | by -pas | not -pour | for -qu | que before vowel -que | that -qui | who -sa | his, her (fem) -se | oneself -ses | his (pl) -son | his, her (masc) -sur | on -ta | thy (fem) -te | thee -tes | thy (pl) -toi | thee -ton | thy (masc) -tu | thou -un | a -une | a -vos | your (pl) -votre | your -vous | you - - | single letter forms - -c | c' -d | d' -j | j' -l | l' -à | to, at -m | m' -n | n' -s | s' -t | t' -y | there - - | forms of être (not including the infinitive): -été -étée -étées -étés -étant -suis -es -est -sommes -êtes -sont -serai -seras -sera -serons -serez -seront -serais -serait -serions -seriez -seraient -étais -était -étions -étiez -étaient -fus -fut -fûmes -fûtes -furent -sois -soit -soyons -soyez -soient -fusse -fusses -fût -fussions -fussiez -fussent - - | forms of avoir (not including the infinitive): -ayant -eu -eue -eues -eus -ai -as -avons -avez -ont -aurai -auras -aura -aurons -aurez -auront -aurais -aurait -aurions -auriez -auraient -avais -avait -avions -aviez -avaient -eut -eûmes -eûtes -eurent -aie -aies -ait -ayons -ayez -aient -eusse -eusses -eût -eussions -eussiez -eussent - - | Later additions (from Jean-Christophe Deschamps) -ceci | this -cela | that -celà | that -cet | this -cette | this -ici | here -ils | they -les | the (pl) -leurs | their (pl) -quel | which -quels | which -quelle | which -quelles | which -sans | without -soi | oneself - diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ga.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ga.txt deleted file mode 100644 index 9ff88d747e5..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ga.txt +++ /dev/null @@ -1,110 +0,0 @@ - -a -ach -ag -agus -an -aon -ar -arna -as -b' -ba -beirt -bhúr -caoga -ceathair -ceathrar -chomh -chtó -chuig -chun -cois -céad -cúig -cúigear -d' -daichead -dar -de -deich -deichniúr -den -dhá -do -don -dtí -dá -dár -dó -faoi -faoin -faoina -faoinár -fara -fiche -gach -gan -go -gur -haon -hocht -i -iad -idir -in -ina -ins -inár -is -le -leis -lena -lenár -m' -mar -mo -mé -na -nach -naoi -naonúr -ná -ní -níor -nó -nócha -ocht -ochtar -os -roimh -sa -seacht -seachtar -seachtó -seasca -seisear -siad -sibh -sinn -sna -sé -sí -tar -thar -thú -triúr -trí -trína -trínár -tríocha -tú -um -ár -é -éis -í -ó -ón -óna -ónár diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_gl.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_gl.txt deleted file mode 100644 index d8760b12c14..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_gl.txt +++ /dev/null @@ -1,161 +0,0 @@ -# galican stopwords -a -aínda -alí -aquel -aquela -aquelas -aqueles -aquilo -aquí -ao -aos -as -así -á -ben -cando -che -co -coa -comigo -con -connosco -contigo -convosco -coas -cos -cun -cuns -cunha -cunhas -da -dalgunha -dalgunhas -dalgún -dalgúns -das -de -del -dela -delas -deles -desde -deste -do -dos -dun -duns -dunha -dunhas -e -el -ela -elas -eles -en -era -eran -esa -esas -ese -eses -esta -estar -estaba -está -están -este -estes -estiven -estou -eu -é -facer -foi -foron -fun -había -hai -iso -isto -la -las -lle -lles -lo -los -mais -me -meu -meus -min -miña -miñas -moi -na -nas -neste -nin -no -non -nos -nosa -nosas -noso -nosos -nós -nun -nunha -nuns -nunhas -o -os -ou -ó -ós -para -pero -pode -pois -pola -polas -polo -polos -por -que -se -senón -ser -seu -seus -sexa -sido -sobre -súa -súas -tamén -tan -te -ten -teñen -teño -ter -teu -teus -ti -tido -tiña -tiven -túa -túas -un -unha -unhas -uns -vos -vosa -vosas -voso -vosos -vós diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_hi.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_hi.txt deleted file mode 100644 index 86286bb083b..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_hi.txt +++ /dev/null @@ -1,235 +0,0 @@ -# Also see http://www.opensource.org/licenses/bsd-license.html -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# This file was created by Jacques Savoy and is distributed under the BSD license. -# Note: by default this file also contains forms normalized by HindiNormalizer -# for spelling variation (see section below), such that it can be used whether or -# not you enable that feature. When adding additional entries to this list, -# please add the normalized form as well. -अंदर -अत -अपना -अपनी -अपने -अभी -आदि -आप -इत्यादि -इन -इनका -इन्हीं -इन्हें -इन्हों -इस -इसका -इसकी -इसके -इसमें -इसी -इसे -उन -उनका -उनकी -उनके -उनको -उन्हीं -उन्हें -उन्हों -उस -उसके -उसी -उसे -एक -एवं -एस -ऐसे -और -कई -कर -करता -करते -करना -करने -करें -कहते -कहा -का -काफ़ी -कि -कितना -किन्हें -किन्हों -किया -किर -किस -किसी -किसे -की -कुछ -कुल -के -को -कोई -कौन -कौनसा -गया -घर -जब -जहाँ -जा -जितना -जिन -जिन्हें -जिन्हों -जिस -जिसे -जीधर -जैसा -जैसे -जो -तक -तब -तरह -तिन -तिन्हें -तिन्हों -तिस -तिसे -तो -था -थी -थे -दबारा -दिया -दुसरा -दूसरे -दो -द्वारा -न -नहीं -ना -निहायत -नीचे -ने -पर -पर -पहले -पूरा -पे -फिर -बनी -बही -बहुत -बाद -बाला -बिलकुल -भी -भीतर -मगर -मानो -मे -में -यदि -यह -यहाँ -यही -या -यिह -ये -रखें -रहा -रहे -ऱ्वासा -लिए -लिये -लेकिन -व -वर्ग -वह -वह -वहाँ -वहीं -वाले -वुह -वे -वग़ैरह -संग -सकता -सकते -सबसे -सभी -साथ -साबुत -साभ -सारा -से -सो -ही -हुआ -हुई -हुए -है -हैं -हो -होता -होती -होते -होना -होने -# additional normalized forms of the above -अपनि -जेसे -होति -सभि -तिंहों -इंहों -दवारा -इसि -किंहें -थि -उंहों -ओर -जिंहें -वहिं -अभि -बनि -हि -उंहिं -उंहें -हें -वगेरह -एसे -रवासा -कोन -निचे -काफि -उसि -पुरा -भितर -हे -बहि -वहां -कोइ -यहां -जिंहों -तिंहें -किसि -कइ -यहि -इंहिं -जिधर -इंहें -अदि -इतयादि -हुइ -कोनसा -इसकि -दुसरे -जहां -अप -किंहों -उनकि -भि -वरग -हुअ -जेसा -नहिं diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_hu.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_hu.txt deleted file mode 100644 index 37526da8aa9..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_hu.txt +++ /dev/null @@ -1,211 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| Hungarian stop word list -| prepared by Anna Tordai - -a -ahogy -ahol -aki -akik -akkor -alatt -által -általában -amely -amelyek -amelyekben -amelyeket -amelyet -amelynek -ami -amit -amolyan -amíg -amikor -át -abban -ahhoz -annak -arra -arról -az -azok -azon -azt -azzal -azért -aztán -azután -azonban -bár -be -belül -benne -cikk -cikkek -cikkeket -csak -de -e -eddig -egész -egy -egyes -egyetlen -egyéb -egyik -egyre -ekkor -el -elég -ellen -elő -először -előtt -első -én -éppen -ebben -ehhez -emilyen -ennek -erre -ez -ezt -ezek -ezen -ezzel -ezért -és -fel -felé -hanem -hiszen -hogy -hogyan -igen -így -illetve -ill. -ill -ilyen -ilyenkor -ison -ismét -itt -jó -jól -jobban -kell -kellett -keresztül -keressünk -ki -kívül -között -közül -legalább -lehet -lehetett -legyen -lenne -lenni -lesz -lett -maga -magát -majd -majd -már -más -másik -meg -még -mellett -mert -mely -melyek -mi -mit -míg -miért -milyen -mikor -minden -mindent -mindenki -mindig -mint -mintha -mivel -most -nagy -nagyobb -nagyon -ne -néha -nekem -neki -nem -néhány -nélkül -nincs -olyan -ott -össze -ő -ők -őket -pedig -persze -rá -s -saját -sem -semmi -sok -sokat -sokkal -számára -szemben -szerint -szinte -talán -tehát -teljes -tovább -továbbá -több -úgy -ugyanis -új -újabb -újra -után -utána -utolsó -vagy -vagyis -valaki -valami -valamint -való -vagyok -van -vannak -volt -voltam -voltak -voltunk -vissza -vele -viszont -volna diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_hy.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_hy.txt deleted file mode 100644 index 60c1c50fbc8..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_hy.txt +++ /dev/null @@ -1,46 +0,0 @@ -# example set of Armenian stopwords. -այդ -այլ -այն -այս -դու -դուք -եմ -են -ենք -ես -եք -է -էի -էին -էինք -էիր -էիք -էր -ըստ -թ -ի -ին -իսկ -իր -կամ -համար -հետ -հետո -մենք -մեջ -մի -ն -նա -նաև -նրա -նրանք -որ -որը -որոնք -որպես -ու -ում -պիտի -վրա -և diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_id.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_id.txt deleted file mode 100644 index 4617f83a5c5..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_id.txt +++ /dev/null @@ -1,359 +0,0 @@ -# from appendix D of: A Study of Stemming Effects on Information -# Retrieval in Bahasa Indonesia -ada -adanya -adalah -adapun -agak -agaknya -agar -akan -akankah -akhirnya -aku -akulah -amat -amatlah -anda -andalah -antar -diantaranya -antara -antaranya -diantara -apa -apaan -mengapa -apabila -apakah -apalagi -apatah -atau -ataukah -ataupun -bagai -bagaikan -sebagai -sebagainya -bagaimana -bagaimanapun -sebagaimana -bagaimanakah -bagi -bahkan -bahwa -bahwasanya -sebaliknya -banyak -sebanyak -beberapa -seberapa -begini -beginian -beginikah -beginilah -sebegini -begitu -begitukah -begitulah -begitupun -sebegitu -belum -belumlah -sebelum -sebelumnya -sebenarnya -berapa -berapakah -berapalah -berapapun -betulkah -sebetulnya -biasa -biasanya -bila -bilakah -bisa -bisakah -sebisanya -boleh -bolehkah -bolehlah -buat -bukan -bukankah -bukanlah -bukannya -cuma -percuma -dahulu -dalam -dan -dapat -dari -daripada -dekat -demi -demikian -demikianlah -sedemikian -dengan -depan -di -dia -dialah -dini -diri -dirinya -terdiri -dong -dulu -enggak -enggaknya -entah -entahlah -terhadap -terhadapnya -hal -hampir -hanya -hanyalah -harus -haruslah -harusnya -seharusnya -hendak -hendaklah -hendaknya -hingga -sehingga -ia -ialah -ibarat -ingin -inginkah -inginkan -ini -inikah -inilah -itu -itukah -itulah -jangan -jangankan -janganlah -jika -jikalau -juga -justru -kala -kalau -kalaulah -kalaupun -kalian -kami -kamilah -kamu -kamulah -kan -kapan -kapankah -kapanpun -dikarenakan -karena -karenanya -ke -kecil -kemudian -kenapa -kepada -kepadanya -ketika -seketika -khususnya -kini -kinilah -kiranya -sekiranya -kita -kitalah -kok -lagi -lagian -selagi -lah -lain -lainnya -melainkan -selaku -lalu -melalui -terlalu -lama -lamanya -selama -selama -selamanya -lebih -terlebih -bermacam -macam -semacam -maka -makanya -makin -malah -malahan -mampu -mampukah -mana -manakala -manalagi -masih -masihkah -semasih -masing -mau -maupun -semaunya -memang -mereka -merekalah -meski -meskipun -semula -mungkin -mungkinkah -nah -namun -nanti -nantinya -nyaris -oleh -olehnya -seorang -seseorang -pada -padanya -padahal -paling -sepanjang -pantas -sepantasnya -sepantasnyalah -para -pasti -pastilah -per -pernah -pula -pun -merupakan -rupanya -serupa -saat -saatnya -sesaat -saja -sajalah -saling -bersama -sama -sesama -sambil -sampai -sana -sangat -sangatlah -saya -sayalah -se -sebab -sebabnya -sebuah -tersebut -tersebutlah -sedang -sedangkan -sedikit -sedikitnya -segala -segalanya -segera -sesegera -sejak -sejenak -sekali -sekalian -sekalipun -sesekali -sekaligus -sekarang -sekarang -sekitar -sekitarnya -sela -selain -selalu -seluruh -seluruhnya -semakin -sementara -sempat -semua -semuanya -sendiri -sendirinya -seolah -seperti -sepertinya -sering -seringnya -serta -siapa -siapakah -siapapun -disini -disinilah -sini -sinilah -sesuatu -sesuatunya -suatu -sesudah -sesudahnya -sudah -sudahkah -sudahlah -supaya -tadi -tadinya -tak -tanpa -setelah -telah -tentang -tentu -tentulah -tentunya -tertentu -seterusnya -tapi -tetapi -setiap -tiap -setidaknya -tidak -tidakkah -tidaklah -toh -waduh -wah -wahai -sewaktu -walau -walaupun -wong -yaitu -yakni -yang diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_it.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_it.txt deleted file mode 100644 index 1219cc773ab..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_it.txt +++ /dev/null @@ -1,303 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | An Italian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -ad | a (to) before vowel -al | a + il -allo | a + lo -ai | a + i -agli | a + gli -all | a + l' -agl | a + gl' -alla | a + la -alle | a + le -con | with -col | con + il -coi | con + i (forms collo, cogli etc are now very rare) -da | from -dal | da + il -dallo | da + lo -dai | da + i -dagli | da + gli -dall | da + l' -dagl | da + gll' -dalla | da + la -dalle | da + le -di | of -del | di + il -dello | di + lo -dei | di + i -degli | di + gli -dell | di + l' -degl | di + gl' -della | di + la -delle | di + le -in | in -nel | in + el -nello | in + lo -nei | in + i -negli | in + gli -nell | in + l' -negl | in + gl' -nella | in + la -nelle | in + le -su | on -sul | su + il -sullo | su + lo -sui | su + i -sugli | su + gli -sull | su + l' -sugl | su + gl' -sulla | su + la -sulle | su + le -per | through, by -tra | among -contro | against -io | I -tu | thou -lui | he -lei | she -noi | we -voi | you -loro | they -mio | my -mia | -miei | -mie | -tuo | -tua | -tuoi | thy -tue | -suo | -sua | -suoi | his, her -sue | -nostro | our -nostra | -nostri | -nostre | -vostro | your -vostra | -vostri | -vostre | -mi | me -ti | thee -ci | us, there -vi | you, there -lo | him, the -la | her, the -li | them -le | them, the -gli | to him, the -ne | from there etc -il | the -un | a -uno | a -una | a -ma | but -ed | and -se | if -perché | why, because -anche | also -come | how -dov | where (as dov') -dove | where -che | who, that -chi | who -cui | whom -non | not -più | more -quale | who, that -quanto | how much -quanti | -quanta | -quante | -quello | that -quelli | -quella | -quelle | -questo | this -questi | -questa | -queste | -si | yes -tutto | all -tutti | all - - | single letter forms: - -a | at -c | as c' for ce or ci -e | and -i | the -l | as l' -o | or - - | forms of avere, to have (not including the infinitive): - -ho -hai -ha -abbiamo -avete -hanno -abbia -abbiate -abbiano -avrò -avrai -avrà -avremo -avrete -avranno -avrei -avresti -avrebbe -avremmo -avreste -avrebbero -avevo -avevi -aveva -avevamo -avevate -avevano -ebbi -avesti -ebbe -avemmo -aveste -ebbero -avessi -avesse -avessimo -avessero -avendo -avuto -avuta -avuti -avute - - | forms of essere, to be (not including the infinitive): -sono -sei -è -siamo -siete -sia -siate -siano -sarò -sarai -sarà -saremo -sarete -saranno -sarei -saresti -sarebbe -saremmo -sareste -sarebbero -ero -eri -era -eravamo -eravate -erano -fui -fosti -fu -fummo -foste -furono -fossi -fosse -fossimo -fossero -essendo - - | forms of fare, to do (not including the infinitive, fa, fat-): -faccio -fai -facciamo -fanno -faccia -facciate -facciano -farò -farai -farà -faremo -farete -faranno -farei -faresti -farebbe -faremmo -fareste -farebbero -facevo -facevi -faceva -facevamo -facevate -facevano -feci -facesti -fece -facemmo -faceste -fecero -facessi -facesse -facessimo -facessero -facendo - - | forms of stare, to be (not including the infinitive): -sto -stai -sta -stiamo -stanno -stia -stiate -stiano -starò -starai -starà -staremo -starete -staranno -starei -staresti -starebbe -staremmo -stareste -starebbero -stavo -stavi -stava -stavamo -stavate -stavano -stetti -stesti -stette -stemmo -steste -stettero -stessi -stesse -stessimo -stessero -stando diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ja.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ja.txt deleted file mode 100644 index d4321be6b16..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ja.txt +++ /dev/null @@ -1,127 +0,0 @@ -# -# This file defines a stopword set for Japanese. -# -# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. -# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 -# for frequency lists, etc. that can be useful for making your own set (if desired) -# -# Note that there is an overlap between these stopwords and the terms stopped when used -# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note -# that comments are not allowed on the same line as stopwords. -# -# Also note that stopping is done in a case-insensitive manner. Change your StopFilter -# configuration if you need case-sensitive stopping. Lastly, note that stopping is done -# using the same character width as the entries in this file. Since this StopFilter is -# normally done after a CJKWidthFilter in your chain, you would usually want your romaji -# entries to be in half-width and your kana entries to be in full-width. -# -の -に -は -を -た -が -で -て -と -し -れ -さ -ある -いる -も -する -から -な -こと -として -い -や -れる -など -なっ -ない -この -ため -その -あっ -よう -また -もの -という -あり -まで -られ -なる -へ -か -だ -これ -によって -により -おり -より -による -ず -なり -られる -において -ば -なかっ -なく -しかし -について -せ -だっ -その後 -できる -それ -う -ので -なお -のみ -でき -き -つ -における -および -いう -さらに -でも -ら -たり -その他 -に関する -たち -ます -ん -なら -に対して -特に -せる -及び -これら -とき -では -にて -ほか -ながら -うち -そして -とともに -ただし -かつて -それぞれ -または -お -ほど -ものの -に対する -ほとんど -と共に -といった -です -とも -ところ -ここ -##### End of file diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_lv.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_lv.txt deleted file mode 100644 index e21a23c06c3..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_lv.txt +++ /dev/null @@ -1,172 +0,0 @@ -# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins -# the original list of over 800 forms was refined: -# pronouns, adverbs, interjections were removed -# -# prepositions -aiz -ap -ar -apakš -ārpus -augšpus -bez -caur -dēļ -gar -iekš -iz -kopš -labad -lejpus -līdz -no -otrpus -pa -par -pār -pēc -pie -pirms -pret -priekš -starp -šaipus -uz -viņpus -virs -virspus -zem -apakšpus -# Conjunctions -un -bet -jo -ja -ka -lai -tomēr -tikko -turpretī -arī -kaut -gan -tādēļ -tā -ne -tikvien -vien -kā -ir -te -vai -kamēr -# Particles -ar -diezin -droši -diemžēl -nebūt -ik -it -taču -nu -pat -tiklab -iekšpus -nedz -tik -nevis -turpretim -jeb -iekam -iekām -iekāms -kolīdz -līdzko -tiklīdz -jebšu -tālab -tāpēc -nekā -itin -jā -jau -jel -nē -nezin -tad -tikai -vis -tak -iekams -vien -# modal verbs -būt -biju -biji -bija -bijām -bijāt -esmu -esi -esam -esat -būšu -būsi -būs -būsim -būsiet -tikt -tiku -tiki -tika -tikām -tikāt -tieku -tiec -tiek -tiekam -tiekat -tikšu -tiks -tiksim -tiksiet -tapt -tapi -tapāt -topat -tapšu -tapsi -taps -tapsim -tapsiet -kļūt -kļuvu -kļuvi -kļuva -kļuvām -kļuvāt -kļūstu -kļūsti -kļūst -kļūstam -kļūstat -kļūšu -kļūsi -kļūs -kļūsim -kļūsiet -# verbs -varēt -varēju -varējām -varēšu -varēsim -var -varēji -varējāt -varēsi -varēsiet -varat -varēja -varēs diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_nl.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_nl.txt deleted file mode 100644 index 47a2aeacf6f..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_nl.txt +++ /dev/null @@ -1,119 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Dutch stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large sample of Dutch text. - - | Dutch stop words frequently exhibit homonym clashes. These are indicated - | clearly below. - -de | the -en | and -van | of, from -ik | I, the ego -te | (1) chez, at etc, (2) to, (3) too -dat | that, which -die | that, those, who, which -in | in, inside -een | a, an, one -hij | he -het | the, it -niet | not, nothing, naught -zijn | (1) to be, being, (2) his, one's, its -is | is -was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river -op | on, upon, at, in, up, used up -aan | on, upon, to (as dative) -met | with, by -als | like, such as, when -voor | (1) before, in front of, (2) furrow -had | had, past tense all persons sing. of 'hebben' (have) -er | there -maar | but, only -om | round, about, for etc -hem | him -dan | then -zou | should/would, past tense all persons sing. of 'zullen' -of | or, whether, if -wat | what, something, anything -mijn | possessive and noun 'mine' -men | people, 'one' -dit | this -zo | so, thus, in this way -door | through by -over | over, across -ze | she, her, they, them -zich | oneself -bij | (1) a bee, (2) by, near, at -ook | also, too -tot | till, until -je | you -mij | me -uit | out of, from -der | Old Dutch form of 'van der' still found in surnames -daar | (1) there, (2) because -haar | (1) her, their, them, (2) hair -naar | (1) unpleasant, unwell etc, (2) towards, (3) as -heb | present first person sing. of 'to have' -hoe | how, why -heeft | present third person sing. of 'to have' -hebben | 'to have' and various parts thereof -deze | this -u | you -want | (1) for, (2) mitten, (3) rigging -nog | yet, still -zal | 'shall', first and third person sing. of verb 'zullen' (will) -me | me -zij | she, they -nu | now -ge | 'thou', still used in Belgium and south Netherlands -geen | none -omdat | because -iets | something, somewhat -worden | to become, grow, get -toch | yet, still -al | all, every, each -waren | (1) 'were' (2) to wander, (3) wares, (3) -veel | much, many -meer | (1) more, (2) lake -doen | to do, to make -toen | then, when -moet | noun 'spot/mote' and present form of 'to must' -ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' -zonder | without -kan | noun 'can' and present form of 'to be able' -hun | their, them -dus | so, consequently -alles | all, everything, anything -onder | under, beneath -ja | yes, of course -eens | once, one day -hier | here -wie | who -werd | imperfect third person sing. of 'become' -altijd | always -doch | yet, but etc -wordt | present third person sing. of 'become' -wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans -kunnen | to be able -ons | us/our -zelf | self -tegen | against, towards, at -na | after, near -reeds | already -wil | (1) present tense of 'want', (2) 'will', noun, (3) fender -kon | could; past tense of 'to be able' -niets | nothing -uw | your -iemand | somebody -geweest | been; past participle of 'be' -andere | other diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_no.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_no.txt deleted file mode 100644 index a7a2c28ba54..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_no.txt +++ /dev/null @@ -1,194 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Norwegian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This stop word list is for the dominant bokmål dialect. Words unique - | to nynorsk are marked *. - - | Revised by Jan Bruusgaard , Jan 2005 - -og | and -i | in -jeg | I -det | it/this/that -at | to (w. inf.) -en | a/an -et | a/an -den | it/this/that -til | to -er | is/am/are -som | who/that -på | on -de | they / you(formal) -med | with -han | he -av | of -ikke | not -ikkje | not * -der | there -så | so -var | was/were -meg | me -seg | you -men | but -ett | one -har | have -om | about -vi | we -min | my -mitt | my -ha | have -hadde | had -hun | she -nå | now -over | over -da | when/as -ved | by/know -fra | from -du | you -ut | out -sin | your -dem | them -oss | us -opp | up -man | you/one -kan | can -hans | his -hvor | where -eller | or -hva | what -skal | shall/must -selv | self (reflective) -sjøl | self (reflective) -her | here -alle | all -vil | will -bli | become -ble | became -blei | became * -blitt | have become -kunne | could -inn | in -når | when -være | be -kom | come -noen | some -noe | some -ville | would -dere | you -som | who/which/that -deres | their/theirs -kun | only/just -ja | yes -etter | after -ned | down -skulle | should -denne | this -for | for/because -deg | you -si | hers/his -sine | hers/his -sitt | hers/his -mot | against -å | to -meget | much -hvorfor | why -dette | this -disse | these/those -uten | without -hvordan | how -ingen | none -din | your -ditt | your -blir | become -samme | same -hvilken | which -hvilke | which (plural) -sånn | such a -inni | inside/within -mellom | between -vår | our -hver | each -hvem | who -vors | us/ours -hvis | whose -både | both -bare | only/just -enn | than -fordi | as/because -før | before -mange | many -også | also -slik | just -vært | been -være | to be -båe | both * -begge | both -siden | since -dykk | your * -dykkar | yours * -dei | they * -deira | them * -deires | theirs * -deim | them * -di | your (fem.) * -då | as/when * -eg | I * -ein | a/an * -eit | a/an * -eitt | a/an * -elles | or * -honom | he * -hjå | at * -ho | she * -hoe | she * -henne | her -hennar | her/hers -hennes | hers -hoss | how * -hossen | how * -ikkje | not * -ingi | noone * -inkje | noone * -korleis | how * -korso | how * -kva | what/which * -kvar | where * -kvarhelst | where * -kven | who/whom * -kvi | why * -kvifor | why * -me | we * -medan | while * -mi | my * -mine | my * -mykje | much * -no | now * -nokon | some (masc./neut.) * -noka | some (fem.) * -nokor | some * -noko | some * -nokre | some * -si | his/hers * -sia | since * -sidan | since * -so | so * -somt | some * -somme | some * -um | about* -upp | up * -vere | be * -vore | was * -verte | become * -vort | become * -varte | became * -vart | became * - diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_pt.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_pt.txt deleted file mode 100644 index acfeb01af6b..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_pt.txt +++ /dev/null @@ -1,253 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Portuguese stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | of, from -a | the; to, at; her -o | the; him -que | who, that -e | and -do | de + o -da | de + a -em | in -um | a -para | for - | é from SER -com | with -não | not, no -uma | a -os | the; them -no | em + o -se | himself etc -na | em + a -por | for -mais | more -as | the; them -dos | de + os -como | as, like -mas | but - | foi from SER -ao | a + o -ele | he -das | de + as - | tem from TER -à | a + a -seu | his -sua | her -ou | or - | ser from SER -quando | when -muito | much - | há from HAV -nos | em + os; us -já | already, now - | está from EST -eu | I -também | also -só | only, just -pelo | per + o -pela | per + a -até | up to -isso | that -ela | he -entre | between - | era from SER -depois | after -sem | without -mesmo | same -aos | a + os - | ter from TER -seus | his -quem | whom -nas | em + as -me | me -esse | that -eles | they - | estão from EST -você | you - | tinha from TER - | foram from SER -essa | that -num | em + um -nem | nor -suas | her -meu | my -às | a + as -minha | my - | têm from TER -numa | em + uma -pelos | per + os -elas | they - | havia from HAV - | seja from SER -qual | which - | será from SER -nós | we - | tenho from TER -lhe | to him, her -deles | of them -essas | those -esses | those -pelas | per + as -este | this - | fosse from SER -dele | of him - - | other words. There are many contractions such as naquele = em+aquele, - | mo = me+o, but they are rare. - | Indefinite article plural forms are also rare. - -tu | thou -te | thee -vocês | you (plural) -vos | you -lhes | to them -meus | my -minhas -teu | thy -tua -teus -tuas -nosso | our -nossa -nossos -nossas - -dela | of her -delas | of them - -esta | this -estes | these -estas | these -aquele | that -aquela | that -aqueles | those -aquelas | those -isto | this -aquilo | that - - | forms of estar, to be (not including the infinitive): -estou -está -estamos -estão -estive -esteve -estivemos -estiveram -estava -estávamos -estavam -estivera -estivéramos -esteja -estejamos -estejam -estivesse -estivéssemos -estivessem -estiver -estivermos -estiverem - - | forms of haver, to have (not including the infinitive): -hei -há -havemos -hão -houve -houvemos -houveram -houvera -houvéramos -haja -hajamos -hajam -houvesse -houvéssemos -houvessem -houver -houvermos -houverem -houverei -houverá -houveremos -houverão -houveria -houveríamos -houveriam - - | forms of ser, to be (not including the infinitive): -sou -somos -são -era -éramos -eram -fui -foi -fomos -foram -fora -fôramos -seja -sejamos -sejam -fosse -fôssemos -fossem -for -formos -forem -serei -será -seremos -serão -seria -seríamos -seriam - - | forms of ter, to have (not including the infinitive): -tenho -tem -temos -tém -tinha -tínhamos -tinham -tive -teve -tivemos -tiveram -tivera -tivéramos -tenha -tenhamos -tenham -tivesse -tivéssemos -tivessem -tiver -tivermos -tiverem -terei -terá -teremos -terão -teria -teríamos -teriam diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ro.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ro.txt deleted file mode 100644 index 4fdee90a5ba..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ro.txt +++ /dev/null @@ -1,233 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -acea -aceasta -această -aceea -acei -aceia -acel -acela -acele -acelea -acest -acesta -aceste -acestea -aceşti -aceştia -acolo -acum -ai -aia -aibă -aici -al -ăla -ale -alea -ălea -altceva -altcineva -am -ar -are -aş -aşadar -asemenea -asta -ăsta -astăzi -astea -ăstea -ăştia -asupra -aţi -au -avea -avem -aveţi -azi -bine -bucur -bună -ca -că -căci -când -care -cărei -căror -cărui -cât -câte -câţi -către -câtva -ce -cel -ceva -chiar -cînd -cine -cineva -cît -cîte -cîţi -cîtva -contra -cu -cum -cumva -curând -curînd -da -dă -dacă -dar -datorită -de -deci -deja -deoarece -departe -deşi -din -dinaintea -dintr -dintre -drept -după -ea -ei -el -ele -eram -este -eşti -eu -face -fără -fi -fie -fiecare -fii -fim -fiţi -iar -ieri -îi -îl -îmi -împotriva -în -înainte -înaintea -încât -încît -încotro -între -întrucât -întrucît -îţi -la -lângă -le -li -lîngă -lor -lui -mă -mâine -mea -mei -mele -mereu -meu -mi -mine -mult -multă -mulţi -ne -nicăieri -nici -nimeni -nişte -noastră -noastre -noi -noştri -nostru -nu -ori -oricând -oricare -oricât -orice -oricînd -oricine -oricît -oricum -oriunde -până -pe -pentru -peste -pînă -poate -pot -prea -prima -primul -prin -printr -sa -să -săi -sale -sau -său -se -şi -sînt -sîntem -sînteţi -spre -sub -sunt -suntem -sunteţi -ta -tăi -tale -tău -te -ţi -ţie -tine -toată -toate -tot -toţi -totuşi -tu -un -una -unde -undeva -unei -unele -uneori -unor -vă -vi -voastră -voastre -voi -voştri -vostru -vouă -vreo -vreun diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ru.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ru.txt deleted file mode 100644 index 55271400c64..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_ru.txt +++ /dev/null @@ -1,243 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | a russian stop word list. comments begin with vertical bar. each stop - | word is at the start of a line. - - | this is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | letter `ё' is translated to `е'. - -и | and -в | in/into -во | alternative form -не | not -что | what/that -он | he -на | on/onto -я | i -с | from -со | alternative form -как | how -а | milder form of `no' (but) -то | conjunction and form of `that' -все | all -она | she -так | so, thus -его | him -но | but -да | yes/and -ты | thou -к | towards, by -у | around, chez -же | intensifier particle -вы | you -за | beyond, behind -бы | conditional/subj. particle -по | up to, along -только | only -ее | her -мне | to me -было | it was -вот | here is/are, particle -от | away from -меня | me -еще | still, yet, more -нет | no, there isnt/arent -о | about -из | out of -ему | to him -теперь | now -когда | when -даже | even -ну | so, well -вдруг | suddenly -ли | interrogative particle -если | if -уже | already, but homonym of `narrower' -или | or -ни | neither -быть | to be -был | he was -него | prepositional form of его -до | up to -вас | you accusative -нибудь | indef. suffix preceded by hyphen -опять | again -уж | already, but homonym of `adder' -вам | to you -сказал | he said -ведь | particle `after all' -там | there -потом | then -себя | oneself -ничего | nothing -ей | to her -может | usually with `быть' as `maybe' -они | they -тут | here -где | where -есть | there is/are -надо | got to, must -ней | prepositional form of ей -для | for -мы | we -тебя | thee -их | them, their -чем | than -была | she was -сам | self -чтоб | in order to -без | without -будто | as if -человек | man, person, one -чего | genitive form of `what' -раз | once -тоже | also -себе | to oneself -под | beneath -жизнь | life -будет | will be -ж | short form of intensifer particle `же' -тогда | then -кто | who -этот | this -говорил | was saying -того | genitive form of `that' -потому | for that reason -этого | genitive form of `this' -какой | which -совсем | altogether -ним | prepositional form of `его', `они' -здесь | here -этом | prepositional form of `этот' -один | one -почти | almost -мой | my -тем | instrumental/dative plural of `тот', `то' -чтобы | full form of `in order that' -нее | her (acc.) -кажется | it seems -сейчас | now -были | they were -куда | where to -зачем | why -сказать | to say -всех | all (acc., gen. preposn. plural) -никогда | never -сегодня | today -можно | possible, one can -при | by -наконец | finally -два | two -об | alternative form of `о', about -другой | another -хоть | even -после | after -над | above -больше | more -тот | that one (masc.) -через | across, in -эти | these -нас | us -про | about -всего | in all, only, of all -них | prepositional form of `они' (they) -какая | which, feminine -много | lots -разве | interrogative particle -сказала | she said -три | three -эту | this, acc. fem. sing. -моя | my, feminine -впрочем | moreover, besides -хорошо | good -свою | ones own, acc. fem. sing. -этой | oblique form of `эта', fem. `this' -перед | in front of -иногда | sometimes -лучше | better -чуть | a little -том | preposn. form of `that one' -нельзя | one must not -такой | such a one -им | to them -более | more -всегда | always -конечно | of course -всю | acc. fem. sing of `all' -между | between - - - | b: some paradigms - | - | personal pronouns - | - | я меня мне мной [мною] - | ты тебя тебе тобой [тобою] - | он его ему им [него, нему, ним] - | она ее эи ею [нее, нэи, нею] - | оно его ему им [него, нему, ним] - | - | мы нас нам нами - | вы вас вам вами - | они их им ими [них, ним, ними] - | - | себя себе собой [собою] - | - | demonstrative pronouns: этот (this), тот (that) - | - | этот эта это эти - | этого эты это эти - | этого этой этого этих - | этому этой этому этим - | этим этой этим [этою] этими - | этом этой этом этих - | - | тот та то те - | того ту то те - | того той того тех - | тому той тому тем - | тем той тем [тою] теми - | том той том тех - | - | determinative pronouns - | - | (a) весь (all) - | - | весь вся все все - | всего всю все все - | всего всей всего всех - | всему всей всему всем - | всем всей всем [всею] всеми - | всем всей всем всех - | - | (b) сам (himself etc) - | - | сам сама само сами - | самого саму само самих - | самого самой самого самих - | самому самой самому самим - | самим самой самим [самою] самими - | самом самой самом самих - | - | stems of verbs `to be', `to have', `to do' and modal - | - | быть бы буд быв есть суть - | име - | дел - | мог мож мочь - | уме - | хоч хот - | долж - | можн - | нужн - | нельзя - diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_sv.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_sv.txt deleted file mode 100644 index 096f87f6766..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_sv.txt +++ /dev/null @@ -1,133 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Swedish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | Swedish stop words occasionally exhibit homonym clashes. For example - | så = so, but also seed. These are indicated clearly below. - -och | and -det | it, this/that -att | to (with infinitive) -i | in, at -en | a -jag | I -hon | she -som | who, that -han | he -på | on -den | it, this/that -med | with -var | where, each -sig | him(self) etc -för | for -så | so (also: seed) -till | to -är | is -men | but -ett | a -om | if; around, about -hade | had -de | they, these/those -av | of -icke | not, no -mig | me -du | you -henne | her -då | then, when -sin | his -nu | now -har | have -inte | inte någon = no one -hans | his -honom | him -skulle | 'sake' -hennes | her -där | there -min | my -man | one (pronoun) -ej | nor -vid | at, by, on (also: vast) -kunde | could -något | some etc -från | from, off -ut | out -när | when -efter | after, behind -upp | up -vi | we -dem | them -vara | be -vad | what -över | over -än | than -dig | you -kan | can -sina | his -här | here -ha | have -mot | towards -alla | all -under | under (also: wonder) -någon | some etc -eller | or (else) -allt | all -mycket | much -sedan | since -ju | why -denna | this/that -själv | myself, yourself etc -detta | this/that -åt | to -utan | without -varit | was -hur | how -ingen | no -mitt | my -ni | you -bli | to be, become -blev | from bli -oss | us -din | thy -dessa | these/those -några | some etc -deras | their -blir | from bli -mina | my -samma | (the) same -vilken | who, that -er | you, your -sådan | such a -vår | our -blivit | from bli -dess | its -inom | within -mellan | between -sådant | such a -varför | why -varje | each -vilka | who, that -ditt | thy -vem | who -vilket | who, that -sitta | his -sådana | such a -vart | each -dina | thy -vars | whose -vårt | our -våra | our -ert | your -era | your -vilkas | whose - diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_th.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_th.txt deleted file mode 100644 index 07f0fabe692..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_th.txt +++ /dev/null @@ -1,119 +0,0 @@ -# Thai stopwords from: -# "Opinion Detection in Thai Political News Columns -# Based on Subjectivity Analysis" -# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak -ไว้ -ไม่ -ไป -ได้ -ให้ -ใน -โดย -แห่ง -แล้ว -และ -แรก -แบบ -แต่ -เอง -เห็น -เลย -เริ่ม -เรา -เมื่อ -เพื่อ -เพราะ -เป็นการ -เป็น -เปิดเผย -เปิด -เนื่องจาก -เดียวกัน -เดียว -เช่น -เฉพาะ -เคย -เข้า -เขา -อีก -อาจ -อะไร -ออก -อย่าง -อยู่ -อยาก -หาก -หลาย -หลังจาก -หลัง -หรือ -หนึ่ง -ส่วน -ส่ง -สุด -สําหรับ -ว่า -วัน -ลง -ร่วม -ราย -รับ -ระหว่าง -รวม -ยัง -มี -มาก -มา -พร้อม -พบ -ผ่าน -ผล -บาง -น่า -นี้ -นํา -นั้น -นัก -นอกจาก -ทุก -ที่สุด -ที่ -ทําให้ -ทํา -ทาง -ทั้งนี้ -ทั้ง -ถ้า -ถูก -ถึง -ต้อง -ต่างๆ -ต่าง -ต่อ -ตาม -ตั้งแต่ -ตั้ง -ด้าน -ด้วย -ดัง -ซึ่ง -ช่วง -จึง -จาก -จัด -จะ -คือ -ความ -ครั้ง -คง -ขึ้น -ของ -ขอ -ขณะ -ก่อน -ก็ -การ -กับ -กัน -กว่า -กล่าว diff --git a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_tr.txt b/solr/example/example-DIH/solr/solr/conf/lang/stopwords_tr.txt deleted file mode 100644 index 84d9408d4ea..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/stopwords_tr.txt +++ /dev/null @@ -1,212 +0,0 @@ -# Turkish stopwords from LUCENE-559 -# merged with the list from "Information Retrieval on Turkish Texts" -# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) -acaba -altmış -altı -ama -ancak -arada -aslında -ayrıca -bana -bazı -belki -ben -benden -beni -benim -beri -beş -bile -bin -bir -birçok -biri -birkaç -birkez -birşey -birşeyi -biz -bize -bizden -bizi -bizim -böyle -böylece -bu -buna -bunda -bundan -bunlar -bunları -bunların -bunu -bunun -burada -çok -çünkü -da -daha -dahi -de -defa -değil -diğer -diye -doksan -dokuz -dolayı -dolayısıyla -dört -edecek -eden -ederek -edilecek -ediliyor -edilmesi -ediyor -eğer -elli -en -etmesi -etti -ettiği -ettiğini -gibi -göre -halen -hangi -hatta -hem -henüz -hep -hepsi -her -herhangi -herkesin -hiç -hiçbir -için -iki -ile -ilgili -ise -işte -itibaren -itibariyle -kadar -karşın -katrilyon -kendi -kendilerine -kendini -kendisi -kendisine -kendisini -kez -ki -kim -kimden -kime -kimi -kimse -kırk -milyar -milyon -mu -mü -mı -nasıl -ne -neden -nedenle -nerde -nerede -nereye -niye -niçin -o -olan -olarak -oldu -olduğu -olduğunu -olduklarını -olmadı -olmadığı -olmak -olması -olmayan -olmaz -olsa -olsun -olup -olur -olursa -oluyor -on -ona -ondan -onlar -onlardan -onları -onların -onu -onun -otuz -oysa -öyle -pek -rağmen -sadece -sanki -sekiz -seksen -sen -senden -seni -senin -siz -sizden -sizi -sizin -şey -şeyden -şeyi -şeyler -şöyle -şu -şuna -şunda -şundan -şunları -şunu -tarafından -trilyon -tüm -üç -üzere -var -vardı -ve -veya -ya -yani -yapacak -yapılan -yapılması -yapıyor -yapmak -yaptı -yaptığı -yaptığını -yaptıkları -yedi -yerine -yetmiş -yine -yirmi -yoksa -yüz -zaten diff --git a/solr/example/example-DIH/solr/solr/conf/lang/userdict_ja.txt b/solr/example/example-DIH/solr/solr/conf/lang/userdict_ja.txt deleted file mode 100644 index 6f0368e4d81..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/lang/userdict_ja.txt +++ /dev/null @@ -1,29 +0,0 @@ -# -# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) -# -# Add entries to this file in order to override the statistical model in terms -# of segmentation, readings and part-of-speech tags. Notice that entries do -# not have weights since they are always used when found. This is by-design -# in order to maximize ease-of-use. -# -# Entries are defined using the following CSV format: -# , ... , ... , -# -# Notice that a single half-width space separates tokens and readings, and -# that the number tokens and readings must match exactly. -# -# Also notice that multiple entries with the same is undefined. -# -# Whitespace only lines are ignored. Comments are not allowed on entry lines. -# - -# Custom segmentation for kanji compounds -日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 -関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 - -# Custom segmentation for compound katakana -トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 -ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 - -# Custom reading for former sumo wrestler -朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr/example/example-DIH/solr/solr/conf/managed-schema b/solr/example/example-DIH/solr/solr/conf/managed-schema deleted file mode 100644 index d337bda1225..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/managed-schema +++ /dev/null @@ -1,1143 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - id - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/solr/conf/mapping-FoldToASCII.txt b/solr/example/example-DIH/solr/solr/conf/mapping-FoldToASCII.txt deleted file mode 100644 index 9a84b6eac34..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/mapping-FoldToASCII.txt +++ /dev/null @@ -1,3813 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# This map converts alphabetic, numeric, and symbolic Unicode characters -# which are not in the first 127 ASCII characters (the "Basic Latin" Unicode -# block) into their ASCII equivalents, if one exists. -# -# Characters from the following Unicode blocks are converted; however, only -# those characters with reasonable ASCII alternatives are converted: -# -# - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf -# - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf -# - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf -# - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf -# - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf -# - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf -# - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf -# - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf -# - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf -# - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf -# - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf -# - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf -# - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf -# - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf -# - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf -# - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf -# -# See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode -# -# The set of character conversions supported by this map is a superset of -# those supported by the map represented by mapping-ISOLatin1Accent.txt. -# -# See the bottom of this file for the Perl script used to generate the contents -# of this file (without this header) from ASCIIFoldingFilter.java. - - -# Syntax: -# "source" => "target" -# "source".length() > 0 (source cannot be empty.) -# "target".length() >= 0 (target can be empty.) - - -# À [LATIN CAPITAL LETTER A WITH GRAVE] -"\u00C0" => "A" - -# Á [LATIN CAPITAL LETTER A WITH ACUTE] -"\u00C1" => "A" - -#  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] -"\u00C2" => "A" - -# à [LATIN CAPITAL LETTER A WITH TILDE] -"\u00C3" => "A" - -# Ä [LATIN CAPITAL LETTER A WITH DIAERESIS] -"\u00C4" => "A" - -# Å [LATIN CAPITAL LETTER A WITH RING ABOVE] -"\u00C5" => "A" - -# Ā [LATIN CAPITAL LETTER A WITH MACRON] -"\u0100" => "A" - -# Ă [LATIN CAPITAL LETTER A WITH BREVE] -"\u0102" => "A" - -# Ą [LATIN CAPITAL LETTER A WITH OGONEK] -"\u0104" => "A" - -# Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA] -"\u018F" => "A" - -# Ǎ [LATIN CAPITAL LETTER A WITH CARON] -"\u01CD" => "A" - -# Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] -"\u01DE" => "A" - -# Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] -"\u01E0" => "A" - -# Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] -"\u01FA" => "A" - -# Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] -"\u0200" => "A" - -# Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE] -"\u0202" => "A" - -# Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE] -"\u0226" => "A" - -# Ⱥ [LATIN CAPITAL LETTER A WITH STROKE] -"\u023A" => "A" - -# ᴀ [LATIN LETTER SMALL CAPITAL A] -"\u1D00" => "A" - -# Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW] -"\u1E00" => "A" - -# Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW] -"\u1EA0" => "A" - -# Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE] -"\u1EA2" => "A" - -# Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] -"\u1EA4" => "A" - -# Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] -"\u1EA6" => "A" - -# Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EA8" => "A" - -# Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] -"\u1EAA" => "A" - -# Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] -"\u1EAC" => "A" - -# Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] -"\u1EAE" => "A" - -# Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] -"\u1EB0" => "A" - -# Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] -"\u1EB2" => "A" - -# Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] -"\u1EB4" => "A" - -# Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] -"\u1EB6" => "A" - -# Ⓐ [CIRCLED LATIN CAPITAL LETTER A] -"\u24B6" => "A" - -# A [FULLWIDTH LATIN CAPITAL LETTER A] -"\uFF21" => "A" - -# à [LATIN SMALL LETTER A WITH GRAVE] -"\u00E0" => "a" - -# á [LATIN SMALL LETTER A WITH ACUTE] -"\u00E1" => "a" - -# â [LATIN SMALL LETTER A WITH CIRCUMFLEX] -"\u00E2" => "a" - -# ã [LATIN SMALL LETTER A WITH TILDE] -"\u00E3" => "a" - -# ä [LATIN SMALL LETTER A WITH DIAERESIS] -"\u00E4" => "a" - -# å [LATIN SMALL LETTER A WITH RING ABOVE] -"\u00E5" => "a" - -# ā [LATIN SMALL LETTER A WITH MACRON] -"\u0101" => "a" - -# ă [LATIN SMALL LETTER A WITH BREVE] -"\u0103" => "a" - -# ą [LATIN SMALL LETTER A WITH OGONEK] -"\u0105" => "a" - -# ǎ [LATIN SMALL LETTER A WITH CARON] -"\u01CE" => "a" - -# ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] -"\u01DF" => "a" - -# ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] -"\u01E1" => "a" - -# ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] -"\u01FB" => "a" - -# ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE] -"\u0201" => "a" - -# ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE] -"\u0203" => "a" - -# ȧ [LATIN SMALL LETTER A WITH DOT ABOVE] -"\u0227" => "a" - -# ɐ [LATIN SMALL LETTER TURNED A] -"\u0250" => "a" - -# ə [LATIN SMALL LETTER SCHWA] -"\u0259" => "a" - -# ɚ [LATIN SMALL LETTER SCHWA WITH HOOK] -"\u025A" => "a" - -# ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK] -"\u1D8F" => "a" - -# ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] -"\u1D95" => "a" - -# ạ [LATIN SMALL LETTER A WITH RING BELOW] -"\u1E01" => "a" - -# ả [LATIN SMALL LETTER A WITH RIGHT HALF RING] -"\u1E9A" => "a" - -# ạ [LATIN SMALL LETTER A WITH DOT BELOW] -"\u1EA1" => "a" - -# ả [LATIN SMALL LETTER A WITH HOOK ABOVE] -"\u1EA3" => "a" - -# ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] -"\u1EA5" => "a" - -# ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] -"\u1EA7" => "a" - -# ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EA9" => "a" - -# ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] -"\u1EAB" => "a" - -# ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] -"\u1EAD" => "a" - -# ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE] -"\u1EAF" => "a" - -# ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE] -"\u1EB1" => "a" - -# ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] -"\u1EB3" => "a" - -# ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE] -"\u1EB5" => "a" - -# ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] -"\u1EB7" => "a" - -# ₐ [LATIN SUBSCRIPT SMALL LETTER A] -"\u2090" => "a" - -# ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA] -"\u2094" => "a" - -# ⓐ [CIRCLED LATIN SMALL LETTER A] -"\u24D0" => "a" - -# ⱥ [LATIN SMALL LETTER A WITH STROKE] -"\u2C65" => "a" - -# Ɐ [LATIN CAPITAL LETTER TURNED A] -"\u2C6F" => "a" - -# a [FULLWIDTH LATIN SMALL LETTER A] -"\uFF41" => "a" - -# Ꜳ [LATIN CAPITAL LETTER AA] -"\uA732" => "AA" - -# Æ [LATIN CAPITAL LETTER AE] -"\u00C6" => "AE" - -# Ǣ [LATIN CAPITAL LETTER AE WITH MACRON] -"\u01E2" => "AE" - -# Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE] -"\u01FC" => "AE" - -# ᴁ [LATIN LETTER SMALL CAPITAL AE] -"\u1D01" => "AE" - -# Ꜵ [LATIN CAPITAL LETTER AO] -"\uA734" => "AO" - -# Ꜷ [LATIN CAPITAL LETTER AU] -"\uA736" => "AU" - -# Ꜹ [LATIN CAPITAL LETTER AV] -"\uA738" => "AV" - -# Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] -"\uA73A" => "AV" - -# Ꜽ [LATIN CAPITAL LETTER AY] -"\uA73C" => "AY" - -# ⒜ [PARENTHESIZED LATIN SMALL LETTER A] -"\u249C" => "(a)" - -# ꜳ [LATIN SMALL LETTER AA] -"\uA733" => "aa" - -# æ [LATIN SMALL LETTER AE] -"\u00E6" => "ae" - -# ǣ [LATIN SMALL LETTER AE WITH MACRON] -"\u01E3" => "ae" - -# ǽ [LATIN SMALL LETTER AE WITH ACUTE] -"\u01FD" => "ae" - -# ᴂ [LATIN SMALL LETTER TURNED AE] -"\u1D02" => "ae" - -# ꜵ [LATIN SMALL LETTER AO] -"\uA735" => "ao" - -# ꜷ [LATIN SMALL LETTER AU] -"\uA737" => "au" - -# ꜹ [LATIN SMALL LETTER AV] -"\uA739" => "av" - -# ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] -"\uA73B" => "av" - -# ꜽ [LATIN SMALL LETTER AY] -"\uA73D" => "ay" - -# Ɓ [LATIN CAPITAL LETTER B WITH HOOK] -"\u0181" => "B" - -# Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR] -"\u0182" => "B" - -# Ƀ [LATIN CAPITAL LETTER B WITH STROKE] -"\u0243" => "B" - -# ʙ [LATIN LETTER SMALL CAPITAL B] -"\u0299" => "B" - -# ᴃ [LATIN LETTER SMALL CAPITAL BARRED B] -"\u1D03" => "B" - -# Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE] -"\u1E02" => "B" - -# Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW] -"\u1E04" => "B" - -# Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW] -"\u1E06" => "B" - -# Ⓑ [CIRCLED LATIN CAPITAL LETTER B] -"\u24B7" => "B" - -# B [FULLWIDTH LATIN CAPITAL LETTER B] -"\uFF22" => "B" - -# ƀ [LATIN SMALL LETTER B WITH STROKE] -"\u0180" => "b" - -# ƃ [LATIN SMALL LETTER B WITH TOPBAR] -"\u0183" => "b" - -# ɓ [LATIN SMALL LETTER B WITH HOOK] -"\u0253" => "b" - -# ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE] -"\u1D6C" => "b" - -# ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK] -"\u1D80" => "b" - -# ḃ [LATIN SMALL LETTER B WITH DOT ABOVE] -"\u1E03" => "b" - -# ḅ [LATIN SMALL LETTER B WITH DOT BELOW] -"\u1E05" => "b" - -# ḇ [LATIN SMALL LETTER B WITH LINE BELOW] -"\u1E07" => "b" - -# ⓑ [CIRCLED LATIN SMALL LETTER B] -"\u24D1" => "b" - -# b [FULLWIDTH LATIN SMALL LETTER B] -"\uFF42" => "b" - -# ⒝ [PARENTHESIZED LATIN SMALL LETTER B] -"\u249D" => "(b)" - -# Ç [LATIN CAPITAL LETTER C WITH CEDILLA] -"\u00C7" => "C" - -# Ć [LATIN CAPITAL LETTER C WITH ACUTE] -"\u0106" => "C" - -# Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] -"\u0108" => "C" - -# Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE] -"\u010A" => "C" - -# Č [LATIN CAPITAL LETTER C WITH CARON] -"\u010C" => "C" - -# Ƈ [LATIN CAPITAL LETTER C WITH HOOK] -"\u0187" => "C" - -# Ȼ [LATIN CAPITAL LETTER C WITH STROKE] -"\u023B" => "C" - -# ʗ [LATIN LETTER STRETCHED C] -"\u0297" => "C" - -# ᴄ [LATIN LETTER SMALL CAPITAL C] -"\u1D04" => "C" - -# Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] -"\u1E08" => "C" - -# Ⓒ [CIRCLED LATIN CAPITAL LETTER C] -"\u24B8" => "C" - -# C [FULLWIDTH LATIN CAPITAL LETTER C] -"\uFF23" => "C" - -# ç [LATIN SMALL LETTER C WITH CEDILLA] -"\u00E7" => "c" - -# ć [LATIN SMALL LETTER C WITH ACUTE] -"\u0107" => "c" - -# ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX] -"\u0109" => "c" - -# ċ [LATIN SMALL LETTER C WITH DOT ABOVE] -"\u010B" => "c" - -# č [LATIN SMALL LETTER C WITH CARON] -"\u010D" => "c" - -# ƈ [LATIN SMALL LETTER C WITH HOOK] -"\u0188" => "c" - -# ȼ [LATIN SMALL LETTER C WITH STROKE] -"\u023C" => "c" - -# ɕ [LATIN SMALL LETTER C WITH CURL] -"\u0255" => "c" - -# ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] -"\u1E09" => "c" - -# ↄ [LATIN SMALL LETTER REVERSED C] -"\u2184" => "c" - -# ⓒ [CIRCLED LATIN SMALL LETTER C] -"\u24D2" => "c" - -# Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT] -"\uA73E" => "c" - -# ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT] -"\uA73F" => "c" - -# c [FULLWIDTH LATIN SMALL LETTER C] -"\uFF43" => "c" - -# ⒞ [PARENTHESIZED LATIN SMALL LETTER C] -"\u249E" => "(c)" - -# Ð [LATIN CAPITAL LETTER ETH] -"\u00D0" => "D" - -# Ď [LATIN CAPITAL LETTER D WITH CARON] -"\u010E" => "D" - -# Đ [LATIN CAPITAL LETTER D WITH STROKE] -"\u0110" => "D" - -# Ɖ [LATIN CAPITAL LETTER AFRICAN D] -"\u0189" => "D" - -# Ɗ [LATIN CAPITAL LETTER D WITH HOOK] -"\u018A" => "D" - -# Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR] -"\u018B" => "D" - -# ᴅ [LATIN LETTER SMALL CAPITAL D] -"\u1D05" => "D" - -# ᴆ [LATIN LETTER SMALL CAPITAL ETH] -"\u1D06" => "D" - -# Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE] -"\u1E0A" => "D" - -# Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW] -"\u1E0C" => "D" - -# Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW] -"\u1E0E" => "D" - -# Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA] -"\u1E10" => "D" - -# Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] -"\u1E12" => "D" - -# Ⓓ [CIRCLED LATIN CAPITAL LETTER D] -"\u24B9" => "D" - -# Ꝺ [LATIN CAPITAL LETTER INSULAR D] -"\uA779" => "D" - -# D [FULLWIDTH LATIN CAPITAL LETTER D] -"\uFF24" => "D" - -# ð [LATIN SMALL LETTER ETH] -"\u00F0" => "d" - -# ď [LATIN SMALL LETTER D WITH CARON] -"\u010F" => "d" - -# đ [LATIN SMALL LETTER D WITH STROKE] -"\u0111" => "d" - -# ƌ [LATIN SMALL LETTER D WITH TOPBAR] -"\u018C" => "d" - -# ȡ [LATIN SMALL LETTER D WITH CURL] -"\u0221" => "d" - -# ɖ [LATIN SMALL LETTER D WITH TAIL] -"\u0256" => "d" - -# ɗ [LATIN SMALL LETTER D WITH HOOK] -"\u0257" => "d" - -# ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE] -"\u1D6D" => "d" - -# ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK] -"\u1D81" => "d" - -# ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL] -"\u1D91" => "d" - -# ḋ [LATIN SMALL LETTER D WITH DOT ABOVE] -"\u1E0B" => "d" - -# ḍ [LATIN SMALL LETTER D WITH DOT BELOW] -"\u1E0D" => "d" - -# ḏ [LATIN SMALL LETTER D WITH LINE BELOW] -"\u1E0F" => "d" - -# ḑ [LATIN SMALL LETTER D WITH CEDILLA] -"\u1E11" => "d" - -# ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] -"\u1E13" => "d" - -# ⓓ [CIRCLED LATIN SMALL LETTER D] -"\u24D3" => "d" - -# ꝺ [LATIN SMALL LETTER INSULAR D] -"\uA77A" => "d" - -# d [FULLWIDTH LATIN SMALL LETTER D] -"\uFF44" => "d" - -# DŽ [LATIN CAPITAL LETTER DZ WITH CARON] -"\u01C4" => "DZ" - -# DZ [LATIN CAPITAL LETTER DZ] -"\u01F1" => "DZ" - -# Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] -"\u01C5" => "Dz" - -# Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] -"\u01F2" => "Dz" - -# ⒟ [PARENTHESIZED LATIN SMALL LETTER D] -"\u249F" => "(d)" - -# ȸ [LATIN SMALL LETTER DB DIGRAPH] -"\u0238" => "db" - -# dž [LATIN SMALL LETTER DZ WITH CARON] -"\u01C6" => "dz" - -# dz [LATIN SMALL LETTER DZ] -"\u01F3" => "dz" - -# ʣ [LATIN SMALL LETTER DZ DIGRAPH] -"\u02A3" => "dz" - -# ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] -"\u02A5" => "dz" - -# È [LATIN CAPITAL LETTER E WITH GRAVE] -"\u00C8" => "E" - -# É [LATIN CAPITAL LETTER E WITH ACUTE] -"\u00C9" => "E" - -# Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] -"\u00CA" => "E" - -# Ë [LATIN CAPITAL LETTER E WITH DIAERESIS] -"\u00CB" => "E" - -# Ē [LATIN CAPITAL LETTER E WITH MACRON] -"\u0112" => "E" - -# Ĕ [LATIN CAPITAL LETTER E WITH BREVE] -"\u0114" => "E" - -# Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE] -"\u0116" => "E" - -# Ę [LATIN CAPITAL LETTER E WITH OGONEK] -"\u0118" => "E" - -# Ě [LATIN CAPITAL LETTER E WITH CARON] -"\u011A" => "E" - -# Ǝ [LATIN CAPITAL LETTER REVERSED E] -"\u018E" => "E" - -# Ɛ [LATIN CAPITAL LETTER OPEN E] -"\u0190" => "E" - -# Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] -"\u0204" => "E" - -# Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE] -"\u0206" => "E" - -# Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA] -"\u0228" => "E" - -# Ɇ [LATIN CAPITAL LETTER E WITH STROKE] -"\u0246" => "E" - -# ᴇ [LATIN LETTER SMALL CAPITAL E] -"\u1D07" => "E" - -# Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] -"\u1E14" => "E" - -# Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] -"\u1E16" => "E" - -# Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] -"\u1E18" => "E" - -# Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW] -"\u1E1A" => "E" - -# Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] -"\u1E1C" => "E" - -# Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW] -"\u1EB8" => "E" - -# Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE] -"\u1EBA" => "E" - -# Ẽ [LATIN CAPITAL LETTER E WITH TILDE] -"\u1EBC" => "E" - -# Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] -"\u1EBE" => "E" - -# Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] -"\u1EC0" => "E" - -# Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EC2" => "E" - -# Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] -"\u1EC4" => "E" - -# Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] -"\u1EC6" => "E" - -# Ⓔ [CIRCLED LATIN CAPITAL LETTER E] -"\u24BA" => "E" - -# ⱻ [LATIN LETTER SMALL CAPITAL TURNED E] -"\u2C7B" => "E" - -# E [FULLWIDTH LATIN CAPITAL LETTER E] -"\uFF25" => "E" - -# è [LATIN SMALL LETTER E WITH GRAVE] -"\u00E8" => "e" - -# é [LATIN SMALL LETTER E WITH ACUTE] -"\u00E9" => "e" - -# ê [LATIN SMALL LETTER E WITH CIRCUMFLEX] -"\u00EA" => "e" - -# ë [LATIN SMALL LETTER E WITH DIAERESIS] -"\u00EB" => "e" - -# ē [LATIN SMALL LETTER E WITH MACRON] -"\u0113" => "e" - -# ĕ [LATIN SMALL LETTER E WITH BREVE] -"\u0115" => "e" - -# ė [LATIN SMALL LETTER E WITH DOT ABOVE] -"\u0117" => "e" - -# ę [LATIN SMALL LETTER E WITH OGONEK] -"\u0119" => "e" - -# ě [LATIN SMALL LETTER E WITH CARON] -"\u011B" => "e" - -# ǝ [LATIN SMALL LETTER TURNED E] -"\u01DD" => "e" - -# ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE] -"\u0205" => "e" - -# ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE] -"\u0207" => "e" - -# ȩ [LATIN SMALL LETTER E WITH CEDILLA] -"\u0229" => "e" - -# ɇ [LATIN SMALL LETTER E WITH STROKE] -"\u0247" => "e" - -# ɘ [LATIN SMALL LETTER REVERSED E] -"\u0258" => "e" - -# ɛ [LATIN SMALL LETTER OPEN E] -"\u025B" => "e" - -# ɜ [LATIN SMALL LETTER REVERSED OPEN E] -"\u025C" => "e" - -# ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] -"\u025D" => "e" - -# ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E] -"\u025E" => "e" - -# ʚ [LATIN SMALL LETTER CLOSED OPEN E] -"\u029A" => "e" - -# ᴈ [LATIN SMALL LETTER TURNED OPEN E] -"\u1D08" => "e" - -# ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK] -"\u1D92" => "e" - -# ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] -"\u1D93" => "e" - -# ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] -"\u1D94" => "e" - -# ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE] -"\u1E15" => "e" - -# ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE] -"\u1E17" => "e" - -# ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] -"\u1E19" => "e" - -# ḛ [LATIN SMALL LETTER E WITH TILDE BELOW] -"\u1E1B" => "e" - -# ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] -"\u1E1D" => "e" - -# ẹ [LATIN SMALL LETTER E WITH DOT BELOW] -"\u1EB9" => "e" - -# ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE] -"\u1EBB" => "e" - -# ẽ [LATIN SMALL LETTER E WITH TILDE] -"\u1EBD" => "e" - -# ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] -"\u1EBF" => "e" - -# ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] -"\u1EC1" => "e" - -# ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EC3" => "e" - -# ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] -"\u1EC5" => "e" - -# ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] -"\u1EC7" => "e" - -# ₑ [LATIN SUBSCRIPT SMALL LETTER E] -"\u2091" => "e" - -# ⓔ [CIRCLED LATIN SMALL LETTER E] -"\u24D4" => "e" - -# ⱸ [LATIN SMALL LETTER E WITH NOTCH] -"\u2C78" => "e" - -# e [FULLWIDTH LATIN SMALL LETTER E] -"\uFF45" => "e" - -# ⒠ [PARENTHESIZED LATIN SMALL LETTER E] -"\u24A0" => "(e)" - -# Ƒ [LATIN CAPITAL LETTER F WITH HOOK] -"\u0191" => "F" - -# Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE] -"\u1E1E" => "F" - -# Ⓕ [CIRCLED LATIN CAPITAL LETTER F] -"\u24BB" => "F" - -# ꜰ [LATIN LETTER SMALL CAPITAL F] -"\uA730" => "F" - -# Ꝼ [LATIN CAPITAL LETTER INSULAR F] -"\uA77B" => "F" - -# ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F] -"\uA7FB" => "F" - -# F [FULLWIDTH LATIN CAPITAL LETTER F] -"\uFF26" => "F" - -# ƒ [LATIN SMALL LETTER F WITH HOOK] -"\u0192" => "f" - -# ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE] -"\u1D6E" => "f" - -# ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK] -"\u1D82" => "f" - -# ḟ [LATIN SMALL LETTER F WITH DOT ABOVE] -"\u1E1F" => "f" - -# ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE] -"\u1E9B" => "f" - -# ⓕ [CIRCLED LATIN SMALL LETTER F] -"\u24D5" => "f" - -# ꝼ [LATIN SMALL LETTER INSULAR F] -"\uA77C" => "f" - -# f [FULLWIDTH LATIN SMALL LETTER F] -"\uFF46" => "f" - -# ⒡ [PARENTHESIZED LATIN SMALL LETTER F] -"\u24A1" => "(f)" - -# ff [LATIN SMALL LIGATURE FF] -"\uFB00" => "ff" - -# ffi [LATIN SMALL LIGATURE FFI] -"\uFB03" => "ffi" - -# ffl [LATIN SMALL LIGATURE FFL] -"\uFB04" => "ffl" - -# fi [LATIN SMALL LIGATURE FI] -"\uFB01" => "fi" - -# fl [LATIN SMALL LIGATURE FL] -"\uFB02" => "fl" - -# Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] -"\u011C" => "G" - -# Ğ [LATIN CAPITAL LETTER G WITH BREVE] -"\u011E" => "G" - -# Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE] -"\u0120" => "G" - -# Ģ [LATIN CAPITAL LETTER G WITH CEDILLA] -"\u0122" => "G" - -# Ɠ [LATIN CAPITAL LETTER G WITH HOOK] -"\u0193" => "G" - -# Ǥ [LATIN CAPITAL LETTER G WITH STROKE] -"\u01E4" => "G" - -# ǥ [LATIN SMALL LETTER G WITH STROKE] -"\u01E5" => "G" - -# Ǧ [LATIN CAPITAL LETTER G WITH CARON] -"\u01E6" => "G" - -# ǧ [LATIN SMALL LETTER G WITH CARON] -"\u01E7" => "G" - -# Ǵ [LATIN CAPITAL LETTER G WITH ACUTE] -"\u01F4" => "G" - -# ɢ [LATIN LETTER SMALL CAPITAL G] -"\u0262" => "G" - -# ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK] -"\u029B" => "G" - -# Ḡ [LATIN CAPITAL LETTER G WITH MACRON] -"\u1E20" => "G" - -# Ⓖ [CIRCLED LATIN CAPITAL LETTER G] -"\u24BC" => "G" - -# Ᵹ [LATIN CAPITAL LETTER INSULAR G] -"\uA77D" => "G" - -# Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G] -"\uA77E" => "G" - -# G [FULLWIDTH LATIN CAPITAL LETTER G] -"\uFF27" => "G" - -# ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX] -"\u011D" => "g" - -# ğ [LATIN SMALL LETTER G WITH BREVE] -"\u011F" => "g" - -# ġ [LATIN SMALL LETTER G WITH DOT ABOVE] -"\u0121" => "g" - -# ģ [LATIN SMALL LETTER G WITH CEDILLA] -"\u0123" => "g" - -# ǵ [LATIN SMALL LETTER G WITH ACUTE] -"\u01F5" => "g" - -# ɠ [LATIN SMALL LETTER G WITH HOOK] -"\u0260" => "g" - -# ɡ [LATIN SMALL LETTER SCRIPT G] -"\u0261" => "g" - -# ᵷ [LATIN SMALL LETTER TURNED G] -"\u1D77" => "g" - -# ᵹ [LATIN SMALL LETTER INSULAR G] -"\u1D79" => "g" - -# ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK] -"\u1D83" => "g" - -# ḡ [LATIN SMALL LETTER G WITH MACRON] -"\u1E21" => "g" - -# ⓖ [CIRCLED LATIN SMALL LETTER G] -"\u24D6" => "g" - -# ꝿ [LATIN SMALL LETTER TURNED INSULAR G] -"\uA77F" => "g" - -# g [FULLWIDTH LATIN SMALL LETTER G] -"\uFF47" => "g" - -# ⒢ [PARENTHESIZED LATIN SMALL LETTER G] -"\u24A2" => "(g)" - -# Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] -"\u0124" => "H" - -# Ħ [LATIN CAPITAL LETTER H WITH STROKE] -"\u0126" => "H" - -# Ȟ [LATIN CAPITAL LETTER H WITH CARON] -"\u021E" => "H" - -# ʜ [LATIN LETTER SMALL CAPITAL H] -"\u029C" => "H" - -# Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE] -"\u1E22" => "H" - -# Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW] -"\u1E24" => "H" - -# Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS] -"\u1E26" => "H" - -# Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA] -"\u1E28" => "H" - -# Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW] -"\u1E2A" => "H" - -# Ⓗ [CIRCLED LATIN CAPITAL LETTER H] -"\u24BD" => "H" - -# Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER] -"\u2C67" => "H" - -# Ⱶ [LATIN CAPITAL LETTER HALF H] -"\u2C75" => "H" - -# H [FULLWIDTH LATIN CAPITAL LETTER H] -"\uFF28" => "H" - -# ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX] -"\u0125" => "h" - -# ħ [LATIN SMALL LETTER H WITH STROKE] -"\u0127" => "h" - -# ȟ [LATIN SMALL LETTER H WITH CARON] -"\u021F" => "h" - -# ɥ [LATIN SMALL LETTER TURNED H] -"\u0265" => "h" - -# ɦ [LATIN SMALL LETTER H WITH HOOK] -"\u0266" => "h" - -# ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK] -"\u02AE" => "h" - -# ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] -"\u02AF" => "h" - -# ḣ [LATIN SMALL LETTER H WITH DOT ABOVE] -"\u1E23" => "h" - -# ḥ [LATIN SMALL LETTER H WITH DOT BELOW] -"\u1E25" => "h" - -# ḧ [LATIN SMALL LETTER H WITH DIAERESIS] -"\u1E27" => "h" - -# ḩ [LATIN SMALL LETTER H WITH CEDILLA] -"\u1E29" => "h" - -# ḫ [LATIN SMALL LETTER H WITH BREVE BELOW] -"\u1E2B" => "h" - -# ẖ [LATIN SMALL LETTER H WITH LINE BELOW] -"\u1E96" => "h" - -# ⓗ [CIRCLED LATIN SMALL LETTER H] -"\u24D7" => "h" - -# ⱨ [LATIN SMALL LETTER H WITH DESCENDER] -"\u2C68" => "h" - -# ⱶ [LATIN SMALL LETTER HALF H] -"\u2C76" => "h" - -# h [FULLWIDTH LATIN SMALL LETTER H] -"\uFF48" => "h" - -# Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR] -"\u01F6" => "HV" - -# ⒣ [PARENTHESIZED LATIN SMALL LETTER H] -"\u24A3" => "(h)" - -# ƕ [LATIN SMALL LETTER HV] -"\u0195" => "hv" - -# Ì [LATIN CAPITAL LETTER I WITH GRAVE] -"\u00CC" => "I" - -# Í [LATIN CAPITAL LETTER I WITH ACUTE] -"\u00CD" => "I" - -# Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] -"\u00CE" => "I" - -# Ï [LATIN CAPITAL LETTER I WITH DIAERESIS] -"\u00CF" => "I" - -# Ĩ [LATIN CAPITAL LETTER I WITH TILDE] -"\u0128" => "I" - -# Ī [LATIN CAPITAL LETTER I WITH MACRON] -"\u012A" => "I" - -# Ĭ [LATIN CAPITAL LETTER I WITH BREVE] -"\u012C" => "I" - -# Į [LATIN CAPITAL LETTER I WITH OGONEK] -"\u012E" => "I" - -# İ [LATIN CAPITAL LETTER I WITH DOT ABOVE] -"\u0130" => "I" - -# Ɩ [LATIN CAPITAL LETTER IOTA] -"\u0196" => "I" - -# Ɨ [LATIN CAPITAL LETTER I WITH STROKE] -"\u0197" => "I" - -# Ǐ [LATIN CAPITAL LETTER I WITH CARON] -"\u01CF" => "I" - -# Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] -"\u0208" => "I" - -# Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE] -"\u020A" => "I" - -# ɪ [LATIN LETTER SMALL CAPITAL I] -"\u026A" => "I" - -# ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE] -"\u1D7B" => "I" - -# Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW] -"\u1E2C" => "I" - -# Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] -"\u1E2E" => "I" - -# Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE] -"\u1EC8" => "I" - -# Ị [LATIN CAPITAL LETTER I WITH DOT BELOW] -"\u1ECA" => "I" - -# Ⓘ [CIRCLED LATIN CAPITAL LETTER I] -"\u24BE" => "I" - -# ꟾ [LATIN EPIGRAPHIC LETTER I LONGA] -"\uA7FE" => "I" - -# I [FULLWIDTH LATIN CAPITAL LETTER I] -"\uFF29" => "I" - -# ì [LATIN SMALL LETTER I WITH GRAVE] -"\u00EC" => "i" - -# í [LATIN SMALL LETTER I WITH ACUTE] -"\u00ED" => "i" - -# î [LATIN SMALL LETTER I WITH CIRCUMFLEX] -"\u00EE" => "i" - -# ï [LATIN SMALL LETTER I WITH DIAERESIS] -"\u00EF" => "i" - -# ĩ [LATIN SMALL LETTER I WITH TILDE] -"\u0129" => "i" - -# ī [LATIN SMALL LETTER I WITH MACRON] -"\u012B" => "i" - -# ĭ [LATIN SMALL LETTER I WITH BREVE] -"\u012D" => "i" - -# į [LATIN SMALL LETTER I WITH OGONEK] -"\u012F" => "i" - -# ı [LATIN SMALL LETTER DOTLESS I] -"\u0131" => "i" - -# ǐ [LATIN SMALL LETTER I WITH CARON] -"\u01D0" => "i" - -# ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE] -"\u0209" => "i" - -# ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE] -"\u020B" => "i" - -# ɨ [LATIN SMALL LETTER I WITH STROKE] -"\u0268" => "i" - -# ᴉ [LATIN SMALL LETTER TURNED I] -"\u1D09" => "i" - -# ᵢ [LATIN SUBSCRIPT SMALL LETTER I] -"\u1D62" => "i" - -# ᵼ [LATIN SMALL LETTER IOTA WITH STROKE] -"\u1D7C" => "i" - -# ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK] -"\u1D96" => "i" - -# ḭ [LATIN SMALL LETTER I WITH TILDE BELOW] -"\u1E2D" => "i" - -# ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] -"\u1E2F" => "i" - -# ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE] -"\u1EC9" => "i" - -# ị [LATIN SMALL LETTER I WITH DOT BELOW] -"\u1ECB" => "i" - -# ⁱ [SUPERSCRIPT LATIN SMALL LETTER I] -"\u2071" => "i" - -# ⓘ [CIRCLED LATIN SMALL LETTER I] -"\u24D8" => "i" - -# i [FULLWIDTH LATIN SMALL LETTER I] -"\uFF49" => "i" - -# IJ [LATIN CAPITAL LIGATURE IJ] -"\u0132" => "IJ" - -# ⒤ [PARENTHESIZED LATIN SMALL LETTER I] -"\u24A4" => "(i)" - -# ij [LATIN SMALL LIGATURE IJ] -"\u0133" => "ij" - -# Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] -"\u0134" => "J" - -# Ɉ [LATIN CAPITAL LETTER J WITH STROKE] -"\u0248" => "J" - -# ᴊ [LATIN LETTER SMALL CAPITAL J] -"\u1D0A" => "J" - -# Ⓙ [CIRCLED LATIN CAPITAL LETTER J] -"\u24BF" => "J" - -# J [FULLWIDTH LATIN CAPITAL LETTER J] -"\uFF2A" => "J" - -# ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX] -"\u0135" => "j" - -# ǰ [LATIN SMALL LETTER J WITH CARON] -"\u01F0" => "j" - -# ȷ [LATIN SMALL LETTER DOTLESS J] -"\u0237" => "j" - -# ɉ [LATIN SMALL LETTER J WITH STROKE] -"\u0249" => "j" - -# ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE] -"\u025F" => "j" - -# ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] -"\u0284" => "j" - -# ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL] -"\u029D" => "j" - -# ⓙ [CIRCLED LATIN SMALL LETTER J] -"\u24D9" => "j" - -# ⱼ [LATIN SUBSCRIPT SMALL LETTER J] -"\u2C7C" => "j" - -# j [FULLWIDTH LATIN SMALL LETTER J] -"\uFF4A" => "j" - -# ⒥ [PARENTHESIZED LATIN SMALL LETTER J] -"\u24A5" => "(j)" - -# Ķ [LATIN CAPITAL LETTER K WITH CEDILLA] -"\u0136" => "K" - -# Ƙ [LATIN CAPITAL LETTER K WITH HOOK] -"\u0198" => "K" - -# Ǩ [LATIN CAPITAL LETTER K WITH CARON] -"\u01E8" => "K" - -# ᴋ [LATIN LETTER SMALL CAPITAL K] -"\u1D0B" => "K" - -# Ḱ [LATIN CAPITAL LETTER K WITH ACUTE] -"\u1E30" => "K" - -# Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW] -"\u1E32" => "K" - -# Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW] -"\u1E34" => "K" - -# Ⓚ [CIRCLED LATIN CAPITAL LETTER K] -"\u24C0" => "K" - -# Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER] -"\u2C69" => "K" - -# Ꝁ [LATIN CAPITAL LETTER K WITH STROKE] -"\uA740" => "K" - -# Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] -"\uA742" => "K" - -# Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] -"\uA744" => "K" - -# K [FULLWIDTH LATIN CAPITAL LETTER K] -"\uFF2B" => "K" - -# ķ [LATIN SMALL LETTER K WITH CEDILLA] -"\u0137" => "k" - -# ƙ [LATIN SMALL LETTER K WITH HOOK] -"\u0199" => "k" - -# ǩ [LATIN SMALL LETTER K WITH CARON] -"\u01E9" => "k" - -# ʞ [LATIN SMALL LETTER TURNED K] -"\u029E" => "k" - -# ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK] -"\u1D84" => "k" - -# ḱ [LATIN SMALL LETTER K WITH ACUTE] -"\u1E31" => "k" - -# ḳ [LATIN SMALL LETTER K WITH DOT BELOW] -"\u1E33" => "k" - -# ḵ [LATIN SMALL LETTER K WITH LINE BELOW] -"\u1E35" => "k" - -# ⓚ [CIRCLED LATIN SMALL LETTER K] -"\u24DA" => "k" - -# ⱪ [LATIN SMALL LETTER K WITH DESCENDER] -"\u2C6A" => "k" - -# ꝁ [LATIN SMALL LETTER K WITH STROKE] -"\uA741" => "k" - -# ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE] -"\uA743" => "k" - -# ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] -"\uA745" => "k" - -# k [FULLWIDTH LATIN SMALL LETTER K] -"\uFF4B" => "k" - -# ⒦ [PARENTHESIZED LATIN SMALL LETTER K] -"\u24A6" => "(k)" - -# Ĺ [LATIN CAPITAL LETTER L WITH ACUTE] -"\u0139" => "L" - -# Ļ [LATIN CAPITAL LETTER L WITH CEDILLA] -"\u013B" => "L" - -# Ľ [LATIN CAPITAL LETTER L WITH CARON] -"\u013D" => "L" - -# Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT] -"\u013F" => "L" - -# Ł [LATIN CAPITAL LETTER L WITH STROKE] -"\u0141" => "L" - -# Ƚ [LATIN CAPITAL LETTER L WITH BAR] -"\u023D" => "L" - -# ʟ [LATIN LETTER SMALL CAPITAL L] -"\u029F" => "L" - -# ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE] -"\u1D0C" => "L" - -# Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW] -"\u1E36" => "L" - -# Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] -"\u1E38" => "L" - -# Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW] -"\u1E3A" => "L" - -# Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] -"\u1E3C" => "L" - -# Ⓛ [CIRCLED LATIN CAPITAL LETTER L] -"\u24C1" => "L" - -# Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR] -"\u2C60" => "L" - -# Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] -"\u2C62" => "L" - -# Ꝇ [LATIN CAPITAL LETTER BROKEN L] -"\uA746" => "L" - -# Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE] -"\uA748" => "L" - -# Ꞁ [LATIN CAPITAL LETTER TURNED L] -"\uA780" => "L" - -# L [FULLWIDTH LATIN CAPITAL LETTER L] -"\uFF2C" => "L" - -# ĺ [LATIN SMALL LETTER L WITH ACUTE] -"\u013A" => "l" - -# ļ [LATIN SMALL LETTER L WITH CEDILLA] -"\u013C" => "l" - -# ľ [LATIN SMALL LETTER L WITH CARON] -"\u013E" => "l" - -# ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT] -"\u0140" => "l" - -# ł [LATIN SMALL LETTER L WITH STROKE] -"\u0142" => "l" - -# ƚ [LATIN SMALL LETTER L WITH BAR] -"\u019A" => "l" - -# ȴ [LATIN SMALL LETTER L WITH CURL] -"\u0234" => "l" - -# ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE] -"\u026B" => "l" - -# ɬ [LATIN SMALL LETTER L WITH BELT] -"\u026C" => "l" - -# ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK] -"\u026D" => "l" - -# ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK] -"\u1D85" => "l" - -# ḷ [LATIN SMALL LETTER L WITH DOT BELOW] -"\u1E37" => "l" - -# ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] -"\u1E39" => "l" - -# ḻ [LATIN SMALL LETTER L WITH LINE BELOW] -"\u1E3B" => "l" - -# ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] -"\u1E3D" => "l" - -# ⓛ [CIRCLED LATIN SMALL LETTER L] -"\u24DB" => "l" - -# ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR] -"\u2C61" => "l" - -# ꝇ [LATIN SMALL LETTER BROKEN L] -"\uA747" => "l" - -# ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE] -"\uA749" => "l" - -# ꞁ [LATIN SMALL LETTER TURNED L] -"\uA781" => "l" - -# l [FULLWIDTH LATIN SMALL LETTER L] -"\uFF4C" => "l" - -# LJ [LATIN CAPITAL LETTER LJ] -"\u01C7" => "LJ" - -# Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL] -"\u1EFA" => "LL" - -# Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J] -"\u01C8" => "Lj" - -# ⒧ [PARENTHESIZED LATIN SMALL LETTER L] -"\u24A7" => "(l)" - -# lj [LATIN SMALL LETTER LJ] -"\u01C9" => "lj" - -# ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL] -"\u1EFB" => "ll" - -# ʪ [LATIN SMALL LETTER LS DIGRAPH] -"\u02AA" => "ls" - -# ʫ [LATIN SMALL LETTER LZ DIGRAPH] -"\u02AB" => "lz" - -# Ɯ [LATIN CAPITAL LETTER TURNED M] -"\u019C" => "M" - -# ᴍ [LATIN LETTER SMALL CAPITAL M] -"\u1D0D" => "M" - -# Ḿ [LATIN CAPITAL LETTER M WITH ACUTE] -"\u1E3E" => "M" - -# Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE] -"\u1E40" => "M" - -# Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW] -"\u1E42" => "M" - -# Ⓜ [CIRCLED LATIN CAPITAL LETTER M] -"\u24C2" => "M" - -# Ɱ [LATIN CAPITAL LETTER M WITH HOOK] -"\u2C6E" => "M" - -# ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M] -"\uA7FD" => "M" - -# ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M] -"\uA7FF" => "M" - -# M [FULLWIDTH LATIN CAPITAL LETTER M] -"\uFF2D" => "M" - -# ɯ [LATIN SMALL LETTER TURNED M] -"\u026F" => "m" - -# ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG] -"\u0270" => "m" - -# ɱ [LATIN SMALL LETTER M WITH HOOK] -"\u0271" => "m" - -# ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE] -"\u1D6F" => "m" - -# ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK] -"\u1D86" => "m" - -# ḿ [LATIN SMALL LETTER M WITH ACUTE] -"\u1E3F" => "m" - -# ṁ [LATIN SMALL LETTER M WITH DOT ABOVE] -"\u1E41" => "m" - -# ṃ [LATIN SMALL LETTER M WITH DOT BELOW] -"\u1E43" => "m" - -# ⓜ [CIRCLED LATIN SMALL LETTER M] -"\u24DC" => "m" - -# m [FULLWIDTH LATIN SMALL LETTER M] -"\uFF4D" => "m" - -# ⒨ [PARENTHESIZED LATIN SMALL LETTER M] -"\u24A8" => "(m)" - -# Ñ [LATIN CAPITAL LETTER N WITH TILDE] -"\u00D1" => "N" - -# Ń [LATIN CAPITAL LETTER N WITH ACUTE] -"\u0143" => "N" - -# Ņ [LATIN CAPITAL LETTER N WITH CEDILLA] -"\u0145" => "N" - -# Ň [LATIN CAPITAL LETTER N WITH CARON] -"\u0147" => "N" - -# Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG] -"\u014A" => "N" - -# Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK] -"\u019D" => "N" - -# Ǹ [LATIN CAPITAL LETTER N WITH GRAVE] -"\u01F8" => "N" - -# Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] -"\u0220" => "N" - -# ɴ [LATIN LETTER SMALL CAPITAL N] -"\u0274" => "N" - -# ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N] -"\u1D0E" => "N" - -# Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE] -"\u1E44" => "N" - -# Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW] -"\u1E46" => "N" - -# Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW] -"\u1E48" => "N" - -# Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] -"\u1E4A" => "N" - -# Ⓝ [CIRCLED LATIN CAPITAL LETTER N] -"\u24C3" => "N" - -# N [FULLWIDTH LATIN CAPITAL LETTER N] -"\uFF2E" => "N" - -# ñ [LATIN SMALL LETTER N WITH TILDE] -"\u00F1" => "n" - -# ń [LATIN SMALL LETTER N WITH ACUTE] -"\u0144" => "n" - -# ņ [LATIN SMALL LETTER N WITH CEDILLA] -"\u0146" => "n" - -# ň [LATIN SMALL LETTER N WITH CARON] -"\u0148" => "n" - -# ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] -"\u0149" => "n" - -# ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG] -"\u014B" => "n" - -# ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG] -"\u019E" => "n" - -# ǹ [LATIN SMALL LETTER N WITH GRAVE] -"\u01F9" => "n" - -# ȵ [LATIN SMALL LETTER N WITH CURL] -"\u0235" => "n" - -# ɲ [LATIN SMALL LETTER N WITH LEFT HOOK] -"\u0272" => "n" - -# ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK] -"\u0273" => "n" - -# ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE] -"\u1D70" => "n" - -# ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK] -"\u1D87" => "n" - -# ṅ [LATIN SMALL LETTER N WITH DOT ABOVE] -"\u1E45" => "n" - -# ṇ [LATIN SMALL LETTER N WITH DOT BELOW] -"\u1E47" => "n" - -# ṉ [LATIN SMALL LETTER N WITH LINE BELOW] -"\u1E49" => "n" - -# ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW] -"\u1E4B" => "n" - -# ⁿ [SUPERSCRIPT LATIN SMALL LETTER N] -"\u207F" => "n" - -# ⓝ [CIRCLED LATIN SMALL LETTER N] -"\u24DD" => "n" - -# n [FULLWIDTH LATIN SMALL LETTER N] -"\uFF4E" => "n" - -# NJ [LATIN CAPITAL LETTER NJ] -"\u01CA" => "NJ" - -# Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J] -"\u01CB" => "Nj" - -# ⒩ [PARENTHESIZED LATIN SMALL LETTER N] -"\u24A9" => "(n)" - -# nj [LATIN SMALL LETTER NJ] -"\u01CC" => "nj" - -# Ò [LATIN CAPITAL LETTER O WITH GRAVE] -"\u00D2" => "O" - -# Ó [LATIN CAPITAL LETTER O WITH ACUTE] -"\u00D3" => "O" - -# Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX] -"\u00D4" => "O" - -# Õ [LATIN CAPITAL LETTER O WITH TILDE] -"\u00D5" => "O" - -# Ö [LATIN CAPITAL LETTER O WITH DIAERESIS] -"\u00D6" => "O" - -# Ø [LATIN CAPITAL LETTER O WITH STROKE] -"\u00D8" => "O" - -# Ō [LATIN CAPITAL LETTER O WITH MACRON] -"\u014C" => "O" - -# Ŏ [LATIN CAPITAL LETTER O WITH BREVE] -"\u014E" => "O" - -# Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE] -"\u0150" => "O" - -# Ɔ [LATIN CAPITAL LETTER OPEN O] -"\u0186" => "O" - -# Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE] -"\u019F" => "O" - -# Ơ [LATIN CAPITAL LETTER O WITH HORN] -"\u01A0" => "O" - -# Ǒ [LATIN CAPITAL LETTER O WITH CARON] -"\u01D1" => "O" - -# Ǫ [LATIN CAPITAL LETTER O WITH OGONEK] -"\u01EA" => "O" - -# Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON] -"\u01EC" => "O" - -# Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE] -"\u01FE" => "O" - -# Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE] -"\u020C" => "O" - -# Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE] -"\u020E" => "O" - -# Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON] -"\u022A" => "O" - -# Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON] -"\u022C" => "O" - -# Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE] -"\u022E" => "O" - -# Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON] -"\u0230" => "O" - -# ᴏ [LATIN LETTER SMALL CAPITAL O] -"\u1D0F" => "O" - -# ᴐ [LATIN LETTER SMALL CAPITAL OPEN O] -"\u1D10" => "O" - -# Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE] -"\u1E4C" => "O" - -# Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS] -"\u1E4E" => "O" - -# Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE] -"\u1E50" => "O" - -# Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE] -"\u1E52" => "O" - -# Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW] -"\u1ECC" => "O" - -# Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE] -"\u1ECE" => "O" - -# Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE] -"\u1ED0" => "O" - -# Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE] -"\u1ED2" => "O" - -# Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1ED4" => "O" - -# Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE] -"\u1ED6" => "O" - -# Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW] -"\u1ED8" => "O" - -# Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE] -"\u1EDA" => "O" - -# Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE] -"\u1EDC" => "O" - -# Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE] -"\u1EDE" => "O" - -# Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE] -"\u1EE0" => "O" - -# Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW] -"\u1EE2" => "O" - -# Ⓞ [CIRCLED LATIN CAPITAL LETTER O] -"\u24C4" => "O" - -# Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY] -"\uA74A" => "O" - -# Ꝍ [LATIN CAPITAL LETTER O WITH LOOP] -"\uA74C" => "O" - -# O [FULLWIDTH LATIN CAPITAL LETTER O] -"\uFF2F" => "O" - -# ò [LATIN SMALL LETTER O WITH GRAVE] -"\u00F2" => "o" - -# ó [LATIN SMALL LETTER O WITH ACUTE] -"\u00F3" => "o" - -# ô [LATIN SMALL LETTER O WITH CIRCUMFLEX] -"\u00F4" => "o" - -# õ [LATIN SMALL LETTER O WITH TILDE] -"\u00F5" => "o" - -# ö [LATIN SMALL LETTER O WITH DIAERESIS] -"\u00F6" => "o" - -# ø [LATIN SMALL LETTER O WITH STROKE] -"\u00F8" => "o" - -# ō [LATIN SMALL LETTER O WITH MACRON] -"\u014D" => "o" - -# ŏ [LATIN SMALL LETTER O WITH BREVE] -"\u014F" => "o" - -# ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE] -"\u0151" => "o" - -# ơ [LATIN SMALL LETTER O WITH HORN] -"\u01A1" => "o" - -# ǒ [LATIN SMALL LETTER O WITH CARON] -"\u01D2" => "o" - -# ǫ [LATIN SMALL LETTER O WITH OGONEK] -"\u01EB" => "o" - -# ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON] -"\u01ED" => "o" - -# ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE] -"\u01FF" => "o" - -# ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE] -"\u020D" => "o" - -# ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE] -"\u020F" => "o" - -# ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON] -"\u022B" => "o" - -# ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON] -"\u022D" => "o" - -# ȯ [LATIN SMALL LETTER O WITH DOT ABOVE] -"\u022F" => "o" - -# ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON] -"\u0231" => "o" - -# ɔ [LATIN SMALL LETTER OPEN O] -"\u0254" => "o" - -# ɵ [LATIN SMALL LETTER BARRED O] -"\u0275" => "o" - -# ᴖ [LATIN SMALL LETTER TOP HALF O] -"\u1D16" => "o" - -# ᴗ [LATIN SMALL LETTER BOTTOM HALF O] -"\u1D17" => "o" - -# ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK] -"\u1D97" => "o" - -# ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE] -"\u1E4D" => "o" - -# ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS] -"\u1E4F" => "o" - -# ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE] -"\u1E51" => "o" - -# ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE] -"\u1E53" => "o" - -# ọ [LATIN SMALL LETTER O WITH DOT BELOW] -"\u1ECD" => "o" - -# ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE] -"\u1ECF" => "o" - -# ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE] -"\u1ED1" => "o" - -# ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE] -"\u1ED3" => "o" - -# ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1ED5" => "o" - -# ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE] -"\u1ED7" => "o" - -# ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW] -"\u1ED9" => "o" - -# ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE] -"\u1EDB" => "o" - -# ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE] -"\u1EDD" => "o" - -# ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE] -"\u1EDF" => "o" - -# ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE] -"\u1EE1" => "o" - -# ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW] -"\u1EE3" => "o" - -# ₒ [LATIN SUBSCRIPT SMALL LETTER O] -"\u2092" => "o" - -# ⓞ [CIRCLED LATIN SMALL LETTER O] -"\u24DE" => "o" - -# ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE] -"\u2C7A" => "o" - -# ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY] -"\uA74B" => "o" - -# ꝍ [LATIN SMALL LETTER O WITH LOOP] -"\uA74D" => "o" - -# o [FULLWIDTH LATIN SMALL LETTER O] -"\uFF4F" => "o" - -# Œ [LATIN CAPITAL LIGATURE OE] -"\u0152" => "OE" - -# ɶ [LATIN LETTER SMALL CAPITAL OE] -"\u0276" => "OE" - -# Ꝏ [LATIN CAPITAL LETTER OO] -"\uA74E" => "OO" - -# Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU] -"\u0222" => "OU" - -# ᴕ [LATIN LETTER SMALL CAPITAL OU] -"\u1D15" => "OU" - -# ⒪ [PARENTHESIZED LATIN SMALL LETTER O] -"\u24AA" => "(o)" - -# œ [LATIN SMALL LIGATURE OE] -"\u0153" => "oe" - -# ᴔ [LATIN SMALL LETTER TURNED OE] -"\u1D14" => "oe" - -# ꝏ [LATIN SMALL LETTER OO] -"\uA74F" => "oo" - -# ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU] -"\u0223" => "ou" - -# Ƥ [LATIN CAPITAL LETTER P WITH HOOK] -"\u01A4" => "P" - -# ᴘ [LATIN LETTER SMALL CAPITAL P] -"\u1D18" => "P" - -# Ṕ [LATIN CAPITAL LETTER P WITH ACUTE] -"\u1E54" => "P" - -# Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE] -"\u1E56" => "P" - -# Ⓟ [CIRCLED LATIN CAPITAL LETTER P] -"\u24C5" => "P" - -# Ᵽ [LATIN CAPITAL LETTER P WITH STROKE] -"\u2C63" => "P" - -# Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER] -"\uA750" => "P" - -# Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH] -"\uA752" => "P" - -# Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL] -"\uA754" => "P" - -# P [FULLWIDTH LATIN CAPITAL LETTER P] -"\uFF30" => "P" - -# ƥ [LATIN SMALL LETTER P WITH HOOK] -"\u01A5" => "p" - -# ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE] -"\u1D71" => "p" - -# ᵽ [LATIN SMALL LETTER P WITH STROKE] -"\u1D7D" => "p" - -# ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK] -"\u1D88" => "p" - -# ṕ [LATIN SMALL LETTER P WITH ACUTE] -"\u1E55" => "p" - -# ṗ [LATIN SMALL LETTER P WITH DOT ABOVE] -"\u1E57" => "p" - -# ⓟ [CIRCLED LATIN SMALL LETTER P] -"\u24DF" => "p" - -# ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER] -"\uA751" => "p" - -# ꝓ [LATIN SMALL LETTER P WITH FLOURISH] -"\uA753" => "p" - -# ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL] -"\uA755" => "p" - -# ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P] -"\uA7FC" => "p" - -# p [FULLWIDTH LATIN SMALL LETTER P] -"\uFF50" => "p" - -# ⒫ [PARENTHESIZED LATIN SMALL LETTER P] -"\u24AB" => "(p)" - -# Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL] -"\u024A" => "Q" - -# Ⓠ [CIRCLED LATIN CAPITAL LETTER Q] -"\u24C6" => "Q" - -# Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER] -"\uA756" => "Q" - -# Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE] -"\uA758" => "Q" - -# Q [FULLWIDTH LATIN CAPITAL LETTER Q] -"\uFF31" => "Q" - -# ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA] -"\u0138" => "q" - -# ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL] -"\u024B" => "q" - -# ʠ [LATIN SMALL LETTER Q WITH HOOK] -"\u02A0" => "q" - -# ⓠ [CIRCLED LATIN SMALL LETTER Q] -"\u24E0" => "q" - -# ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER] -"\uA757" => "q" - -# ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE] -"\uA759" => "q" - -# q [FULLWIDTH LATIN SMALL LETTER Q] -"\uFF51" => "q" - -# ⒬ [PARENTHESIZED LATIN SMALL LETTER Q] -"\u24AC" => "(q)" - -# ȹ [LATIN SMALL LETTER QP DIGRAPH] -"\u0239" => "qp" - -# Ŕ [LATIN CAPITAL LETTER R WITH ACUTE] -"\u0154" => "R" - -# Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA] -"\u0156" => "R" - -# Ř [LATIN CAPITAL LETTER R WITH CARON] -"\u0158" => "R" - -# Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE] -"\u0210" => "R" - -# Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE] -"\u0212" => "R" - -# Ɍ [LATIN CAPITAL LETTER R WITH STROKE] -"\u024C" => "R" - -# ʀ [LATIN LETTER SMALL CAPITAL R] -"\u0280" => "R" - -# ʁ [LATIN LETTER SMALL CAPITAL INVERTED R] -"\u0281" => "R" - -# ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R] -"\u1D19" => "R" - -# ᴚ [LATIN LETTER SMALL CAPITAL TURNED R] -"\u1D1A" => "R" - -# Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE] -"\u1E58" => "R" - -# Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW] -"\u1E5A" => "R" - -# Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON] -"\u1E5C" => "R" - -# Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW] -"\u1E5E" => "R" - -# Ⓡ [CIRCLED LATIN CAPITAL LETTER R] -"\u24C7" => "R" - -# Ɽ [LATIN CAPITAL LETTER R WITH TAIL] -"\u2C64" => "R" - -# Ꝛ [LATIN CAPITAL LETTER R ROTUNDA] -"\uA75A" => "R" - -# Ꞃ [LATIN CAPITAL LETTER INSULAR R] -"\uA782" => "R" - -# R [FULLWIDTH LATIN CAPITAL LETTER R] -"\uFF32" => "R" - -# ŕ [LATIN SMALL LETTER R WITH ACUTE] -"\u0155" => "r" - -# ŗ [LATIN SMALL LETTER R WITH CEDILLA] -"\u0157" => "r" - -# ř [LATIN SMALL LETTER R WITH CARON] -"\u0159" => "r" - -# ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE] -"\u0211" => "r" - -# ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE] -"\u0213" => "r" - -# ɍ [LATIN SMALL LETTER R WITH STROKE] -"\u024D" => "r" - -# ɼ [LATIN SMALL LETTER R WITH LONG LEG] -"\u027C" => "r" - -# ɽ [LATIN SMALL LETTER R WITH TAIL] -"\u027D" => "r" - -# ɾ [LATIN SMALL LETTER R WITH FISHHOOK] -"\u027E" => "r" - -# ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK] -"\u027F" => "r" - -# ᵣ [LATIN SUBSCRIPT SMALL LETTER R] -"\u1D63" => "r" - -# ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE] -"\u1D72" => "r" - -# ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE] -"\u1D73" => "r" - -# ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK] -"\u1D89" => "r" - -# ṙ [LATIN SMALL LETTER R WITH DOT ABOVE] -"\u1E59" => "r" - -# ṛ [LATIN SMALL LETTER R WITH DOT BELOW] -"\u1E5B" => "r" - -# ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON] -"\u1E5D" => "r" - -# ṟ [LATIN SMALL LETTER R WITH LINE BELOW] -"\u1E5F" => "r" - -# ⓡ [CIRCLED LATIN SMALL LETTER R] -"\u24E1" => "r" - -# ꝛ [LATIN SMALL LETTER R ROTUNDA] -"\uA75B" => "r" - -# ꞃ [LATIN SMALL LETTER INSULAR R] -"\uA783" => "r" - -# r [FULLWIDTH LATIN SMALL LETTER R] -"\uFF52" => "r" - -# ⒭ [PARENTHESIZED LATIN SMALL LETTER R] -"\u24AD" => "(r)" - -# Ś [LATIN CAPITAL LETTER S WITH ACUTE] -"\u015A" => "S" - -# Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX] -"\u015C" => "S" - -# Ş [LATIN CAPITAL LETTER S WITH CEDILLA] -"\u015E" => "S" - -# Š [LATIN CAPITAL LETTER S WITH CARON] -"\u0160" => "S" - -# Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW] -"\u0218" => "S" - -# Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE] -"\u1E60" => "S" - -# Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW] -"\u1E62" => "S" - -# Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE] -"\u1E64" => "S" - -# Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE] -"\u1E66" => "S" - -# Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE] -"\u1E68" => "S" - -# Ⓢ [CIRCLED LATIN CAPITAL LETTER S] -"\u24C8" => "S" - -# ꜱ [LATIN LETTER SMALL CAPITAL S] -"\uA731" => "S" - -# ꞅ [LATIN SMALL LETTER INSULAR S] -"\uA785" => "S" - -# S [FULLWIDTH LATIN CAPITAL LETTER S] -"\uFF33" => "S" - -# ś [LATIN SMALL LETTER S WITH ACUTE] -"\u015B" => "s" - -# ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX] -"\u015D" => "s" - -# ş [LATIN SMALL LETTER S WITH CEDILLA] -"\u015F" => "s" - -# š [LATIN SMALL LETTER S WITH CARON] -"\u0161" => "s" - -# ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S] -"\u017F" => "s" - -# ș [LATIN SMALL LETTER S WITH COMMA BELOW] -"\u0219" => "s" - -# ȿ [LATIN SMALL LETTER S WITH SWASH TAIL] -"\u023F" => "s" - -# ʂ [LATIN SMALL LETTER S WITH HOOK] -"\u0282" => "s" - -# ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE] -"\u1D74" => "s" - -# ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK] -"\u1D8A" => "s" - -# ṡ [LATIN SMALL LETTER S WITH DOT ABOVE] -"\u1E61" => "s" - -# ṣ [LATIN SMALL LETTER S WITH DOT BELOW] -"\u1E63" => "s" - -# ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE] -"\u1E65" => "s" - -# ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE] -"\u1E67" => "s" - -# ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE] -"\u1E69" => "s" - -# ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE] -"\u1E9C" => "s" - -# ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE] -"\u1E9D" => "s" - -# ⓢ [CIRCLED LATIN SMALL LETTER S] -"\u24E2" => "s" - -# Ꞅ [LATIN CAPITAL LETTER INSULAR S] -"\uA784" => "s" - -# s [FULLWIDTH LATIN SMALL LETTER S] -"\uFF53" => "s" - -# ẞ [LATIN CAPITAL LETTER SHARP S] -"\u1E9E" => "SS" - -# ⒮ [PARENTHESIZED LATIN SMALL LETTER S] -"\u24AE" => "(s)" - -# ß [LATIN SMALL LETTER SHARP S] -"\u00DF" => "ss" - -# st [LATIN SMALL LIGATURE ST] -"\uFB06" => "st" - -# Ţ [LATIN CAPITAL LETTER T WITH CEDILLA] -"\u0162" => "T" - -# Ť [LATIN CAPITAL LETTER T WITH CARON] -"\u0164" => "T" - -# Ŧ [LATIN CAPITAL LETTER T WITH STROKE] -"\u0166" => "T" - -# Ƭ [LATIN CAPITAL LETTER T WITH HOOK] -"\u01AC" => "T" - -# Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK] -"\u01AE" => "T" - -# Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW] -"\u021A" => "T" - -# Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE] -"\u023E" => "T" - -# ᴛ [LATIN LETTER SMALL CAPITAL T] -"\u1D1B" => "T" - -# Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE] -"\u1E6A" => "T" - -# Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW] -"\u1E6C" => "T" - -# Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW] -"\u1E6E" => "T" - -# Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW] -"\u1E70" => "T" - -# Ⓣ [CIRCLED LATIN CAPITAL LETTER T] -"\u24C9" => "T" - -# Ꞇ [LATIN CAPITAL LETTER INSULAR T] -"\uA786" => "T" - -# T [FULLWIDTH LATIN CAPITAL LETTER T] -"\uFF34" => "T" - -# ţ [LATIN SMALL LETTER T WITH CEDILLA] -"\u0163" => "t" - -# ť [LATIN SMALL LETTER T WITH CARON] -"\u0165" => "t" - -# ŧ [LATIN SMALL LETTER T WITH STROKE] -"\u0167" => "t" - -# ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK] -"\u01AB" => "t" - -# ƭ [LATIN SMALL LETTER T WITH HOOK] -"\u01AD" => "t" - -# ț [LATIN SMALL LETTER T WITH COMMA BELOW] -"\u021B" => "t" - -# ȶ [LATIN SMALL LETTER T WITH CURL] -"\u0236" => "t" - -# ʇ [LATIN SMALL LETTER TURNED T] -"\u0287" => "t" - -# ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK] -"\u0288" => "t" - -# ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE] -"\u1D75" => "t" - -# ṫ [LATIN SMALL LETTER T WITH DOT ABOVE] -"\u1E6B" => "t" - -# ṭ [LATIN SMALL LETTER T WITH DOT BELOW] -"\u1E6D" => "t" - -# ṯ [LATIN SMALL LETTER T WITH LINE BELOW] -"\u1E6F" => "t" - -# ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW] -"\u1E71" => "t" - -# ẗ [LATIN SMALL LETTER T WITH DIAERESIS] -"\u1E97" => "t" - -# ⓣ [CIRCLED LATIN SMALL LETTER T] -"\u24E3" => "t" - -# ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE] -"\u2C66" => "t" - -# t [FULLWIDTH LATIN SMALL LETTER T] -"\uFF54" => "t" - -# Þ [LATIN CAPITAL LETTER THORN] -"\u00DE" => "TH" - -# Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER] -"\uA766" => "TH" - -# Ꜩ [LATIN CAPITAL LETTER TZ] -"\uA728" => "TZ" - -# ⒯ [PARENTHESIZED LATIN SMALL LETTER T] -"\u24AF" => "(t)" - -# ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL] -"\u02A8" => "tc" - -# þ [LATIN SMALL LETTER THORN] -"\u00FE" => "th" - -# ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH] -"\u1D7A" => "th" - -# ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER] -"\uA767" => "th" - -# ʦ [LATIN SMALL LETTER TS DIGRAPH] -"\u02A6" => "ts" - -# ꜩ [LATIN SMALL LETTER TZ] -"\uA729" => "tz" - -# Ù [LATIN CAPITAL LETTER U WITH GRAVE] -"\u00D9" => "U" - -# Ú [LATIN CAPITAL LETTER U WITH ACUTE] -"\u00DA" => "U" - -# Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX] -"\u00DB" => "U" - -# Ü [LATIN CAPITAL LETTER U WITH DIAERESIS] -"\u00DC" => "U" - -# Ũ [LATIN CAPITAL LETTER U WITH TILDE] -"\u0168" => "U" - -# Ū [LATIN CAPITAL LETTER U WITH MACRON] -"\u016A" => "U" - -# Ŭ [LATIN CAPITAL LETTER U WITH BREVE] -"\u016C" => "U" - -# Ů [LATIN CAPITAL LETTER U WITH RING ABOVE] -"\u016E" => "U" - -# Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE] -"\u0170" => "U" - -# Ų [LATIN CAPITAL LETTER U WITH OGONEK] -"\u0172" => "U" - -# Ư [LATIN CAPITAL LETTER U WITH HORN] -"\u01AF" => "U" - -# Ǔ [LATIN CAPITAL LETTER U WITH CARON] -"\u01D3" => "U" - -# Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON] -"\u01D5" => "U" - -# Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE] -"\u01D7" => "U" - -# Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON] -"\u01D9" => "U" - -# Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE] -"\u01DB" => "U" - -# Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE] -"\u0214" => "U" - -# Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE] -"\u0216" => "U" - -# Ʉ [LATIN CAPITAL LETTER U BAR] -"\u0244" => "U" - -# ᴜ [LATIN LETTER SMALL CAPITAL U] -"\u1D1C" => "U" - -# ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE] -"\u1D7E" => "U" - -# Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW] -"\u1E72" => "U" - -# Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW] -"\u1E74" => "U" - -# Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW] -"\u1E76" => "U" - -# Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE] -"\u1E78" => "U" - -# Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS] -"\u1E7A" => "U" - -# Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW] -"\u1EE4" => "U" - -# Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE] -"\u1EE6" => "U" - -# Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE] -"\u1EE8" => "U" - -# Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE] -"\u1EEA" => "U" - -# Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE] -"\u1EEC" => "U" - -# Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE] -"\u1EEE" => "U" - -# Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW] -"\u1EF0" => "U" - -# Ⓤ [CIRCLED LATIN CAPITAL LETTER U] -"\u24CA" => "U" - -# U [FULLWIDTH LATIN CAPITAL LETTER U] -"\uFF35" => "U" - -# ù [LATIN SMALL LETTER U WITH GRAVE] -"\u00F9" => "u" - -# ú [LATIN SMALL LETTER U WITH ACUTE] -"\u00FA" => "u" - -# û [LATIN SMALL LETTER U WITH CIRCUMFLEX] -"\u00FB" => "u" - -# ü [LATIN SMALL LETTER U WITH DIAERESIS] -"\u00FC" => "u" - -# ũ [LATIN SMALL LETTER U WITH TILDE] -"\u0169" => "u" - -# ū [LATIN SMALL LETTER U WITH MACRON] -"\u016B" => "u" - -# ŭ [LATIN SMALL LETTER U WITH BREVE] -"\u016D" => "u" - -# ů [LATIN SMALL LETTER U WITH RING ABOVE] -"\u016F" => "u" - -# ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE] -"\u0171" => "u" - -# ų [LATIN SMALL LETTER U WITH OGONEK] -"\u0173" => "u" - -# ư [LATIN SMALL LETTER U WITH HORN] -"\u01B0" => "u" - -# ǔ [LATIN SMALL LETTER U WITH CARON] -"\u01D4" => "u" - -# ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON] -"\u01D6" => "u" - -# ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE] -"\u01D8" => "u" - -# ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON] -"\u01DA" => "u" - -# ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE] -"\u01DC" => "u" - -# ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE] -"\u0215" => "u" - -# ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE] -"\u0217" => "u" - -# ʉ [LATIN SMALL LETTER U BAR] -"\u0289" => "u" - -# ᵤ [LATIN SUBSCRIPT SMALL LETTER U] -"\u1D64" => "u" - -# ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK] -"\u1D99" => "u" - -# ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW] -"\u1E73" => "u" - -# ṵ [LATIN SMALL LETTER U WITH TILDE BELOW] -"\u1E75" => "u" - -# ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW] -"\u1E77" => "u" - -# ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE] -"\u1E79" => "u" - -# ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS] -"\u1E7B" => "u" - -# ụ [LATIN SMALL LETTER U WITH DOT BELOW] -"\u1EE5" => "u" - -# ủ [LATIN SMALL LETTER U WITH HOOK ABOVE] -"\u1EE7" => "u" - -# ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE] -"\u1EE9" => "u" - -# ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE] -"\u1EEB" => "u" - -# ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE] -"\u1EED" => "u" - -# ữ [LATIN SMALL LETTER U WITH HORN AND TILDE] -"\u1EEF" => "u" - -# ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW] -"\u1EF1" => "u" - -# ⓤ [CIRCLED LATIN SMALL LETTER U] -"\u24E4" => "u" - -# u [FULLWIDTH LATIN SMALL LETTER U] -"\uFF55" => "u" - -# ⒰ [PARENTHESIZED LATIN SMALL LETTER U] -"\u24B0" => "(u)" - -# ᵫ [LATIN SMALL LETTER UE] -"\u1D6B" => "ue" - -# Ʋ [LATIN CAPITAL LETTER V WITH HOOK] -"\u01B2" => "V" - -# Ʌ [LATIN CAPITAL LETTER TURNED V] -"\u0245" => "V" - -# ᴠ [LATIN LETTER SMALL CAPITAL V] -"\u1D20" => "V" - -# Ṽ [LATIN CAPITAL LETTER V WITH TILDE] -"\u1E7C" => "V" - -# Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW] -"\u1E7E" => "V" - -# Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V] -"\u1EFC" => "V" - -# Ⓥ [CIRCLED LATIN CAPITAL LETTER V] -"\u24CB" => "V" - -# Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE] -"\uA75E" => "V" - -# Ꝩ [LATIN CAPITAL LETTER VEND] -"\uA768" => "V" - -# V [FULLWIDTH LATIN CAPITAL LETTER V] -"\uFF36" => "V" - -# ʋ [LATIN SMALL LETTER V WITH HOOK] -"\u028B" => "v" - -# ʌ [LATIN SMALL LETTER TURNED V] -"\u028C" => "v" - -# ᵥ [LATIN SUBSCRIPT SMALL LETTER V] -"\u1D65" => "v" - -# ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK] -"\u1D8C" => "v" - -# ṽ [LATIN SMALL LETTER V WITH TILDE] -"\u1E7D" => "v" - -# ṿ [LATIN SMALL LETTER V WITH DOT BELOW] -"\u1E7F" => "v" - -# ⓥ [CIRCLED LATIN SMALL LETTER V] -"\u24E5" => "v" - -# ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK] -"\u2C71" => "v" - -# ⱴ [LATIN SMALL LETTER V WITH CURL] -"\u2C74" => "v" - -# ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE] -"\uA75F" => "v" - -# v [FULLWIDTH LATIN SMALL LETTER V] -"\uFF56" => "v" - -# Ꝡ [LATIN CAPITAL LETTER VY] -"\uA760" => "VY" - -# ⒱ [PARENTHESIZED LATIN SMALL LETTER V] -"\u24B1" => "(v)" - -# ꝡ [LATIN SMALL LETTER VY] -"\uA761" => "vy" - -# Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX] -"\u0174" => "W" - -# Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN] -"\u01F7" => "W" - -# ᴡ [LATIN LETTER SMALL CAPITAL W] -"\u1D21" => "W" - -# Ẁ [LATIN CAPITAL LETTER W WITH GRAVE] -"\u1E80" => "W" - -# Ẃ [LATIN CAPITAL LETTER W WITH ACUTE] -"\u1E82" => "W" - -# Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS] -"\u1E84" => "W" - -# Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE] -"\u1E86" => "W" - -# Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW] -"\u1E88" => "W" - -# Ⓦ [CIRCLED LATIN CAPITAL LETTER W] -"\u24CC" => "W" - -# Ⱳ [LATIN CAPITAL LETTER W WITH HOOK] -"\u2C72" => "W" - -# W [FULLWIDTH LATIN CAPITAL LETTER W] -"\uFF37" => "W" - -# ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX] -"\u0175" => "w" - -# ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN] -"\u01BF" => "w" - -# ʍ [LATIN SMALL LETTER TURNED W] -"\u028D" => "w" - -# ẁ [LATIN SMALL LETTER W WITH GRAVE] -"\u1E81" => "w" - -# ẃ [LATIN SMALL LETTER W WITH ACUTE] -"\u1E83" => "w" - -# ẅ [LATIN SMALL LETTER W WITH DIAERESIS] -"\u1E85" => "w" - -# ẇ [LATIN SMALL LETTER W WITH DOT ABOVE] -"\u1E87" => "w" - -# ẉ [LATIN SMALL LETTER W WITH DOT BELOW] -"\u1E89" => "w" - -# ẘ [LATIN SMALL LETTER W WITH RING ABOVE] -"\u1E98" => "w" - -# ⓦ [CIRCLED LATIN SMALL LETTER W] -"\u24E6" => "w" - -# ⱳ [LATIN SMALL LETTER W WITH HOOK] -"\u2C73" => "w" - -# w [FULLWIDTH LATIN SMALL LETTER W] -"\uFF57" => "w" - -# ⒲ [PARENTHESIZED LATIN SMALL LETTER W] -"\u24B2" => "(w)" - -# Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE] -"\u1E8A" => "X" - -# Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS] -"\u1E8C" => "X" - -# Ⓧ [CIRCLED LATIN CAPITAL LETTER X] -"\u24CD" => "X" - -# X [FULLWIDTH LATIN CAPITAL LETTER X] -"\uFF38" => "X" - -# ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK] -"\u1D8D" => "x" - -# ẋ [LATIN SMALL LETTER X WITH DOT ABOVE] -"\u1E8B" => "x" - -# ẍ [LATIN SMALL LETTER X WITH DIAERESIS] -"\u1E8D" => "x" - -# ₓ [LATIN SUBSCRIPT SMALL LETTER X] -"\u2093" => "x" - -# ⓧ [CIRCLED LATIN SMALL LETTER X] -"\u24E7" => "x" - -# x [FULLWIDTH LATIN SMALL LETTER X] -"\uFF58" => "x" - -# ⒳ [PARENTHESIZED LATIN SMALL LETTER X] -"\u24B3" => "(x)" - -# Ý [LATIN CAPITAL LETTER Y WITH ACUTE] -"\u00DD" => "Y" - -# Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX] -"\u0176" => "Y" - -# Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS] -"\u0178" => "Y" - -# Ƴ [LATIN CAPITAL LETTER Y WITH HOOK] -"\u01B3" => "Y" - -# Ȳ [LATIN CAPITAL LETTER Y WITH MACRON] -"\u0232" => "Y" - -# Ɏ [LATIN CAPITAL LETTER Y WITH STROKE] -"\u024E" => "Y" - -# ʏ [LATIN LETTER SMALL CAPITAL Y] -"\u028F" => "Y" - -# Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE] -"\u1E8E" => "Y" - -# Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE] -"\u1EF2" => "Y" - -# Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW] -"\u1EF4" => "Y" - -# Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE] -"\u1EF6" => "Y" - -# Ỹ [LATIN CAPITAL LETTER Y WITH TILDE] -"\u1EF8" => "Y" - -# Ỿ [LATIN CAPITAL LETTER Y WITH LOOP] -"\u1EFE" => "Y" - -# Ⓨ [CIRCLED LATIN CAPITAL LETTER Y] -"\u24CE" => "Y" - -# Y [FULLWIDTH LATIN CAPITAL LETTER Y] -"\uFF39" => "Y" - -# ý [LATIN SMALL LETTER Y WITH ACUTE] -"\u00FD" => "y" - -# ÿ [LATIN SMALL LETTER Y WITH DIAERESIS] -"\u00FF" => "y" - -# ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX] -"\u0177" => "y" - -# ƴ [LATIN SMALL LETTER Y WITH HOOK] -"\u01B4" => "y" - -# ȳ [LATIN SMALL LETTER Y WITH MACRON] -"\u0233" => "y" - -# ɏ [LATIN SMALL LETTER Y WITH STROKE] -"\u024F" => "y" - -# ʎ [LATIN SMALL LETTER TURNED Y] -"\u028E" => "y" - -# ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE] -"\u1E8F" => "y" - -# ẙ [LATIN SMALL LETTER Y WITH RING ABOVE] -"\u1E99" => "y" - -# ỳ [LATIN SMALL LETTER Y WITH GRAVE] -"\u1EF3" => "y" - -# ỵ [LATIN SMALL LETTER Y WITH DOT BELOW] -"\u1EF5" => "y" - -# ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE] -"\u1EF7" => "y" - -# ỹ [LATIN SMALL LETTER Y WITH TILDE] -"\u1EF9" => "y" - -# ỿ [LATIN SMALL LETTER Y WITH LOOP] -"\u1EFF" => "y" - -# ⓨ [CIRCLED LATIN SMALL LETTER Y] -"\u24E8" => "y" - -# y [FULLWIDTH LATIN SMALL LETTER Y] -"\uFF59" => "y" - -# ⒴ [PARENTHESIZED LATIN SMALL LETTER Y] -"\u24B4" => "(y)" - -# Ź [LATIN CAPITAL LETTER Z WITH ACUTE] -"\u0179" => "Z" - -# Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE] -"\u017B" => "Z" - -# Ž [LATIN CAPITAL LETTER Z WITH CARON] -"\u017D" => "Z" - -# Ƶ [LATIN CAPITAL LETTER Z WITH STROKE] -"\u01B5" => "Z" - -# Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH] -"\u021C" => "Z" - -# Ȥ [LATIN CAPITAL LETTER Z WITH HOOK] -"\u0224" => "Z" - -# ᴢ [LATIN LETTER SMALL CAPITAL Z] -"\u1D22" => "Z" - -# Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX] -"\u1E90" => "Z" - -# Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW] -"\u1E92" => "Z" - -# Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW] -"\u1E94" => "Z" - -# Ⓩ [CIRCLED LATIN CAPITAL LETTER Z] -"\u24CF" => "Z" - -# Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER] -"\u2C6B" => "Z" - -# Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z] -"\uA762" => "Z" - -# Z [FULLWIDTH LATIN CAPITAL LETTER Z] -"\uFF3A" => "Z" - -# ź [LATIN SMALL LETTER Z WITH ACUTE] -"\u017A" => "z" - -# ż [LATIN SMALL LETTER Z WITH DOT ABOVE] -"\u017C" => "z" - -# ž [LATIN SMALL LETTER Z WITH CARON] -"\u017E" => "z" - -# ƶ [LATIN SMALL LETTER Z WITH STROKE] -"\u01B6" => "z" - -# ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH] -"\u021D" => "z" - -# ȥ [LATIN SMALL LETTER Z WITH HOOK] -"\u0225" => "z" - -# ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL] -"\u0240" => "z" - -# ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK] -"\u0290" => "z" - -# ʑ [LATIN SMALL LETTER Z WITH CURL] -"\u0291" => "z" - -# ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE] -"\u1D76" => "z" - -# ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK] -"\u1D8E" => "z" - -# ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX] -"\u1E91" => "z" - -# ẓ [LATIN SMALL LETTER Z WITH DOT BELOW] -"\u1E93" => "z" - -# ẕ [LATIN SMALL LETTER Z WITH LINE BELOW] -"\u1E95" => "z" - -# ⓩ [CIRCLED LATIN SMALL LETTER Z] -"\u24E9" => "z" - -# ⱬ [LATIN SMALL LETTER Z WITH DESCENDER] -"\u2C6C" => "z" - -# ꝣ [LATIN SMALL LETTER VISIGOTHIC Z] -"\uA763" => "z" - -# z [FULLWIDTH LATIN SMALL LETTER Z] -"\uFF5A" => "z" - -# ⒵ [PARENTHESIZED LATIN SMALL LETTER Z] -"\u24B5" => "(z)" - -# ⁰ [SUPERSCRIPT ZERO] -"\u2070" => "0" - -# ₀ [SUBSCRIPT ZERO] -"\u2080" => "0" - -# ⓪ [CIRCLED DIGIT ZERO] -"\u24EA" => "0" - -# ⓿ [NEGATIVE CIRCLED DIGIT ZERO] -"\u24FF" => "0" - -# 0 [FULLWIDTH DIGIT ZERO] -"\uFF10" => "0" - -# ¹ [SUPERSCRIPT ONE] -"\u00B9" => "1" - -# ₁ [SUBSCRIPT ONE] -"\u2081" => "1" - -# ① [CIRCLED DIGIT ONE] -"\u2460" => "1" - -# ⓵ [DOUBLE CIRCLED DIGIT ONE] -"\u24F5" => "1" - -# ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE] -"\u2776" => "1" - -# ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE] -"\u2780" => "1" - -# ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE] -"\u278A" => "1" - -# 1 [FULLWIDTH DIGIT ONE] -"\uFF11" => "1" - -# ⒈ [DIGIT ONE FULL STOP] -"\u2488" => "1." - -# ⑴ [PARENTHESIZED DIGIT ONE] -"\u2474" => "(1)" - -# ² [SUPERSCRIPT TWO] -"\u00B2" => "2" - -# ₂ [SUBSCRIPT TWO] -"\u2082" => "2" - -# ② [CIRCLED DIGIT TWO] -"\u2461" => "2" - -# ⓶ [DOUBLE CIRCLED DIGIT TWO] -"\u24F6" => "2" - -# ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO] -"\u2777" => "2" - -# ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO] -"\u2781" => "2" - -# ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO] -"\u278B" => "2" - -# 2 [FULLWIDTH DIGIT TWO] -"\uFF12" => "2" - -# ⒉ [DIGIT TWO FULL STOP] -"\u2489" => "2." - -# ⑵ [PARENTHESIZED DIGIT TWO] -"\u2475" => "(2)" - -# ³ [SUPERSCRIPT THREE] -"\u00B3" => "3" - -# ₃ [SUBSCRIPT THREE] -"\u2083" => "3" - -# ③ [CIRCLED DIGIT THREE] -"\u2462" => "3" - -# ⓷ [DOUBLE CIRCLED DIGIT THREE] -"\u24F7" => "3" - -# ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE] -"\u2778" => "3" - -# ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE] -"\u2782" => "3" - -# ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE] -"\u278C" => "3" - -# 3 [FULLWIDTH DIGIT THREE] -"\uFF13" => "3" - -# ⒊ [DIGIT THREE FULL STOP] -"\u248A" => "3." - -# ⑶ [PARENTHESIZED DIGIT THREE] -"\u2476" => "(3)" - -# ⁴ [SUPERSCRIPT FOUR] -"\u2074" => "4" - -# ₄ [SUBSCRIPT FOUR] -"\u2084" => "4" - -# ④ [CIRCLED DIGIT FOUR] -"\u2463" => "4" - -# ⓸ [DOUBLE CIRCLED DIGIT FOUR] -"\u24F8" => "4" - -# ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR] -"\u2779" => "4" - -# ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR] -"\u2783" => "4" - -# ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR] -"\u278D" => "4" - -# 4 [FULLWIDTH DIGIT FOUR] -"\uFF14" => "4" - -# ⒋ [DIGIT FOUR FULL STOP] -"\u248B" => "4." - -# ⑷ [PARENTHESIZED DIGIT FOUR] -"\u2477" => "(4)" - -# ⁵ [SUPERSCRIPT FIVE] -"\u2075" => "5" - -# ₅ [SUBSCRIPT FIVE] -"\u2085" => "5" - -# ⑤ [CIRCLED DIGIT FIVE] -"\u2464" => "5" - -# ⓹ [DOUBLE CIRCLED DIGIT FIVE] -"\u24F9" => "5" - -# ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE] -"\u277A" => "5" - -# ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE] -"\u2784" => "5" - -# ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE] -"\u278E" => "5" - -# 5 [FULLWIDTH DIGIT FIVE] -"\uFF15" => "5" - -# ⒌ [DIGIT FIVE FULL STOP] -"\u248C" => "5." - -# ⑸ [PARENTHESIZED DIGIT FIVE] -"\u2478" => "(5)" - -# ⁶ [SUPERSCRIPT SIX] -"\u2076" => "6" - -# ₆ [SUBSCRIPT SIX] -"\u2086" => "6" - -# ⑥ [CIRCLED DIGIT SIX] -"\u2465" => "6" - -# ⓺ [DOUBLE CIRCLED DIGIT SIX] -"\u24FA" => "6" - -# ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX] -"\u277B" => "6" - -# ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX] -"\u2785" => "6" - -# ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX] -"\u278F" => "6" - -# 6 [FULLWIDTH DIGIT SIX] -"\uFF16" => "6" - -# ⒍ [DIGIT SIX FULL STOP] -"\u248D" => "6." - -# ⑹ [PARENTHESIZED DIGIT SIX] -"\u2479" => "(6)" - -# ⁷ [SUPERSCRIPT SEVEN] -"\u2077" => "7" - -# ₇ [SUBSCRIPT SEVEN] -"\u2087" => "7" - -# ⑦ [CIRCLED DIGIT SEVEN] -"\u2466" => "7" - -# ⓻ [DOUBLE CIRCLED DIGIT SEVEN] -"\u24FB" => "7" - -# ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN] -"\u277C" => "7" - -# ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN] -"\u2786" => "7" - -# ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN] -"\u2790" => "7" - -# 7 [FULLWIDTH DIGIT SEVEN] -"\uFF17" => "7" - -# ⒎ [DIGIT SEVEN FULL STOP] -"\u248E" => "7." - -# ⑺ [PARENTHESIZED DIGIT SEVEN] -"\u247A" => "(7)" - -# ⁸ [SUPERSCRIPT EIGHT] -"\u2078" => "8" - -# ₈ [SUBSCRIPT EIGHT] -"\u2088" => "8" - -# ⑧ [CIRCLED DIGIT EIGHT] -"\u2467" => "8" - -# ⓼ [DOUBLE CIRCLED DIGIT EIGHT] -"\u24FC" => "8" - -# ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT] -"\u277D" => "8" - -# ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT] -"\u2787" => "8" - -# ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT] -"\u2791" => "8" - -# 8 [FULLWIDTH DIGIT EIGHT] -"\uFF18" => "8" - -# ⒏ [DIGIT EIGHT FULL STOP] -"\u248F" => "8." - -# ⑻ [PARENTHESIZED DIGIT EIGHT] -"\u247B" => "(8)" - -# ⁹ [SUPERSCRIPT NINE] -"\u2079" => "9" - -# ₉ [SUBSCRIPT NINE] -"\u2089" => "9" - -# ⑨ [CIRCLED DIGIT NINE] -"\u2468" => "9" - -# ⓽ [DOUBLE CIRCLED DIGIT NINE] -"\u24FD" => "9" - -# ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE] -"\u277E" => "9" - -# ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE] -"\u2788" => "9" - -# ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE] -"\u2792" => "9" - -# 9 [FULLWIDTH DIGIT NINE] -"\uFF19" => "9" - -# ⒐ [DIGIT NINE FULL STOP] -"\u2490" => "9." - -# ⑼ [PARENTHESIZED DIGIT NINE] -"\u247C" => "(9)" - -# ⑩ [CIRCLED NUMBER TEN] -"\u2469" => "10" - -# ⓾ [DOUBLE CIRCLED NUMBER TEN] -"\u24FE" => "10" - -# ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN] -"\u277F" => "10" - -# ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN] -"\u2789" => "10" - -# ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN] -"\u2793" => "10" - -# ⒑ [NUMBER TEN FULL STOP] -"\u2491" => "10." - -# ⑽ [PARENTHESIZED NUMBER TEN] -"\u247D" => "(10)" - -# ⑪ [CIRCLED NUMBER ELEVEN] -"\u246A" => "11" - -# ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN] -"\u24EB" => "11" - -# ⒒ [NUMBER ELEVEN FULL STOP] -"\u2492" => "11." - -# ⑾ [PARENTHESIZED NUMBER ELEVEN] -"\u247E" => "(11)" - -# ⑫ [CIRCLED NUMBER TWELVE] -"\u246B" => "12" - -# ⓬ [NEGATIVE CIRCLED NUMBER TWELVE] -"\u24EC" => "12" - -# ⒓ [NUMBER TWELVE FULL STOP] -"\u2493" => "12." - -# ⑿ [PARENTHESIZED NUMBER TWELVE] -"\u247F" => "(12)" - -# ⑬ [CIRCLED NUMBER THIRTEEN] -"\u246C" => "13" - -# ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN] -"\u24ED" => "13" - -# ⒔ [NUMBER THIRTEEN FULL STOP] -"\u2494" => "13." - -# ⒀ [PARENTHESIZED NUMBER THIRTEEN] -"\u2480" => "(13)" - -# ⑭ [CIRCLED NUMBER FOURTEEN] -"\u246D" => "14" - -# ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN] -"\u24EE" => "14" - -# ⒕ [NUMBER FOURTEEN FULL STOP] -"\u2495" => "14." - -# ⒁ [PARENTHESIZED NUMBER FOURTEEN] -"\u2481" => "(14)" - -# ⑮ [CIRCLED NUMBER FIFTEEN] -"\u246E" => "15" - -# ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN] -"\u24EF" => "15" - -# ⒖ [NUMBER FIFTEEN FULL STOP] -"\u2496" => "15." - -# ⒂ [PARENTHESIZED NUMBER FIFTEEN] -"\u2482" => "(15)" - -# ⑯ [CIRCLED NUMBER SIXTEEN] -"\u246F" => "16" - -# ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN] -"\u24F0" => "16" - -# ⒗ [NUMBER SIXTEEN FULL STOP] -"\u2497" => "16." - -# ⒃ [PARENTHESIZED NUMBER SIXTEEN] -"\u2483" => "(16)" - -# ⑰ [CIRCLED NUMBER SEVENTEEN] -"\u2470" => "17" - -# ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN] -"\u24F1" => "17" - -# ⒘ [NUMBER SEVENTEEN FULL STOP] -"\u2498" => "17." - -# ⒄ [PARENTHESIZED NUMBER SEVENTEEN] -"\u2484" => "(17)" - -# ⑱ [CIRCLED NUMBER EIGHTEEN] -"\u2471" => "18" - -# ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN] -"\u24F2" => "18" - -# ⒙ [NUMBER EIGHTEEN FULL STOP] -"\u2499" => "18." - -# ⒅ [PARENTHESIZED NUMBER EIGHTEEN] -"\u2485" => "(18)" - -# ⑲ [CIRCLED NUMBER NINETEEN] -"\u2472" => "19" - -# ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN] -"\u24F3" => "19" - -# ⒚ [NUMBER NINETEEN FULL STOP] -"\u249A" => "19." - -# ⒆ [PARENTHESIZED NUMBER NINETEEN] -"\u2486" => "(19)" - -# ⑳ [CIRCLED NUMBER TWENTY] -"\u2473" => "20" - -# ⓴ [NEGATIVE CIRCLED NUMBER TWENTY] -"\u24F4" => "20" - -# ⒛ [NUMBER TWENTY FULL STOP] -"\u249B" => "20." - -# ⒇ [PARENTHESIZED NUMBER TWENTY] -"\u2487" => "(20)" - -# « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK] -"\u00AB" => "\"" - -# » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK] -"\u00BB" => "\"" - -# “ [LEFT DOUBLE QUOTATION MARK] -"\u201C" => "\"" - -# ” [RIGHT DOUBLE QUOTATION MARK] -"\u201D" => "\"" - -# „ [DOUBLE LOW-9 QUOTATION MARK] -"\u201E" => "\"" - -# ″ [DOUBLE PRIME] -"\u2033" => "\"" - -# ‶ [REVERSED DOUBLE PRIME] -"\u2036" => "\"" - -# ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT] -"\u275D" => "\"" - -# ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT] -"\u275E" => "\"" - -# ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT] -"\u276E" => "\"" - -# ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT] -"\u276F" => "\"" - -# " [FULLWIDTH QUOTATION MARK] -"\uFF02" => "\"" - -# ‘ [LEFT SINGLE QUOTATION MARK] -"\u2018" => "\'" - -# ’ [RIGHT SINGLE QUOTATION MARK] -"\u2019" => "\'" - -# ‚ [SINGLE LOW-9 QUOTATION MARK] -"\u201A" => "\'" - -# ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK] -"\u201B" => "\'" - -# ′ [PRIME] -"\u2032" => "\'" - -# ‵ [REVERSED PRIME] -"\u2035" => "\'" - -# ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK] -"\u2039" => "\'" - -# › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK] -"\u203A" => "\'" - -# ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT] -"\u275B" => "\'" - -# ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT] -"\u275C" => "\'" - -# ' [FULLWIDTH APOSTROPHE] -"\uFF07" => "\'" - -# ‐ [HYPHEN] -"\u2010" => "-" - -# ‑ [NON-BREAKING HYPHEN] -"\u2011" => "-" - -# ‒ [FIGURE DASH] -"\u2012" => "-" - -# – [EN DASH] -"\u2013" => "-" - -# — [EM DASH] -"\u2014" => "-" - -# ⁻ [SUPERSCRIPT MINUS] -"\u207B" => "-" - -# ₋ [SUBSCRIPT MINUS] -"\u208B" => "-" - -# - [FULLWIDTH HYPHEN-MINUS] -"\uFF0D" => "-" - -# ⁅ [LEFT SQUARE BRACKET WITH QUILL] -"\u2045" => "[" - -# ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT] -"\u2772" => "[" - -# [ [FULLWIDTH LEFT SQUARE BRACKET] -"\uFF3B" => "[" - -# ⁆ [RIGHT SQUARE BRACKET WITH QUILL] -"\u2046" => "]" - -# ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT] -"\u2773" => "]" - -# ] [FULLWIDTH RIGHT SQUARE BRACKET] -"\uFF3D" => "]" - -# ⁽ [SUPERSCRIPT LEFT PARENTHESIS] -"\u207D" => "(" - -# ₍ [SUBSCRIPT LEFT PARENTHESIS] -"\u208D" => "(" - -# ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT] -"\u2768" => "(" - -# ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT] -"\u276A" => "(" - -# ( [FULLWIDTH LEFT PARENTHESIS] -"\uFF08" => "(" - -# ⸨ [LEFT DOUBLE PARENTHESIS] -"\u2E28" => "((" - -# ⁾ [SUPERSCRIPT RIGHT PARENTHESIS] -"\u207E" => ")" - -# ₎ [SUBSCRIPT RIGHT PARENTHESIS] -"\u208E" => ")" - -# ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT] -"\u2769" => ")" - -# ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT] -"\u276B" => ")" - -# ) [FULLWIDTH RIGHT PARENTHESIS] -"\uFF09" => ")" - -# ⸩ [RIGHT DOUBLE PARENTHESIS] -"\u2E29" => "))" - -# ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT] -"\u276C" => "<" - -# ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT] -"\u2770" => "<" - -# < [FULLWIDTH LESS-THAN SIGN] -"\uFF1C" => "<" - -# ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT] -"\u276D" => ">" - -# ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT] -"\u2771" => ">" - -# > [FULLWIDTH GREATER-THAN SIGN] -"\uFF1E" => ">" - -# ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT] -"\u2774" => "{" - -# { [FULLWIDTH LEFT CURLY BRACKET] -"\uFF5B" => "{" - -# ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT] -"\u2775" => "}" - -# } [FULLWIDTH RIGHT CURLY BRACKET] -"\uFF5D" => "}" - -# ⁺ [SUPERSCRIPT PLUS SIGN] -"\u207A" => "+" - -# ₊ [SUBSCRIPT PLUS SIGN] -"\u208A" => "+" - -# + [FULLWIDTH PLUS SIGN] -"\uFF0B" => "+" - -# ⁼ [SUPERSCRIPT EQUALS SIGN] -"\u207C" => "=" - -# ₌ [SUBSCRIPT EQUALS SIGN] -"\u208C" => "=" - -# = [FULLWIDTH EQUALS SIGN] -"\uFF1D" => "=" - -# ! [FULLWIDTH EXCLAMATION MARK] -"\uFF01" => "!" - -# ‼ [DOUBLE EXCLAMATION MARK] -"\u203C" => "!!" - -# ⁉ [EXCLAMATION QUESTION MARK] -"\u2049" => "!?" - -# # [FULLWIDTH NUMBER SIGN] -"\uFF03" => "#" - -# $ [FULLWIDTH DOLLAR SIGN] -"\uFF04" => "$" - -# ⁒ [COMMERCIAL MINUS SIGN] -"\u2052" => "%" - -# % [FULLWIDTH PERCENT SIGN] -"\uFF05" => "%" - -# & [FULLWIDTH AMPERSAND] -"\uFF06" => "&" - -# ⁎ [LOW ASTERISK] -"\u204E" => "*" - -# * [FULLWIDTH ASTERISK] -"\uFF0A" => "*" - -# , [FULLWIDTH COMMA] -"\uFF0C" => "," - -# . [FULLWIDTH FULL STOP] -"\uFF0E" => "." - -# ⁄ [FRACTION SLASH] -"\u2044" => "/" - -# / [FULLWIDTH SOLIDUS] -"\uFF0F" => "/" - -# : [FULLWIDTH COLON] -"\uFF1A" => ":" - -# ⁏ [REVERSED SEMICOLON] -"\u204F" => ";" - -# ; [FULLWIDTH SEMICOLON] -"\uFF1B" => ";" - -# ? [FULLWIDTH QUESTION MARK] -"\uFF1F" => "?" - -# ⁇ [DOUBLE QUESTION MARK] -"\u2047" => "??" - -# ⁈ [QUESTION EXCLAMATION MARK] -"\u2048" => "?!" - -# @ [FULLWIDTH COMMERCIAL AT] -"\uFF20" => "@" - -# \ [FULLWIDTH REVERSE SOLIDUS] -"\uFF3C" => "\\" - -# ‸ [CARET] -"\u2038" => "^" - -# ^ [FULLWIDTH CIRCUMFLEX ACCENT] -"\uFF3E" => "^" - -# _ [FULLWIDTH LOW LINE] -"\uFF3F" => "_" - -# ⁓ [SWUNG DASH] -"\u2053" => "~" - -# ~ [FULLWIDTH TILDE] -"\uFF5E" => "~" - -################################################################ -# Below is the Perl script used to generate the above mappings # -# from ASCIIFoldingFilter.java: # -################################################################ -# -# #!/usr/bin/perl -# -# use warnings; -# use strict; -# -# my @source_chars = (); -# my @source_char_descriptions = (); -# my $target = ''; -# -# while (<>) { -# if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) { -# push @source_chars, $1; -# push @source_char_descriptions, $2; -# next; -# } -# if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) { -# $target .= $1; -# next; -# } -# if (/break;/) { -# $target = "\\\"" if ($target eq '"'); -# for my $source_char_num (0..$#source_chars) { -# print "# $source_char_descriptions[$source_char_num]\n"; -# print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n"; -# } -# @source_chars = (); -# @source_char_descriptions = (); -# $target = ''; -# } -# } diff --git a/solr/example/example-DIH/solr/solr/conf/mapping-ISOLatin1Accent.txt b/solr/example/example-DIH/solr/solr/conf/mapping-ISOLatin1Accent.txt deleted file mode 100644 index ede7742581b..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/mapping-ISOLatin1Accent.txt +++ /dev/null @@ -1,246 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Syntax: -# "source" => "target" -# "source".length() > 0 (source cannot be empty.) -# "target".length() >= 0 (target can be empty.) - -# example: -# "À" => "A" -# "\u00C0" => "A" -# "\u00C0" => "\u0041" -# "ß" => "ss" -# "\t" => " " -# "\n" => "" - -# À => A -"\u00C0" => "A" - -# Á => A -"\u00C1" => "A" - -#  => A -"\u00C2" => "A" - -# à => A -"\u00C3" => "A" - -# Ä => A -"\u00C4" => "A" - -# Å => A -"\u00C5" => "A" - -# Æ => AE -"\u00C6" => "AE" - -# Ç => C -"\u00C7" => "C" - -# È => E -"\u00C8" => "E" - -# É => E -"\u00C9" => "E" - -# Ê => E -"\u00CA" => "E" - -# Ë => E -"\u00CB" => "E" - -# Ì => I -"\u00CC" => "I" - -# Í => I -"\u00CD" => "I" - -# Î => I -"\u00CE" => "I" - -# Ï => I -"\u00CF" => "I" - -# IJ => IJ -"\u0132" => "IJ" - -# Ð => D -"\u00D0" => "D" - -# Ñ => N -"\u00D1" => "N" - -# Ò => O -"\u00D2" => "O" - -# Ó => O -"\u00D3" => "O" - -# Ô => O -"\u00D4" => "O" - -# Õ => O -"\u00D5" => "O" - -# Ö => O -"\u00D6" => "O" - -# Ø => O -"\u00D8" => "O" - -# Œ => OE -"\u0152" => "OE" - -# Þ -"\u00DE" => "TH" - -# Ù => U -"\u00D9" => "U" - -# Ú => U -"\u00DA" => "U" - -# Û => U -"\u00DB" => "U" - -# Ü => U -"\u00DC" => "U" - -# Ý => Y -"\u00DD" => "Y" - -# Ÿ => Y -"\u0178" => "Y" - -# à => a -"\u00E0" => "a" - -# á => a -"\u00E1" => "a" - -# â => a -"\u00E2" => "a" - -# ã => a -"\u00E3" => "a" - -# ä => a -"\u00E4" => "a" - -# å => a -"\u00E5" => "a" - -# æ => ae -"\u00E6" => "ae" - -# ç => c -"\u00E7" => "c" - -# è => e -"\u00E8" => "e" - -# é => e -"\u00E9" => "e" - -# ê => e -"\u00EA" => "e" - -# ë => e -"\u00EB" => "e" - -# ì => i -"\u00EC" => "i" - -# í => i -"\u00ED" => "i" - -# î => i -"\u00EE" => "i" - -# ï => i -"\u00EF" => "i" - -# ij => ij -"\u0133" => "ij" - -# ð => d -"\u00F0" => "d" - -# ñ => n -"\u00F1" => "n" - -# ò => o -"\u00F2" => "o" - -# ó => o -"\u00F3" => "o" - -# ô => o -"\u00F4" => "o" - -# õ => o -"\u00F5" => "o" - -# ö => o -"\u00F6" => "o" - -# ø => o -"\u00F8" => "o" - -# œ => oe -"\u0153" => "oe" - -# ß => ss -"\u00DF" => "ss" - -# þ => th -"\u00FE" => "th" - -# ù => u -"\u00F9" => "u" - -# ú => u -"\u00FA" => "u" - -# û => u -"\u00FB" => "u" - -# ü => u -"\u00FC" => "u" - -# ý => y -"\u00FD" => "y" - -# ÿ => y -"\u00FF" => "y" - -# ff => ff -"\uFB00" => "ff" - -# fi => fi -"\uFB01" => "fi" - -# fl => fl -"\uFB02" => "fl" - -# ffi => ffi -"\uFB03" => "ffi" - -# ffl => ffl -"\uFB04" => "ffl" - -# ſt => ft -"\uFB05" => "ft" - -# st => st -"\uFB06" => "st" diff --git a/solr/example/example-DIH/solr/solr/conf/protwords.txt b/solr/example/example-DIH/solr/solr/conf/protwords.txt deleted file mode 100644 index 1dfc0abecbf..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/protwords.txt +++ /dev/null @@ -1,21 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# Use a protected word file to protect against the stemmer reducing two -# unrelated words to the same base word. - -# Some non-words that normally won't be encountered, -# just to test that they won't be stemmed. -dontstems -zwhacky - diff --git a/solr/example/example-DIH/solr/solr/conf/solr-data-config.xml b/solr/example/example-DIH/solr/solr/conf/solr-data-config.xml deleted file mode 100644 index ee7d6cf36e3..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/solr-data-config.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - diff --git a/solr/example/example-DIH/solr/solr/conf/solrconfig.xml b/solr/example/example-DIH/solr/solr/conf/solrconfig.xml deleted file mode 100644 index 56e7ed68f1e..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/solrconfig.xml +++ /dev/null @@ -1,1340 +0,0 @@ - - - - - - - - - 9.0.0 - - - - - - - - - - - - - - - - - - - - ${solr.data.dir:} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ${solr.lock.type:native} - - - - - - - - - - - - - true - - - - - - - - - - - - - - - - ${solr.ulog.dir:} - - - - - ${solr.autoCommit.maxTime:15000} - false - - - - - - ${solr.autoSoftCommit.maxTime:-1} - - - - - - - - - - - - - ${solr.max.booleanClauses:1024} - - - - - - - - - - - - - - - - - - - - - - - - - true - - - - - - 20 - - - 200 - - - - - - - - - - - - static firstSearcher warming in solrconfig.xml - - - - - - false - - - - - - - - - - - - - - - - - - - - - solr-data-config.xml - - - - - - - - explicit - 10 - text - - - - - - - - - - - - - - - explicit - json - true - text - - - - - - - explicit - - - velocity - browse - layout - - - edismax - *:* - 10 - *,score - - - on - 1 - - - - - - text - - - - - - - true - ignored_ - - - true - links - ignored_ - - - - - - - - text_general - - - - - - default - text - solr.DirectSolrSpellChecker - - internal - - 0.5 - - 2 - - 1 - - 5 - - 4 - - 0.01 - - - - - - wordbreak - solr.WordBreakSolrSpellChecker - name - true - true - 10 - - - - - - - - - - - - - - - - text - - default - wordbreak - on - true - 10 - 5 - 5 - true - true - 10 - 5 - - - spellcheck - - - - - - mySuggester - FuzzyLookupFactory - DocumentDictionaryFactory - cat - price - string - - - - - - true - 10 - - - suggest - - - - - - - - - text - true - - - tvComponent - - - - - - - - - - true - false - - - terms - - - - - - - - string - elevate.xml - - - - - - explicit - text - - - elevator - - - - - - - - - - - 100 - - - - - - - - 70 - - 0.5 - - [-\w ,/\n\"']{20,200} - - - - - - - ]]> - ]]> - - - - - - - - - - - - - - - - - - - - - - - - ,, - ,, - ,, - ,, - ,]]> - ]]> - - - - - - 10 - .,!? - - - - - - - WORD - - - en - US - - - - - - - - - - - - - - - - - - - - - - text/plain; charset=UTF-8 - - - - - ${velocity.template.base.dir:} - - - - - 5 - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/solr/conf/spellings.txt b/solr/example/example-DIH/solr/solr/conf/spellings.txt deleted file mode 100644 index d7ede6f5611..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/spellings.txt +++ /dev/null @@ -1,2 +0,0 @@ -pizza -history \ No newline at end of file diff --git a/solr/example/example-DIH/solr/solr/conf/stopwords.txt b/solr/example/example-DIH/solr/solr/conf/stopwords.txt deleted file mode 100644 index ae1e83eeb3d..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/stopwords.txt +++ /dev/null @@ -1,14 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/solr/example/example-DIH/solr/solr/conf/synonyms.txt b/solr/example/example-DIH/solr/solr/conf/synonyms.txt deleted file mode 100644 index eab4ee87537..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/synonyms.txt +++ /dev/null @@ -1,29 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -#some test synonym mappings unlikely to appear in real input text -aaafoo => aaabar -bbbfoo => bbbfoo bbbbar -cccfoo => cccbar cccbaz -fooaaa,baraaa,bazaaa - -# Some synonym groups specific to this example -GB,gib,gigabyte,gigabytes -MB,mib,megabyte,megabytes -Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming -#after us won't split it into two words. - -# Synonym mappings can be used for spelling correction too -pixima => pixma - diff --git a/solr/example/example-DIH/solr/solr/conf/update-script.js b/solr/example/example-DIH/solr/solr/conf/update-script.js deleted file mode 100644 index 49b07f9b71e..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/update-script.js +++ /dev/null @@ -1,53 +0,0 @@ -/* - This is a basic skeleton JavaScript update processor. - - In order for this to be executed, it must be properly wired into solrconfig.xml; by default it is commented out in - the example solrconfig.xml and must be uncommented to be enabled. - - See http://wiki.apache.org/solr/ScriptUpdateProcessor for more details. -*/ - -function processAdd(cmd) { - - doc = cmd.solrDoc; // org.apache.solr.common.SolrInputDocument - id = doc.getFieldValue("id"); - logger.info("update-script#processAdd: id=" + id); - -// Set a field value: -// doc.setField("foo_s", "whatever"); - -// Get a configuration parameter: -// config_param = params.get('config_param'); // "params" only exists if processor configured with - -// Get a request parameter: -// some_param = req.getParams().get("some_param") - -// Add a field of field names that match a pattern: -// - Potentially useful to determine the fields/attributes represented in a result set, via faceting on field_name_ss -// field_names = doc.getFieldNames().toArray(); -// for(i=0; i < field_names.length; i++) { -// field_name = field_names[i]; -// if (/attr_.*/.test(field_name)) { doc.addField("attribute_ss", field_names[i]); } -// } - -} - -function processDelete(cmd) { - // no-op -} - -function processMergeIndexes(cmd) { - // no-op -} - -function processCommit(cmd) { - // no-op -} - -function processRollback(cmd) { - // no-op -} - -function finish() { - // no-op -} diff --git a/solr/example/example-DIH/solr/solr/conf/xslt/example.xsl b/solr/example/example-DIH/solr/solr/conf/xslt/example.xsl deleted file mode 100644 index b8992700828..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/xslt/example.xsl +++ /dev/null @@ -1,132 +0,0 @@ - - - - - - - - - - - - - - - <xsl:value-of select="$title"/> - - - -

-
- This has been formatted by the sample "example.xsl" transform - - use your own XSLT to get a nicer page -
- - - -
- - - -
- - - - -
-
-
- - - - - - - - - - - - - - javascript:toggle("");? -
- - exp - - - - - -
- - -
- - - - - - - -
    - -
  • -
    -
- - -
- - - - - - - - - - - - - - - - - - - - -
diff --git a/solr/example/example-DIH/solr/solr/conf/xslt/example_atom.xsl b/solr/example/example-DIH/solr/solr/conf/xslt/example_atom.xsl deleted file mode 100644 index b6c23151dc4..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/xslt/example_atom.xsl +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - Example Solr Atom 1.0 Feed - - This has been formatted by the sample "example_atom.xsl" transform - - use your own XSLT to get a nicer Atom feed. - - - Apache Solr - solr-user@lucene.apache.org - - - - - - tag:localhost,2007:example - - - - - - - - - <xsl:value-of select="str[@name='name']"/> - - tag:localhost,2007: - - - - - - diff --git a/solr/example/example-DIH/solr/solr/conf/xslt/example_rss.xsl b/solr/example/example-DIH/solr/solr/conf/xslt/example_rss.xsl deleted file mode 100644 index c8ab5bfb1ec..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/xslt/example_rss.xsl +++ /dev/null @@ -1,66 +0,0 @@ - - - - - - - - - - - - - Example Solr RSS 2.0 Feed - http://localhost:8983/solr - - This has been formatted by the sample "example_rss.xsl" transform - - use your own XSLT to get a nicer RSS feed. - - en-us - http://localhost:8983/solr - - - - - - - - - - - <xsl:value-of select="str[@name='name']"/> - - http://localhost:8983/solr/select?q=id: - - - - - - - http://localhost:8983/solr/select?q=id: - - - - diff --git a/solr/example/example-DIH/solr/solr/conf/xslt/luke.xsl b/solr/example/example-DIH/solr/solr/conf/xslt/luke.xsl deleted file mode 100644 index 05fb5bfeee2..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/xslt/luke.xsl +++ /dev/null @@ -1,337 +0,0 @@ - - - - - - - - - Solr Luke Request Handler Response - - - - - - - - - <xsl:value-of select="$title"/> - - - - - -

- -

-
- -

Index Statistics

- -
- -

Field Statistics

- - - -

Document statistics

- - - - - - - - - - -
- -
- - -
- -
- -
-
-
- - - - - - - - - - - - - - - - - - - - - -
-

- -

- -
- -
-
-
- - -
- - 50 - 800 - 160 - blue - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- background-color: ; width: px; height: px; -
-
- -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - -
  • - -
  • -
    -
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/solr/conf/xslt/updateXml.xsl b/solr/example/example-DIH/solr/solr/conf/xslt/updateXml.xsl deleted file mode 100644 index a96e1d02448..00000000000 --- a/solr/example/example-DIH/solr/solr/conf/xslt/updateXml.xsl +++ /dev/null @@ -1,70 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/solr/core.properties b/solr/example/example-DIH/solr/solr/core.properties deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/solr/example/example-DIH/solr/tika/conf/managed-schema b/solr/example/example-DIH/solr/tika/conf/managed-schema deleted file mode 100644 index 196cdb3632a..00000000000 --- a/solr/example/example-DIH/solr/tika/conf/managed-schema +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - id - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/solr/example/example-DIH/solr/tika/conf/solrconfig.xml b/solr/example/example-DIH/solr/tika/conf/solrconfig.xml deleted file mode 100644 index 500ee19ca63..00000000000 --- a/solr/example/example-DIH/solr/tika/conf/solrconfig.xml +++ /dev/null @@ -1,61 +0,0 @@ - - - - - - - - 9.0.0 - - - - - - - - explicit - text - - - - - - - tika-data-config.xml - - - - diff --git a/solr/example/example-DIH/solr/tika/conf/tika-data-config.xml b/solr/example/example-DIH/solr/tika/conf/tika-data-config.xml deleted file mode 100644 index 5286fc418f7..00000000000 --- a/solr/example/example-DIH/solr/tika/conf/tika-data-config.xml +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - diff --git a/solr/example/example-DIH/solr/tika/core.properties b/solr/example/example-DIH/solr/tika/core.properties deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/solr/licenses/activation-1.1.1.jar.sha1 b/solr/licenses/activation-1.1.1.jar.sha1 deleted file mode 100644 index 7b2295c88bf..00000000000 --- a/solr/licenses/activation-1.1.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -485de3a253e23f645037828c07f1d7f1af40763a diff --git a/solr/licenses/activation-LICENSE-CDDL.txt b/solr/licenses/activation-LICENSE-CDDL.txt deleted file mode 100644 index 1154e0aeec5..00000000000 --- a/solr/licenses/activation-LICENSE-CDDL.txt +++ /dev/null @@ -1,119 +0,0 @@ -COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0 - -1. Definitions. - -1.1. Contributor means each individual or entity that creates or contributes to the creation of Modifications. - -1.2. Contributor Version means the combination of the Original Software, prior Modifications used by a Contributor (if any), and the Modifications made by that particular Contributor. - -1.3. Covered Software means (a) the Original Software, or (b) Modifications, or (c) the combination of files containing Original Software with files containing Modifications, in each case including portions thereof. - -1.4. Executable means the Covered Software in any form other than Source Code. - -1.5. Initial Developer means the individual or entity that first makes Original Software available under this License. - -1.6. Larger Work means a work which combines Covered Software or portions thereof with code not governed by the terms of this License. - -1.7. License means this document. - -1.8. Licensable means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein. - -1.9. Modifications means the Source Code and Executable form of any of the following: - -A. Any file that results from an addition to, deletion from or modification of the contents of a file containing Original Software or previous Modifications; - -B. Any new file that contains any part of the Original Software or previous Modification; or - -C. Any new file that is contributed or otherwise made available under the terms of this License. - -1.10. Original Software means the Source Code and Executable form of computer software code that is originally released under this License. - -1.11. Patent Claims means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor. - -1.12. Source Code means (a) the common form of computer software code in which modifications are made and (b) associated documentation included in or with such code. - -1.13. You (or Your) means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, You includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, control means (a)the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b)ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. - -2. License Grants. - -2.1. The Initial Developer Grant. -Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, the Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license: -(a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer, to use, reproduce, modify, display, perform, sublicense and distribute the Original Software (or portions thereof), with or without Modifications, and/or as part of a Larger Work; and -(b) under Patent Claims infringed by the making, using or selling of Original Software, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Software (or portions thereof). -(c) The licenses granted in Sections2.1(a) and (b) are effective on the date Initial Developer first distributes or otherwise makes the Original Software available to a third party under the terms of this License. -(d) Notwithstanding Section2.1(b) above, no patent license is granted: (1)for code that You delete from the Original Software, or (2)for infringements caused by: (i)the modification of the Original Software, or (ii)the combination of the Original Software with other software or devices. - -2.2. Contributor Grant. -Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: -(a) under intellectual property rights (other than patent or trademark) Licensable by Contributor to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof), either on an unmodified basis, with other Modifications, as Covered Software and/or as part of a Larger Work; and -(b) under Patent Claims infringed by the making, using, or selling of Modifications made by that Contributor either alone and/or in combination with its Contributor Version (or portions of such combination), to make, use, sell, offer for sale, have made, and/or otherwise dispose of: (1)Modifications made by that Contributor (or portions thereof); and (2)the combination of Modifications made by that Contributor with its Contributor Version (or portions of such combination). -(c) The licenses granted in Sections2.2(a) and 2.2(b) are effective on the date Contributor first distributes or otherwise makes the Modifications available to a third party. -(d) Notwithstanding Section2.2(b) above, no patent license is granted: (1)for any code that Contributor has deleted from the Contributor Version; (2)for infringements caused by: (i)third party modifications of Contributor Version, or (ii)the combination of Modifications made by that Contributor with other software (except as part of the Contributor Version) or other devices; or (3)under Patent Claims infringed by Covered Software in the absence of Modifications made by that Contributor. - -3. Distribution Obligations. - -3.1. Availability of Source Code. - -Any Covered Software that You distribute or otherwise make available in Executable form must also be made available in Source Code form and that Source Code form must be distributed only under the terms of this License. You must include a copy of this License with every copy of the Source Code form of the Covered Software You distribute or otherwise make available. You must inform recipients of any such Covered Software in Executable form as to how they can obtain such Covered Software in Source Code form in a reasonable manner on or through a medium customarily used for software exchange. - -3.2. Modifications. - -The Modifications that You create or to which You contribute are governed by the terms of this License. You represent that You believe Your Modifications are Your original creation(s) and/or You have sufficient rights to grant the rights conveyed by this License. - -3.3. Required Notices. -You must include a notice in each of Your Modifications that identifies You as the Contributor of the Modification. You may not remove or alter any copyright, patent or trademark notices contained within the Covered Software, or any notices of licensing or any descriptive text giving attribution to any Contributor or the Initial Developer. - -3.4. Application of Additional Terms. -You may not offer or impose any terms on any Covered Software in Source Code form that alters or restricts the applicable version of this License or the recipients rights hereunder. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, you may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear that any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer. - -3.5. Distribution of Executable Versions. -You may distribute the Executable form of the Covered Software under the terms of this License or under the terms of a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable form does not attempt to limit or alter the recipients rights in the Source Code form from the rights set forth in this License. If You distribute the Covered Software in Executable form under a different license, You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer. - -3.6. Larger Works. -You may create a Larger Work by combining Covered Software with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Software. - -4. Versions of the License. - -4.1. New Versions. -Sun Microsystems, Inc. is the initial license steward and may publish revised and/or new versions of this License from time to time. Each version will be given a distinguishing version number. Except as provided in Section 4.3, no one other than the license steward has the right to modify this License. - -4.2. Effect of New Versions. - -You may always continue to use, distribute or otherwise make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. If the Initial Developer includes a notice in the Original Software prohibiting it from being distributed or otherwise made available under any subsequent version of the License, You must distribute and make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. Otherwise, You may also choose to use, distribute or otherwise make the Covered Software available under the terms of any subsequent version of the License published by the license steward. -4.3. Modified Versions. - -When You are an Initial Developer and You want to create a new license for Your Original Software, You may create and use a modified version of this License if You: (a)rename the license and remove any references to the name of the license steward (except to note that the license differs from this License); and (b)otherwise make it clear that the license contains terms which differ from this License. - -5. DISCLAIMER OF WARRANTY. - -COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN AS IS BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. - -6. TERMINATION. - -6.1. This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. - -6.2. If You assert a patent infringement claim (excluding declaratory judgment actions) against Initial Developer or a Contributor (the Initial Developer or Contributor against whom You assert such claim is referred to as Participant) alleging that the Participant Software (meaning the Contributor Version where the Participant is a Contributor or the Original Software where the Participant is the Initial Developer) directly or indirectly infringes any patent, then any and all rights granted directly or indirectly to You by such Participant, the Initial Developer (if the Initial Developer is not the Participant) and all Contributors under Sections2.1 and/or 2.2 of this License shall, upon 60 days notice from Participant terminate prospectively and automatically at the expiration of such 60 day notice period, unless if within such 60 day period You withdraw Your claim with respect to the Participant Software against such Participant either unilaterally or pursuant to a written agreement with Participant. - -6.3. In the event of termination under Sections6.1 or 6.2 above, all end user licenses that have been validly granted by You or any distributor hereunder prior to termination (excluding licenses granted to You by any distributor) shall survive termination. - -7. LIMITATION OF LIABILITY. - -UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTYS NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. - -8. U.S. GOVERNMENT END USERS. - -The Covered Software is a commercial item, as that term is defined in 48C.F.R.2.101 (Oct. 1995), consisting of commercial computer software (as that term is defined at 48 C.F.R. 252.227-7014(a)(1)) and commercial computer software documentation as such terms are used in 48C.F.R.12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software with only those rights set forth herein. This U.S. Government Rights clause is in lieu of, and supersedes, any other FAR, DFAR, or other clause or provision that addresses Government rights in computer software under this License. - -9. MISCELLANEOUS. - -This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by the law of the jurisdiction specified in a notice contained within the Original Software (except to the extent applicable law, if any, provides otherwise), excluding such jurisdictions conflict-of-law provisions. Any litigation relating to this License shall be subject to the jurisdiction of the courts located in the jurisdiction and venue specified in a notice contained within the Original Software, with the losing party responsible for costs, including, without limitation, court costs and reasonable attorneys fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. You agree that You alone are responsible for compliance with the United States export administration regulations (and the export control laws and regulation of any other countries) when You use, distribute or otherwise make available any Covered Software. - -10. RESPONSIBILITY FOR CLAIMS. - -As between Initial Developer and the Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License and You agree to work with Initial Developer and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. - -NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) -The GlassFish code released under the CDDL shall be governed by the laws of the State of California (excluding conflict-of-law provisions). Any litigation relating to this License shall be subject to the jurisdiction of the Federal Courts of the Northern District of California and the state courts of the State of California, with venue lying in Santa Clara County, California. - - - diff --git a/solr/licenses/derby-10.9.1.0.jar.sha1 b/solr/licenses/derby-10.9.1.0.jar.sha1 deleted file mode 100644 index 2a69e42afb7..00000000000 --- a/solr/licenses/derby-10.9.1.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4538cf5564ab3c262eec65c55fdb13965625589c diff --git a/solr/licenses/derby-LICENSE-ASL.txt b/solr/licenses/derby-LICENSE-ASL.txt deleted file mode 100644 index d6456956733..00000000000 --- a/solr/licenses/derby-LICENSE-ASL.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/solr/licenses/derby-NOTICE.txt b/solr/licenses/derby-NOTICE.txt deleted file mode 100644 index f22595feb7f..00000000000 --- a/solr/licenses/derby-NOTICE.txt +++ /dev/null @@ -1,182 +0,0 @@ -========================================================================= -== NOTICE file corresponding to section 4(d) of the Apache License, -== Version 2.0, in this case for the Apache Derby distribution. -== -== DO NOT EDIT THIS FILE DIRECTLY. IT IS GENERATED -== BY THE buildnotice TARGET IN THE TOP LEVEL build.xml FILE. -== -========================================================================= - -Apache Derby -Copyright 2004-2012 The Apache Software Foundation - -This product includes software developed by -The Apache Software Foundation (http://www.apache.org/). - - -========================================================================= - -Portions of Derby were originally developed by -International Business Machines Corporation and are -licensed to the Apache Software Foundation under the -"Software Grant and Corporate Contribution License Agreement", -informally known as the "Derby CLA". -The following copyright notice(s) were affixed to portions of the code -with which this file is now or was at one time distributed -and are placed here unaltered. - -(C) Copyright 1997,2004 International Business Machines Corporation. All rights reserved. - -(C) Copyright IBM Corp. 2003. - - -========================================================================= - - -The portion of the functionTests under 'nist' was originally -developed by the National Institute of Standards and Technology (NIST), -an agency of the United States Department of Commerce, and adapted by -International Business Machines Corporation in accordance with the NIST -Software Acknowledgment and Redistribution document at -http://www.itl.nist.gov/div897/ctg/sql_form.htm - - - -========================================================================= - - -The JDBC apis for small devices and JDBC3 (under java/stubs/jsr169 and -java/stubs/jdbc3) were produced by trimming sources supplied by the -Apache Harmony project. In addition, the Harmony SerialBlob and -SerialClob implementations are used. The following notice covers the Harmony sources: - -Portions of Harmony were originally developed by -Intel Corporation and are licensed to the Apache Software -Foundation under the "Software Grant and Corporate Contribution -License Agreement", informally known as the "Intel Harmony CLA". - - -========================================================================= - - -The Derby build relies on source files supplied by the Apache Felix -project. The following notice covers the Felix files: - - Apache Felix Main - Copyright 2008 The Apache Software Foundation - - - I. Included Software - - This product includes software developed at - The Apache Software Foundation (http://www.apache.org/). - Licensed under the Apache License 2.0. - - This product includes software developed at - The OSGi Alliance (http://www.osgi.org/). - Copyright (c) OSGi Alliance (2000, 2007). - Licensed under the Apache License 2.0. - - This product includes software from http://kxml.sourceforge.net. - Copyright (c) 2002,2003, Stefan Haustein, Oberhausen, Rhld., Germany. - Licensed under BSD License. - - II. Used Software - - This product uses software developed at - The OSGi Alliance (http://www.osgi.org/). - Copyright (c) OSGi Alliance (2000, 2007). - Licensed under the Apache License 2.0. - - - III. License Summary - - Apache License 2.0 - - BSD License - - -========================================================================= - - -The Derby build relies on jar files supplied by the Apache Xalan -project. The following notice covers the Xalan jar files: - - ========================================================================= - == NOTICE file corresponding to section 4(d) of the Apache License, == - == Version 2.0, in this case for the Apache Xalan Java distribution. == - ========================================================================= - - Apache Xalan (Xalan XSLT processor) - Copyright 1999-2006 The Apache Software Foundation - - Apache Xalan (Xalan serializer) - Copyright 1999-2006 The Apache Software Foundation - - This product includes software developed at - The Apache Software Foundation (http://www.apache.org/). - - ========================================================================= - Portions of this software was originally based on the following: - - software copyright (c) 1999-2002, Lotus Development Corporation., - http://www.lotus.com. - - software copyright (c) 2001-2002, Sun Microsystems., - http://www.sun.com. - - software copyright (c) 2003, IBM Corporation., - http://www.ibm.com. - - ========================================================================= - The binary distribution package (ie. jars, samples and documentation) of - this product includes software developed by the following: - - - The Apache Software Foundation - - Xerces Java - see LICENSE.txt - - JAXP 1.3 APIs - see LICENSE.txt - - Bytecode Engineering Library - see LICENSE.txt - - Regular Expression - see LICENSE.txt - - - Scott Hudson, Frank Flannery, C. Scott Ananian - - CUP Parser Generator runtime (javacup\runtime) - see LICENSE.txt - - ========================================================================= - The source distribution package (ie. all source and tools required to build - Xalan Java) of this product includes software developed by the following: - - - The Apache Software Foundation - - Xerces Java - see LICENSE.txt - - JAXP 1.3 APIs - see LICENSE.txt - - Bytecode Engineering Library - see LICENSE.txt - - Regular Expression - see LICENSE.txt - - Ant - see LICENSE.txt - - Stylebook doc tool - see LICENSE.txt - - - Elliot Joel Berk and C. Scott Ananian - - Lexical Analyzer Generator (JLex) - see LICENSE.txt - - ========================================================================= - Apache Xerces Java - Copyright 1999-2006 The Apache Software Foundation - - This product includes software developed at - The Apache Software Foundation (http://www.apache.org/). - - Portions of Apache Xerces Java in xercesImpl.jar and xml-apis.jar - were originally based on the following: - - software copyright (c) 1999, IBM Corporation., http://www.ibm.com. - - software copyright (c) 1999, Sun Microsystems., http://www.sun.com. - - voluntary contributions made by Paul Eng on behalf of the - Apache Software Foundation that were originally developed at iClick, Inc., - software copyright (c) 1999. - - ========================================================================= - Apache xml-commons xml-apis (redistribution of xml-apis.jar) - - Apache XML Commons - Copyright 2001-2003,2006 The Apache Software Foundation. - - This product includes software developed at - The Apache Software Foundation (http://www.apache.org/). - - Portions of this software were originally based on the following: - - software copyright (c) 1999, IBM Corporation., http://www.ibm.com. - - software copyright (c) 1999, Sun Microsystems., http://www.sun.com. - - software copyright (c) 2000 World Wide Web Consortium, http://www.w3.org - diff --git a/solr/licenses/gimap-1.5.1.jar.sha1 b/solr/licenses/gimap-1.5.1.jar.sha1 deleted file mode 100644 index 41c9dbff5dc..00000000000 --- a/solr/licenses/gimap-1.5.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -3a4ccd3aa6ce33ec701893c3ee632eeb0e012c89 diff --git a/solr/licenses/gimap-LICENSE-CDDL.txt b/solr/licenses/gimap-LICENSE-CDDL.txt deleted file mode 100644 index d6e03ec15ce..00000000000 --- a/solr/licenses/gimap-LICENSE-CDDL.txt +++ /dev/null @@ -1,135 +0,0 @@ -COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1 - -1. Definitions. - -1.1. "Contributor" means each individual or entity that creates or contributes to the creation of Modifications. - -1.2. "Contributor Version" means the combination of the Original Software, prior Modifications used by a Contributor (if any), and the Modifications made by that particular Contributor. - -1.3. "Covered Software" means (a) the Original Software, or (b) Modifications, or (c) the combination of files containing Original Software with files containing Modifications, in each case including portions thereof. - -1.4. "Executable" means the Covered Software in any form other than Source Code. - -1.5. "Initial Developer" means the individual or entity that first makes Original Software available under this License. - -1.6. "Larger Work" means a work which combines Covered Software or portions thereof with code not governed by the terms of this License. - -1.7. "License" means this document. - -1.8. "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein. - -1.9. "Modifications" means the Source Code and Executable form of any of the following: - - A. Any file that results from an addition to, deletion from or modification of the contents of a file containing Original Software or previous Modifications; - - B. Any new file that contains any part of the Original Software or previous Modification; or - - C. Any new file that is contributed or otherwise made available under the terms of this License. - -1.10. "Original Software" means the Source Code and Executable form of computer software code that is originally released under this License. - -1.11. "Patent Claims" means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor. - -1.12. "Source Code" means (a) the common form of computer software code in which modifications are made and (b) associated documentation included in or with such code. - -1.13. "You" (or "Your") means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, "You" includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. - -2. License Grants. - -2.1. The Initial Developer Grant. - -Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, the Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license: - -(a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer, to use, reproduce, modify, display, perform, sublicense and distribute the Original Software (or portions thereof), with or without Modifications, and/or as part of a Larger Work; and - -(b) under Patent Claims infringed by the making, using or selling of Original Software, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Software (or portions thereof). - -(c) The licenses granted in Sections 2.1(a) and (b) are effective on the date Initial Developer first distributes or otherwise makes the Original Software available to a third party under the terms of this License. - -(d) Notwithstanding Section 2.1(b) above, no patent license is granted: (1) for code that You delete from the Original Software, or (2) for infringements caused by: (i) the modification of the Original Software, or (ii) the combination of the Original Software with other software or devices. - -2.2. Contributor Grant. - -Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: - -(a) under intellectual property rights (other than patent or trademark) Licensable by Contributor to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof), either on an unmodified basis, with other Modifications, as Covered Software and/or as part of a Larger Work; and - -(b) under Patent Claims infringed by the making, using, or selling of Modifications made by that Contributor either alone and/or in combination with its Contributor Version (or portions of such combination), to make, use, sell, offer for sale, have made, and/or otherwise dispose of: (1) Modifications made by that Contributor (or portions thereof); and (2) the combination of Modifications made by that Contributor with its Contributor Version (or portions of such combination). - -(c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on the date Contributor first distributes or otherwise makes the Modifications available to a third party. - -(d) Notwithstanding Section 2.2(b) above, no patent license is granted: (1) for any code that Contributor has deleted from the Contributor Version; (2) for infringements caused by: (i) third party modifications of Contributor Version, or (ii) the combination of Modifications made by that Contributor with other software (except as part of the Contributor Version) or other devices; or (3) under Patent Claims infringed by Covered Software in the absence of Modifications made by that Contributor. - -3. Distribution Obligations. - -3.1. Availability of Source Code. - -Any Covered Software that You distribute or otherwise make available in Executable form must also be made available in Source Code form and that Source Code form must be distributed only under the terms of this License. You must include a copy of this License with every copy of the Source Code form of the Covered Software You distribute or otherwise make available. You must inform recipients of any such Covered Software in Executable form as to how they can obtain such Covered Software in Source Code form in a reasonable manner on or through a medium customarily used for software exchange. - -3.2. Modifications. - -The Modifications that You create or to which You contribute are governed by the terms of this License. You represent that You believe Your Modifications are Your original creation(s) and/or You have sufficient rights to grant the rights conveyed by this License. - -3.3. Required Notices. - -You must include a notice in each of Your Modifications that identifies You as the Contributor of the Modification. You may not remove or alter any copyright, patent or trademark notices contained within the Covered Software, or any notices of licensing or any descriptive text giving attribution to any Contributor or the Initial Developer. - -3.4. Application of Additional Terms. - -You may not offer or impose any terms on any Covered Software in Source Code form that alters or restricts the applicable version of this License or the recipients' rights hereunder. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, you may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear that any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer. - -3.5. Distribution of Executable Versions. - -You may distribute the Executable form of the Covered Software under the terms of this License or under the terms of a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable form does not attempt to limit or alter the recipient's rights in the Source Code form from the rights set forth in this License. If You distribute the Covered Software in Executable form under a different license, You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer. - -3.6. Larger Works. - -You may create a Larger Work by combining Covered Software with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Software. - -4. Versions of the License. - -4.1. New Versions. - -Oracle is the initial license steward and may publish revised and/or new versions of this License from time to time. Each version will be given a distinguishing version number. Except as provided in Section 4.3, no one other than the license steward has the right to modify this License. - -4.2. Effect of New Versions. - -You may always continue to use, distribute or otherwise make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. If the Initial Developer includes a notice in the Original Software prohibiting it from being distributed or otherwise made available under any subsequent version of the License, You must distribute and make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. Otherwise, You may also choose to use, distribute or otherwise make the Covered Software available under the terms of any subsequent version of the License published by the license steward. - -4.3. Modified Versions. - -When You are an Initial Developer and You want to create a new license for Your Original Software, You may create and use a modified version of this License if You: (a) rename the license and remove any references to the name of the license steward (except to note that the license differs from this License); and (b) otherwise make it clear that the license contains terms which differ from this License. - -5. DISCLAIMER OF WARRANTY. - -COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. - -6. TERMINATION. - -6.1. This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. - -6.2. If You assert a patent infringement claim (excluding declaratory judgment actions) against Initial Developer or a Contributor (the Initial Developer or Contributor against whom You assert such claim is referred to as "Participant") alleging that the Participant Software (meaning the Contributor Version where the Participant is a Contributor or the Original Software where the Participant is the Initial Developer) directly or indirectly infringes any patent, then any and all rights granted directly or indirectly to You by such Participant, the Initial Developer (if the Initial Developer is not the Participant) and all Contributors under Sections 2.1 and/or 2.2 of this License shall, upon 60 days notice from Participant terminate prospectively and automatically at the expiration of such 60 day notice period, unless if within such 60 day period You withdraw Your claim with respect to the Participant Software against such Participant either unilaterally or pursuant to a written agreement with Participant. - -6.3. If You assert a patent infringement claim against Participant alleging that the Participant Software directly or indirectly infringes any patent where such claim is resolved (such as by license or settlement) prior to the initiation of patent infringement litigation, then the reasonable value of the licenses granted by such Participant under Sections 2.1 or 2.2 shall be taken into account in determining the amount or value of any payment or license. - -6.4. In the event of termination under Sections 6.1 or 6.2 above, all end user licenses that have been validly granted by You or any distributor hereunder prior to termination (excluding licenses granted to You by any distributor) shall survive termination. - -7. LIMITATION OF LIABILITY. - -UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. - -8. U.S. GOVERNMENT END USERS. - -The Covered Software is a "commercial item," as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer software" (as that term is defined at 48 C.F.R. § 252.227-7014(a)(1)) and "commercial computer software documentation" as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software with only those rights set forth herein. This U.S. Government Rights clause is in lieu of, and supersedes, any other FAR, DFAR, or other clause or provision that addresses Government rights in computer software under this License. - -9. MISCELLANEOUS. - -This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by the law of the jurisdiction specified in a notice contained within the Original Software (except to the extent applicable law, if any, provides otherwise), excluding such jurisdiction's conflict-of-law provisions. Any litigation relating to this License shall be subject to the jurisdiction of the courts located in the jurisdiction and venue specified in a notice contained within the Original Software, with the losing party responsible for costs, including, without limitation, court costs and reasonable attorneys' fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. You agree that You alone are responsible for compliance with the United States export administration regulations (and the export control laws and regulation of any other countries) when You use, distribute or otherwise make available any Covered Software. - -10. RESPONSIBILITY FOR CLAIMS. - -As between Initial Developer and the Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License and You agree to work with Initial Developer and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. - -NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) - -The code released under the CDDL shall be governed by the laws of the State of California (excluding conflict-of-law provisions). Any litigation relating to this License shall be subject to the jurisdiction of the Federal Courts of the Northern District of California and the state courts of the State of California, with venue lying in Santa Clara County, California. \ No newline at end of file diff --git a/solr/licenses/javax.mail-1.5.1.jar.sha1 b/solr/licenses/javax.mail-1.5.1.jar.sha1 deleted file mode 100644 index e7a0a834c9a..00000000000 --- a/solr/licenses/javax.mail-1.5.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -9724dd44f1abbba99c9858aa05fc91d53f59e7a5 diff --git a/solr/licenses/javax.mail-LICENSE-CDDL.txt b/solr/licenses/javax.mail-LICENSE-CDDL.txt deleted file mode 100644 index d6e03ec15ce..00000000000 --- a/solr/licenses/javax.mail-LICENSE-CDDL.txt +++ /dev/null @@ -1,135 +0,0 @@ -COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1 - -1. Definitions. - -1.1. "Contributor" means each individual or entity that creates or contributes to the creation of Modifications. - -1.2. "Contributor Version" means the combination of the Original Software, prior Modifications used by a Contributor (if any), and the Modifications made by that particular Contributor. - -1.3. "Covered Software" means (a) the Original Software, or (b) Modifications, or (c) the combination of files containing Original Software with files containing Modifications, in each case including portions thereof. - -1.4. "Executable" means the Covered Software in any form other than Source Code. - -1.5. "Initial Developer" means the individual or entity that first makes Original Software available under this License. - -1.6. "Larger Work" means a work which combines Covered Software or portions thereof with code not governed by the terms of this License. - -1.7. "License" means this document. - -1.8. "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein. - -1.9. "Modifications" means the Source Code and Executable form of any of the following: - - A. Any file that results from an addition to, deletion from or modification of the contents of a file containing Original Software or previous Modifications; - - B. Any new file that contains any part of the Original Software or previous Modification; or - - C. Any new file that is contributed or otherwise made available under the terms of this License. - -1.10. "Original Software" means the Source Code and Executable form of computer software code that is originally released under this License. - -1.11. "Patent Claims" means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor. - -1.12. "Source Code" means (a) the common form of computer software code in which modifications are made and (b) associated documentation included in or with such code. - -1.13. "You" (or "Your") means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, "You" includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. - -2. License Grants. - -2.1. The Initial Developer Grant. - -Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, the Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license: - -(a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer, to use, reproduce, modify, display, perform, sublicense and distribute the Original Software (or portions thereof), with or without Modifications, and/or as part of a Larger Work; and - -(b) under Patent Claims infringed by the making, using or selling of Original Software, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Software (or portions thereof). - -(c) The licenses granted in Sections 2.1(a) and (b) are effective on the date Initial Developer first distributes or otherwise makes the Original Software available to a third party under the terms of this License. - -(d) Notwithstanding Section 2.1(b) above, no patent license is granted: (1) for code that You delete from the Original Software, or (2) for infringements caused by: (i) the modification of the Original Software, or (ii) the combination of the Original Software with other software or devices. - -2.2. Contributor Grant. - -Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: - -(a) under intellectual property rights (other than patent or trademark) Licensable by Contributor to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof), either on an unmodified basis, with other Modifications, as Covered Software and/or as part of a Larger Work; and - -(b) under Patent Claims infringed by the making, using, or selling of Modifications made by that Contributor either alone and/or in combination with its Contributor Version (or portions of such combination), to make, use, sell, offer for sale, have made, and/or otherwise dispose of: (1) Modifications made by that Contributor (or portions thereof); and (2) the combination of Modifications made by that Contributor with its Contributor Version (or portions of such combination). - -(c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on the date Contributor first distributes or otherwise makes the Modifications available to a third party. - -(d) Notwithstanding Section 2.2(b) above, no patent license is granted: (1) for any code that Contributor has deleted from the Contributor Version; (2) for infringements caused by: (i) third party modifications of Contributor Version, or (ii) the combination of Modifications made by that Contributor with other software (except as part of the Contributor Version) or other devices; or (3) under Patent Claims infringed by Covered Software in the absence of Modifications made by that Contributor. - -3. Distribution Obligations. - -3.1. Availability of Source Code. - -Any Covered Software that You distribute or otherwise make available in Executable form must also be made available in Source Code form and that Source Code form must be distributed only under the terms of this License. You must include a copy of this License with every copy of the Source Code form of the Covered Software You distribute or otherwise make available. You must inform recipients of any such Covered Software in Executable form as to how they can obtain such Covered Software in Source Code form in a reasonable manner on or through a medium customarily used for software exchange. - -3.2. Modifications. - -The Modifications that You create or to which You contribute are governed by the terms of this License. You represent that You believe Your Modifications are Your original creation(s) and/or You have sufficient rights to grant the rights conveyed by this License. - -3.3. Required Notices. - -You must include a notice in each of Your Modifications that identifies You as the Contributor of the Modification. You may not remove or alter any copyright, patent or trademark notices contained within the Covered Software, or any notices of licensing or any descriptive text giving attribution to any Contributor or the Initial Developer. - -3.4. Application of Additional Terms. - -You may not offer or impose any terms on any Covered Software in Source Code form that alters or restricts the applicable version of this License or the recipients' rights hereunder. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, you may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear that any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer. - -3.5. Distribution of Executable Versions. - -You may distribute the Executable form of the Covered Software under the terms of this License or under the terms of a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable form does not attempt to limit or alter the recipient's rights in the Source Code form from the rights set forth in this License. If You distribute the Covered Software in Executable form under a different license, You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer. - -3.6. Larger Works. - -You may create a Larger Work by combining Covered Software with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Software. - -4. Versions of the License. - -4.1. New Versions. - -Oracle is the initial license steward and may publish revised and/or new versions of this License from time to time. Each version will be given a distinguishing version number. Except as provided in Section 4.3, no one other than the license steward has the right to modify this License. - -4.2. Effect of New Versions. - -You may always continue to use, distribute or otherwise make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. If the Initial Developer includes a notice in the Original Software prohibiting it from being distributed or otherwise made available under any subsequent version of the License, You must distribute and make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. Otherwise, You may also choose to use, distribute or otherwise make the Covered Software available under the terms of any subsequent version of the License published by the license steward. - -4.3. Modified Versions. - -When You are an Initial Developer and You want to create a new license for Your Original Software, You may create and use a modified version of this License if You: (a) rename the license and remove any references to the name of the license steward (except to note that the license differs from this License); and (b) otherwise make it clear that the license contains terms which differ from this License. - -5. DISCLAIMER OF WARRANTY. - -COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. - -6. TERMINATION. - -6.1. This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. - -6.2. If You assert a patent infringement claim (excluding declaratory judgment actions) against Initial Developer or a Contributor (the Initial Developer or Contributor against whom You assert such claim is referred to as "Participant") alleging that the Participant Software (meaning the Contributor Version where the Participant is a Contributor or the Original Software where the Participant is the Initial Developer) directly or indirectly infringes any patent, then any and all rights granted directly or indirectly to You by such Participant, the Initial Developer (if the Initial Developer is not the Participant) and all Contributors under Sections 2.1 and/or 2.2 of this License shall, upon 60 days notice from Participant terminate prospectively and automatically at the expiration of such 60 day notice period, unless if within such 60 day period You withdraw Your claim with respect to the Participant Software against such Participant either unilaterally or pursuant to a written agreement with Participant. - -6.3. If You assert a patent infringement claim against Participant alleging that the Participant Software directly or indirectly infringes any patent where such claim is resolved (such as by license or settlement) prior to the initiation of patent infringement litigation, then the reasonable value of the licenses granted by such Participant under Sections 2.1 or 2.2 shall be taken into account in determining the amount or value of any payment or license. - -6.4. In the event of termination under Sections 6.1 or 6.2 above, all end user licenses that have been validly granted by You or any distributor hereunder prior to termination (excluding licenses granted to You by any distributor) shall survive termination. - -7. LIMITATION OF LIABILITY. - -UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. - -8. U.S. GOVERNMENT END USERS. - -The Covered Software is a "commercial item," as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer software" (as that term is defined at 48 C.F.R. § 252.227-7014(a)(1)) and "commercial computer software documentation" as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software with only those rights set forth herein. This U.S. Government Rights clause is in lieu of, and supersedes, any other FAR, DFAR, or other clause or provision that addresses Government rights in computer software under this License. - -9. MISCELLANEOUS. - -This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by the law of the jurisdiction specified in a notice contained within the Original Software (except to the extent applicable law, if any, provides otherwise), excluding such jurisdiction's conflict-of-law provisions. Any litigation relating to this License shall be subject to the jurisdiction of the courts located in the jurisdiction and venue specified in a notice contained within the Original Software, with the losing party responsible for costs, including, without limitation, court costs and reasonable attorneys' fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. You agree that You alone are responsible for compliance with the United States export administration regulations (and the export control laws and regulation of any other countries) when You use, distribute or otherwise make available any Covered Software. - -10. RESPONSIBILITY FOR CLAIMS. - -As between Initial Developer and the Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License and You agree to work with Initial Developer and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. - -NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) - -The code released under the CDDL shall be governed by the laws of the State of California (excluding conflict-of-law provisions). Any litigation relating to this License shall be subject to the jurisdiction of the Federal Courts of the Northern District of California and the state courts of the State of California, with venue lying in Santa Clara County, California. \ No newline at end of file diff --git a/solr/packaging/build.gradle b/solr/packaging/build.gradle index 55b78ca0182..a75ea19fdc8 100644 --- a/solr/packaging/build.gradle +++ b/solr/packaging/build.gradle @@ -46,8 +46,6 @@ dependencies { ":solr:contrib:analytics", ":solr:contrib:extraction", ":solr:contrib:clustering", - ":solr:contrib:dataimporthandler", - ":solr:contrib:dataimporthandler-extras", ":solr:contrib:jaegertracer-configurator", ":solr:contrib:langid", ":solr:contrib:ltr", diff --git a/solr/server/README.md b/solr/server/README.md index 6686c4f1f96..3760227fa3a 100644 --- a/solr/server/README.md +++ b/solr/server/README.md @@ -98,8 +98,8 @@ statements in the solrconfig.xml file to reference plugin jars outside of this directory for loading "contrib" plugins via relative paths. If you make a copy of this example server and wish to use the -ExtractingRequestHandler (SolrCell), DataImportHandler (DIH), the -clustering component, or any other modules in "contrib", you will need to +ExtractingRequestHandler (SolrCell), the clustering component, +or any other modules in "contrib", you will need to copy the required jars or update the paths to those jars in your solrconfig.xml. diff --git a/solr/server/etc/security.policy b/solr/server/etc/security.policy index 57229f06cb3..10303472764 100644 --- a/solr/server/etc/security.policy +++ b/solr/server/etc/security.policy @@ -114,7 +114,7 @@ grant { // needed by hadoop htrace permission java.net.NetPermission "getNetworkInformation"; - // needed by DIH + // needed by DIH - possibly even after DIH is a package permission java.sql.SQLPermission "deregisterDriver"; permission java.util.logging.LoggingPermission "control"; diff --git a/solr/solr-ref-guide/src/collection-specific-tools.adoc b/solr/solr-ref-guide/src/collection-specific-tools.adoc index a46da7c44b2..a927ae5d222 100644 --- a/solr/solr-ref-guide/src/collection-specific-tools.adoc +++ b/solr/solr-ref-guide/src/collection-specific-tools.adoc @@ -1,5 +1,5 @@ = Collection-Specific Tools -:page-children: analysis-screen, dataimport-screen, documents-screen, files-screen, query-screen, stream-screen, schema-browser-screen +:page-children: analysis-screen, documents-screen, files-screen, query-screen, stream-screen, schema-browser-screen // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -35,7 +35,6 @@ The collection-specific UI screens are listed below, with a link to the section // TODO: SOLR-10655 BEGIN: refactor this into a 'collection-screens-list.include.adoc' file for reuse * <> - lets you analyze the data found in specific fields. -* <> - shows you information about the current status of the Data Import Handler. * <> - provides a simple form allowing you to execute various Solr indexing commands directly from the browser. * <> - shows the current core configuration files such as `solrconfig.xml`. * <> - lets you submit a structured query about various elements of a core. diff --git a/solr/solr-ref-guide/src/config-sets.adoc b/solr/solr-ref-guide/src/config-sets.adoc index f89251786f5..8acb03add60 100644 --- a/solr/solr-ref-guide/src/config-sets.adoc +++ b/solr/solr-ref-guide/src/config-sets.adoc @@ -16,7 +16,7 @@ // specific language governing permissions and limitations // under the License. -Configsets are a set of configuration files used in a Solr installation: `solrconfig.xml`, the schema, and then <> like language files, `synonyms.txt`, DIH-related configuration, and others. +Configsets are a set of configuration files used in a Solr installation: `solrconfig.xml`, the schema, and then <> like language files, `synonyms.txt`, and others. Such configuration, _configsets_, can be named and then referenced by collections or cores, possibly with the intent to share them to avoid duplication. diff --git a/solr/solr-ref-guide/src/configsets-api.adoc b/solr/solr-ref-guide/src/configsets-api.adoc index 2ce48396737..9b0cf261fae 100644 --- a/solr/solr-ref-guide/src/configsets-api.adoc +++ b/solr/solr-ref-guide/src/configsets-api.adoc @@ -19,7 +19,7 @@ The Configsets API enables you to upload new configsets to ZooKeeper, create, and delete configsets when Solr is running SolrCloud mode. -Configsets are a collection of configuration files such as `solrconfig.xml`, `synonyms.txt`, the schema, language-specific files, DIH-related configuration, and other collection-level configuration files (everything that normally lives in the `conf` directory). Solr ships with two example configsets (`_default` and `sample_techproducts_configs`) which can be used when creating collections. Using the same concept, you can create your own configsets and make them available when creating collections. +Configsets are a collection of configuration files such as `solrconfig.xml`, `synonyms.txt`, the schema, language-specific files, and other collection-level configuration files (everything that normally lives in the `conf` directory). Solr ships with two example configsets (`_default` and `sample_techproducts_configs`) which can be used when creating collections. Using the same concept, you can create your own configsets and make them available when creating collections. This API provides a way to upload configuration files to ZooKeeper and share the same set of configuration files between two or more collections. @@ -86,7 +86,6 @@ This functionality is enabled by default, but can be disabled via a runtime para A configset is uploaded in a "trusted" mode if authentication is enabled and the upload operation is performed as an authenticated request. Without authentication, a configset is uploaded in an "untrusted" mode. Upon creation of a collection using an "untrusted" configset, the following functionality will not work: -* If specified in the configset, the DataImportHandler's ScriptTransformer will not initialize. * The XSLT transformer (`tr` parameter) cannot be used at request processing time. * If specified in the configset, the StatelessScriptUpdateProcessor will not initialize. * Collections won't initialize if directives are used in the configset. (Note: Libraries added to Solr's classpath don't need the directive) diff --git a/solr/solr-ref-guide/src/configuring-solrconfig-xml.adoc b/solr/solr-ref-guide/src/configuring-solrconfig-xml.adoc index fccd9d27435..f1f0fa2286f 100644 --- a/solr/solr-ref-guide/src/configuring-solrconfig-xml.adoc +++ b/solr/solr-ref-guide/src/configuring-solrconfig-xml.adoc @@ -93,10 +93,15 @@ The <> allows you to use an API to modify [source,json] ---- -{"userProps": { - "dih.db.url": "jdbc:oracle:thin:@localhost:1521", - "dih.db.user": "username", - "dih.db.pass": "password"}} +{ + "userProps":{"update.autoCreateFields":"false"}, + "requestHandler":{"/myterms":{ + "name":"/myterms", + "class":"solr.SearchHandler", + "defaults":{ + "terms":true, + "distrib":false}, + "components":["terms"]}}} ---- For more details, see the section <>. diff --git a/solr/solr-ref-guide/src/core-specific-tools.adoc b/solr/solr-ref-guide/src/core-specific-tools.adoc index 16c31c47aa1..ab02c11b66f 100644 --- a/solr/solr-ref-guide/src/core-specific-tools.adoc +++ b/solr/solr-ref-guide/src/core-specific-tools.adoc @@ -39,7 +39,6 @@ If you are running a single node instance of Solr, additional UI screens normall // TODO: SOLR-10655 BEGIN: refactor this into a 'collection-screens-list.include.adoc' file for reuse * <> - lets you analyze the data found in specific fields. -* <> - shows you information about the current status of the Data Import Handler. * <> - provides a simple form allowing you to execute various Solr indexing commands directly from the browser. * <> - shows the current core configuration files such as `solrconfig.xml`. * <> - lets you submit a structured query about various elements of a core. diff --git a/solr/solr-ref-guide/src/dataimport-screen.adoc b/solr/solr-ref-guide/src/dataimport-screen.adoc deleted file mode 100644 index 1f28cd52feb..00000000000 --- a/solr/solr-ref-guide/src/dataimport-screen.adoc +++ /dev/null @@ -1,28 +0,0 @@ -= Dataimport Screen -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -WARNING: The Data Import Handler is deprecated as of v8.6 and is scheduled to be removed in 9.0. - -The Dataimport screen shows the configuration of the DataImportHandler (DIH) and allows you start, and monitor the status of, import commands as defined by the options selected on the screen and defined in the configuration file. - -.The Dataimport Screen -image::images/dataimport-screen/dataimport.png[image,width=485,height=250] - -This screen also lets you adjust various options to control how the data is imported to Solr, and view the data import configuration file that controls the import. - -For more information about data importing with DIH, see the section on <>. diff --git a/solr/solr-ref-guide/src/images/dataimport-screen/dataimport.png b/solr/solr-ref-guide/src/images/dataimport-screen/dataimport.png deleted file mode 100644 index 7444c27eac2..00000000000 Binary files a/solr/solr-ref-guide/src/images/dataimport-screen/dataimport.png and /dev/null differ diff --git a/solr/solr-ref-guide/src/indexing-and-basic-data-operations.adoc b/solr/solr-ref-guide/src/indexing-and-basic-data-operations.adoc index e805bff233a..993a4a26e35 100644 --- a/solr/solr-ref-guide/src/indexing-and-basic-data-operations.adoc +++ b/solr/solr-ref-guide/src/indexing-and-basic-data-operations.adoc @@ -4,7 +4,6 @@ uploading-data-with-index-handlers, + indexing-nested-documents, + uploading-data-with-solr-cell-using-apache-tika, + - uploading-structured-data-store-data-with-the-data-import-handler, + updating-parts-of-documents, + detecting-languages-during-indexing, + de-duplication, + @@ -42,8 +41,6 @@ This section describes how Solr adds data to its index. It covers the following * *<>*: Information about using the Solr Cell framework to upload data for indexing. -* *<>*: Information about uploading and indexing data from a structured data store. - * *<>*: Information about how to use atomic updates and optimistic concurrency with Solr. * *<>*: Information about using language identification during the indexing process. diff --git a/solr/solr-ref-guide/src/installing-solr.adoc b/solr/solr-ref-guide/src/installing-solr.adoc index 4e3872aba3c..962375e7984 100644 --- a/solr/solr-ref-guide/src/installing-solr.adoc +++ b/solr/solr-ref-guide/src/installing-solr.adoc @@ -109,9 +109,6 @@ Here are the examples included with Solr: exampledocs:: This is a small set of simple CSV, XML, and JSON files that can be used with `bin/post` when first getting started with Solr. For more information about using `bin/post` with these files, see <>. -example-DIH:: -This directory includes a few example DataImport Handler (DIH) configurations to help you get started with importing structured content in a database, an email server, or even an Atom feed. Each example will index a different set of data; see the README there for more details about these examples. - files:: The `files` directory provides a basic search UI for documents such as Word or PDF that you may have stored locally. See the README there for details on how to use this example. @@ -151,7 +148,7 @@ Solr also provides a number of useful examples to help you learn about key featu bin/solr -e techproducts ---- -Currently, the available examples you can run are: techproducts, dih, schemaless, and cloud. See the section <> for details on each example. +Currently, the available examples you can run are: techproducts, schemaless, and cloud. See the section <> for details on each example. .Getting Started with SolrCloud NOTE: Running the `cloud` example starts Solr in <> mode. For more information on starting Solr in SolrCloud mode, see the section <>. diff --git a/solr/solr-ref-guide/src/major-changes-in-solr-9.adoc b/solr/solr-ref-guide/src/major-changes-in-solr-9.adoc index e42b8c7ffd3..4deea753f43 100644 --- a/solr/solr-ref-guide/src/major-changes-in-solr-9.adoc +++ b/solr/solr-ref-guide/src/major-changes-in-solr-9.adoc @@ -122,6 +122,8 @@ _(raw; not yet edited)_ and "follower". This includes API calls for the replication handler and metrics. For rolling upgrades into 9.0, you need to be on Solr version 8.7 or greater. Some metrics also changed, alerts and monitors on Solr KPIs that mention "master" or "slave" will also now be "leader" and "follower" + +* SOLR-14783: Data Import Handler (DIH) has been removed from Solr. The community package is available at: https://github.com/rohitbemax/dataimporthandler (Alexandre Rafalovitch) === Upgrade Prerequisites in Solr 9 diff --git a/solr/solr-ref-guide/src/solr-control-script-reference.adoc b/solr/solr-ref-guide/src/solr-control-script-reference.adoc index b192f9c6a70..88d7c4580bc 100644 --- a/solr/solr-ref-guide/src/solr-control-script-reference.adoc +++ b/solr/solr-ref-guide/src/solr-control-script-reference.adoc @@ -77,7 +77,6 @@ The available options are: * cloud * techproducts -* dih * schemaless + See the section <> below for more details on the example configurations. @@ -206,11 +205,6 @@ When using this example, you can choose from any of the available configsets fou * *techproducts*: This example starts Solr in standalone mode with a schema designed for the sample documents included in the `$SOLR_HOME/example/exampledocs` directory. + The configset used can be found in `$SOLR_HOME/server/solr/configsets/sample_techproducts_configs`. -* *dih*: This example starts Solr in standalone mode with the DataImportHandler (DIH) enabled and several example `dataconfig.xml` files pre-configured for different types of data supported with DIH (such as, database contents, email, RSS feeds, etc.). -+ -The configset used is customized for DIH, and is found in `$SOLR_HOME/example/example-DIH/solr/conf`. -+ -For more information about DIH, see the section <>. * *schemaless*: This example starts Solr in standalone mode using a managed schema, as described in the section <>, and provides a very minimal pre-defined schema. Solr will run in <> with this configuration, where Solr will create fields in the schema on the fly and will guess field types used in incoming documents. + The configset used can be found in `$SOLR_HOME/server/solr/configsets/_default`. diff --git a/solr/solr-ref-guide/src/solr-tutorial.adoc b/solr/solr-ref-guide/src/solr-tutorial.adoc index 0eb1a8a5f05..37380d9ffb5 100644 --- a/solr/solr-ref-guide/src/solr-tutorial.adoc +++ b/solr/solr-ref-guide/src/solr-tutorial.adoc @@ -912,11 +912,6 @@ In this example, assume there is a directory named "Documents" locally. To index + You may get errors as it works through your documents. These might be caused by the field guessing, or the file type may not be supported. Indexing content such as this demonstrates the need to plan Solr for your data, which requires understanding it and perhaps also some trial and error. -DataImportHandler:: -Solr includes a tool called the <> which can connect to databases (if you have a jdbc driver), mail servers, or other structured data sources. There are several examples included for feeds, GMail, and a small HSQL database. -+ -The `README.md` file in `example/example-DIH` will give you details on how to start working with this tool. - SolrJ:: SolrJ is a Java-based client for interacting with Solr. Use <> for JVM-based languages or other <> to programmatically create documents to send to Solr. diff --git a/solr/solr-ref-guide/src/uploading-data-with-index-handlers.adoc b/solr/solr-ref-guide/src/uploading-data-with-index-handlers.adoc index 1c090a09693..4cf000510e0 100644 --- a/solr/solr-ref-guide/src/uploading-data-with-index-handlers.adoc +++ b/solr/solr-ref-guide/src/uploading-data-with-index-handlers.adoc @@ -17,7 +17,7 @@ // specific language governing permissions and limitations // under the License. -Index Handlers are Request Handlers designed to add, delete and update documents to the index. In addition to having plugins for importing rich documents <> or from structured data sources using the <>, Solr natively supports indexing structured documents in XML, CSV and JSON. +Index Handlers are Request Handlers designed to add, delete and update documents to the index. In addition to having plugins for importing rich documents <>, Solr natively supports indexing structured documents in XML, CSV and JSON. The recommended way to configure and use request handlers is with path based names that map to paths in the request url. However, request handlers can also be specified with the `qt` (query type) parameter if the <> is appropriately configured. It is possible to access the same handler using more than one name, which can be useful if you wish to specify different sets of default options. diff --git a/solr/solr-ref-guide/src/uploading-structured-data-store-data-with-the-data-import-handler.adoc b/solr/solr-ref-guide/src/uploading-structured-data-store-data-with-the-data-import-handler.adoc deleted file mode 100644 index 98c315ea447..00000000000 --- a/solr/solr-ref-guide/src/uploading-structured-data-store-data-with-the-data-import-handler.adoc +++ /dev/null @@ -1,1077 +0,0 @@ -= Uploading Structured Data Store Data with the Data Import Handler -:toclevels: 1 -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -WARNING: The Data Import Handler is deprecated is scheduled to be removed in 9.0. This functionality will likely migrate to a 3rd-party plugin in the near future. - -Many search applications store the content to be indexed in a structured data store, such as a relational database. The Data Import Handler (DIH) provides a mechanism for importing content from a data store and indexing it. - -In addition to relational databases, DIH can index content from HTTP based data sources such as RSS and ATOM feeds, e-mail repositories, and structured XML where an XPath processor is used to generate fields. - -== DIH Concepts and Terminology - -Descriptions of the Data Import Handler use several familiar terms, such as entity and processor, in specific ways, as explained in the table below. - -Datasource:: -As its name suggests, a datasource defines the location of the data of interest. For a database, it's a DSN. For an HTTP datasource, it's the base URL. - -Entity:: -Conceptually, an entity is processed to generate a set of documents, containing multiple fields, which (after optionally being transformed in various ways) are sent to Solr for indexing. For a RDBMS data source, an entity is a view or table, which would be processed by one or more SQL statements to generate a set of rows (documents) with one or more columns (fields). - -Processor:: -An entity processor does the work of extracting content from a data source, transforming it, and adding it to the index. Custom entity processors can be written to extend or replace the ones supplied. - -Transformer:: -Each set of fields fetched by the entity may optionally be transformed. This process can modify the fields, create new fields, or generate multiple rows/documents form a single row. There are several built-in transformers in the DIH, which perform functions such as modifying dates and stripping HTML. It is possible to write custom transformers using the publicly available interface. - -== Solr's DIH Examples - -The `example/example-DIH` directory contains several collections to demonstrate many of the features of the data import handler. These are available with the `dih` example from the <>: - -[source,bash] ----- -bin/solr -e dih ----- - -This launches a standalone Solr instance with several collections that correspond to detailed examples. The available examples are `atom`, `db`, `mail`, `solr`, and `tika`. - -All examples in this section assume you are running the DIH example server. - -== Configuring DIH - -=== Configuring solrconfig.xml for DIH - -The Data Import Handler has to be registered in `solrconfig.xml`. For example: - -[source,xml] ----- - - - /path/to/my/DIHconfigfile.xml - - ----- - -The only required parameter is the `config` parameter, which specifies the location of the DIH configuration file that contains specifications for the data source, how to fetch data, what data to fetch, and how to process it to generate the Solr documents to be posted to the index. - -You can have multiple DIH configuration files. Each file would require a separate definition in the `solrconfig.xml` file, specifying a path to the file. - -=== Configuring the DIH Configuration File - -An annotated configuration file, based on the `db` collection in the `dih` example server, is shown below (this file is located in `example/example-DIH/solr/db/conf/db-data-config.xml`). - -This example shows how to extract fields from four tables defining a simple product database. More information about the parameters and options shown here will be described in the sections following. - -[source,xml] ----- - - - - user="sa" password="secret"/> --<2> - --<3> - --<4> - - - --<5> - - - - - - - - - - - ----- -<1> The first element is the `dataSource`, in this case an HSQLDB database. The path to the JDBC driver and the JDBC URL and login credentials are all specified here. Other permissible attributes include whether or not to autocommit to Solr, the batchsize used in the JDBC connection, and a `readOnly` flag. -<2> The password attribute is optional if there is no password set for the DB. Alternately, the password can be encrypted; the section <> below describes how to do this. -<3> A `document` element follows, containing multiple `entity` elements. Note that `entity` elements can be nested, and this allows the entity relationships in the sample database to be mirrored here, so that we can generate a denormalized Solr record which may include multiple features for one item, for instance. -<4> The possible attributes for the `entity` element are described in later sections. Entity elements may contain one or more `field` elements, which map the data source field names to Solr fields, and optionally specify per-field transformations. This entity is the `root` entity. -<5> This entity is nested and reflects the one-to-many relationship between an item and its multiple features. Note the use of variables; `${item.ID}` is the value of the column 'ID' for the current item (`item` referring to the entity name). - -Datasources can still be specified in `solrconfig.xml`. These must be specified in the defaults section of the handler in `solrconfig.xml`. However, these are not parsed until the main configuration is loaded. - -The entire configuration itself can be passed as a request parameter using the `dataConfig` parameter rather than using a file. When configuration errors are encountered, the error message is returned in XML format. Due to security concerns, this only works if you start Solr with `-Denable.dih.dataConfigParam=true`. - -A `reload-config` command is also supported, which is useful for validating a new configuration file, or if you want to specify a file, load it, and not have it reloaded again on import. If there is an `xml` mistake in the configuration a user-friendly message is returned in `xml` format. You can then fix the problem and do a `reload-config`. - -TIP: You can also view the DIH configuration in the Solr Admin UI from the <>. It includes an interface to import content. - -==== DIH Request Parameters - -Request parameters can be substituted in configuration with placeholder `${dataimporter.request._paramname_}`, as in this example: - -[source,xml] ----- - ----- - -These parameters can then be passed to the `full-import` command or defined in the `` section in `solrconfig.xml`. This example shows the parameters with the full-import command: - -[source,bash] -http://localhost:8983/solr/dih/dataimport?command=full-import&jdbcurl=jdbc:hsqldb:./example-DIH/hsqldb/ex&jdbcuser=sa&jdbcpassword=secret - -==== Encrypting a Database Password - -The database password can be encrypted if necessary to avoid plaintext passwords being exposed in unsecured files. To do this, we will replace the password in `data-config.xml` with an encrypted password. We will use the `openssl` tool for the encryption, and the encryption key will be stored in a file which is only readable to the `solr` process. Please follow these steps: - -. Create a strong encryption password and store it in a file. Then make sure it is readable only for the `solr` user. Example commands: -+ -[source,text] -echo -n "a-secret" > /var/solr/data/dih-encryptionkey -chown solr:solr /var/solr/data/dih-encryptionkey -chmod 600 /var/solr/data/dih-encryptionkey -+ -CAUTION: Note that we use the `-n` argument to `echo` to avoid including a newline character at the end of the password. If you use another method to generate the encrypted password, make sure to avoid newlines as well. - -. Encrypt the JDBC database password using `openssl` as follows: -+ -[source,text] -echo -n "my-jdbc-password" | openssl enc -aes-128-cbc -a -salt -md md5 -pass file:/var/solr/data/dih-encryptionkey -+ -The output of the command will be a long string such as `U2FsdGVkX18QMjY0yfCqlfBMvAB4d3XkwY96L7gfO2o=`. You will use this as `password` in your `data-config.xml` file. - -. In your `data-config.xml`, you'll add the `password` and `encryptKeyFile` parameters to the `` configuration, as in this example: -+ -[source,xml] - - - - -== DataImportHandler Commands - -DIH commands are sent to Solr via an HTTP request. The following operations are supported. - -abort:: -Aborts an ongoing operation. For example: `\http://localhost:8983/solr/dih/dataimport?command=abort`. - -delta-import:: -For incremental imports and change detection. Only the <> supports delta imports. -+ -For example: `\http://localhost:8983/solr/dih/dataimport?command=delta-import`. -+ -This command supports the same `clean`, `commit`, `optimize` and `debug` parameters as `full-import` command described below. - -full-import:: -A Full Import operation can be started with a URL such as `\http://localhost:8983/solr/dih/dataimport?command=full-import`. The command returns immediately. -+ -The operation will be started in a new thread and the _status_ attribute in the response should be shown as _busy_. The operation may take some time depending on the size of dataset. Queries to Solr are not blocked during full-imports. -+ -When a `full-import` command is executed, it stores the start time of the operation in a file located at `conf/dataimport.properties`. This stored timestamp is used when a `delta-import` operation is executed. -+ -Commands available to `full-import` are: -+ -clean::: -Default is true. Tells whether to clean up the index before the indexing is started. -commit::: -Default is true. Tells whether to commit after the operation. -debug::: -Default is false. Runs the command in debug mode and is used by the interactive development mode. -+ -Note that in debug mode, documents are never committed automatically. If you want to run debug mode and commit the results too, add `commit=true` as a request parameter. -entity::: -The name of an entity directly under the `` tag in the configuration file. Use this to execute one or more entities selectively. -+ -Multiple "entity" parameters can be passed on to run multiple entities at once. If nothing is passed, all entities are executed. -optimize::: -Default is true. Tells Solr whether to optimize after the operation. -synchronous::: -Blocks request until import is completed. Default is false. - -reload-config:: -If the configuration file has been changed and you wish to reload it without restarting Solr, run the command `\http://localhost:8983/solr/dih/dataimport?command=reload-config` - -status:: -This command returns statistics on the number of documents created, deleted, queries run, rows fetched, status, and so on. For example: `\http://localhost:8983/solr/dih/dataimport?command=status`. - -show-config:: -This command responds with configuration: `\http://localhost:8983/solr/dih/dataimport?command=show-config`. - - -== Property Writer - -The `propertyWriter` element defines the date format and locale for use with delta queries. It is an optional configuration. Add the element to the DIH configuration file, directly under the `dataConfig` element. - -[source,xml] ----- - ----- - -The parameters available are: - -dateFormat:: -A `java.text.SimpleDateFormat` to use when converting the date to text. The default is `yyyy-MM-dd HH:mm:ss`. - -type:: -The implementation class. Use `SimplePropertiesWriter` for non-SolrCloud installations. If using SolrCloud, use `ZKPropertiesWriter`. -+ -If this is not specified, it will default to the appropriate class depending on if SolrCloud mode is enabled. - -directory:: -Used with the `SimplePropertiesWriter` only. The directory for the properties file. If not specified, the default is `conf`. - -filename:: -Used with the `SimplePropertiesWriter` only. The name of the properties file. -+ -If not specified, the default is the requestHandler name (as defined in `solrconfig.xml`, appended by ".properties" (such as, `dataimport.properties`). - -locale:: -The locale. If not defined, the ROOT locale is used. It must be specified as language-country (https://tools.ietf.org/html/bcp47[BCP 47 language tag]). For example, `en-US`. - -== Data Sources - -A data source specifies the origin of data and its type. Somewhat confusingly, some data sources are configured within the associated entity processor. Data sources can also be specified in `solrconfig.xml`, which is useful when you have multiple environments (for example, development, QA, and production) differing only in their data sources. - -You can create a custom data source by writing a class that extends `org.apache.solr.handler.dataimport.DataSource`. - -The mandatory attributes for a data source definition are its name and type. The name identifies the data source to an Entity element. - -The types of data sources available are described below. - -=== ContentStreamDataSource - -This takes the POST data as the data source. This can be used with any EntityProcessor that uses a `DataSource`. - -=== FieldReaderDataSource - -This can be used where a database field contains XML which you wish to process using the XPathEntityProcessor. You would set up a configuration with both JDBC and FieldReader data sources, and two entities, as follows: - -[source,xml] ----- - - - - - - - - - - - - ... - - - ----- - -The `FieldReaderDataSource` can take an `encoding` parameter, which will default to "UTF-8" if not specified. It must be specified as language-country. For example, `en-US`. - -=== FileDataSource - -This can be used like a <>, but is used to fetch content from files on disk. The only difference from `URLDataSource`, when accessing disk files, is how a pathname is specified. - -This data source accepts these optional attributes. - -basePath:: -The base path relative to which the value is evaluated if it is not absolute. - -encoding:: -Defines the character encoding to use. If not defined, UTF-8 is used. - -=== JdbcDataSource - -This is the default datasource. It's used with the <>. See the example in the <> section for details on configuration. `JdbcDatasource` supports at least the following attributes: - -driver, url, user, password, encryptKeyFile:: -Usual JDBC connection properties. - -batchSize:: -Passed to `Statement#setFetchSize`, default value 500. -+ -For MySQL driver, which doesn't honor fetchSize and pulls whole resultSet, which often lead to OutOfMemoryError. -+ -In this case, set `batchSize=-1` that pass setFetchSize(Integer.MIN_VALUE), and switch result set to pull row by row - -All of them substitute properties via `$\{placeholders}`. - -=== URLDataSource - -This data source is often used with <> to fetch content from an underlying `file://` or `http://` location. Here's an example: - -[source,xml] ----- - ----- - -The URLDataSource type accepts these optional parameters: - -baseURL:: -Specifies a new baseURL for pathnames. You can use this to specify host/port changes between Dev/QA/Prod environments. Using this attribute isolates the changes to be made to the `solrconfig.xml` - -connectionTimeout:: -Specifies the length of time in milliseconds after which the connection should time out. The default value is 5000ms. - -encoding:: -By default the encoding in the response header is used. You can use this property to override the default encoding. - -readTimeout:: -Specifies the length of time in milliseconds after which a read operation should time out. The default value is 10000ms. - - -== Entity Processors - -Entity processors extract data, transform it, and add it to a Solr index. Examples of entities include views or tables in a data store. - -Each processor has its own set of attributes, described in its own section below. In addition, there are several attributes common to all entities which may be specified: - -dataSource:: -The name of a data source. If there are multiple data sources defined, use this attribute with the name of the data source for this entity. - -name:: -Required. The unique name used to identify an entity. - -pk:: -The primary key for the entity. It is optional, and required only when using delta-imports. It has no relation to the uniqueKey defined in `schema.xml` but they can both be the same. -+ -This attribute is mandatory if you do delta-imports and then refer to the column name in `${dataimporter.delta.}` which is used as the primary key. - -processor:: -Default is <>. Required only if the datasource is not RDBMS. - -onError:: -Defines what to do if an error is encountered. -+ -Permissible values are: - -abort::: Stops the import. - -skip::: Skips the current document. - -continue::: Ignores the error and processing continues. - -preImportDeleteQuery:: -Before a `full-import` command, use this query this to cleanup the index instead of using `\*:*`. This is honored only on an entity that is an immediate sub-child of ``. - -postImportDeleteQuery:: -Similar to `preImportDeleteQuery`, but it executes after the import has completed. - -rootEntity:: -By default the entities immediately under `` are root entities. If this attribute is set to false, the entity directly falling under that entity will be treated as the root entity (and so on). For every row returned by the root entity, a document is created in Solr. - -transformer:: -Optional. One or more transformers to be applied on this entity. - -cacheImpl:: -Optional. A class (which must implement `DIHCache`) to use for caching this entity when doing lookups from an entity which wraps it. Provided implementation is `SortedMapBackedCache`. - -cacheKey:: -The name of a property of this entity to use as a cache key if `cacheImpl` is specified. - -cacheLookup:: -An entity + property name that will be used to lookup cached instances of this entity if `cacheImpl` is specified. - -where:: -An alternative way to specify `cacheKey` and `cacheLookup` concatenated with '='. -+ -For example, `where="CODE=People.COUNTRY_CODE"` is equivalent to `cacheKey="CODE" cacheLookup="People.COUNTRY_CODE"` - -child="true":: -Enables indexing document blocks aka <> for searching with <>. It can be only specified on the `` element under another root entity. It switches from default behavior (merging field values) to nesting documents as children documents. -+ -Note: parent `` should add a field which is used as a parent filter in query time. - -join="zipper":: -Enables merge join, aka "zipper" algorithm, for joining parent and child entities without cache. It should be specified at child (nested) ``. It implies that parent and child queries return results ordered by keys, otherwise it throws an exception. Keys should be specified either with `where` attribute or with `cacheKey` and `cacheLookup`. - -=== Entity Caching -Caching of entities in DIH is provided to avoid repeated lookups for same entities again and again. The default `SortedMapBackedCache` is a `HashMap` where a key is a field in the row and the value is a bunch of rows for that same key. - -In the example below, each `manufacturer` entity is cached using the `id` property as a cache key. Cache lookups will be performed for each `product` entity based on the product's `manu` property. When the cache has no data for a particular key, the query is run and the cache is populated - -[source,xml] ----- - - - ----- - -=== The SQL Entity Processor - -The SqlEntityProcessor is the default processor. The associated <> should be a JDBC URL. - -The entity attributes specific to this processor are shown in the table below. These are in addition to the attributes common to all entity processors described above. - -query:: -Required. The SQL query used to select rows. - -deltaQuery:: -SQL query used if the operation is delta-import. This query selects the primary keys of the rows which will be parts of the delta-update. The pks will be available to the deltaImportQuery through the variable `${dataimporter.delta.}`. - -parentDeltaQuery:: -SQL query used if the operation is `delta-import`. - -deletedPkQuery:: -SQL query used if the operation is `delta-import`. - -deltaImportQuery:: -SQL query used if the operation is `delta-import`. If this is not present, DIH tries to construct the import query by (after identifying the delta) modifying the 'query' (this is error prone). -+ -There is a namespace `${dataimporter.delta.}` which can be used in this query. For example, `select * from tbl where id=${dataimporter.delta.id}`. - -=== The XPathEntityProcessor - -This processor is used when indexing XML formatted data. The data source is typically <> or <>. XPath can also be used with the <> described below, to generate a document from each file. - -The entity attributes unique to this processor are shown below. These are in addition to the attributes common to all entity processors described above. - -Processor:: -Required. Must be set to `XpathEntityProcessor`. - -url:: -Required. The HTTP URL or file location. - -stream:: -Optional: Set to true for a large file or download. - -forEach:: -Required unless you define `useSolrAddSchema`. The XPath expression which demarcates each record. This will be used to set up the processing loop. - -xsl:: -Optional: Its value (a URL or filesystem path) is the name of a resource used as a preprocessor for applying the XSL transformation. - -useSolrAddSchema:: -Set this to true if the content is in the form of the standard Solr update XML schema. - -Each `` element in the entity can have the following attributes as well as the default ones. - -xpath:: -Required. The XPath expression which will extract the content from the record for this field. Only a subset of XPath syntax is supported. - -commonField:: -Optional. If true, then when this field is encountered in a record it will be copied to future records when creating a Solr document. - -flatten:: -Optional. If set to true, then any children text nodes are collected to form the value of a field. -+ -[WARNING] -The default value is false, meaning that if there are any sub-elements of the node pointed to by the XPath expression, they will be quietly omitted. - -Here is an example from the `atom` collection in the `dih` example (data-config file found at `example/example-DIH/solr/atom/conf/atom-data-config.xml`): - -[source,xml] ----- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ----- - -=== The MailEntityProcessor - -The MailEntityProcessor uses the Java Mail API to index email messages using the IMAP protocol. - -The MailEntityProcessor works by connecting to a specified mailbox using a username and password, fetching the email headers for each message, and then fetching the full email contents to construct a document (one document for each mail message). - -The entity attributes unique to the MailEntityProcessor are shown below. These are in addition to the attributes common to all entity processors described above. - -processor:: -Required. Must be set to `MailEntityProcessor`. - -user:: -Required. Username for authenticating to the IMAP server; this is typically the email address of the mailbox owner. - -password:: -Required. Password for authenticating to the IMAP server. - -host:: -Required. The IMAP server to connect to. - -protocol:: -Required. The IMAP protocol to use, valid values are: imap, imaps, gimap, and gimaps. - -fetchMailsSince:: -Optional. Date/time used to set a filter to import messages that occur after the specified date; expected format is: `yyyy-MM-dd HH:mm:ss`. - -folders:: -Required. Comma-delimited list of folder names to pull messages from, such as "inbox". - -recurse:: -Optional. Default is true. Flag to indicate if the processor should recurse all child folders when looking for messages to import. - -include:: -Optional. Comma-delimited list of folder patterns to include when processing folders (can be a literal value or regular expression). - -exclude:: -Optional. Comma-delimited list of folder patterns to exclude when processing folders (can be a literal value or regular expression). Excluded folder patterns take precedence over include folder patterns. - -processAttachement or processAttachments:: -Optional. Default is true. Use Tika to process message attachments. - -includeContent:: -Optional. Default is true. Include the message body when constructing Solr documents for indexing. - -Here is an example from the `mail` collection of the `dih` example (data-config file found at `example/example-DIH/mail/conf/mail-data-config.xml`): - -[source,xml] ----- - - - - - ----- - -==== Importing New Emails Only - -After running a full import, the MailEntityProcessor keeps track of the timestamp of the previous import so that subsequent imports can use the fetchMailsSince filter to only pull new messages from the mail server. This occurs automatically using the DataImportHandler `dataimport.properties` file (stored in `conf`). - -For instance, if you set `fetchMailsSince="2014-08-22 00:00:00"` in your `mail-data-config.xml`, then all mail messages that occur after this date will be imported on the first run of the importer. Subsequent imports will use the date of the previous import as the `fetchMailsSince` filter, so that only new emails since the last import are indexed each time. - -==== GMail Extensions - -When connecting to a GMail account, you can improve the efficiency of the MailEntityProcessor by setting the protocol to *gimap* or *gimaps*. - -This allows the processor to send the `fetchMailsSince` filter to the GMail server to have the date filter applied on the server, which means the processor only receives new messages from the server. However, GMail only supports date granularity, so the server-side filter may return previously seen messages if run more than once a day. - -=== The TikaEntityProcessor - -The TikaEntityProcessor uses Apache Tika to process incoming documents. This is similar to <>, but using DataImportHandler options instead. - -The parameters for this processor are described in the table below. These are in addition to the attributes common to all entity processors described above. - -dataSource:: -This parameter defines the data source and an optional name which can be referred to in later parts of the configuration if needed. This is the same `dataSource` explained in the description of general entity processor attributes above. -+ -The available data source types for this processor are: -+ -* BinURLDataSource: used for HTTP resources, but can also be used for files. -* BinContentStreamDataSource: used for uploading content as a stream. -* BinFileDataSource: used for content on the local filesystem. - -url:: -Required. The path to the source file(s), as a file path or a traditional internet URL. - -htmlMapper:: -Optional. Allows control of how Tika parses HTML. If this parameter is defined, it must be either *default* or *identity*; if it is absent, "default" is assumed. -+ -The "default" mapper strips much of the HTML from documents while the "identity" mapper passes all HTML as-is with no modifications. - -format:: -The output format. The options are *text*, *xml*, *html* or *none*. The default is "text" if not defined. The format "none" can be used if metadata only should be indexed and not the body of the documents. - -parser:: -Optional. The default parser is `org.apache.tika.parser.AutoDetectParser`. If a custom or other parser should be used, it should be entered as a fully-qualified name of the class and path. - -fields:: -The list of fields from the input documents and how they should be mapped to Solr fields. If the attribute `meta` is defined as "true", the field will be obtained from the metadata of the document and not parsed from the body of the main text. - -extractEmbedded:: -Instructs the TikaEntityProcessor to extract embedded documents or attachments when *true*. If false, embedded documents and attachments will be ignored. - -onError:: -By default, the TikaEntityProcessor will stop processing documents if it finds one that generates an error. If you define `onError` to "skip", the TikaEntityProcessor will instead skip documents that fail processing and log a message that the document was skipped. - -Here is an example from the `tika` collection of the `dih` example (data-config file found in `example/example-DIH/tika/conf/tika-data-config.xml`): - -[source,xml] ----- - - - - - - - - - - - - - - - - - - - - - ----- - -=== The FileListEntityProcessor - -This processor is basically a wrapper, and is designed to generate a set of files satisfying conditions specified in the attributes which can then be passed to another processor, such as the <>. - -The entity information for this processor would be nested within the FileListEntity entry. It generates five implicit fields: `fileAbsolutePath`, `fileDir`, `fileSize`, `fileLastModified`, and `file`, which can be used in the nested processor. This processor does not use a data source. - -The attributes specific to this processor are described in the table below: - -fileName:: -Required. A regular expression pattern to identify files to be included. - -basedir:: -Required. The base directory (absolute path). - -recursive:: -Whether to search directories recursively. Default is 'false'. - -excludes:: -A regular expression pattern to identify files which will be excluded. - -newerThan:: -A date in the format `yyyy-MM-ddHH:mm:ss` or a date math expression (`NOW - 2YEARS`). - -olderThan:: -A date, using the same formats as newerThan. - -rootEntity:: -This should be set to false. This ensures that each row (filepath) emitted by this processor is considered to be a document. - -dataSource:: -Must be set to null. - -The example below shows the combination of the FileListEntityProcessor with another processor which will generate a set of fields from each file found. - -[source,xml] ----- - - - - - - - - - - - - - - - ----- - -=== LineEntityProcessor - -This EntityProcessor reads all content from the data source on a line by line basis and returns a field called `rawLine` for each line read. The content is not parsed in any way; however, you may add transformers to manipulate the data within the `rawLine` field, or to create other additional fields. - -The lines read can be filtered by two regular expressions specified with the `acceptLineRegex` and `omitLineRegex` attributes. - -The LineEntityProcessor has the following attributes: - -url:: -A required attribute that specifies the location of the input file in a way that is compatible with the configured data source. If this value is relative and you are using FileDataSource or URLDataSource, it assumed to be relative to baseLoc. - -acceptLineRegex:: -An optional attribute that if present discards any line which does not match the regular expression. - -omitLineRegex:: -An optional attribute that is applied after any `acceptLineRegex` and that discards any line which matches this regular expression. - -For example: - -[source,xml] ----- - - ----- - -While there are use cases where you might need to create a Solr document for each line read from a file, it is expected that in most cases that the lines read by this processor will consist of a pathname, which in turn will be consumed by another entity processor, such as the XPathEntityProcessor. - -=== PlainTextEntityProcessor - -This EntityProcessor reads all content from the data source into an single implicit field called `plainText`. The content is not parsed in any way, however you may add <> to manipulate the data within the `plainText` as needed, or to create other additional fields. - -For example: - -[source,xml] ----- - - - - ----- - -Ensure that the dataSource is of type `DataSource` (`FileDataSource`, `URLDataSource`). - -=== SolrEntityProcessor - -This EntityProcessor imports data from different Solr instances and cores. The data is retrieved based on a specified filter query. This EntityProcessor is useful in cases you want to copy your Solr index and want to modify the data in the target index. - -The SolrEntityProcessor can only copy fields that are stored in the source index. - -The SolrEntityProcessor supports the following parameters: - -url:: -Required. The URL of the source Solr instance and/or core. - -query:: -Required. The main query to execute on the source index. - -fq:: -Any filter queries to execute on the source index. If more than one filter query is defined, they must be separated by a comma. - -rows:: -The number of rows to return for each iteration. The default is 50 rows. - -fl:: -A comma-separated list of fields to fetch from the source index. Note, these fields must be stored in the source Solr instance. - -qt:: -The search handler to use, if not the default. - -wt:: -The response format to use, either *javabin* or *xml*. - -timeout:: -The query timeout in seconds. The default is 5 minutes (300 seconds). - -cursorMark="true":: -Use this to enable cursor for efficient result set scrolling. - -sort="id asc":: -This should be used to specify a sort parameter referencing the uniqueKey field of the source Solr instance. See <> for details. - -Here is a simple example of a SolrEntityProcessor: - -[source,xml] - - - - - - -== Transformers - -Transformers manipulate the fields in a document returned by an entity. A transformer can create new fields or modify existing ones. You must tell the entity which transformers your import operation will be using, by adding an attribute containing a comma separated list to the `` element. - -[source,xml] ----- - ----- - -Specific transformation rules are then added to the attributes of a `` element, as shown in the examples below. The transformers are applied in the order in which they are specified in the transformer attribute. - -The DataImportHandler contains several built-in transformers. -You can also write your own custom transformers if necessary. -The ScriptTransformer described below offers an alternative method for writing your own transformers. - -=== ClobTransformer - -You can use the ClobTransformer to create a string out of a CLOB in a database. A http://en.wikipedia.org/wiki/Character_large_object[CLOB] is a character large object: a collection of character data typically stored in a separate location that is referenced in the database. - -The ClobTransformer accepts these attributes: - -clob:: -Boolean value to signal if ClobTransformer should process this field or not. If this attribute is omitted, then the corresponding field is not transformed. - -sourceColName:: -The source column to be used as input. If this is absent source and target are same - -Here's an example of invoking the ClobTransformer. - -[source,xml] ----- - - - ... - ----- - -=== The DateFormatTransformer - -This transformer converts dates from one format to another. This would be useful, for example, in a situation where you wanted to convert a field with a fully specified date/time into a less precise date format, for use in faceting. - -DateFormatTransformer applies only on the fields with an attribute `dateTimeFormat`. Other fields are not modified. - -This transformer recognizes the following attributes: - -dateTimeFormat:: -The format used for parsing this field. This must comply with the syntax of the {java-javadocs}java/text/SimpleDateFormat.html[Java SimpleDateFormat] class. - -sourceColName:: -The column on which the dateFormat is to be applied. If this is absent source and target are same. - -locale:: -The locale to use for date transformations. If not defined, the ROOT locale is used. It must be specified as language-country (https://tools.ietf.org/html/bcp47[BCP 47 language tag]). For example, `en-US`. - -Here is example code that returns the date rounded up to the month "2007-JUL": - -[source,xml] ----- - - ... - - ----- - -=== The HTMLStripTransformer - -You can use this transformer to strip HTML out of a field. - -There is one attribute for this transformer, `stripHTML`, which is a boolean value (true or false) to signal if the HTMLStripTransformer should process the field or not. - -For example: - -[source,xml] ----- - - - ... - ----- - -=== The LogTransformer - -You can use this transformer to log data to the console or log files. For example: - -[source,xml] ----- - - .... - ----- - -Unlike other transformers, the LogTransformer does not apply to any field, so the attributes are applied on the entity itself. - -=== The NumberFormatTransformer - -Use this transformer to parse a number from a string, converting it into the specified format, and optionally using a different locale. - -NumberFormatTransformer will be applied only to fields with an attribute `formatStyle`. - -This transformer recognizes the following attributes: - -formatStyle:: -The format used for parsing this field. The value of the attribute must be one of `number`, `percent`, `integer`, or `currency`. This uses the semantics of the Java NumberFormat class. - -sourceColName:: -The column on which the NumberFormat is to be applied. This is attribute is absent. The source column and the target column are the same. - -locale:: -The locale to be used for parsing the strings. The locale. If not defined, the ROOT locale is used. It must be specified as language-country (https://tools.ietf.org/html/bcp47[BCP 47 language tag]). For example, `en-US`. - -For example: - -[source,xml] ----- - - ... - - - - - ----- - -=== The RegexTransformer - -The regex transformer helps in extracting or manipulating values from fields (from the source) using Regular Expressions. The actual class name is `org.apache.solr.handler.dataimport.RegexTransformer`. But as it belongs to the default package the package-name can be omitted. - -The table below describes the attributes recognized by the regex transformer. - -regex:: -The regular expression that is used to match against the column or sourceColName's value(s). If replaceWith is absent, each regex _group_ is taken as a value and a list of values is returned. - -sourceColName:: -The column on which the regex is to be applied. If not present, then the source and target are identical. - -splitBy:: -Used to split a string. It returns a list of values. Note, this is a regular expression so it may need to be escaped (e.g., via back-slashes). - -groupNames:: -A comma separated list of field column names, used where the regex contains groups and each group is to be saved to a different field. If some groups are not to be named leave a space between commas. - -replaceWith:: -Used along with regex. It is equivalent to the method `new String().replaceAll(, )`. - -Here is an example of configuring the regex transformer: - -[source,xml] ----- - --<1> - --<2> - - - - - - - --<3> - ----- - -<1> In this example, `regex` and `sourceColName` are custom attributes used by the transformer. -<2> The transformer reads the field `full_name` from the result set and transforms it to two new target fields, `firstName` and `lastName`. Even though the query returned only one column, `full_name`, in the result set, the Solr document gets two extra fields `firstName` and `lastName` which are "derived" fields. These new fields are only created if the regexp matches. -<3> The `emailids` field in the table can be a comma-separated value. It ends up producing one or more email IDs, and we expect the `mailId` to be a multivalued field in Solr. - -Note that this transformer can be used to either split a string into tokens based on a splitBy pattern, or to perform a string substitution as per `replaceWith`, or it can assign groups within a pattern to a list of `groupNames`. It decides what it is to do based upon the above attributes `splitBy`, `replaceWith` and `groupNames` which are looked for in order. This first one found is acted upon and other unrelated attributes are ignored. - -=== The ScriptTransformer - -The script transformer allows arbitrary transformer functions to be written in any scripting language supported by Java, such as Javascript, JRuby, Jython, Groovy, or BeanShell. Javascript is integrated into Java by default; you'll need to integrate other languages yourself. - -Each function you write must accept a row variable (which corresponds to a `Java Map`, thus permitting `get,put,remove` operations). Thus you can modify the value of an existing field or add new fields. The return value of the function is the returned object. - -The script is inserted into the DIH configuration file at the top level and is called once for each row. - -Here is a simple example. - -[source,xml] ----- - - - - - - - - - - - .... - - - ----- - -=== The TemplateTransformer - -You can use the template transformer to construct or modify a field value, perhaps using the value of other fields. You can insert extra text into the template. - -[source,xml] ----- - - ... - - - ----- - -== Special Commands for DIH - -You can pass special commands to the DIH by adding any of the variables listed below to any row returned by any component: - -$skipDoc:: -Skip the current document; that is, do not add it to Solr. The value can be the string `true` or `false`. - -$skipRow:: -Skip the current row. The document will be added with rows from other entities. The value can be the string `true` or `false`. - -$deleteDocById:: -Delete a document from Solr with this ID. The value has to be the `uniqueKey` value of the document. - -$deleteDocByQuery:: -Delete documents from Solr using this query. The value must be a Solr Query. diff --git a/solr/solr-ref-guide/src/using-the-solr-administration-user-interface.adoc b/solr/solr-ref-guide/src/using-the-solr-administration-user-interface.adoc index a74b3edbf00..e764ed8c656 100644 --- a/solr/solr-ref-guide/src/using-the-solr-administration-user-interface.adoc +++ b/solr/solr-ref-guide/src/using-the-solr-administration-user-interface.adoc @@ -30,7 +30,6 @@ The <>* is a section explaining additional screens available for each collection. // TODO: SOLR-10655 BEGIN: refactor this into a 'collection-screens-list.include.adoc' file for reuse ** <> - lets you analyze the data found in specific fields. -** <> - shows you information about the current status of the Data Import Handler. ** <> - provides a simple form allowing you to execute various Solr indexing commands directly from the browser. ** <> - shows the current core configuration files such as `solrconfig.xml`. ** <> - lets you submit a structured query about various elements of a core. diff --git a/solr/webapp/web/css/angular/dataimport.css b/solr/webapp/web/css/angular/dataimport.css deleted file mode 100644 index ad37896dec2..00000000000 --- a/solr/webapp/web/css/angular/dataimport.css +++ /dev/null @@ -1,371 +0,0 @@ -/* - -Licensed to the Apache Software Foundation (ASF) under one or more -contributor license agreements. See the NOTICE file distributed with -this work for additional information regarding copyright ownership. -The ASF licenses this file to You under the Apache License, Version 2.0 -(the "License"); you may not use this file except in compliance with -the License. You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -*/ - -#content #dataimport -{ - background-image: url( ../../img/div.gif ); - background-position: 21% 0; - background-repeat: repeat-y; -} - -#content #dataimport #frame -{ - float: right; - width: 78%; -} - -#content #dataimport #form -{ - float: left; - width: 20%; -} - -#content #dataimport #form #navigation -{ - border-right: 0; -} - -#content #dataimport #form #navigation a -{ - background-image: url( ../../img/ico/status-offline.png ); -} - -#content #dataimport #form #navigation .current a -{ - background-image: url( ../../img/ico/status.png ); -} - -#content #dataimport #form form -{ - border-top: 1px solid #f0f0f0; - margin-top: 10px; - padding-top: 5px; -} - -#content #dataimport #form label -{ - cursor: pointer; - display: block; - margin-top: 5px; -} - -#content #dataimport #form input, -#content #dataimport #form select, -#content #dataimport #form textarea -{ - margin-bottom: 2px; - width: 100%; -} - -#content #dataimport #form input -{ - width: 98%; -} - -#content #dataimport #form button -{ - margin-top: 10px; -} - -#content #dataimport #form .execute span -{ - background-image: url( ../../img/ico/document-import.png ); -} - -#content #dataimport #form .refresh-status span -{ - background-image: url( ../../img/ico/arrow-circle.png ); -} - -#content #dataimport #form .refresh-status span.success -{ - background-image: url( ../../img/ico/tick.png ); -} - -#content #dataimport #form #start -{ - float: left; - width: 47%; -} - -#content #dataimport #form #rows -{ - float: right; - width: 47%; -} - -#content #dataimport #form .checkbox input -{ - margin-bottom: 0; - width: auto; -} - -#content #dataimport #form #auto-refresh-status -{ - margin-top: 20px; -} - -#content #dataimport #form #auto-refresh-status a -{ - background-image: url( ../../img/ico/ui-check-box-uncheck.png ); - background-position: 0 50%; - color: #4D4D4D; - display: block; - padding-left: 21px; -} - -#content #dataimport #form #auto-refresh-status a.on, -#content #dataimport #form #auto-refresh-status a:hover -{ - color: #333; -} - -#content #dataimport #form #auto-refresh-status a.on -{ - background-image: url( ../../img/ico/ui-check-box.png ); -} - -#content #dataimport #current_state -{ - padding: 10px; - margin-bottom: 20px; -} - -#content #dataimport #current_state .last_update, -#content #dataimport #current_state .info -{ - display: block; - padding-left: 21px; -} - -#content #dataimport #current_state .last_update -{ - color: #4D4D4D; - font-size: 11px; -} - -#content #dataimport #current_state .info -{ - background-position: 0 1px; - position: relative; -} - -#content #dataimport #current_state .info .details span -{ - color: #c0c0c0; -} - -#content #dataimport #current_state .info .abort-import -{ - position: absolute; - right: 0px; - top: 0px; -} - -#content #dataimport #current_state .info .abort-import span -{ - background-image: url( ../../img/ico/cross.png ); -} - -#content #dataimport #current_state .info .abort-import.success span -{ - background-image: url( ../../img/ico/tick.png ); -} - -#content #dataimport #current_state.indexing -{ - background-color: #f9f9f9; -} - -#content #dataimport #current_state.indexing .info -{ - background-image: url( ../../img/ico/hourglass.png ); -} - -#content #dataimport #current_state.indexing .info .abort-import -{ - display: block; -} - -#content #dataimport #current_state.success -{ - background-color: #e6f3e6; -} - -#content #dataimport #current_state.success .info -{ - background-image: url( ../../img/ico/tick-circle.png ); -} - -#content #dataimport #current_state.success .info strong -{ - color: #080; -} - -#content #dataimport #current_state.aborted -{ - background-color: #f3e6e6; -} - -#content #dataimport #current_state.aborted .info -{ - background-image: url( ../../img/ico/slash.png ); -} - -#content #dataimport #current_state.aborted .info strong -{ - color: #800; -} - -#content #dataimport #current_state.failure -{ - background-color: #f3e6e6; -} - -#content #dataimport #current_state.failure .info -{ - background-image: url( ../../img/ico/cross-button.png ); -} - -#content #dataimport #current_state.failure .info strong -{ - color: #800; -} - -#content #dataimport #current_state.idle -{ - background-color: #e6e6ff; -} - -#content #dataimport #current_state.idle .info -{ - background-image: url( ../../img/ico/information.png ); -} - -#content #dataimport #error, -#content #dataimport #deprecation_message -{ - background-color: #f00; - background-image: url( ../../img/ico/construction.png ); - background-position: 10px 50%; - color: #fff; - font-weight: bold; - margin-bottom: 20px; - padding: 10px; - padding-left: 35px; -} - -#content #dataimport .block h2 -{ - border-color: #c0c0c0; - padding-left: 5px; - position: relative; -} - -#content #dataimport .block.hidden h2 -{ - border-color: #fafafa; -} - -#content #dataimport .block h2 a.toggle -{ - background-image: url( ../../img/ico/toggle-small.png ); - background-position: 0 50%; - padding-left: 21px; -} - -#content #dataimport .block.hidden h2 a.toggle -{ - background-image: url( ../../img/ico/toggle-small-expand.png ); -} - -#content #dataimport #config h2 a.r -{ - background-position: 3px 50%; - display: block; - float: right; - margin-left: 10px; - padding-left: 24px; - padding-right: 3px; -} - -#content #dataimport #config h2 a.reload_config -{ - background-image: url( ../../img/ico/arrow-circle.png ); -} - -#content #dataimport #config h2 a.reload_config.success -{ - background-image: url( ../../img/ico/tick.png ); -} - -#content #dataimport #config h2 a.reload_config.error -{ - background-image: url( ../../img/ico/slash.png ); -} - -#content #dataimport #config h2 a.debug_mode -{ - background-image: url( ../../img/ico/hammer.png ); - color: #4D4D4D; -} - -#content #dataimport #config.debug_mode h2 a.debug_mode -{ - background-color: #ff0; - background-image: url( ../../img/ico/hammer-screwdriver.png ); - color: #333; -} - -#content #dataimport #config .content -{ - padding: 5px 2px; -} - -#content #dataimport #dataimport_config .loader -{ - background-position: 0 50%; - padding-left: 21px; -} - -#content #dataimport #dataimport_config .formatted -{ - border: 1px solid #fff; - display: block; - padding: 2px; -} - -#content #dataimport .debug_mode #dataimport_config .editable -{ - display: block; -} - -#content #dataimport #dataimport_config .editable textarea -{ - font-family: monospace; - height: 120px; - min-height: 60px; - width: 100%; -} - -#content #dataimport #debug_response em -{ - color: #4D4D4D; - font-style: normal; -} diff --git a/solr/webapp/web/css/angular/menu.css b/solr/webapp/web/css/angular/menu.css index 24fd3f6ec3f..fce8eb3136c 100644 --- a/solr/webapp/web/css/angular/menu.css +++ b/solr/webapp/web/css/angular/menu.css @@ -292,7 +292,6 @@ limitations under the License. .sub-menu .ping a { background-image: url( ../../img/ico/system-monitor.png ); } .sub-menu .logging a { background-image: url( ../../img/ico/inbox-document-text.png ); } .sub-menu .plugins a { background-image: url( ../../img/ico/block.png ); } -.sub-menu .dataimport a { background-image: url( ../../img/ico/document-import.png ); } .sub-menu .segments a { background-image: url( ../../img/ico/construction.png ); } diff --git a/solr/webapp/web/index.html b/solr/webapp/web/index.html index db50db7d355..2eba8b66025 100644 --- a/solr/webapp/web/index.html +++ b/solr/webapp/web/index.html @@ -30,7 +30,6 @@ limitations under the License. - @@ -79,7 +78,6 @@ limitations under the License. - @@ -193,7 +191,6 @@ limitations under the License.
  • Overview
  • Overview
  • Analysis
  • -
  • Dataimport
  • Documents
  • Files
  • Query
  • @@ -218,7 +215,6 @@ limitations under the License.
    • Overview
    • Analysis
    • -
    • Dataimport
    • Documents
    • Files
    • Ping ({{pingMS}}ms)
    • diff --git a/solr/webapp/web/js/angular/app.js b/solr/webapp/web/js/angular/app.js index d7d0cef2ca8..70a2a26031b 100644 --- a/solr/webapp/web/js/angular/app.js +++ b/solr/webapp/web/js/angular/app.js @@ -130,14 +130,6 @@ solrAdminApp.config([ templateUrl: 'partials/analysis.html', controller: 'AnalysisController' }). - when('/:core/dataimport', { - templateUrl: 'partials/dataimport.html', - controller: 'DataImportController' - }). - when('/:core/dataimport/:handler*', { - templateUrl: 'partials/dataimport.html', - controller: 'DataImportController' - }). when('/:core/documents', { templateUrl: 'partials/documents.html', controller: 'DocumentsController' @@ -168,14 +160,6 @@ solrAdminApp.config([ templateUrl: 'partials/replication.html', controller: 'ReplicationController' }). - when('/:core/dataimport', { - templateUrl: 'partials/dataimport.html', - controller: 'DataImportController' - }). - when('/:core/dataimport/:handler*', { - templateUrl: 'partials/dataimport.html', - controller: 'DataImportController' - }). when('/:core/schema', { templateUrl: 'partials/schema.html', controller: 'SchemaController' diff --git a/solr/webapp/web/js/angular/controllers/dataimport.js b/solr/webapp/web/js/angular/controllers/dataimport.js deleted file mode 100644 index c31b6f0fbe7..00000000000 --- a/solr/webapp/web/js/angular/controllers/dataimport.js +++ /dev/null @@ -1,302 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -var dataimport_timeout = 2000; - -solrAdminApp.controller('DataImportController', - function($scope, $rootScope, $routeParams, $location, $timeout, $interval, $cookies, Mbeans, DataImport, Constants) { - $scope.resetMenu("dataimport", Constants.IS_COLLECTION_PAGE); - - $scope.refresh = function () { - Mbeans.info({core: $routeParams.core, cat: 'QUERY'}, function (data) { - var mbeans = data['solr-mbeans'][1]; - $scope.handlers = []; - for (var key in mbeans) { - if (mbeans[key]['class'] !== key && mbeans[key]['class'] === 'org.apache.solr.handler.dataimport.DataImportHandler') { - $scope.handlers.push(key); - } - } - $scope.hasHandlers = $scope.handlers.length > 0; - - if (!$routeParams.handler) { - $location.path("/" + $routeParams.core + "/dataimport/" + $scope.handlers[0]); - } else { - $scope.currentHandler = $routeParams.handler; - } - }); - - $scope.handler = $routeParams.handler; - if ($scope.handler && $scope.handler[0]=="/") { - $scope.handler = $scope.handler.substr(1); - } - if ($scope.handler) { - DataImport.config({core: $routeParams.core, name: $scope.handler}, function (data) { - try { - $scope.config = data.config; - var xml = $.parseXML(data.config); - $scope.entities = []; - $('document > entity', xml).each(function (i, element) { - $scope.entities.push($(element).attr('name')); - }); - $scope.refreshStatus(); - } catch (err) { - console.log(err); - } - }); - } - $scope.lastUpdate = "unknown"; - $scope.lastUpdateUTC = ""; - }; - - $scope.toggleDebug = function () { - $scope.isDebugMode = !$scope.isDebugMode; - if ($scope.isDebugMode) { - // also enable Debug checkbox - $scope.form.showDebug = true; - } - $scope.showConfiguration = true; - } - - $scope.toggleConfiguration = function () { - $scope.showConfiguration = !$scope.showConfiguration; - } - - $scope.toggleRawStatus = function () { - $scope.showRawStatus = !$scope.showRawStatus; - } - - $scope.toggleRawDebug = function () { - $scope.showRawDebug = !$scope.showRawDebug; - } - - $scope.reload = function () { - DataImport.reload({core: $routeParams.core, name: $scope.handler}, function () { - $scope.reloaded = true; - $timeout(function () { - $scope.reloaded = false; - }, 5000); - $scope.refresh(); - }); - } - - $scope.form = { - command: "full-import", - verbose: false, - clean: false, - commit: true, - showDebug: false, - custom: "", - core: $routeParams.core - }; - - $scope.submit = function () { - var params = {}; - for (var key in $scope.form) { - if (key == "showDebug") { - if ($scope.form.showDebug) { - params["debug"] = true; - } - } else { - params[key] = $scope.form[key]; - } - } - if (params.custom.length) { - var customParams = $scope.form.custom.split("&"); - for (var i in customParams) { - var parts = customParams[i].split("="); - params[parts[0]] = parts[1]; - } - } - delete params.custom; - - if ($scope.isDebugMode) { - params.dataConfig = $scope.config; - } - - params.core = $routeParams.core; - params.name = $scope.handler; - - DataImport.post(params, function (data) { - $scope.rawResponse = JSON.stringify(data, null, 2); - $scope.refreshStatus(); - }); - }; - - $scope.abort = function () { - $scope.isAborting = true; - DataImport.abort({core: $routeParams.core, name: $scope.handler}, function () { - $timeout(function () { - $scope.isAborting = false; - $scope.refreshStatus(); - }, 4000); - }); - } - - $scope.refreshStatus = function () { - - console.log("Refresh Status"); - - $scope.isStatusLoading = true; - DataImport.status({core: $routeParams.core, name: $scope.handler}, function (data) { - if (data[0] == "<") { - $scope.hasHandlers = false; - return; - } - - var now = new Date(); - $scope.lastUpdate = now.toTimeString().split(' ').shift(); - $scope.lastUpdateUTC = now.toUTCString(); - var messages = data.statusMessages; - var messagesCount = 0; - for( var key in messages ) { messagesCount++; } - - if (data.status == 'busy') { - $scope.status = "indexing"; - - $scope.timeElapsed = data.statusMessages['Time Elapsed']; - $scope.elapsedSeconds = parseSeconds($scope.timeElapsed); - - var info = $scope.timeElapsed ? 'Indexing since ' + $scope.timeElapsed : 'Indexing ...'; - $scope.info = showInfo(messages, true, info, $scope.elapsedSeconds); - - } else if (messages.RolledBack) { - $scope.status = "failure"; - $scope.info = showInfo(messages, true); - } else if (messages.Aborted) { - $scope.status = "aborted"; - $scope.info = showInfo(messages, true, 'Aborting current Import ...'); - } else if (data.status == "idle" && messagesCount != 0) { - $scope.status = "success"; - $scope.info = showInfo(messages, true); - } else { - $scope.status = "idle"; - $scope.info = showInfo(messages, false, 'No information available (idle)'); - } - - delete data.$promise; - delete data.$resolved; - - $scope.rawStatus = JSON.stringify(data, null, 2); - - $scope.isStatusLoading = false; - $scope.statusUpdated = true; - $timeout(function () { - $scope.statusUpdated = false; - }, dataimport_timeout / 2); - }); - }; - - $scope.updateAutoRefresh = function () { - $scope.autorefresh = !$scope.autorefresh; - $cookies.dataimport_autorefresh = $scope.autorefresh ? true : null; - if ($scope.autorefresh) { - $scope.refreshTimeout = $interval($scope.refreshStatus, dataimport_timeout); - var onRouteChangeOff = $scope.$on('$routeChangeStart', function() { - $interval.cancel($scope.refreshTimeout); - onRouteChangeOff(); - }); - - } else if ($scope.refreshTimeout) { - $interval.cancel($scope.refreshTimeout); - } - $scope.refreshStatus(); - }; - - $scope.refresh(); - -}); - -var showInfo = function (messages, showFull, info_text, elapsed_seconds) { - - var info = {}; - if (info_text) { - info.text = info_text; - } else { - info.text = messages[''] || ''; - // format numbers included in status nicely - /* @todo this pretty printing is hard to work out how to do in an Angularesque way: - info.text = info.text.replace(/\d{4,}/g, - function (match, position, string) { - return app.format_number(parseInt(match, 10)); - } - ); - */ - - var time_taken_text = messages['Time taken']; - info.timeTaken = parseSeconds(time_taken_text); - } - info.showDetails = false; - - if (showFull) { - if (!elapsed_seconds) { - var time_taken_text = messages['Time taken']; - elapsed_seconds = parseSeconds(time_taken_text); - } - - info.showDetails = true; - - var document_config = { - 'Requests': 'Total Requests made to DataSource', - 'Fetched': 'Total Rows Fetched', - 'Skipped': 'Total Documents Skipped', - 'Processed': 'Total Documents Processed' - }; - - info.docs = []; - for (var key in document_config) { - var value = parseInt(messages[document_config[key]], 10); - var doc = {desc: document_config[key], name: key, value: value}; - if (elapsed_seconds && key != 'Skipped') { - doc.speed = Math.round(value / elapsed_seconds); - } - info.docs.push(doc); - } - - var dates_config = { - 'Started': 'Full Dump Started', - 'Aborted': 'Aborted', - 'Rolledback': 'Rolledback' - }; - - info.dates = []; - for (var key in dates_config) { - var value = messages[dates_config[key]]; - if (value) { - value = value.replace(" ", "T")+".000Z"; - console.log(value); - var date = {desc: dates_config[key], name: key, value: value}; - info.dates.push(date); - } - } - } - return info; -} - -var parseSeconds = function(time) { - var seconds = 0; - var arr = new String(time || '').split('.'); - var parts = arr[0].split(':').reverse(); - - for (var i = 0; i < parts.length; i++) { - seconds += ( parseInt(parts[i], 10) || 0 ) * Math.pow(60, i); - } - - if (arr[1] && 5 <= parseInt(arr[1][0], 10)) { - seconds++; // treat more or equal than .5 as additional second - } - return seconds; -} diff --git a/solr/webapp/web/js/angular/services.js b/solr/webapp/web/js/angular/services.js index 8b371b6fff3..51dde424825 100644 --- a/solr/webapp/web/js/angular/services.js +++ b/solr/webapp/web/js/angular/services.js @@ -173,21 +173,6 @@ solrAdminServices.factory('System', "field": {params: {"analysis.showmatch": true}} }); }]) -.factory('DataImport', - ['$resource', function($resource) { - return $resource(':core/:name', {core: '@core', name: '@name', indent:'on', wt:'json', _:Date.now()}, { - "config": {params: {command: "show-config"}, headers: {doNotIntercept: "true"}, - transformResponse: function(data) { - return {config: data}; - } - }, - "status": {params: {command: "status"}, headers: {doNotIntercept: "true"}}, - "reload": {params: {command: "reload-config"}}, - "post": {method: "POST", - headers: {'Content-type': 'application/x-www-form-urlencoded'}, - transformRequest: function(data) { return $.param(data) }} - }); - }]) .factory('Ping', ['$resource', function($resource) { return $resource(':core/admin/ping', {wt:'json', core: '@core', ts:Date.now(), _:Date.now()}, { diff --git a/solr/webapp/web/partials/dataimport.html b/solr/webapp/web/partials/dataimport.html deleted file mode 100644 index a27be07a5a8..00000000000 --- a/solr/webapp/web/partials/dataimport.html +++ /dev/null @@ -1,210 +0,0 @@ - -
      - -
      The solrconfig.xml file for this index does not have an operational DataImportHandler defined!
      -
      -
      The Data Import Handler is deprecated as of Solr 8.6 and may be removed in a future release. A community supported package for may be used instead (See SOLR-14066 for details).
      - -
      - -
      - -

      Last Update: {{lastUpdate}}

      -
      - - {{info.text}} (Duration: {{info.timeTaken | readableSeconds }}) - -
      -
      - - {{ doc.name }}: {{doc.value | number}} {{ doc.speed | number}}/s, - -
      -
      - - {{ date.name }}: - {{ date.value | timeago }} - -
      -
      - - - -
      - -
      - -
      - -

      - Raw Status-Output -

      - -
      -
      -
      - -
      - -
      - -
      - -
      - -
      - -

      - Configuration - Reload - Debug-Mode -

      - -
      -
      -
      - -
      -
      - -
      - -
      - -
      - -
      - - - -
      - -
      - -
      - -
      - -
      - -

      - Raw Debug-Response -

      - -
      -
      -
      - -
      - - No Request executed - - -
      -
      -
      - -
      - -
      - -
      - - - -
      - - - - - - - - - - - - - - - - - - -
      - - -
      - - - -
      - - - -

      Auto-Refresh Status

      - -
      - -
      diff --git a/versions.lock b/versions.lock index c6b0adbac3a..31b2ae55168 100644 --- a/versions.lock +++ b/versions.lock @@ -43,8 +43,6 @@ com.martiansoftware:nailgun-server:0.9.1 (1 constraints: 800960a1) com.pff:java-libpst:0.8.1 (1 constraints: 0b050436) com.rometools:rome:1.12.2 (1 constraints: 3805313b) com.rometools:rome-utils:1.12.2 (1 constraints: 3805313b) -com.sun.mail:gimap:1.5.1 (1 constraints: 09050036) -com.sun.mail:javax.mail:1.5.1 (2 constraints: 830d2844) com.tdunning:t-digest:3.1 (1 constraints: a804212c) com.vaadin.external.google:android-json:0.0.20131108.vaadin1 (1 constraints: 34092a9e) commons-cli:commons-cli:1.4 (1 constraints: a9041e2c) @@ -77,7 +75,6 @@ io.prometheus:simpleclient:0.2.0 (3 constraints: fe242db8) io.prometheus:simpleclient_common:0.2.0 (2 constraints: e8159ecb) io.prometheus:simpleclient_httpserver:0.2.0 (1 constraints: 0405f135) io.sgr:s2-geometry-library-java:1.0.0 (1 constraints: 0305f035) -javax.activation:activation:1.1.1 (3 constraints: 1017445c) javax.servlet:javax.servlet-api:3.1.0 (3 constraints: 75209943) joda-time:joda-time:2.9.9 (1 constraints: 8a0972a1) junit:junit:4.12 (1 constraints: db04ff30) @@ -216,7 +213,6 @@ xerces:xercesImpl:2.12.0 (2 constraints: 1f14b675) [Test dependencies] com.sun.jersey:jersey-servlet:1.19 (1 constraints: df04fa30) net.bytebuddy:byte-buddy:1.9.3 (2 constraints: 2510faaf) -org.apache.derby:derby:10.9.1.0 (1 constraints: 9b054946) org.apache.hadoop:hadoop-hdfs:3.2.0 (1 constraints: 07050036) org.apache.hadoop:hadoop-minikdc:3.2.0 (1 constraints: 07050036) org.apache.kerby:kerb-admin:1.0.1 (1 constraints: 0405f135) diff --git a/versions.props b/versions.props index d76f0bf4ff3..8c25e307d82 100644 --- a/versions.props +++ b/versions.props @@ -21,7 +21,6 @@ com.lmax:disruptor=3.4.2 com.pff:java-libpst=0.8.1 com.rometools:*=1.12.2 com.sun.jersey:*=1.19 -com.sun.mail:*=1.5.1 com.tdunning:t-digest=3.1 com.vaadin.external.google:android-json=0.0.20131108.vaadin1 commons-beanutils:commons-beanutils=1.9.3 @@ -37,7 +36,6 @@ io.netty:*=4.1.50.Final io.opentracing:*=0.33.0 io.prometheus:*=0.2.0 io.sgr:s2-geometry-library-java=1.0.0 -javax.activation:activation=1.1.1 javax.servlet:javax.servlet-api=3.1.0 junit:junit=4.12 net.arnx:jsonic=1.2.7 @@ -58,7 +56,6 @@ org.apache.commons:commons-lang3=3.9 org.apache.commons:commons-math3=3.6.1 org.apache.commons:commons-text=1.6 org.apache.curator:*=2.13.0 -org.apache.derby:derby=10.9.1.0 org.apache.hadoop:*=3.2.0 org.apache.htrace:htrace-core4=4.1.0-incubating org.apache.httpcomponents:httpclient=4.5.10