diff --git a/dev-tools/idea/.idea/libraries/ICU_library.xml b/dev-tools/idea/.idea/libraries/ICU_library.xml index 232c6c46dde..83735235ab1 100644 --- a/dev-tools/idea/.idea/libraries/ICU_library.xml +++ b/dev-tools/idea/.idea/libraries/ICU_library.xml @@ -1,10 +1,10 @@ - - - - - - - - - + + + + + + + + + \ No newline at end of file diff --git a/lucene/tools/custom-tasks.xml b/lucene/tools/custom-tasks.xml index a1239b0fef5..e48501fe685 100644 --- a/lucene/tools/custom-tasks.xml +++ b/lucene/tools/custom-tasks.xml @@ -1,56 +1,56 @@ - - - - This file is designed for importing into a main build file, and not intended - for standalone use. - - - - - - - - - - - - - - - - License check under: @{dir} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + This file is designed for importing into a main build file, and not intended + for standalone use. + + + + + + + + + + + + + + + + License check under: @{dir} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/modules/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateHTMLStripCharFilterSupplementaryMacros.java b/modules/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateHTMLStripCharFilterSupplementaryMacros.java index ec63abf6ae6..c522febd8f7 100644 --- a/modules/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateHTMLStripCharFilterSupplementaryMacros.java +++ b/modules/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateHTMLStripCharFilterSupplementaryMacros.java @@ -1,110 +1,110 @@ -package org.apache.lucene.analysis.icu; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.text.DateFormat; -import java.util.*; - -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.text.UnicodeSetIterator; -import com.ibm.icu.util.VersionInfo; - -/** creates a macro to augment jflex's unicode support for > BMP */ -public class GenerateHTMLStripCharFilterSupplementaryMacros { - private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]"); - private static final String NL = System.getProperty("line.separator"); - private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance - (DateFormat.FULL, DateFormat.FULL, Locale.US); - static { - DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); - } - - private static final String APACHE_LICENSE - = "/*" + NL - + " * Copyright 2010 The Apache Software Foundation." + NL - + " *" + NL - + " * Licensed under the Apache License, Version 2.0 (the \"License\");" + NL - + " * you may not use this file except in compliance with the License." + NL - + " * You may obtain a copy of the License at" + NL - + " *" + NL - + " * http://www.apache.org/licenses/LICENSE-2.0" + NL - + " *" + NL - + " * Unless required by applicable law or agreed to in writing, software" + NL - + " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL - + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL - + " * See the License for the specific language governing permissions and" + NL - + " * limitations under the License." + NL - + " */" + NL + NL; - - - public static void main(String args[]) throws Exception { - outputHeader(); - outputMacro("ID_Start_Supp", "[:ID_Start:]"); - outputMacro("ID_Continue_Supp", "[:ID_Continue:]"); - } - - static void outputHeader() { - System.out.print(APACHE_LICENSE); - System.out.print("// Generated using ICU4J " + VersionInfo.ICU_VERSION.toString() + " on "); - System.out.println(DATE_FORMAT.format(new Date())); - System.out.println("// by " + GenerateHTMLStripCharFilterSupplementaryMacros.class.getName()); - System.out.print(NL + NL); - } - - // we have to carefully output the possibilities as compact utf-16 - // range expressions, or jflex will OOM! - static void outputMacro(String name, String pattern) { - UnicodeSet set = new UnicodeSet(pattern); - set.removeAll(BMP); - System.out.println(name + " = ("); - // if the set is empty, we have to do this or jflex will barf - if (set.isEmpty()) { - System.out.println("\t []"); - } - - HashMap utf16ByLead = new HashMap(); - for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) { - char utf16[] = Character.toChars(it.codepoint); - UnicodeSet trails = utf16ByLead.get(utf16[0]); - if (trails == null) { - trails = new UnicodeSet(); - utf16ByLead.put(utf16[0], trails); - } - trails.add(utf16[1]); - } - - Map utf16ByTrail = new HashMap(); - for (Map.Entry entry : utf16ByLead.entrySet()) { - String trail = entry.getValue().getRegexEquivalent(); - UnicodeSet leads = utf16ByTrail.get(trail); - if (leads == null) { - leads = new UnicodeSet(); - utf16ByTrail.put(trail, leads); - } - leads.add(entry.getKey()); - } - - boolean isFirst = true; - for (Map.Entry entry : utf16ByTrail.entrySet()) { - System.out.print( isFirst ? "\t " : "\t| "); - isFirst = false; - System.out.println(entry.getValue().getRegexEquivalent() + entry.getKey()); - } - System.out.println(")"); - } -} +package org.apache.lucene.analysis.icu; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.text.DateFormat; +import java.util.*; + +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSetIterator; +import com.ibm.icu.util.VersionInfo; + +/** creates a macro to augment jflex's unicode support for > BMP */ +public class GenerateHTMLStripCharFilterSupplementaryMacros { + private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]"); + private static final String NL = System.getProperty("line.separator"); + private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance + (DateFormat.FULL, DateFormat.FULL, Locale.US); + static { + DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + private static final String APACHE_LICENSE + = "/*" + NL + + " * Copyright 2010 The Apache Software Foundation." + NL + + " *" + NL + + " * Licensed under the Apache License, Version 2.0 (the \"License\");" + NL + + " * you may not use this file except in compliance with the License." + NL + + " * You may obtain a copy of the License at" + NL + + " *" + NL + + " * http://www.apache.org/licenses/LICENSE-2.0" + NL + + " *" + NL + + " * Unless required by applicable law or agreed to in writing, software" + NL + + " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL + + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL + + " * See the License for the specific language governing permissions and" + NL + + " * limitations under the License." + NL + + " */" + NL + NL; + + + public static void main(String args[]) throws Exception { + outputHeader(); + outputMacro("ID_Start_Supp", "[:ID_Start:]"); + outputMacro("ID_Continue_Supp", "[:ID_Continue:]"); + } + + static void outputHeader() { + System.out.print(APACHE_LICENSE); + System.out.print("// Generated using ICU4J " + VersionInfo.ICU_VERSION.toString() + " on "); + System.out.println(DATE_FORMAT.format(new Date())); + System.out.println("// by " + GenerateHTMLStripCharFilterSupplementaryMacros.class.getName()); + System.out.print(NL + NL); + } + + // we have to carefully output the possibilities as compact utf-16 + // range expressions, or jflex will OOM! + static void outputMacro(String name, String pattern) { + UnicodeSet set = new UnicodeSet(pattern); + set.removeAll(BMP); + System.out.println(name + " = ("); + // if the set is empty, we have to do this or jflex will barf + if (set.isEmpty()) { + System.out.println("\t []"); + } + + HashMap utf16ByLead = new HashMap(); + for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) { + char utf16[] = Character.toChars(it.codepoint); + UnicodeSet trails = utf16ByLead.get(utf16[0]); + if (trails == null) { + trails = new UnicodeSet(); + utf16ByLead.put(utf16[0], trails); + } + trails.add(utf16[1]); + } + + Map utf16ByTrail = new HashMap(); + for (Map.Entry entry : utf16ByLead.entrySet()) { + String trail = entry.getValue().getRegexEquivalent(); + UnicodeSet leads = utf16ByTrail.get(trail); + if (leads == null) { + leads = new UnicodeSet(); + utf16ByTrail.put(trail, leads); + } + leads.add(entry.getKey()); + } + + boolean isFirst = true; + for (Map.Entry entry : utf16ByTrail.entrySet()) { + System.out.print( isFirst ? "\t " : "\t| "); + isFirst = false; + System.out.println(entry.getValue().getRegexEquivalent() + entry.getKey()); + } + System.out.println(")"); + } +} diff --git a/modules/spatial/build.xml b/modules/spatial/build.xml index 350179a0973..dbd06e8b985 100644 --- a/modules/spatial/build.xml +++ b/modules/spatial/build.xml @@ -1,33 +1,33 @@ - - - - Lucene Spatial - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + Lucene Spatial + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/modules/spatial/src/java/org/apache/lucene/spatial/prefix/package-info.java b/modules/spatial/src/java/org/apache/lucene/spatial/prefix/package-info.java index 67c4e7f2ebc..e4ea3bc7fc5 100644 --- a/modules/spatial/src/java/org/apache/lucene/spatial/prefix/package-info.java +++ b/modules/spatial/src/java/org/apache/lucene/spatial/prefix/package-info.java @@ -1,22 +1,22 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Prefix Tree Strategy - */ -package org.apache.lucene.spatial.prefix; - +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Prefix Tree Strategy + */ +package org.apache.lucene.spatial.prefix; + diff --git a/modules/spatial/src/java/overview.html b/modules/spatial/src/java/overview.html index d246ef0b9b2..9ba5f0b63fe 100644 --- a/modules/spatial/src/java/overview.html +++ b/modules/spatial/src/java/overview.html @@ -1,23 +1,23 @@ - - - - Apache Lucene Spatial Strategies - - - + + + + Apache Lucene Spatial Strategies + + + \ No newline at end of file diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java index 182f67ec98c..e890e86d971 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java @@ -1,241 +1,241 @@ -package org.apache.solr.handler.clustering.carrot2; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.nio.CharBuffer; -import java.util.HashMap; - -import org.apache.lucene.analysis.ar.ArabicNormalizer; -import org.apache.lucene.analysis.ar.ArabicStemmer; -import org.carrot2.core.LanguageCode; -import org.carrot2.text.linguistic.IStemmer; -import org.carrot2.text.linguistic.IStemmerFactory; -import org.carrot2.util.ReflectionUtils; -import org.slf4j.Logger; -import org.tartarus.snowball.SnowballProgram; -import org.tartarus.snowball.ext.DanishStemmer; -import org.tartarus.snowball.ext.DutchStemmer; -import org.tartarus.snowball.ext.EnglishStemmer; -import org.tartarus.snowball.ext.FinnishStemmer; -import org.tartarus.snowball.ext.FrenchStemmer; -import org.tartarus.snowball.ext.GermanStemmer; -import org.tartarus.snowball.ext.HungarianStemmer; -import org.tartarus.snowball.ext.ItalianStemmer; -import org.tartarus.snowball.ext.NorwegianStemmer; -import org.tartarus.snowball.ext.PortugueseStemmer; -import org.tartarus.snowball.ext.RomanianStemmer; -import org.tartarus.snowball.ext.RussianStemmer; -import org.tartarus.snowball.ext.SpanishStemmer; -import org.tartarus.snowball.ext.SwedishStemmer; -import org.tartarus.snowball.ext.TurkishStemmer; - -/** - * An implementation of Carrot2's {@link IStemmerFactory} based on Lucene's - * APIs. Should the relevant Lucene APIs need to change, the changes can be made - * in this class. - */ -public class LuceneCarrot2StemmerFactory implements IStemmerFactory { - final static Logger logger = org.slf4j.LoggerFactory - .getLogger(LuceneCarrot2StemmerFactory.class); - - @Override - public IStemmer getStemmer(LanguageCode language) { - switch (language) { - case ARABIC: - return ArabicStemmerFactory.createStemmer(); - - case CHINESE_SIMPLIFIED: - return IdentityStemmer.INSTANCE; - - default: - /* - * For other languages, try to use snowball's stemming. - */ - return SnowballStemmerFactory.createStemmer(language); - } - } - - /** - * Factory of {@link IStemmer} implementations from the snowball - * project. - */ - private final static class SnowballStemmerFactory { - /** - * Static hard mapping from language codes to stemmer classes in Snowball. - * This mapping is not dynamic because we want to keep the possibility to - * obfuscate these classes. - */ - private static HashMap> snowballStemmerClasses; - static { - snowballStemmerClasses = new HashMap>(); - snowballStemmerClasses.put(LanguageCode.DANISH, DanishStemmer.class); - snowballStemmerClasses.put(LanguageCode.DUTCH, DutchStemmer.class); - snowballStemmerClasses.put(LanguageCode.ENGLISH, EnglishStemmer.class); - snowballStemmerClasses.put(LanguageCode.FINNISH, FinnishStemmer.class); - snowballStemmerClasses.put(LanguageCode.FRENCH, FrenchStemmer.class); - snowballStemmerClasses.put(LanguageCode.GERMAN, GermanStemmer.class); - snowballStemmerClasses - .put(LanguageCode.HUNGARIAN, HungarianStemmer.class); - snowballStemmerClasses.put(LanguageCode.ITALIAN, ItalianStemmer.class); - snowballStemmerClasses - .put(LanguageCode.NORWEGIAN, NorwegianStemmer.class); - snowballStemmerClasses.put(LanguageCode.PORTUGUESE, - PortugueseStemmer.class); - snowballStemmerClasses.put(LanguageCode.ROMANIAN, RomanianStemmer.class); - snowballStemmerClasses.put(LanguageCode.RUSSIAN, RussianStemmer.class); - snowballStemmerClasses.put(LanguageCode.SPANISH, SpanishStemmer.class); - snowballStemmerClasses.put(LanguageCode.SWEDISH, SwedishStemmer.class); - snowballStemmerClasses.put(LanguageCode.TURKISH, TurkishStemmer.class); - } - - /** - * An adapter converting Snowball programs into {@link IStemmer} interface. - */ - private static class SnowballStemmerAdapter implements IStemmer { - private final SnowballProgram snowballStemmer; - - public SnowballStemmerAdapter(SnowballProgram snowballStemmer) { - this.snowballStemmer = snowballStemmer; - } - - public CharSequence stem(CharSequence word) { - snowballStemmer.setCurrent(word.toString()); - if (snowballStemmer.stem()) { - return snowballStemmer.getCurrent(); - } else { - return null; - } - } - } - - /** - * Create and return an {@link IStemmer} adapter for a - * {@link SnowballProgram} for a given language code. An identity stemmer is - * returned for unknown languages. - */ - public static IStemmer createStemmer(LanguageCode language) { - final Class stemmerClazz = snowballStemmerClasses - .get(language); - - if (stemmerClazz == null) { - logger.warn("No Snowball stemmer class for: " + language.name() - + ". Quality of clustering may be degraded."); - return IdentityStemmer.INSTANCE; - } - - try { - return new SnowballStemmerAdapter(stemmerClazz.newInstance()); - } catch (Exception e) { - logger.warn("Could not instantiate snowball stemmer" - + " for language: " + language.name() - + ". Quality of clustering may be degraded.", e); - - return IdentityStemmer.INSTANCE; - } - } - } - - /** - * Factory of {@link IStemmer} implementations for the - * {@link LanguageCode#ARABIC} language. Requires lucene-contrib - * to be present in classpath, otherwise an empty (identity) stemmer is - * returned. - */ - private static class ArabicStemmerFactory { - static { - try { - ReflectionUtils.classForName(ArabicStemmer.class.getName(), false); - ReflectionUtils.classForName(ArabicNormalizer.class.getName(), false); - } catch (ClassNotFoundException e) { - logger - .warn( - "Could not instantiate Lucene stemmer for Arabic, clustering quality " - + "of Arabic content may be degraded. For best quality clusters, " - + "make sure Lucene's Arabic analyzer JAR is in the classpath", - e); - } - } - - /** - * Adapter to lucene-contrib Arabic analyzers. - */ - private static class LuceneStemmerAdapter implements IStemmer { - private final org.apache.lucene.analysis.ar.ArabicStemmer delegate; - private final org.apache.lucene.analysis.ar.ArabicNormalizer normalizer; - - private char[] buffer = new char[0]; - - private LuceneStemmerAdapter() throws Exception { - delegate = new org.apache.lucene.analysis.ar.ArabicStemmer(); - normalizer = new org.apache.lucene.analysis.ar.ArabicNormalizer(); - } - - public CharSequence stem(CharSequence word) { - if (word.length() > buffer.length) { - buffer = new char[word.length()]; - } - - for (int i = 0; i < word.length(); i++) { - buffer[i] = word.charAt(i); - } - - int newLen = normalizer.normalize(buffer, word.length()); - newLen = delegate.stem(buffer, newLen); - - if (newLen != word.length() || !equals(buffer, newLen, word)) { - return CharBuffer.wrap(buffer, 0, newLen); - } - - // Same-same. - return null; - } - - private boolean equals(char[] buffer, int len, CharSequence word) { - assert len == word.length(); - - for (int i = 0; i < len; i++) { - if (buffer[i] != word.charAt(i)) - return false; - } - - return true; - } - } - - public static IStemmer createStemmer() { - try { - return new LuceneStemmerAdapter(); - } catch (Throwable e) { - return IdentityStemmer.INSTANCE; - } - } - } - - /** - * An implementation of {@link IStemmer} that always returns null - * which means no stemming. - */ - private static class IdentityStemmer implements IStemmer { - private final static IdentityStemmer INSTANCE = new IdentityStemmer(); - - @Override - public CharSequence stem(CharSequence word) { - return null; - } - } -} +package org.apache.solr.handler.clustering.carrot2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.nio.CharBuffer; +import java.util.HashMap; + +import org.apache.lucene.analysis.ar.ArabicNormalizer; +import org.apache.lucene.analysis.ar.ArabicStemmer; +import org.carrot2.core.LanguageCode; +import org.carrot2.text.linguistic.IStemmer; +import org.carrot2.text.linguistic.IStemmerFactory; +import org.carrot2.util.ReflectionUtils; +import org.slf4j.Logger; +import org.tartarus.snowball.SnowballProgram; +import org.tartarus.snowball.ext.DanishStemmer; +import org.tartarus.snowball.ext.DutchStemmer; +import org.tartarus.snowball.ext.EnglishStemmer; +import org.tartarus.snowball.ext.FinnishStemmer; +import org.tartarus.snowball.ext.FrenchStemmer; +import org.tartarus.snowball.ext.GermanStemmer; +import org.tartarus.snowball.ext.HungarianStemmer; +import org.tartarus.snowball.ext.ItalianStemmer; +import org.tartarus.snowball.ext.NorwegianStemmer; +import org.tartarus.snowball.ext.PortugueseStemmer; +import org.tartarus.snowball.ext.RomanianStemmer; +import org.tartarus.snowball.ext.RussianStemmer; +import org.tartarus.snowball.ext.SpanishStemmer; +import org.tartarus.snowball.ext.SwedishStemmer; +import org.tartarus.snowball.ext.TurkishStemmer; + +/** + * An implementation of Carrot2's {@link IStemmerFactory} based on Lucene's + * APIs. Should the relevant Lucene APIs need to change, the changes can be made + * in this class. + */ +public class LuceneCarrot2StemmerFactory implements IStemmerFactory { + final static Logger logger = org.slf4j.LoggerFactory + .getLogger(LuceneCarrot2StemmerFactory.class); + + @Override + public IStemmer getStemmer(LanguageCode language) { + switch (language) { + case ARABIC: + return ArabicStemmerFactory.createStemmer(); + + case CHINESE_SIMPLIFIED: + return IdentityStemmer.INSTANCE; + + default: + /* + * For other languages, try to use snowball's stemming. + */ + return SnowballStemmerFactory.createStemmer(language); + } + } + + /** + * Factory of {@link IStemmer} implementations from the snowball + * project. + */ + private final static class SnowballStemmerFactory { + /** + * Static hard mapping from language codes to stemmer classes in Snowball. + * This mapping is not dynamic because we want to keep the possibility to + * obfuscate these classes. + */ + private static HashMap> snowballStemmerClasses; + static { + snowballStemmerClasses = new HashMap>(); + snowballStemmerClasses.put(LanguageCode.DANISH, DanishStemmer.class); + snowballStemmerClasses.put(LanguageCode.DUTCH, DutchStemmer.class); + snowballStemmerClasses.put(LanguageCode.ENGLISH, EnglishStemmer.class); + snowballStemmerClasses.put(LanguageCode.FINNISH, FinnishStemmer.class); + snowballStemmerClasses.put(LanguageCode.FRENCH, FrenchStemmer.class); + snowballStemmerClasses.put(LanguageCode.GERMAN, GermanStemmer.class); + snowballStemmerClasses + .put(LanguageCode.HUNGARIAN, HungarianStemmer.class); + snowballStemmerClasses.put(LanguageCode.ITALIAN, ItalianStemmer.class); + snowballStemmerClasses + .put(LanguageCode.NORWEGIAN, NorwegianStemmer.class); + snowballStemmerClasses.put(LanguageCode.PORTUGUESE, + PortugueseStemmer.class); + snowballStemmerClasses.put(LanguageCode.ROMANIAN, RomanianStemmer.class); + snowballStemmerClasses.put(LanguageCode.RUSSIAN, RussianStemmer.class); + snowballStemmerClasses.put(LanguageCode.SPANISH, SpanishStemmer.class); + snowballStemmerClasses.put(LanguageCode.SWEDISH, SwedishStemmer.class); + snowballStemmerClasses.put(LanguageCode.TURKISH, TurkishStemmer.class); + } + + /** + * An adapter converting Snowball programs into {@link IStemmer} interface. + */ + private static class SnowballStemmerAdapter implements IStemmer { + private final SnowballProgram snowballStemmer; + + public SnowballStemmerAdapter(SnowballProgram snowballStemmer) { + this.snowballStemmer = snowballStemmer; + } + + public CharSequence stem(CharSequence word) { + snowballStemmer.setCurrent(word.toString()); + if (snowballStemmer.stem()) { + return snowballStemmer.getCurrent(); + } else { + return null; + } + } + } + + /** + * Create and return an {@link IStemmer} adapter for a + * {@link SnowballProgram} for a given language code. An identity stemmer is + * returned for unknown languages. + */ + public static IStemmer createStemmer(LanguageCode language) { + final Class stemmerClazz = snowballStemmerClasses + .get(language); + + if (stemmerClazz == null) { + logger.warn("No Snowball stemmer class for: " + language.name() + + ". Quality of clustering may be degraded."); + return IdentityStemmer.INSTANCE; + } + + try { + return new SnowballStemmerAdapter(stemmerClazz.newInstance()); + } catch (Exception e) { + logger.warn("Could not instantiate snowball stemmer" + + " for language: " + language.name() + + ". Quality of clustering may be degraded.", e); + + return IdentityStemmer.INSTANCE; + } + } + } + + /** + * Factory of {@link IStemmer} implementations for the + * {@link LanguageCode#ARABIC} language. Requires lucene-contrib + * to be present in classpath, otherwise an empty (identity) stemmer is + * returned. + */ + private static class ArabicStemmerFactory { + static { + try { + ReflectionUtils.classForName(ArabicStemmer.class.getName(), false); + ReflectionUtils.classForName(ArabicNormalizer.class.getName(), false); + } catch (ClassNotFoundException e) { + logger + .warn( + "Could not instantiate Lucene stemmer for Arabic, clustering quality " + + "of Arabic content may be degraded. For best quality clusters, " + + "make sure Lucene's Arabic analyzer JAR is in the classpath", + e); + } + } + + /** + * Adapter to lucene-contrib Arabic analyzers. + */ + private static class LuceneStemmerAdapter implements IStemmer { + private final org.apache.lucene.analysis.ar.ArabicStemmer delegate; + private final org.apache.lucene.analysis.ar.ArabicNormalizer normalizer; + + private char[] buffer = new char[0]; + + private LuceneStemmerAdapter() throws Exception { + delegate = new org.apache.lucene.analysis.ar.ArabicStemmer(); + normalizer = new org.apache.lucene.analysis.ar.ArabicNormalizer(); + } + + public CharSequence stem(CharSequence word) { + if (word.length() > buffer.length) { + buffer = new char[word.length()]; + } + + for (int i = 0; i < word.length(); i++) { + buffer[i] = word.charAt(i); + } + + int newLen = normalizer.normalize(buffer, word.length()); + newLen = delegate.stem(buffer, newLen); + + if (newLen != word.length() || !equals(buffer, newLen, word)) { + return CharBuffer.wrap(buffer, 0, newLen); + } + + // Same-same. + return null; + } + + private boolean equals(char[] buffer, int len, CharSequence word) { + assert len == word.length(); + + for (int i = 0; i < len; i++) { + if (buffer[i] != word.charAt(i)) + return false; + } + + return true; + } + } + + public static IStemmer createStemmer() { + try { + return new LuceneStemmerAdapter(); + } catch (Throwable e) { + return IdentityStemmer.INSTANCE; + } + } + } + + /** + * An implementation of {@link IStemmer} that always returns null + * which means no stemming. + */ + private static class IdentityStemmer implements IStemmer { + private final static IdentityStemmer INSTANCE = new IdentityStemmer(); + + @Override + public CharSequence stem(CharSequence word) { + return null; + } + } +} diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java index 7511baed56c..faaf85a48aa 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java @@ -1,156 +1,156 @@ -package org.apache.solr.handler.clustering.carrot2; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.io.Reader; -import java.util.regex.Pattern; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.carrot2.core.LanguageCode; -import org.carrot2.text.analysis.ExtendedWhitespaceTokenizer; -import org.carrot2.text.analysis.ITokenizer; -import org.carrot2.text.linguistic.ITokenizerFactory; -import org.carrot2.text.util.MutableCharArray; -import org.carrot2.util.ExceptionUtils; -import org.carrot2.util.ReflectionUtils; -import org.slf4j.Logger; - -/** - * An implementation of Carrot2's {@link ITokenizerFactory} based on Lucene's - * Smart Chinese tokenizer. If Smart Chinese tokenizer is not available in - * classpath at runtime, the default Carrot2's tokenizer is used. Should the - * Lucene APIs need to change, the changes can be made in this class. - */ -public class LuceneCarrot2TokenizerFactory implements ITokenizerFactory { - final static Logger logger = org.slf4j.LoggerFactory - .getLogger(LuceneCarrot2TokenizerFactory.class); - - @Override - public ITokenizer getTokenizer(LanguageCode language) { - switch (language) { - case CHINESE_SIMPLIFIED: - return ChineseTokenizerFactory.createTokenizer(); - - /* - * We use our own analyzer for Arabic. Lucene's version has special - * support for Nonspacing-Mark characters (see - * http://www.fileformat.info/info/unicode/category/Mn/index.htm), but we - * have them included as letters in the parser. - */ - case ARABIC: - // Intentional fall-through. - - default: - return new ExtendedWhitespaceTokenizer(); - } - } - - /** - * Creates tokenizers that adapt Lucene's Smart Chinese Tokenizer to Carrot2's - * {@link ITokenizer}. If Smart Chinese is not available in the classpath, the - * factory will fall back to the default white space tokenizer. - */ - private static final class ChineseTokenizerFactory { - static { - try { - ReflectionUtils.classForName( - "org.apache.lucene.analysis.cn.smart.WordTokenFilter", false); - ReflectionUtils.classForName( - "org.apache.lucene.analysis.cn.smart.SentenceTokenizer", false); - } catch (Throwable e) { - logger - .warn("Could not instantiate Smart Chinese Analyzer, clustering quality " - + "of Chinese content may be degraded. For best quality clusters, " - + "make sure Lucene's Smart Chinese Analyzer JAR is in the classpath"); - } - } - - static ITokenizer createTokenizer() { - try { - return new ChineseTokenizer(); - } catch (Throwable e) { - return new ExtendedWhitespaceTokenizer(); - } - } - - private final static class ChineseTokenizer implements ITokenizer { - private final static Pattern numeric = Pattern - .compile("[\\-+'$]?\\d+([:\\-/,.]?\\d+)*[%$]?"); - - private Tokenizer sentenceTokenizer; - private TokenStream wordTokenFilter; - private CharTermAttribute term = null; - - private final MutableCharArray tempCharSequence; - private final Class tokenFilterClass; - - private ChineseTokenizer() throws Exception { - this.tempCharSequence = new MutableCharArray(new char[0]); - - // As Smart Chinese is not available during compile time, - // we need to resort to reflection. - final Class tokenizerClass = ReflectionUtils.classForName( - "org.apache.lucene.analysis.cn.smart.SentenceTokenizer", false); - this.sentenceTokenizer = (Tokenizer) tokenizerClass.getConstructor( - Reader.class).newInstance((Reader) null); - this.tokenFilterClass = ReflectionUtils.classForName( - "org.apache.lucene.analysis.cn.smart.WordTokenFilter", false); - } - - public short nextToken() throws IOException { - final boolean hasNextToken = wordTokenFilter.incrementToken(); - if (hasNextToken) { - short flags = 0; - final char[] image = term.buffer(); - final int length = term.length(); - tempCharSequence.reset(image, 0, length); - if (length == 1 && image[0] == ',') { - // ChineseTokenizer seems to convert all punctuation to ',' - // characters - flags = ITokenizer.TT_PUNCTUATION; - } else if (numeric.matcher(tempCharSequence).matches()) { - flags = ITokenizer.TT_NUMERIC; - } else { - flags = ITokenizer.TT_TERM; - } - return flags; - } - - return ITokenizer.TT_EOF; - } - - public void setTermBuffer(MutableCharArray array) { - array.reset(term.buffer(), 0, term.length()); - } - - public void reset(Reader input) throws IOException { - try { - sentenceTokenizer.reset(input); - wordTokenFilter = (TokenStream) tokenFilterClass.getConstructor( - TokenStream.class).newInstance(sentenceTokenizer); - term = wordTokenFilter.addAttribute(CharTermAttribute.class); - } catch (Exception e) { - throw ExceptionUtils.wrapAsRuntimeException(e); - } - } - } - } -} +package org.apache.solr.handler.clustering.carrot2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.util.regex.Pattern; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.carrot2.core.LanguageCode; +import org.carrot2.text.analysis.ExtendedWhitespaceTokenizer; +import org.carrot2.text.analysis.ITokenizer; +import org.carrot2.text.linguistic.ITokenizerFactory; +import org.carrot2.text.util.MutableCharArray; +import org.carrot2.util.ExceptionUtils; +import org.carrot2.util.ReflectionUtils; +import org.slf4j.Logger; + +/** + * An implementation of Carrot2's {@link ITokenizerFactory} based on Lucene's + * Smart Chinese tokenizer. If Smart Chinese tokenizer is not available in + * classpath at runtime, the default Carrot2's tokenizer is used. Should the + * Lucene APIs need to change, the changes can be made in this class. + */ +public class LuceneCarrot2TokenizerFactory implements ITokenizerFactory { + final static Logger logger = org.slf4j.LoggerFactory + .getLogger(LuceneCarrot2TokenizerFactory.class); + + @Override + public ITokenizer getTokenizer(LanguageCode language) { + switch (language) { + case CHINESE_SIMPLIFIED: + return ChineseTokenizerFactory.createTokenizer(); + + /* + * We use our own analyzer for Arabic. Lucene's version has special + * support for Nonspacing-Mark characters (see + * http://www.fileformat.info/info/unicode/category/Mn/index.htm), but we + * have them included as letters in the parser. + */ + case ARABIC: + // Intentional fall-through. + + default: + return new ExtendedWhitespaceTokenizer(); + } + } + + /** + * Creates tokenizers that adapt Lucene's Smart Chinese Tokenizer to Carrot2's + * {@link ITokenizer}. If Smart Chinese is not available in the classpath, the + * factory will fall back to the default white space tokenizer. + */ + private static final class ChineseTokenizerFactory { + static { + try { + ReflectionUtils.classForName( + "org.apache.lucene.analysis.cn.smart.WordTokenFilter", false); + ReflectionUtils.classForName( + "org.apache.lucene.analysis.cn.smart.SentenceTokenizer", false); + } catch (Throwable e) { + logger + .warn("Could not instantiate Smart Chinese Analyzer, clustering quality " + + "of Chinese content may be degraded. For best quality clusters, " + + "make sure Lucene's Smart Chinese Analyzer JAR is in the classpath"); + } + } + + static ITokenizer createTokenizer() { + try { + return new ChineseTokenizer(); + } catch (Throwable e) { + return new ExtendedWhitespaceTokenizer(); + } + } + + private final static class ChineseTokenizer implements ITokenizer { + private final static Pattern numeric = Pattern + .compile("[\\-+'$]?\\d+([:\\-/,.]?\\d+)*[%$]?"); + + private Tokenizer sentenceTokenizer; + private TokenStream wordTokenFilter; + private CharTermAttribute term = null; + + private final MutableCharArray tempCharSequence; + private final Class tokenFilterClass; + + private ChineseTokenizer() throws Exception { + this.tempCharSequence = new MutableCharArray(new char[0]); + + // As Smart Chinese is not available during compile time, + // we need to resort to reflection. + final Class tokenizerClass = ReflectionUtils.classForName( + "org.apache.lucene.analysis.cn.smart.SentenceTokenizer", false); + this.sentenceTokenizer = (Tokenizer) tokenizerClass.getConstructor( + Reader.class).newInstance((Reader) null); + this.tokenFilterClass = ReflectionUtils.classForName( + "org.apache.lucene.analysis.cn.smart.WordTokenFilter", false); + } + + public short nextToken() throws IOException { + final boolean hasNextToken = wordTokenFilter.incrementToken(); + if (hasNextToken) { + short flags = 0; + final char[] image = term.buffer(); + final int length = term.length(); + tempCharSequence.reset(image, 0, length); + if (length == 1 && image[0] == ',') { + // ChineseTokenizer seems to convert all punctuation to ',' + // characters + flags = ITokenizer.TT_PUNCTUATION; + } else if (numeric.matcher(tempCharSequence).matches()) { + flags = ITokenizer.TT_NUMERIC; + } else { + flags = ITokenizer.TT_TERM; + } + return flags; + } + + return ITokenizer.TT_EOF; + } + + public void setTermBuffer(MutableCharArray array) { + array.reset(term.buffer(), 0, term.length()); + } + + public void reset(Reader input) throws IOException { + try { + sentenceTokenizer.reset(input); + wordTokenFilter = (TokenStream) tokenFilterClass.getConstructor( + TokenStream.class).newInstance(sentenceTokenizer); + term = wordTokenFilter.addAttribute(CharTermAttribute.class); + } catch (Exception e) { + throw ExceptionUtils.wrapAsRuntimeException(e); + } + } + } + } +} diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java index 2159d4265c3..5ddfa1a9995 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java @@ -1,140 +1,140 @@ -package org.apache.solr.handler.clustering.carrot2; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Collection; -import java.util.Set; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.solr.analysis.CommonGramsFilterFactory; -import org.apache.solr.analysis.StopFilterFactory; -import org.apache.solr.analysis.TokenFilterFactory; -import org.apache.solr.analysis.TokenizerChain; -import org.apache.solr.schema.IndexSchema; -import org.carrot2.core.LanguageCode; -import org.carrot2.core.attribute.Init; -import org.carrot2.core.attribute.Processing; -import org.carrot2.text.linguistic.DefaultLexicalDataFactory; -import org.carrot2.text.linguistic.ILexicalData; -import org.carrot2.text.linguistic.ILexicalDataFactory; -import org.carrot2.text.util.MutableCharArray; -import org.carrot2.util.attribute.Attribute; -import org.carrot2.util.attribute.Bindable; -import org.carrot2.util.attribute.Input; -import org.slf4j.Logger; - -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Multimap; - -/** - * An implementation of Carrot2's {@link ILexicalDataFactory} that adds stop - * words from a field's StopFilter to the default stop words used in Carrot2, - * for all languages Carrot2 supports. Completely replacing Carrot2 stop words - * with Solr's wouldn't make much sense because clustering needs more aggressive - * stop words removal. In other words, if something is a stop word during - * indexing, then it should also be a stop word during clustering, but not the - * other way round. - */ -@Bindable -public class SolrStopwordsCarrot2LexicalDataFactory implements - ILexicalDataFactory { - final static Logger logger = org.slf4j.LoggerFactory - .getLogger(SolrStopwordsCarrot2LexicalDataFactory.class); - - @Init - @Input - @Attribute(key = "solrIndexSchema") - private IndexSchema schema; - - @Processing - @Input - @Attribute(key = "solrFieldNames") - private Set fieldNames; - - /** - * A lazily-built cache of stop words per field. - */ - private Multimap solrStopWords = HashMultimap.create(); - - /** - * Carrot2's default lexical resources to use in addition to Solr's stop - * words. - */ - private DefaultLexicalDataFactory carrot2LexicalDataFactory = new DefaultLexicalDataFactory(); - - /** - * Obtains stop words for a field from the associated - * {@link StopFilterFactory}, if any. - */ - private Collection getSolrStopWordsForField(String fieldName) { - // No need to synchronize here, Carrot2 ensures that instances - // of this class are not used by multiple threads at a time. - if (!solrStopWords.containsKey(fieldName)) { - final Analyzer fieldAnalyzer = schema.getFieldType(fieldName) - .getAnalyzer(); - if (fieldAnalyzer instanceof TokenizerChain) { - final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer) - .getTokenFilterFactories(); - for (TokenFilterFactory factory : filterFactories) { - if (factory instanceof StopFilterFactory) { - // StopFilterFactory holds the stop words in a CharArraySet - solrStopWords.put(fieldName, - ((StopFilterFactory) factory).getStopWords()); - } - - if (factory instanceof CommonGramsFilterFactory) { - solrStopWords.put(fieldName, - ((CommonGramsFilterFactory) factory) - .getCommonWords()); - } - } - } - } - return solrStopWords.get(fieldName); - } - - @Override - public ILexicalData getLexicalData(LanguageCode languageCode) { - final ILexicalData carrot2LexicalData = carrot2LexicalDataFactory - .getLexicalData(languageCode); - - return new ILexicalData() { - @Override - public boolean isStopLabel(CharSequence word) { - // Nothing in Solr maps to the concept of a stop label, - // so return Carrot2's default here. - return carrot2LexicalData.isStopLabel(word); - } - - @Override - public boolean isCommonWord(MutableCharArray word) { - // Loop over the fields involved in clustering first - for (String fieldName : fieldNames) { - for (CharArraySet stopWords : getSolrStopWordsForField(fieldName)) { - if (stopWords.contains(word)) { - return true; - } - } - } - // Check default Carrot2 stop words too - return carrot2LexicalData.isCommonWord(word); - } - }; - } -} +package org.apache.solr.handler.clustering.carrot2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Collection; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.solr.analysis.CommonGramsFilterFactory; +import org.apache.solr.analysis.StopFilterFactory; +import org.apache.solr.analysis.TokenFilterFactory; +import org.apache.solr.analysis.TokenizerChain; +import org.apache.solr.schema.IndexSchema; +import org.carrot2.core.LanguageCode; +import org.carrot2.core.attribute.Init; +import org.carrot2.core.attribute.Processing; +import org.carrot2.text.linguistic.DefaultLexicalDataFactory; +import org.carrot2.text.linguistic.ILexicalData; +import org.carrot2.text.linguistic.ILexicalDataFactory; +import org.carrot2.text.util.MutableCharArray; +import org.carrot2.util.attribute.Attribute; +import org.carrot2.util.attribute.Bindable; +import org.carrot2.util.attribute.Input; +import org.slf4j.Logger; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; + +/** + * An implementation of Carrot2's {@link ILexicalDataFactory} that adds stop + * words from a field's StopFilter to the default stop words used in Carrot2, + * for all languages Carrot2 supports. Completely replacing Carrot2 stop words + * with Solr's wouldn't make much sense because clustering needs more aggressive + * stop words removal. In other words, if something is a stop word during + * indexing, then it should also be a stop word during clustering, but not the + * other way round. + */ +@Bindable +public class SolrStopwordsCarrot2LexicalDataFactory implements + ILexicalDataFactory { + final static Logger logger = org.slf4j.LoggerFactory + .getLogger(SolrStopwordsCarrot2LexicalDataFactory.class); + + @Init + @Input + @Attribute(key = "solrIndexSchema") + private IndexSchema schema; + + @Processing + @Input + @Attribute(key = "solrFieldNames") + private Set fieldNames; + + /** + * A lazily-built cache of stop words per field. + */ + private Multimap solrStopWords = HashMultimap.create(); + + /** + * Carrot2's default lexical resources to use in addition to Solr's stop + * words. + */ + private DefaultLexicalDataFactory carrot2LexicalDataFactory = new DefaultLexicalDataFactory(); + + /** + * Obtains stop words for a field from the associated + * {@link StopFilterFactory}, if any. + */ + private Collection getSolrStopWordsForField(String fieldName) { + // No need to synchronize here, Carrot2 ensures that instances + // of this class are not used by multiple threads at a time. + if (!solrStopWords.containsKey(fieldName)) { + final Analyzer fieldAnalyzer = schema.getFieldType(fieldName) + .getAnalyzer(); + if (fieldAnalyzer instanceof TokenizerChain) { + final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer) + .getTokenFilterFactories(); + for (TokenFilterFactory factory : filterFactories) { + if (factory instanceof StopFilterFactory) { + // StopFilterFactory holds the stop words in a CharArraySet + solrStopWords.put(fieldName, + ((StopFilterFactory) factory).getStopWords()); + } + + if (factory instanceof CommonGramsFilterFactory) { + solrStopWords.put(fieldName, + ((CommonGramsFilterFactory) factory) + .getCommonWords()); + } + } + } + } + return solrStopWords.get(fieldName); + } + + @Override + public ILexicalData getLexicalData(LanguageCode languageCode) { + final ILexicalData carrot2LexicalData = carrot2LexicalDataFactory + .getLexicalData(languageCode); + + return new ILexicalData() { + @Override + public boolean isStopLabel(CharSequence word) { + // Nothing in Solr maps to the concept of a stop label, + // so return Carrot2's default here. + return carrot2LexicalData.isStopLabel(word); + } + + @Override + public boolean isCommonWord(MutableCharArray word) { + // Loop over the fields involved in clustering first + for (String fieldName : fieldNames) { + for (CharArraySet stopWords : getSolrStopWordsForField(fieldName)) { + if (stopWords.contains(word)) { + return true; + } + } + } + // Check default Carrot2 stop words too + return carrot2LexicalData.isCommonWord(word); + } + }; + } +} diff --git a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java index a47878f12ae..d4b386c70dc 100644 --- a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java +++ b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java @@ -1,78 +1,78 @@ -package org.apache.solr.handler.clustering.carrot2; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import java.util.Collections; -import java.util.List; - -import org.carrot2.core.Cluster; -import org.carrot2.core.Document; -import org.carrot2.core.IClusteringAlgorithm; -import org.carrot2.core.ProcessingComponentBase; -import org.carrot2.core.ProcessingException; -import org.carrot2.core.attribute.AttributeNames; -import org.carrot2.core.attribute.Processing; -import org.carrot2.util.attribute.Attribute; -import org.carrot2.util.attribute.Bindable; -import org.carrot2.util.attribute.Input; -import org.carrot2.util.attribute.Output; - -import com.google.common.collect.Lists; - -/** - * A mock Carrot2 clustering algorithm that outputs input documents as clusters. - * Useful only in tests. - */ -@Bindable(prefix = "EchoClusteringAlgorithm") -public class EchoClusteringAlgorithm extends ProcessingComponentBase implements - IClusteringAlgorithm { - @Input - @Processing - @Attribute(key = AttributeNames.DOCUMENTS) - private List documents; - - @Output - @Processing - @Attribute(key = AttributeNames.CLUSTERS) - private List clusters; - - @Input - @Processing - @Attribute(key = "custom-fields") - private String customFields = ""; - - - @Override - public void process() throws ProcessingException { - clusters = Lists.newArrayListWithCapacity(documents.size()); - - for (Document document : documents) { - final Cluster cluster = new Cluster(); - cluster.addPhrases(document.getTitle(), document.getSummary()); - if (document.getLanguage() != null) { - cluster.addPhrases(document.getLanguage().name()); - } - for (String field : customFields.split(",")) { - Object value = document.getField(field); - if (value != null) { - cluster.addPhrases(value.toString()); - } - } - cluster.addDocuments(document); - clusters.add(cluster); - } - } -} +package org.apache.solr.handler.clustering.carrot2; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.Collections; +import java.util.List; + +import org.carrot2.core.Cluster; +import org.carrot2.core.Document; +import org.carrot2.core.IClusteringAlgorithm; +import org.carrot2.core.ProcessingComponentBase; +import org.carrot2.core.ProcessingException; +import org.carrot2.core.attribute.AttributeNames; +import org.carrot2.core.attribute.Processing; +import org.carrot2.util.attribute.Attribute; +import org.carrot2.util.attribute.Bindable; +import org.carrot2.util.attribute.Input; +import org.carrot2.util.attribute.Output; + +import com.google.common.collect.Lists; + +/** + * A mock Carrot2 clustering algorithm that outputs input documents as clusters. + * Useful only in tests. + */ +@Bindable(prefix = "EchoClusteringAlgorithm") +public class EchoClusteringAlgorithm extends ProcessingComponentBase implements + IClusteringAlgorithm { + @Input + @Processing + @Attribute(key = AttributeNames.DOCUMENTS) + private List documents; + + @Output + @Processing + @Attribute(key = AttributeNames.CLUSTERS) + private List clusters; + + @Input + @Processing + @Attribute(key = "custom-fields") + private String customFields = ""; + + + @Override + public void process() throws ProcessingException { + clusters = Lists.newArrayListWithCapacity(documents.size()); + + for (Document document : documents) { + final Cluster cluster = new Cluster(); + cluster.addPhrases(document.getTitle(), document.getSummary()); + if (document.getLanguage() != null) { + cluster.addPhrases(document.getLanguage().name()); + } + for (String field : customFields.split(",")) { + Object value = document.getField(field); + if (value != null) { + cluster.addPhrases(value.toString()); + } + } + cluster.addDocuments(document); + clusters.add(cluster); + } + } +} diff --git a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/LexicalResourcesCheckClusteringAlgorithm.java b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/LexicalResourcesCheckClusteringAlgorithm.java index ed02fba80aa..4810f7a845d 100644 --- a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/LexicalResourcesCheckClusteringAlgorithm.java +++ b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/LexicalResourcesCheckClusteringAlgorithm.java @@ -1,80 +1,80 @@ -package org.apache.solr.handler.clustering.carrot2; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import java.util.List; - -import org.carrot2.core.Cluster; -import org.carrot2.core.IClusteringAlgorithm; -import org.carrot2.core.LanguageCode; -import org.carrot2.core.ProcessingComponentBase; -import org.carrot2.core.ProcessingException; -import org.carrot2.core.attribute.AttributeNames; -import org.carrot2.core.attribute.Processing; -import org.carrot2.text.linguistic.ILexicalData; -import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipeline; -import org.carrot2.text.util.MutableCharArray; -import org.carrot2.util.attribute.Attribute; -import org.carrot2.util.attribute.Bindable; -import org.carrot2.util.attribute.Input; -import org.carrot2.util.attribute.Output; - -import com.google.common.collect.Lists; - -/** - * A mock implementation of Carrot2 clustering algorithm for testing whether the - * customized lexical resource lookup works correctly. This algorithm ignores - * the input documents and instead for each word from {@link #wordsToCheck}, it - * outputs a cluster labeled with the word only if the word is neither a stop - * word nor a stop label. - */ -@Bindable(prefix = "LexicalResourcesCheckClusteringAlgorithm") -public class LexicalResourcesCheckClusteringAlgorithm extends - ProcessingComponentBase implements IClusteringAlgorithm { - - @Output - @Processing - @Attribute(key = AttributeNames.CLUSTERS) - private List clusters; - - @Input - @Processing - @Attribute - private String wordsToCheck; - - private BasicPreprocessingPipeline preprocessing = new BasicPreprocessingPipeline(); - - @Override - public void process() throws ProcessingException { - clusters = Lists.newArrayList(); - if (wordsToCheck == null) { - return; - } - - // Test with Maltese so that the English clustering performed in other tests - // is not affected by the test stopwords and stoplabels. - ILexicalData lexicalData = preprocessing.lexicalDataFactory - .getLexicalData(LanguageCode.MALTESE); - - for (String word : wordsToCheck.split(",")) { - if (!lexicalData.isCommonWord(new MutableCharArray(word)) - && !lexicalData.isStopLabel(word)) { - clusters.add(new Cluster(word)); - } - } - } -} +package org.apache.solr.handler.clustering.carrot2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.List; + +import org.carrot2.core.Cluster; +import org.carrot2.core.IClusteringAlgorithm; +import org.carrot2.core.LanguageCode; +import org.carrot2.core.ProcessingComponentBase; +import org.carrot2.core.ProcessingException; +import org.carrot2.core.attribute.AttributeNames; +import org.carrot2.core.attribute.Processing; +import org.carrot2.text.linguistic.ILexicalData; +import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipeline; +import org.carrot2.text.util.MutableCharArray; +import org.carrot2.util.attribute.Attribute; +import org.carrot2.util.attribute.Bindable; +import org.carrot2.util.attribute.Input; +import org.carrot2.util.attribute.Output; + +import com.google.common.collect.Lists; + +/** + * A mock implementation of Carrot2 clustering algorithm for testing whether the + * customized lexical resource lookup works correctly. This algorithm ignores + * the input documents and instead for each word from {@link #wordsToCheck}, it + * outputs a cluster labeled with the word only if the word is neither a stop + * word nor a stop label. + */ +@Bindable(prefix = "LexicalResourcesCheckClusteringAlgorithm") +public class LexicalResourcesCheckClusteringAlgorithm extends + ProcessingComponentBase implements IClusteringAlgorithm { + + @Output + @Processing + @Attribute(key = AttributeNames.CLUSTERS) + private List clusters; + + @Input + @Processing + @Attribute + private String wordsToCheck; + + private BasicPreprocessingPipeline preprocessing = new BasicPreprocessingPipeline(); + + @Override + public void process() throws ProcessingException { + clusters = Lists.newArrayList(); + if (wordsToCheck == null) { + return; + } + + // Test with Maltese so that the English clustering performed in other tests + // is not affected by the test stopwords and stoplabels. + ILexicalData lexicalData = preprocessing.lexicalDataFactory + .getLexicalData(LanguageCode.MALTESE); + + for (String word : wordsToCheck.split(",")) { + if (!lexicalData.isCommonWord(new MutableCharArray(word)) + && !lexicalData.isStopLabel(word)) { + clusters.add(new Cluster(word)); + } + } + } +} diff --git a/solr/contrib/langid/src/java/overview.html b/solr/contrib/langid/src/java/overview.html index 5f19c0c86b0..d1b8f2cf8ec 100644 --- a/solr/contrib/langid/src/java/overview.html +++ b/solr/contrib/langid/src/java/overview.html @@ -1,5 +1,5 @@ - - -Apache Solr Search Server: Solr Language Identifier contrib - - + + +Apache Solr Search Server: Solr Language Identifier contrib + + diff --git a/solr/contrib/velocity/src/java/overview.html b/solr/contrib/velocity/src/java/overview.html index 03b660a983d..b8cff377f40 100644 --- a/solr/contrib/velocity/src/java/overview.html +++ b/solr/contrib/velocity/src/java/overview.html @@ -1,5 +1,5 @@ - - -Apache Solr Search Server: Velocity Response Writer contrib - - + + +Apache Solr Search Server: Velocity Response Writer contrib + + diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java index 965917fb127..3b5ad013355 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java +++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java @@ -1,184 +1,184 @@ -package org.apache.solr.handler.component; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; -import org.apache.commons.httpclient.params.HttpMethodParams; -import org.apache.solr.client.solrj.impl.LBHttpSolrServer; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.core.PluginInfo; -import org.apache.solr.util.DefaultSolrThreadFactory; -import org.apache.solr.util.plugin.PluginInfoInitialized; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.net.MalformedURLException; -import java.util.Random; -import java.util.concurrent.*; - -public class HttpShardHandlerFactory extends ShardHandlerFactory implements PluginInfoInitialized { - protected static Logger log = LoggerFactory.getLogger(HttpShardHandlerFactory.class); - - // We want an executor that doesn't take up any resources if - // it's not used, so it could be created statically for - // the distributed search component if desired. - // - // Consider CallerRuns policy and a lower max threads to throttle - // requests at some point (or should we simply return failure?) - ThreadPoolExecutor commExecutor = new ThreadPoolExecutor( - 0, - Integer.MAX_VALUE, - 5, TimeUnit.SECONDS, // terminate idle threads after 5 sec - new SynchronousQueue(), // directly hand off tasks - new DefaultSolrThreadFactory("httpShardExecutor") - ); - - HttpClient client; - Random r = new Random(); - LBHttpSolrServer loadbalancer; - int soTimeout = 0; //current default values - int connectionTimeout = 0; //current default values - int maxConnectionsPerHost = 20; - int corePoolSize = 0; - int maximumPoolSize = 10; - int keepAliveTime = 5; - int queueSize = 1; - boolean accessPolicy = true; - - public String scheme = "http://"; //current default values - - private MultiThreadedHttpConnectionManager mgr; - // socket timeout measured in ms, closes a socket if read - // takes longer than x ms to complete. throws - // java.net.SocketTimeoutException: Read timed out exception - static final String INIT_SO_TIMEOUT = "socketTimeout"; - - // connection timeout measures in ms, closes a socket if connection - // cannot be established within x ms. with a - // java.net.SocketTimeoutException: Connection timed out - static final String INIT_CONNECTION_TIMEOUT = "connTimeout"; - - // URL scheme to be used in distributed search. - static final String INIT_URL_SCHEME = "urlScheme"; - - // Maximum connections allowed per host - static final String INIT_MAX_CONNECTION_PER_HOST = "maxConnectionsPerHost"; - - // The core size of the threadpool servicing requests - static final String INIT_CORE_POOL_SIZE = "corePoolSize"; - - // The maximum size of the threadpool servicing requests - static final String INIT_MAX_POOL_SIZE = "maximumPoolSize"; - - // The amount of time idle threads persist for in the queue, before being killed - static final String MAX_THREAD_IDLE_TIME = "maxThreadIdleTime"; - - // If the threadpool uses a backing queue, what is its maximum size (-1) to use direct handoff - static final String INIT_SIZE_OF_QUEUE = "sizeOfQueue"; - - // Configure if the threadpool favours fairness over throughput - static final String INIT_FAIRNESS_POLICY = "fairnessPolicy"; - - public ShardHandler getShardHandler() { - return getShardHandler(null); - } - - public ShardHandler getShardHandler(HttpClient httpClient) { - return new HttpShardHandler(this, httpClient); - } - - public void init(PluginInfo info) { - NamedList args = info.initArgs; - this.soTimeout = getParameter(args, INIT_SO_TIMEOUT, 0); - - this.scheme = getParameter(args, INIT_URL_SCHEME, "http://"); - this.scheme = (this.scheme.endsWith("://")) ? this.scheme : this.scheme + "://"; - this.connectionTimeout = getParameter(args, INIT_CONNECTION_TIMEOUT, 0); - this.maxConnectionsPerHost = getParameter(args, INIT_MAX_CONNECTION_PER_HOST, 20); - this.corePoolSize = getParameter(args, INIT_CORE_POOL_SIZE, 0); - this.maximumPoolSize = getParameter(args, INIT_MAX_POOL_SIZE, Integer.MAX_VALUE); - this.keepAliveTime = getParameter(args, MAX_THREAD_IDLE_TIME, 5); - this.queueSize = getParameter(args, INIT_SIZE_OF_QUEUE, -1); - this.accessPolicy = getParameter(args, INIT_FAIRNESS_POLICY, false); - - BlockingQueue blockingQueue = (this.queueSize == -1) ? - new SynchronousQueue(this.accessPolicy) : - new ArrayBlockingQueue(this.queueSize, this.accessPolicy); - - this.commExecutor = new ThreadPoolExecutor( - this.corePoolSize, - this.maximumPoolSize, - this.keepAliveTime, TimeUnit.SECONDS, - blockingQueue, - new DefaultSolrThreadFactory("httpShardExecutor") - ); - - mgr = new MultiThreadedHttpConnectionManager(); - mgr.getParams().setDefaultMaxConnectionsPerHost(this.maxConnectionsPerHost); - mgr.getParams().setMaxTotalConnections(10000); - mgr.getParams().setConnectionTimeout(this.connectionTimeout); - mgr.getParams().setSoTimeout(this.soTimeout); - // mgr.getParams().setStaleCheckingEnabled(false); - - client = new HttpClient(mgr); - - // prevent retries (note: this didn't work when set on mgr.. needed to be set on client) - DefaultHttpMethodRetryHandler retryhandler = new DefaultHttpMethodRetryHandler(0, false); - client.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, retryhandler); - - try { - loadbalancer = new LBHttpSolrServer(client); - } catch (MalformedURLException e) { - // should be impossible since we're not passing any URLs here - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); - } - - } - - private T getParameter(NamedList initArgs, String configKey, T defaultValue) { - T toReturn = defaultValue; - if (initArgs != null) { - T temp = (T) initArgs.get(configKey); - toReturn = (temp != null) ? temp : defaultValue; - } - log.info("Setting {} to: {}", configKey, soTimeout); - return toReturn; - } - - - @Override - public void close() { - try { - mgr.shutdown(); - } catch (Throwable e) { - SolrException.log(log, e); - } - try { - loadbalancer.shutdown(); - } catch (Throwable e) { - SolrException.log(log, e); - } - try { - commExecutor.shutdownNow(); - } catch (Throwable e) { - SolrException.log(log, e); - } - } -} +package org.apache.solr.handler.component; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; +import org.apache.commons.httpclient.params.HttpMethodParams; +import org.apache.solr.client.solrj.impl.LBHttpSolrServer; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.PluginInfo; +import org.apache.solr.util.DefaultSolrThreadFactory; +import org.apache.solr.util.plugin.PluginInfoInitialized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.MalformedURLException; +import java.util.Random; +import java.util.concurrent.*; + +public class HttpShardHandlerFactory extends ShardHandlerFactory implements PluginInfoInitialized { + protected static Logger log = LoggerFactory.getLogger(HttpShardHandlerFactory.class); + + // We want an executor that doesn't take up any resources if + // it's not used, so it could be created statically for + // the distributed search component if desired. + // + // Consider CallerRuns policy and a lower max threads to throttle + // requests at some point (or should we simply return failure?) + ThreadPoolExecutor commExecutor = new ThreadPoolExecutor( + 0, + Integer.MAX_VALUE, + 5, TimeUnit.SECONDS, // terminate idle threads after 5 sec + new SynchronousQueue(), // directly hand off tasks + new DefaultSolrThreadFactory("httpShardExecutor") + ); + + HttpClient client; + Random r = new Random(); + LBHttpSolrServer loadbalancer; + int soTimeout = 0; //current default values + int connectionTimeout = 0; //current default values + int maxConnectionsPerHost = 20; + int corePoolSize = 0; + int maximumPoolSize = 10; + int keepAliveTime = 5; + int queueSize = 1; + boolean accessPolicy = true; + + public String scheme = "http://"; //current default values + + private MultiThreadedHttpConnectionManager mgr; + // socket timeout measured in ms, closes a socket if read + // takes longer than x ms to complete. throws + // java.net.SocketTimeoutException: Read timed out exception + static final String INIT_SO_TIMEOUT = "socketTimeout"; + + // connection timeout measures in ms, closes a socket if connection + // cannot be established within x ms. with a + // java.net.SocketTimeoutException: Connection timed out + static final String INIT_CONNECTION_TIMEOUT = "connTimeout"; + + // URL scheme to be used in distributed search. + static final String INIT_URL_SCHEME = "urlScheme"; + + // Maximum connections allowed per host + static final String INIT_MAX_CONNECTION_PER_HOST = "maxConnectionsPerHost"; + + // The core size of the threadpool servicing requests + static final String INIT_CORE_POOL_SIZE = "corePoolSize"; + + // The maximum size of the threadpool servicing requests + static final String INIT_MAX_POOL_SIZE = "maximumPoolSize"; + + // The amount of time idle threads persist for in the queue, before being killed + static final String MAX_THREAD_IDLE_TIME = "maxThreadIdleTime"; + + // If the threadpool uses a backing queue, what is its maximum size (-1) to use direct handoff + static final String INIT_SIZE_OF_QUEUE = "sizeOfQueue"; + + // Configure if the threadpool favours fairness over throughput + static final String INIT_FAIRNESS_POLICY = "fairnessPolicy"; + + public ShardHandler getShardHandler() { + return getShardHandler(null); + } + + public ShardHandler getShardHandler(HttpClient httpClient) { + return new HttpShardHandler(this, httpClient); + } + + public void init(PluginInfo info) { + NamedList args = info.initArgs; + this.soTimeout = getParameter(args, INIT_SO_TIMEOUT, 0); + + this.scheme = getParameter(args, INIT_URL_SCHEME, "http://"); + this.scheme = (this.scheme.endsWith("://")) ? this.scheme : this.scheme + "://"; + this.connectionTimeout = getParameter(args, INIT_CONNECTION_TIMEOUT, 0); + this.maxConnectionsPerHost = getParameter(args, INIT_MAX_CONNECTION_PER_HOST, 20); + this.corePoolSize = getParameter(args, INIT_CORE_POOL_SIZE, 0); + this.maximumPoolSize = getParameter(args, INIT_MAX_POOL_SIZE, Integer.MAX_VALUE); + this.keepAliveTime = getParameter(args, MAX_THREAD_IDLE_TIME, 5); + this.queueSize = getParameter(args, INIT_SIZE_OF_QUEUE, -1); + this.accessPolicy = getParameter(args, INIT_FAIRNESS_POLICY, false); + + BlockingQueue blockingQueue = (this.queueSize == -1) ? + new SynchronousQueue(this.accessPolicy) : + new ArrayBlockingQueue(this.queueSize, this.accessPolicy); + + this.commExecutor = new ThreadPoolExecutor( + this.corePoolSize, + this.maximumPoolSize, + this.keepAliveTime, TimeUnit.SECONDS, + blockingQueue, + new DefaultSolrThreadFactory("httpShardExecutor") + ); + + mgr = new MultiThreadedHttpConnectionManager(); + mgr.getParams().setDefaultMaxConnectionsPerHost(this.maxConnectionsPerHost); + mgr.getParams().setMaxTotalConnections(10000); + mgr.getParams().setConnectionTimeout(this.connectionTimeout); + mgr.getParams().setSoTimeout(this.soTimeout); + // mgr.getParams().setStaleCheckingEnabled(false); + + client = new HttpClient(mgr); + + // prevent retries (note: this didn't work when set on mgr.. needed to be set on client) + DefaultHttpMethodRetryHandler retryhandler = new DefaultHttpMethodRetryHandler(0, false); + client.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, retryhandler); + + try { + loadbalancer = new LBHttpSolrServer(client); + } catch (MalformedURLException e) { + // should be impossible since we're not passing any URLs here + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); + } + + } + + private T getParameter(NamedList initArgs, String configKey, T defaultValue) { + T toReturn = defaultValue; + if (initArgs != null) { + T temp = (T) initArgs.get(configKey); + toReturn = (temp != null) ? temp : defaultValue; + } + log.info("Setting {} to: {}", configKey, soTimeout); + return toReturn; + } + + + @Override + public void close() { + try { + mgr.shutdown(); + } catch (Throwable e) { + SolrException.log(log, e); + } + try { + loadbalancer.shutdown(); + } catch (Throwable e) { + SolrException.log(log, e); + } + try { + commExecutor.shutdownNow(); + } catch (Throwable e) { + SolrException.log(log, e); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/handler/component/ShardHandler.java b/solr/core/src/java/org/apache/solr/handler/component/ShardHandler.java index 644c1d0fdee..e9c912d2af0 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ShardHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ShardHandler.java @@ -1,28 +1,28 @@ -package org.apache.solr.handler.component; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -import org.apache.solr.common.params.ModifiableSolrParams; - -public abstract class ShardHandler { - public abstract void checkDistributed(ResponseBuilder rb); - public abstract void submit(ShardRequest sreq, String shard, ModifiableSolrParams params) ; - public abstract ShardResponse takeCompletedIncludingErrors(); - public abstract ShardResponse takeCompletedOrError(); - public abstract void cancelAll(); -} +package org.apache.solr.handler.component; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import org.apache.solr.common.params.ModifiableSolrParams; + +public abstract class ShardHandler { + public abstract void checkDistributed(ResponseBuilder rb); + public abstract void submit(ShardRequest sreq, String shard, ModifiableSolrParams params) ; + public abstract ShardResponse takeCompletedIncludingErrors(); + public abstract ShardResponse takeCompletedOrError(); + public abstract void cancelAll(); +} diff --git a/solr/core/src/java/org/apache/solr/handler/component/ShardHandlerFactory.java b/solr/core/src/java/org/apache/solr/handler/component/ShardHandlerFactory.java index 54a40969d42..bba01713f3a 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ShardHandlerFactory.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ShardHandlerFactory.java @@ -1,25 +1,25 @@ -package org.apache.solr.handler.component; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -public abstract class ShardHandlerFactory { - - public abstract ShardHandler getShardHandler(); - - public abstract void close(); -} +package org.apache.solr.handler.component; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +public abstract class ShardHandlerFactory { + + public abstract ShardHandler getShardHandler(); + + public abstract void close(); +} diff --git a/solr/core/src/java/org/apache/solr/search/DocSetCollector.java b/solr/core/src/java/org/apache/solr/search/DocSetCollector.java index a5c2ef79510..158df07a606 100644 --- a/solr/core/src/java/org/apache/solr/search/DocSetCollector.java +++ b/solr/core/src/java/org/apache/solr/search/DocSetCollector.java @@ -1,96 +1,96 @@ -package org.apache.solr.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.util.OpenBitSet; - -import java.io.IOException; - -/** - * - */ - -public class DocSetCollector extends Collector { - int pos=0; - OpenBitSet bits; - final int maxDoc; - final int smallSetSize; - int base; - - // in case there aren't that many hits, we may not want a very sparse - // bit array. Optimistically collect the first few docs in an array - // in case there are only a few. - final int[] scratch; - - public DocSetCollector(int smallSetSize, int maxDoc) { - this.smallSetSize = smallSetSize; - this.maxDoc = maxDoc; - this.scratch = new int[smallSetSize]; - } - - @Override - public void collect(int doc) throws IOException { - doc += base; - // optimistically collect the first docs in an array - // in case the total number will be small enough to represent - // as a small set like SortedIntDocSet instead... - // Storing in this array will be quicker to convert - // than scanning through a potentially huge bit vector. - // FUTURE: when search methods all start returning docs in order, maybe - // we could have a ListDocSet() and use the collected array directly. - if (pos < scratch.length) { - scratch[pos]=doc; - } else { - // this conditional could be removed if BitSet was preallocated, but that - // would take up more memory, and add more GC time... - if (bits==null) bits = new OpenBitSet(maxDoc); - bits.fastSet(doc); - } - - pos++; - } - - public DocSet getDocSet() { - if (pos<=scratch.length) { - // assumes docs were collected in sorted order! - return new SortedIntDocSet(scratch, pos); - } else { - // set the bits for ids that were collected in the array - for (int i=0; i 1) { - path = path.substring(0, path.length() - 1); - } - - int idx = path.lastIndexOf('/'); - String parent = idx >= 0 ? path.substring(0, idx) : path; - if (parent.length() == 0) { - parent = "/"; - } - - CharArr chars = new CharArr(); - JSONWriter json = new JSONWriter(chars, 2); - json.startObject(); - - if (detail) { - if (!printZnode(json, path)) { - return; - } - json.writeValueSeparator(); - } - - json.writeString("tree"); - json.writeNameSeparator(); - json.startArray(); - if (!printTree(json, path)) { - return; // there was an error - } - json.endArray(); - json.endObject(); - out.println(chars.toString()); - } - - void writeError(int code, String msg) { - response.setStatus(code); - - CharArr chars = new CharArr(); - JSONWriter w = new JSONWriter(chars, 2); - w.startObject(); - w.indent(); - w.writeString("status"); - w.writeNameSeparator(); - w.write(code); - w.writeValueSeparator(); - w.indent(); - w.writeString("error"); - w.writeNameSeparator(); - w.writeString(msg); - w.endObject(); - - out.println(chars.toString()); - } - - - boolean printTree(JSONWriter json, String path) throws IOException { - String label = path; - if (!fullpath) { - int idx = path.lastIndexOf('/'); - label = idx > 0 ? path.substring(idx + 1) : path; - } - json.startObject(); - //writeKeyValue(json, "data", label, true ); - json.writeString("data"); - json.writeNameSeparator(); - - json.startObject(); - writeKeyValue(json, "title", label, true); - json.writeValueSeparator(); - json.writeString("attr"); - json.writeNameSeparator(); - json.startObject(); - writeKeyValue(json, "href", "zookeeper?detail=true&path=" + URLEncoder.encode(path, "UTF-8"), true); - json.endObject(); - json.endObject(); - - Stat stat = new Stat(); - try { - // Trickily, the call to zkClient.getData fills in the stat variable - byte[] data = zkClient.getData(path, null, stat, true); - - if (stat.getEphemeralOwner() != 0) { - writeKeyValue(json, "ephemeral", true, false); - writeKeyValue(json, "version", stat.getVersion(), false); - } - - if (dump) { - json.writeValueSeparator(); - printZnode(json, path); - } - - /* - if (stat.getNumChildren() != 0) - { - writeKeyValue(json, "children_count", stat.getNumChildren(), false ); - out.println(", \"children_count\" : \"" + stat.getNumChildren() + "\""); - } - */ - - //if (stat.getDataLength() != 0) - if (data != null) { - String str = new BytesRef(data).utf8ToString(); - //?? writeKeyValue(json, "content", str, false ); - // Does nothing now, but on the assumption this will be used later we'll leave it in. If it comes out - // the catches below need to be restructured. - } - } catch (IllegalArgumentException e) { - // path doesn't exist (must have been removed) - writeKeyValue(json, "warning", "(path gone)", false); - } catch (KeeperException e) { - writeKeyValue(json, "warning", e.toString(), false); - log.warn("Keeper Exception", e); - } catch (InterruptedException e) { - writeKeyValue(json, "warning", e.toString(), false); - log.warn("InterruptedException", e); - } - - if (stat.getNumChildren() > 0) { - json.writeValueSeparator(); - if (indent) { - json.indent(); - } - json.writeString("children"); - json.writeNameSeparator(); - json.startArray(); - - try { - List children = zkClient.getChildren(path, null, true); - java.util.Collections.sort(children); - - boolean first = true; - for (String child : children) { - if (!first) { - json.writeValueSeparator(); - } - - String childPath = path + (path.endsWith("/") ? "" : "/") + child; - if (!printTree(json, childPath)) { - return false; - } - first = false; - } - } catch (KeeperException e) { - writeError(500, e.toString()); - return false; - } catch (InterruptedException e) { - writeError(500, e.toString()); - return false; - } catch (IllegalArgumentException e) { - // path doesn't exist (must have been removed) - json.writeString("(children gone)"); - } - - json.endArray(); - } - - json.endObject(); - return true; - } - - String time(long ms) { - return (new Date(ms)).toString() + " (" + ms + ")"; - } - - public void writeKeyValue(JSONWriter json, String k, Object v, boolean isFirst) { - if (!isFirst) { - json.writeValueSeparator(); - } - if (indent) { - json.indent(); - } - json.writeString(k); - json.writeNameSeparator(); - json.write(v); - } - - boolean printZnode(JSONWriter json, String path) throws IOException { - try { - Stat stat = new Stat(); - // Trickily, the call to zkClient.getData fills in the stat variable - byte[] data = zkClient.getData(path, null, stat, true); - - json.writeString("znode"); - json.writeNameSeparator(); - json.startObject(); - - writeKeyValue(json, "path", path, true); - - json.writeValueSeparator(); - json.writeString("prop"); - json.writeNameSeparator(); - json.startObject(); - writeKeyValue(json, "version", stat.getVersion(), true); - writeKeyValue(json, "aversion", stat.getAversion(), false); - writeKeyValue(json, "children_count", stat.getNumChildren(), false); - writeKeyValue(json, "ctime", time(stat.getCtime()), false); - writeKeyValue(json, "cversion", stat.getCversion(), false); - writeKeyValue(json, "czxid", stat.getCzxid(), false); - writeKeyValue(json, "dataLength", stat.getDataLength(), false); - writeKeyValue(json, "ephemeralOwner", stat.getEphemeralOwner(), false); - writeKeyValue(json, "mtime", time(stat.getMtime()), false); - writeKeyValue(json, "mzxid", stat.getMzxid(), false); - writeKeyValue(json, "pzxid", stat.getPzxid(), false); - json.endObject(); - - if (data != null) { - writeKeyValue(json, "data", new BytesRef(data).utf8ToString(), false); - } - json.endObject(); - } catch (KeeperException e) { - writeError(500, e.toString()); - return false; - } catch (InterruptedException e) { - writeError(500, e.toString()); - return false; - } - return true; - } - } -} +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.servlet; + +import org.apache.lucene.util.BytesRef; +import org.apache.noggit.CharArr; +import org.apache.noggit.JSONWriter; +import org.apache.solr.cloud.ZkController; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.core.CoreContainer; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.data.Stat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import java.io.IOException; +import java.io.PrintWriter; +import java.net.URLEncoder; +import java.util.Date; +import java.util.List; +import java.util.concurrent.TimeoutException; + + +/** + * Zookeeper Info + * + * @since solr 4.0 + */ +public final class ZookeeperInfoServlet extends HttpServlet { + static final Logger log = LoggerFactory.getLogger(ZookeeperInfoServlet.class); + + @Override + public void init() throws ServletException { + } + + @Override + public void doGet(HttpServletRequest request, + HttpServletResponse response) + throws IOException, ServletException { + response.setCharacterEncoding("UTF-8"); + response.setContentType("application/json"); + + // This attribute is set by the SolrDispatchFilter + CoreContainer cores = (CoreContainer) request.getAttribute("org.apache.solr.CoreContainer"); + + String path = request.getParameter("path"); + String addr = request.getParameter("addr"); + + if (addr != null && addr.length() == 0) { + addr = null; + } + + String detailS = request.getParameter("detail"); + boolean detail = detailS != null && detailS.equals("true"); + + String dumpS = request.getParameter("dump"); + boolean dump = dumpS != null && dumpS.equals("true"); + + PrintWriter out = response.getWriter(); + + + ZKPrinter printer = new ZKPrinter(response, out, cores.getZkController(), addr); + printer.detail = detail; + printer.dump = dump; + + try { + printer.print(path); + } finally { + printer.close(); + } + } + + @Override + public void doPost(HttpServletRequest request, + HttpServletResponse response) + throws IOException, ServletException { + doGet(request, response); + } + + + //-------------------------------------------------------------------------------------- + // + //-------------------------------------------------------------------------------------- + + static class ZKPrinter { + static boolean FULLPATH_DEFAULT = false; + + boolean indent = true; + boolean fullpath = FULLPATH_DEFAULT; + boolean detail = false; + boolean dump = false; + + String addr; // the address passed to us + String keeperAddr; // the address we're connected to + + boolean doClose; // close the client after done if we opened it + + final HttpServletResponse response; + final PrintWriter out; + SolrZkClient zkClient; + + int level; + int maxData = 95; + + public ZKPrinter(HttpServletResponse response, PrintWriter out, ZkController controller, String addr) throws IOException { + this.response = response; + this.out = out; + this.addr = addr; + + if (addr == null) { + if (controller != null) { + // this core is zk enabled + keeperAddr = controller.getZkServerAddress(); + zkClient = controller.getZkClient(); + if (zkClient != null && zkClient.isConnected()) { + return; + } else { + // try a different client with this address + addr = keeperAddr; + } + } + } + + keeperAddr = addr; + if (addr == null) { + writeError(404, "Zookeeper is not configured for this Solr Core. Please try connecting to an alternate zookeeper address."); + return; + } + + try { + zkClient = new SolrZkClient(addr, 10000); + doClose = true; + } catch (TimeoutException e) { + writeError(503, "Could not connect to zookeeper at '" + addr + "'\""); + zkClient = null; + return; + } catch (InterruptedException e) { + // Restore the interrupted status + Thread.currentThread().interrupt(); + writeError(503, "Could not connect to zookeeper at '" + addr + "'\""); + zkClient = null; + return; + } + + } + + public void close() { + try { + if (doClose) { + zkClient.close(); + } + } catch (InterruptedException e) { + // ignore exception on close + } + } + + // main entry point + void print(String path) throws IOException { + if (zkClient == null) { + return; + } + + // normalize path + if (path == null) { + path = "/"; + } else { + path.trim(); + if (path.length() == 0) { + path = "/"; + } + } + + if (path.endsWith("/") && path.length() > 1) { + path = path.substring(0, path.length() - 1); + } + + int idx = path.lastIndexOf('/'); + String parent = idx >= 0 ? path.substring(0, idx) : path; + if (parent.length() == 0) { + parent = "/"; + } + + CharArr chars = new CharArr(); + JSONWriter json = new JSONWriter(chars, 2); + json.startObject(); + + if (detail) { + if (!printZnode(json, path)) { + return; + } + json.writeValueSeparator(); + } + + json.writeString("tree"); + json.writeNameSeparator(); + json.startArray(); + if (!printTree(json, path)) { + return; // there was an error + } + json.endArray(); + json.endObject(); + out.println(chars.toString()); + } + + void writeError(int code, String msg) { + response.setStatus(code); + + CharArr chars = new CharArr(); + JSONWriter w = new JSONWriter(chars, 2); + w.startObject(); + w.indent(); + w.writeString("status"); + w.writeNameSeparator(); + w.write(code); + w.writeValueSeparator(); + w.indent(); + w.writeString("error"); + w.writeNameSeparator(); + w.writeString(msg); + w.endObject(); + + out.println(chars.toString()); + } + + + boolean printTree(JSONWriter json, String path) throws IOException { + String label = path; + if (!fullpath) { + int idx = path.lastIndexOf('/'); + label = idx > 0 ? path.substring(idx + 1) : path; + } + json.startObject(); + //writeKeyValue(json, "data", label, true ); + json.writeString("data"); + json.writeNameSeparator(); + + json.startObject(); + writeKeyValue(json, "title", label, true); + json.writeValueSeparator(); + json.writeString("attr"); + json.writeNameSeparator(); + json.startObject(); + writeKeyValue(json, "href", "zookeeper?detail=true&path=" + URLEncoder.encode(path, "UTF-8"), true); + json.endObject(); + json.endObject(); + + Stat stat = new Stat(); + try { + // Trickily, the call to zkClient.getData fills in the stat variable + byte[] data = zkClient.getData(path, null, stat, true); + + if (stat.getEphemeralOwner() != 0) { + writeKeyValue(json, "ephemeral", true, false); + writeKeyValue(json, "version", stat.getVersion(), false); + } + + if (dump) { + json.writeValueSeparator(); + printZnode(json, path); + } + + /* + if (stat.getNumChildren() != 0) + { + writeKeyValue(json, "children_count", stat.getNumChildren(), false ); + out.println(", \"children_count\" : \"" + stat.getNumChildren() + "\""); + } + */ + + //if (stat.getDataLength() != 0) + if (data != null) { + String str = new BytesRef(data).utf8ToString(); + //?? writeKeyValue(json, "content", str, false ); + // Does nothing now, but on the assumption this will be used later we'll leave it in. If it comes out + // the catches below need to be restructured. + } + } catch (IllegalArgumentException e) { + // path doesn't exist (must have been removed) + writeKeyValue(json, "warning", "(path gone)", false); + } catch (KeeperException e) { + writeKeyValue(json, "warning", e.toString(), false); + log.warn("Keeper Exception", e); + } catch (InterruptedException e) { + writeKeyValue(json, "warning", e.toString(), false); + log.warn("InterruptedException", e); + } + + if (stat.getNumChildren() > 0) { + json.writeValueSeparator(); + if (indent) { + json.indent(); + } + json.writeString("children"); + json.writeNameSeparator(); + json.startArray(); + + try { + List children = zkClient.getChildren(path, null, true); + java.util.Collections.sort(children); + + boolean first = true; + for (String child : children) { + if (!first) { + json.writeValueSeparator(); + } + + String childPath = path + (path.endsWith("/") ? "" : "/") + child; + if (!printTree(json, childPath)) { + return false; + } + first = false; + } + } catch (KeeperException e) { + writeError(500, e.toString()); + return false; + } catch (InterruptedException e) { + writeError(500, e.toString()); + return false; + } catch (IllegalArgumentException e) { + // path doesn't exist (must have been removed) + json.writeString("(children gone)"); + } + + json.endArray(); + } + + json.endObject(); + return true; + } + + String time(long ms) { + return (new Date(ms)).toString() + " (" + ms + ")"; + } + + public void writeKeyValue(JSONWriter json, String k, Object v, boolean isFirst) { + if (!isFirst) { + json.writeValueSeparator(); + } + if (indent) { + json.indent(); + } + json.writeString(k); + json.writeNameSeparator(); + json.write(v); + } + + boolean printZnode(JSONWriter json, String path) throws IOException { + try { + Stat stat = new Stat(); + // Trickily, the call to zkClient.getData fills in the stat variable + byte[] data = zkClient.getData(path, null, stat, true); + + json.writeString("znode"); + json.writeNameSeparator(); + json.startObject(); + + writeKeyValue(json, "path", path, true); + + json.writeValueSeparator(); + json.writeString("prop"); + json.writeNameSeparator(); + json.startObject(); + writeKeyValue(json, "version", stat.getVersion(), true); + writeKeyValue(json, "aversion", stat.getAversion(), false); + writeKeyValue(json, "children_count", stat.getNumChildren(), false); + writeKeyValue(json, "ctime", time(stat.getCtime()), false); + writeKeyValue(json, "cversion", stat.getCversion(), false); + writeKeyValue(json, "czxid", stat.getCzxid(), false); + writeKeyValue(json, "dataLength", stat.getDataLength(), false); + writeKeyValue(json, "ephemeralOwner", stat.getEphemeralOwner(), false); + writeKeyValue(json, "mtime", time(stat.getMtime()), false); + writeKeyValue(json, "mzxid", stat.getMzxid(), false); + writeKeyValue(json, "pzxid", stat.getPzxid(), false); + json.endObject(); + + if (data != null) { + writeKeyValue(json, "data", new BytesRef(data).utf8ToString(), false); + } + json.endObject(); + } catch (KeeperException e) { + writeError(500, e.toString()); + return false; + } catch (InterruptedException e) { + writeError(500, e.toString()); + return false; + } + return true; + } + } +} diff --git a/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java b/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java index 7e458852190..d1e03c93e09 100644 --- a/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java +++ b/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java @@ -1,456 +1,456 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.search; - -import org.apache.solr.SolrTestCaseJ4; - -import org.apache.commons.lang.StringUtils; - -import org.junit.BeforeClass; -import org.junit.Test; - -import java.util.List; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; - -public class TestPseudoReturnFields extends SolrTestCaseJ4 { - - // :TODO: datatypes produced by the functions used may change - - /** - * values of the fl param that mean all real fields - */ - private static String[] ALL_REAL_FIELDS = new String[] { "", "*" }; - - /** - * values of the fl param that mean all real fields and score - */ - private static String[] SCORE_AND_REAL_FIELDS = new String[] { - "score,*", "*,score" - }; - - @BeforeClass - public static void beforeTests() throws Exception { - initCore("solrconfig.xml","schema12.xml"); - - - assertU(adoc("id", "42", "val_i", "1", "ssto", "X", "subject", "aaa")); - assertU(adoc("id", "43", "val_i", "9", "ssto", "X", "subject", "bbb")); - assertU(adoc("id", "44", "val_i", "4", "ssto", "X", "subject", "aaa")); - assertU(adoc("id", "45", "val_i", "6", "ssto", "X", "subject", "aaa")); - assertU(adoc("id", "46", "val_i", "3", "ssto", "X", "subject", "ggg")); - assertU(commit()); - } - - @Test - public void testAllRealFields() throws Exception { - - for (String fl : ALL_REAL_FIELDS) { - assertQ("fl="+fl+" ... all real fields", - req("q","*:*", "rows", "1", "fl",fl) - ,"//result[@numFound='5']" - ,"//result/doc/str[@name='id']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='ssto']" - ,"//result/doc/str[@name='subject']" - - ,"//result/doc[count(*)=4]" - ); - } - } - - @Test - public void testScoreAndAllRealFields() throws Exception { - - for (String fl : SCORE_AND_REAL_FIELDS) { - assertQ("fl="+fl+" ... score and real fields", - req("q","*:*", "rows", "1", "fl",fl) - ,"//result[@numFound='5']" - ,"//result/doc/str[@name='id']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='ssto']" - ,"//result/doc/str[@name='subject']" - ,"//result/doc/float[@name='score']" - - ,"//result/doc[count(*)=5]" - ); - } - } - - @Test - public void testScoreAndExplicitRealFields() throws Exception { - - assertQ("fl=score,val_i", - req("q","*:*", "rows", "1", "fl","score,val_i") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/float[@name='score']" - - ,"//result/doc[count(*)=2]" - ); - assertQ("fl=score&fl=val_i", - req("q","*:*", "rows", "1", "fl","score","fl","val_i") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/float[@name='score']" - - ,"//result/doc[count(*)=2]" - ); - - assertQ("fl=val_i", - req("q","*:*", "rows", "1", "fl","val_i") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='val_i']" - - ,"//result/doc[count(*)=1]" - ); - } - - @Test - public void testFunctions() throws Exception { - assertQ("fl=log(val_i)", - req("q","*:*", "rows", "1", "fl","log(val_i)") - ,"//result[@numFound='5']" - ,"//result/doc/double[@name='log(val_i)']" - - ,"//result/doc[count(*)=1]" - ); - - assertQ("fl=log(val_i),abs(val_i)", - req("q","*:*", "rows", "1", "fl","log(val_i),abs(val_i)") - ,"//result[@numFound='5']" - ,"//result/doc/double[@name='log(val_i)']" - ,"//result/doc/float[@name='abs(val_i)']" - - ,"//result/doc[count(*)=2]" - ); - assertQ("fl=log(val_i)&fl=abs(val_i)", - req("q","*:*", "rows", "1", "fl","log(val_i)","fl","abs(val_i)") - ,"//result[@numFound='5']" - ,"//result/doc/double[@name='log(val_i)']" - ,"//result/doc/float[@name='abs(val_i)']" - - ,"//result/doc[count(*)=2]" - ); - } - - @Test - public void testFunctionsAndExplicit() throws Exception { - assertQ("fl=log(val_i),val_i", - req("q","*:*", "rows", "1", "fl","log(val_i),val_i") - ,"//result[@numFound='5']" - ,"//result/doc/double[@name='log(val_i)']" - ,"//result/doc/int[@name='val_i']" - - ,"//result/doc[count(*)=2]" - ); - - assertQ("fl=log(val_i)&fl=val_i", - req("q","*:*", "rows", "1", "fl","log(val_i)","fl","val_i") - ,"//result[@numFound='5']" - ,"//result/doc/double[@name='log(val_i)']" - ,"//result/doc/int[@name='val_i']" - - ,"//result/doc[count(*)=2]" - ); - } - - @Test - public void testFunctionsAndScore() throws Exception { - - assertQ("fl=log(val_i),score", - req("q","*:*", "rows", "1", "fl","log(val_i),score") - ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/double[@name='log(val_i)']" - - ,"//result/doc[count(*)=2]" - ); - assertQ("fl=log(val_i)&fl=score", - req("q","*:*", "rows", "1", "fl","log(val_i)","fl","score") - ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/double[@name='log(val_i)']" - - ,"//result/doc[count(*)=2]" - ); - - assertQ("fl=score,log(val_i),abs(val_i)", - req("q","*:*", "rows", "1", - "fl","score,log(val_i),abs(val_i)") - ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/double[@name='log(val_i)']" - ,"//result/doc/float[@name='abs(val_i)']" - - ,"//result/doc[count(*)=3]" - ); - assertQ("fl=score&fl=log(val_i)&fl=abs(val_i)", - req("q","*:*", "rows", "1", - "fl","score","fl","log(val_i)","fl","abs(val_i)") - ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/double[@name='log(val_i)']" - ,"//result/doc/float[@name='abs(val_i)']" - - ,"//result/doc[count(*)=3]" - ); - - } - - @Test - public void testGlobs() throws Exception { - assertQ("fl=val_*", - req("q","*:*", "rows", "1", "fl","val_*") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='val_i']" - - ,"//result/doc[count(*)=1]" - ); - - assertQ("fl=val_*,subj*", - req("q","*:*", "rows", "1", "fl","val_*,subj*") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='subject']" - - ,"//result/doc[count(*)=2]" - ); - assertQ("fl=val_*&fl=subj*", - req("q","*:*", "rows", "1", "fl","val_*","fl","subj*") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='subject']" - - ,"//result/doc[count(*)=2]" - ); - } - - @Test - public void testGlobsAndExplicit() throws Exception { - assertQ("fl=val_*,id", - req("q","*:*", "rows", "1", "fl","val_*,id") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='id']" - - ,"//result/doc[count(*)=2]" - ); - - assertQ("fl=val_*,subj*,id", - req("q","*:*", "rows", "1", "fl","val_*,subj*,id") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='subject']" - ,"//result/doc/str[@name='id']" - - ,"//result/doc[count(*)=3]" - ); - assertQ("fl=val_*&fl=subj*&fl=id", - req("q","*:*", "rows", "1", "fl","val_*","fl","subj*","fl","id") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='subject']" - ,"//result/doc/str[@name='id']" - - ,"//result/doc[count(*)=3]" - ); - } - - @Test - public void testGlobsAndScore() throws Exception { - assertQ("fl=val_*,score", - req("q","*:*", "rows", "1", "fl","val_*,score", "indent", "true") - ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/int[@name='val_i']" - - ,"//result/doc[count(*)=2]" - ); - - assertQ("fl=val_*,subj*,score", - req("q","*:*", "rows", "1", "fl","val_*,subj*,score") - ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='subject']" - - ,"//result/doc[count(*)=3]" - ); - assertQ("fl=val_*&fl=subj*&fl=score", - req("q","*:*", "rows", "1", - "fl","val_*","fl","subj*","fl","score") - ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='subject']" - - ,"//result/doc[count(*)=3]" - ); - - - } - - @Test - public void testAugmenters() throws Exception { - assertQ("fl=[docid]", - req("q","*:*", "rows", "1", "fl","[docid]") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='[docid]']" - - ,"//result/doc[count(*)=1]" - ); - - assertQ("fl=[docid],[explain]", - req("q","*:*", "rows", "1", "fl","[docid],[explain]") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - - ,"//result/doc[count(*)=2]" - ); - assertQ("fl=[docid]&fl=[explain]", - req("q","*:*", "rows", "1", "fl","[docid]","fl","[explain]") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - - ,"//result/doc[count(*)=2]" - ); - } - - @Test - public void testAugmentersAndExplicit() throws Exception { - assertQ("fl=[docid],id", - req("q","*:*", "rows", "1", - "fl","[docid],id") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='id']" - - ,"//result/doc[count(*)=2]" - ); - - assertQ("fl=[docid],[explain],id", - req("q","*:*", "rows", "1", - "fl","[docid],[explain],id") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - ,"//result/doc/str[@name='id']" - - ,"//result/doc[count(*)=3]" - ); - assertQ("fl=[docid]&fl=[explain]&fl=id", - req("q","*:*", "rows", "1", - "fl","[docid]","fl","[explain]","fl","id") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - ,"//result/doc/str[@name='id']" - - ,"//result/doc[count(*)=3]" - ); - } - - @Test - public void testAugmentersAndScore() throws Exception { - assertQ("fl=[docid],score", - req("q","*:*", "rows", "1", - "fl","[docid],score") - ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/int[@name='[docid]']" - - ,"//result/doc[count(*)=2]" - ); - - assertQ("fl=[docid],[explain],score", - req("q","*:*", "rows", "1", - "fl","[docid],[explain],score") - ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - - ,"//result/doc[count(*)=3]" - ); - assertQ("fl=[docid]&fl=[explain]&fl=score", - req("q","*:*", "rows", "1", - "fl","[docid]","fl","[explain]","fl","score") - ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - - ,"//result/doc[count(*)=3]" - ); - } - - @Test - public void testAugmentersGlobsExplicitAndScoreOhMy() throws Exception { - - // NOTE: 'ssto' is the missing one - final List fl = Arrays.asList - ("id","[docid]","[explain]","score","val_*","subj*"); - - final int iters = atLeast(random, 10); - for (int i = 0; i< iters; i++) { - - Collections.shuffle(fl, random); - - final String singleFl = StringUtils.join(fl.toArray(),','); - assertQ("fl=" + singleFl, - req("q","*:*", "rows", "1","fl",singleFl) - ,"//result[@numFound='5']" - ,"//result/doc/str[@name='id']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/str[@name='subject']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - - ,"//result/doc[count(*)=6]" - ); - - final List params = new ArrayList((fl.size()*2) + 4); - final StringBuilder info = new StringBuilder(); - params.addAll(Arrays.asList("q","*:*", "rows", "1")); - for (String item : fl) { - params.add("fl"); - params.add(item); - info.append("&fl=").append(item); - } - - assertQ(info.toString(), - req((String[])params.toArray(new String[0])) - ,"//result[@numFound='5']" - ,"//result/doc/str[@name='id']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/str[@name='subject']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - - ,"//result/doc[count(*)=6]" - ); - - } - } -} +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search; + +import org.apache.solr.SolrTestCaseJ4; + +import org.apache.commons.lang.StringUtils; + +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.List; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; + +public class TestPseudoReturnFields extends SolrTestCaseJ4 { + + // :TODO: datatypes produced by the functions used may change + + /** + * values of the fl param that mean all real fields + */ + private static String[] ALL_REAL_FIELDS = new String[] { "", "*" }; + + /** + * values of the fl param that mean all real fields and score + */ + private static String[] SCORE_AND_REAL_FIELDS = new String[] { + "score,*", "*,score" + }; + + @BeforeClass + public static void beforeTests() throws Exception { + initCore("solrconfig.xml","schema12.xml"); + + + assertU(adoc("id", "42", "val_i", "1", "ssto", "X", "subject", "aaa")); + assertU(adoc("id", "43", "val_i", "9", "ssto", "X", "subject", "bbb")); + assertU(adoc("id", "44", "val_i", "4", "ssto", "X", "subject", "aaa")); + assertU(adoc("id", "45", "val_i", "6", "ssto", "X", "subject", "aaa")); + assertU(adoc("id", "46", "val_i", "3", "ssto", "X", "subject", "ggg")); + assertU(commit()); + } + + @Test + public void testAllRealFields() throws Exception { + + for (String fl : ALL_REAL_FIELDS) { + assertQ("fl="+fl+" ... all real fields", + req("q","*:*", "rows", "1", "fl",fl) + ,"//result[@numFound='5']" + ,"//result/doc/str[@name='id']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/str[@name='ssto']" + ,"//result/doc/str[@name='subject']" + + ,"//result/doc[count(*)=4]" + ); + } + } + + @Test + public void testScoreAndAllRealFields() throws Exception { + + for (String fl : SCORE_AND_REAL_FIELDS) { + assertQ("fl="+fl+" ... score and real fields", + req("q","*:*", "rows", "1", "fl",fl) + ,"//result[@numFound='5']" + ,"//result/doc/str[@name='id']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/str[@name='ssto']" + ,"//result/doc/str[@name='subject']" + ,"//result/doc/float[@name='score']" + + ,"//result/doc[count(*)=5]" + ); + } + } + + @Test + public void testScoreAndExplicitRealFields() throws Exception { + + assertQ("fl=score,val_i", + req("q","*:*", "rows", "1", "fl","score,val_i") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/float[@name='score']" + + ,"//result/doc[count(*)=2]" + ); + assertQ("fl=score&fl=val_i", + req("q","*:*", "rows", "1", "fl","score","fl","val_i") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/float[@name='score']" + + ,"//result/doc[count(*)=2]" + ); + + assertQ("fl=val_i", + req("q","*:*", "rows", "1", "fl","val_i") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='val_i']" + + ,"//result/doc[count(*)=1]" + ); + } + + @Test + public void testFunctions() throws Exception { + assertQ("fl=log(val_i)", + req("q","*:*", "rows", "1", "fl","log(val_i)") + ,"//result[@numFound='5']" + ,"//result/doc/double[@name='log(val_i)']" + + ,"//result/doc[count(*)=1]" + ); + + assertQ("fl=log(val_i),abs(val_i)", + req("q","*:*", "rows", "1", "fl","log(val_i),abs(val_i)") + ,"//result[@numFound='5']" + ,"//result/doc/double[@name='log(val_i)']" + ,"//result/doc/float[@name='abs(val_i)']" + + ,"//result/doc[count(*)=2]" + ); + assertQ("fl=log(val_i)&fl=abs(val_i)", + req("q","*:*", "rows", "1", "fl","log(val_i)","fl","abs(val_i)") + ,"//result[@numFound='5']" + ,"//result/doc/double[@name='log(val_i)']" + ,"//result/doc/float[@name='abs(val_i)']" + + ,"//result/doc[count(*)=2]" + ); + } + + @Test + public void testFunctionsAndExplicit() throws Exception { + assertQ("fl=log(val_i),val_i", + req("q","*:*", "rows", "1", "fl","log(val_i),val_i") + ,"//result[@numFound='5']" + ,"//result/doc/double[@name='log(val_i)']" + ,"//result/doc/int[@name='val_i']" + + ,"//result/doc[count(*)=2]" + ); + + assertQ("fl=log(val_i)&fl=val_i", + req("q","*:*", "rows", "1", "fl","log(val_i)","fl","val_i") + ,"//result[@numFound='5']" + ,"//result/doc/double[@name='log(val_i)']" + ,"//result/doc/int[@name='val_i']" + + ,"//result/doc[count(*)=2]" + ); + } + + @Test + public void testFunctionsAndScore() throws Exception { + + assertQ("fl=log(val_i),score", + req("q","*:*", "rows", "1", "fl","log(val_i),score") + ,"//result[@numFound='5']" + ,"//result/doc/float[@name='score']" + ,"//result/doc/double[@name='log(val_i)']" + + ,"//result/doc[count(*)=2]" + ); + assertQ("fl=log(val_i)&fl=score", + req("q","*:*", "rows", "1", "fl","log(val_i)","fl","score") + ,"//result[@numFound='5']" + ,"//result/doc/float[@name='score']" + ,"//result/doc/double[@name='log(val_i)']" + + ,"//result/doc[count(*)=2]" + ); + + assertQ("fl=score,log(val_i),abs(val_i)", + req("q","*:*", "rows", "1", + "fl","score,log(val_i),abs(val_i)") + ,"//result[@numFound='5']" + ,"//result/doc/float[@name='score']" + ,"//result/doc/double[@name='log(val_i)']" + ,"//result/doc/float[@name='abs(val_i)']" + + ,"//result/doc[count(*)=3]" + ); + assertQ("fl=score&fl=log(val_i)&fl=abs(val_i)", + req("q","*:*", "rows", "1", + "fl","score","fl","log(val_i)","fl","abs(val_i)") + ,"//result[@numFound='5']" + ,"//result/doc/float[@name='score']" + ,"//result/doc/double[@name='log(val_i)']" + ,"//result/doc/float[@name='abs(val_i)']" + + ,"//result/doc[count(*)=3]" + ); + + } + + @Test + public void testGlobs() throws Exception { + assertQ("fl=val_*", + req("q","*:*", "rows", "1", "fl","val_*") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='val_i']" + + ,"//result/doc[count(*)=1]" + ); + + assertQ("fl=val_*,subj*", + req("q","*:*", "rows", "1", "fl","val_*,subj*") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/str[@name='subject']" + + ,"//result/doc[count(*)=2]" + ); + assertQ("fl=val_*&fl=subj*", + req("q","*:*", "rows", "1", "fl","val_*","fl","subj*") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/str[@name='subject']" + + ,"//result/doc[count(*)=2]" + ); + } + + @Test + public void testGlobsAndExplicit() throws Exception { + assertQ("fl=val_*,id", + req("q","*:*", "rows", "1", "fl","val_*,id") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/str[@name='id']" + + ,"//result/doc[count(*)=2]" + ); + + assertQ("fl=val_*,subj*,id", + req("q","*:*", "rows", "1", "fl","val_*,subj*,id") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/str[@name='subject']" + ,"//result/doc/str[@name='id']" + + ,"//result/doc[count(*)=3]" + ); + assertQ("fl=val_*&fl=subj*&fl=id", + req("q","*:*", "rows", "1", "fl","val_*","fl","subj*","fl","id") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/str[@name='subject']" + ,"//result/doc/str[@name='id']" + + ,"//result/doc[count(*)=3]" + ); + } + + @Test + public void testGlobsAndScore() throws Exception { + assertQ("fl=val_*,score", + req("q","*:*", "rows", "1", "fl","val_*,score", "indent", "true") + ,"//result[@numFound='5']" + ,"//result/doc/float[@name='score']" + ,"//result/doc/int[@name='val_i']" + + ,"//result/doc[count(*)=2]" + ); + + assertQ("fl=val_*,subj*,score", + req("q","*:*", "rows", "1", "fl","val_*,subj*,score") + ,"//result[@numFound='5']" + ,"//result/doc/float[@name='score']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/str[@name='subject']" + + ,"//result/doc[count(*)=3]" + ); + assertQ("fl=val_*&fl=subj*&fl=score", + req("q","*:*", "rows", "1", + "fl","val_*","fl","subj*","fl","score") + ,"//result[@numFound='5']" + ,"//result/doc/float[@name='score']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/str[@name='subject']" + + ,"//result/doc[count(*)=3]" + ); + + + } + + @Test + public void testAugmenters() throws Exception { + assertQ("fl=[docid]", + req("q","*:*", "rows", "1", "fl","[docid]") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='[docid]']" + + ,"//result/doc[count(*)=1]" + ); + + assertQ("fl=[docid],[explain]", + req("q","*:*", "rows", "1", "fl","[docid],[explain]") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='[docid]']" + ,"//result/doc/str[@name='[explain]']" + + ,"//result/doc[count(*)=2]" + ); + assertQ("fl=[docid]&fl=[explain]", + req("q","*:*", "rows", "1", "fl","[docid]","fl","[explain]") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='[docid]']" + ,"//result/doc/str[@name='[explain]']" + + ,"//result/doc[count(*)=2]" + ); + } + + @Test + public void testAugmentersAndExplicit() throws Exception { + assertQ("fl=[docid],id", + req("q","*:*", "rows", "1", + "fl","[docid],id") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='[docid]']" + ,"//result/doc/str[@name='id']" + + ,"//result/doc[count(*)=2]" + ); + + assertQ("fl=[docid],[explain],id", + req("q","*:*", "rows", "1", + "fl","[docid],[explain],id") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='[docid]']" + ,"//result/doc/str[@name='[explain]']" + ,"//result/doc/str[@name='id']" + + ,"//result/doc[count(*)=3]" + ); + assertQ("fl=[docid]&fl=[explain]&fl=id", + req("q","*:*", "rows", "1", + "fl","[docid]","fl","[explain]","fl","id") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='[docid]']" + ,"//result/doc/str[@name='[explain]']" + ,"//result/doc/str[@name='id']" + + ,"//result/doc[count(*)=3]" + ); + } + + @Test + public void testAugmentersAndScore() throws Exception { + assertQ("fl=[docid],score", + req("q","*:*", "rows", "1", + "fl","[docid],score") + ,"//result[@numFound='5']" + ,"//result/doc/float[@name='score']" + ,"//result/doc/int[@name='[docid]']" + + ,"//result/doc[count(*)=2]" + ); + + assertQ("fl=[docid],[explain],score", + req("q","*:*", "rows", "1", + "fl","[docid],[explain],score") + ,"//result[@numFound='5']" + ,"//result/doc/float[@name='score']" + ,"//result/doc/int[@name='[docid]']" + ,"//result/doc/str[@name='[explain]']" + + ,"//result/doc[count(*)=3]" + ); + assertQ("fl=[docid]&fl=[explain]&fl=score", + req("q","*:*", "rows", "1", + "fl","[docid]","fl","[explain]","fl","score") + ,"//result[@numFound='5']" + ,"//result/doc/float[@name='score']" + ,"//result/doc/int[@name='[docid]']" + ,"//result/doc/str[@name='[explain]']" + + ,"//result/doc[count(*)=3]" + ); + } + + @Test + public void testAugmentersGlobsExplicitAndScoreOhMy() throws Exception { + + // NOTE: 'ssto' is the missing one + final List fl = Arrays.asList + ("id","[docid]","[explain]","score","val_*","subj*"); + + final int iters = atLeast(random, 10); + for (int i = 0; i< iters; i++) { + + Collections.shuffle(fl, random); + + final String singleFl = StringUtils.join(fl.toArray(),','); + assertQ("fl=" + singleFl, + req("q","*:*", "rows", "1","fl",singleFl) + ,"//result[@numFound='5']" + ,"//result/doc/str[@name='id']" + ,"//result/doc/float[@name='score']" + ,"//result/doc/str[@name='subject']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/int[@name='[docid]']" + ,"//result/doc/str[@name='[explain]']" + + ,"//result/doc[count(*)=6]" + ); + + final List params = new ArrayList((fl.size()*2) + 4); + final StringBuilder info = new StringBuilder(); + params.addAll(Arrays.asList("q","*:*", "rows", "1")); + for (String item : fl) { + params.add("fl"); + params.add(item); + info.append("&fl=").append(item); + } + + assertQ(info.toString(), + req((String[])params.toArray(new String[0])) + ,"//result[@numFound='5']" + ,"//result/doc/str[@name='id']" + ,"//result/doc/float[@name='score']" + ,"//result/doc/str[@name='subject']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/int[@name='[docid]']" + ,"//result/doc/str[@name='[explain]']" + + ,"//result/doc[count(*)=6]" + ); + + } + } +} diff --git a/solr/test-framework/src/java/org/apache/solr/util/DOMUtilTestBase.java b/solr/test-framework/src/java/org/apache/solr/util/DOMUtilTestBase.java index b4a7f3dfc31..f4841d537d0 100644 --- a/solr/test-framework/src/java/org/apache/solr/util/DOMUtilTestBase.java +++ b/solr/test-framework/src/java/org/apache/solr/util/DOMUtilTestBase.java @@ -1,56 +1,56 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.util; - - -import java.io.StringReader; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.xpath.XPath; -import javax.xml.xpath.XPathConstants; -import javax.xml.xpath.XPathFactory; - -import org.apache.lucene.util.LuceneTestCase; -import org.w3c.dom.Document; -import org.w3c.dom.Node; -import org.xml.sax.InputSource; - -public abstract class DOMUtilTestBase extends LuceneTestCase { - - private DocumentBuilder builder; - private static final XPathFactory xpathFactory = XPathFactory.newInstance(); - - @Override - public void setUp() throws Exception { - super.setUp(); - builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); - } - - public Node getNode( String xml, String path ) throws Exception { - return getNode( getDocument(xml), path ); - } - - public Node getNode( Document doc, String path ) throws Exception { - XPath xpath = xpathFactory.newXPath(); - return (Node)xpath.evaluate(path, doc, XPathConstants.NODE); - } - - public Document getDocument( String xml ) throws Exception { - return builder.parse(new InputSource(new StringReader(xml))); - } -} +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util; + + +import java.io.StringReader; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathFactory; + +import org.apache.lucene.util.LuceneTestCase; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.xml.sax.InputSource; + +public abstract class DOMUtilTestBase extends LuceneTestCase { + + private DocumentBuilder builder; + private static final XPathFactory xpathFactory = XPathFactory.newInstance(); + + @Override + public void setUp() throws Exception { + super.setUp(); + builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + } + + public Node getNode( String xml, String path ) throws Exception { + return getNode( getDocument(xml), path ); + } + + public Node getNode( Document doc, String path ) throws Exception { + XPath xpath = xpathFactory.newXPath(); + return (Node)xpath.evaluate(path, doc, XPathConstants.NODE); + } + + public Document getDocument( String xml ) throws Exception { + return builder.parse(new InputSource(new StringReader(xml))); + } +}