diff --git a/dev-tools/scripts/diffSources.py b/dev-tools/scripts/diffSources.py index a3c6accd8d7..d8f3b59f4f8 100644 --- a/dev-tools/scripts/diffSources.py +++ b/dev-tools/scripts/diffSources.py @@ -50,7 +50,7 @@ while True: elif l.endswith('\n'): l = l[:-1] if l.startswith('diff ') or l.startswith('Binary files '): - keep = not l.endswith('timehints.txt') and l.lower().find('/build/') == -1 and (l.lower().startswith('Only in') or ((l.lower().endswith('.java') or l.lower().endswith('.txt') or l.lower().endswith('.xml') or l.lower().endswith('.iml')) and l.find('/.svn/') == -1)) + keep = not l.endswith('timehints.txt') and l.lower().find('/build/') == -1 and (l.lower().startswith('Only in') or ((l.lower().endswith('.java') or l.lower().endswith('.txt') or l.lower().endswith('.xml') or l.lower().endswith('.iml') or l.lower().endswith('.html') or l.lower().endswith('.template') or l.lower().endswith('.py') or l.lower().endswith('.g') or l.lower().endswith('.properties')) and l.find('/.svn/') == -1)) if keep: print print diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 20a0bd29177..f0779b2f309 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -62,6 +62,22 @@ Velocity 1.7 and Velocity Tools 2.0 Apache UIMA 2.3.1 Apache ZooKeeper 3.4.5 +Detailed Change List +---------------------- + +New Features +---------------------- + +* SOLR-5167: Add support for AnalyzingInfixSuggester (AnalyzingInfixLookupFactory). + (Areek Zillur, Varun Thacker via Robert Muir) + +Other Changes +---------------------- + +* SOLR-5237: Add indexHeapUsageBytes to LukeRequestHandler, indicating how much + heap memory is being used by the underlying Lucene index structures. + (Areek Zillur via Robert Muir) + ================== 4.5.0 ================== Versions of Major Components diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 6bc4e8c54ff..d521cb2fd44 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -555,6 +555,7 @@ public class LukeRequestHandler extends RequestHandlerBase indexInfo.add("numDocs", reader.numDocs()); indexInfo.add("maxDoc", reader.maxDoc()); indexInfo.add("deletedDocs", reader.maxDoc() - reader.numDocs()); + indexInfo.add("indexHeapUsageBytes", getIndexHeapUsed(reader)); indexInfo.add("version", reader.getVersion()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )? indexInfo.add("segmentCount", reader.leaves().size()); @@ -569,6 +570,21 @@ public class LukeRequestHandler extends RequestHandlerBase return indexInfo; } + /** Returns the sum of RAM bytes used by each segment */ + private static long getIndexHeapUsed(DirectoryReader reader) { + long indexHeapRamBytesUsed = 0; + for(AtomicReaderContext atomicReaderContext : reader.leaves()) { + AtomicReader atomicReader = atomicReaderContext.reader(); + if (atomicReader instanceof SegmentReader) { + indexHeapRamBytesUsed += ((SegmentReader) atomicReader).ramBytesUsed(); + } else { + // Not supported for any reader that is not a SegmentReader + return -1; + } + } + return indexHeapRamBytesUsed; + } + // Get terribly detailed information about a particular field. This is a very expensive call, use it with caution // especially on large indexes! @SuppressWarnings("unchecked") diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java new file mode 100644 index 00000000000..e32859eebc0 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java @@ -0,0 +1,97 @@ +package org.apache.solr.spelling.suggest.fst; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; +import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.schema.FieldType; +import org.apache.solr.spelling.suggest.LookupFactory; + +/** + * Factory for {@link AnalyzingInfixSuggester} + * @lucene.experimental + */ +public class AnalyzingInfixLookupFactory extends LookupFactory { + /** + * The analyzer used at "query-time" and "build-time" to analyze suggestions. + */ + public static final String QUERY_ANALYZER = "suggestAnalyzerFieldType"; + + /** + * The path where the underlying index is stored + * if no index is found, it will be generated by + * the AnalyzingInfixSuggester + */ + public static final String INDEX_PATH = "indexPath"; + + /** + * Minimum number of leading characters before PrefixQuery is used (default 4). + * Prefixes shorter than this are indexed as character ngrams + * (increasing index size but making lookups faster) + */ + private static final String MIN_PREFIX_CHARS = "minPrefixChars"; + + private static final String DEFAULT_INDEX_PATH = "analyzingInfixSuggesterIndexDir"; + + /** + * File name for the automaton. + */ + private static final String FILENAME = "iwfsta.bin"; + + + @Override + public Lookup create(NamedList params, SolrCore core) { + // mandatory parameter + Object fieldTypeName = params.get(QUERY_ANALYZER); + if (fieldTypeName == null) { + throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory"); + } + FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString()); + Analyzer indexAnalyzer = ft.getAnalyzer(); + Analyzer queryAnalyzer = ft.getQueryAnalyzer(); + + // optional parameters + + String indexPath = params.get(INDEX_PATH) != null + ? params.get(INDEX_PATH).toString() + : DEFAULT_INDEX_PATH; + + int minPrefixChars = params.get(MIN_PREFIX_CHARS) != null + ? Integer.parseInt(params.get(MIN_PREFIX_CHARS).toString()) + : AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS; + + try { + return new AnalyzingInfixSuggester(core.getSolrConfig().luceneMatchVersion, + new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars); + } catch (IOException e) { + throw new RuntimeException(); + } + } + + @Override + public String storeFileName() { + return FILENAME; + } +} diff --git a/solr/core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt b/solr/core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt new file mode 100644 index 00000000000..6d276c33a16 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt @@ -0,0 +1,5 @@ +# simple AnalyzingInfix suggest phrase dictionary for testing +Japanese Autocomplete and Japanese Highlighter broken +Add Japanese Kanji number normalization to Kuromoji +Add decompose compound Japanese Katakana token capability to Kuromoji +This is just another entry! \ No newline at end of file diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml index 96b4f7b52f4..b4f560ed32f 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml @@ -65,6 +65,24 @@ phrase_suggest + + + + infix_suggest_analyzing + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.AnalyzingInfixLookupFactory + false + + + text + + analyzingInfixSuggest.txt + + + + phrase_suggest + + @@ -183,7 +201,20 @@ - + + + + true + infix_suggest_analyzing + false + + true + + + infix_suggest_analyzing + + + true diff --git a/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java b/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java new file mode 100644 index 00000000000..0ee3e583356 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java @@ -0,0 +1,66 @@ +package org.apache.solr.spelling.suggest; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.params.SpellingParams; +import org.junit.BeforeClass; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestAnalyzeInfixSuggestions extends SolrTestCaseJ4 { + static final String URI_DEFAULT = "/infix_suggest_analyzing"; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-phrasesuggest.xml","schema-phrasesuggest.xml"); + assertQ(req("qt", URI_DEFAULT, "q", "", SpellingParams.SPELLCHECK_BUILD, "true")); + } + + public void testSingle() throws Exception { + + assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "1"), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='1']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='Japanese Autocomplete and Japanese Highlighter broken']" + ); + + assertQ(req("qt", URI_DEFAULT, "q", "high", SpellingParams.SPELLCHECK_COUNT, "1"), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='high']/int[@name='numFound'][.='1']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='high']/arr[@name='suggestion']/str[1][.='Japanese Autocomplete and Japanese Highlighter broken']" + ); + } + + public void testMultiple() throws Exception { + + assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "2"), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='2']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='Japanese Autocomplete and Japanese Highlighter broken']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add Japanese Kanji number normalization to Kuromoji']" + ); + assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "3"), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='3']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='Japanese Autocomplete and Japanese Highlighter broken']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add Japanese Kanji number normalization to Kuromoji']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[3][.='Add decompose compound Japanese Katakana token capability to Kuromoji']" + ); + assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "4"), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='3']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='Japanese Autocomplete and Japanese Highlighter broken']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add Japanese Kanji number normalization to Kuromoji']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[3][.='Add decompose compound Japanese Katakana token capability to Kuromoji']" + ); + } +} \ No newline at end of file