mirror of https://github.com/apache/lucene.git
Merged /lucene/dev/trunk:r1523396-1523455
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5207@1523456 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
140c6d3172
|
@ -50,7 +50,7 @@ while True:
|
||||||
elif l.endswith('\n'):
|
elif l.endswith('\n'):
|
||||||
l = l[:-1]
|
l = l[:-1]
|
||||||
if l.startswith('diff ') or l.startswith('Binary files '):
|
if l.startswith('diff ') or l.startswith('Binary files '):
|
||||||
keep = not l.endswith('timehints.txt') and l.lower().find('/build/') == -1 and (l.lower().startswith('Only in') or ((l.lower().endswith('.java') or l.lower().endswith('.txt') or l.lower().endswith('.xml') or l.lower().endswith('.iml')) and l.find('/.svn/') == -1))
|
keep = not l.endswith('timehints.txt') and l.lower().find('/build/') == -1 and (l.lower().startswith('Only in') or ((l.lower().endswith('.java') or l.lower().endswith('.txt') or l.lower().endswith('.xml') or l.lower().endswith('.iml') or l.lower().endswith('.html') or l.lower().endswith('.template') or l.lower().endswith('.py') or l.lower().endswith('.g') or l.lower().endswith('.properties')) and l.find('/.svn/') == -1))
|
||||||
if keep:
|
if keep:
|
||||||
print
|
print
|
||||||
print
|
print
|
||||||
|
|
|
@ -62,6 +62,22 @@ Velocity 1.7 and Velocity Tools 2.0
|
||||||
Apache UIMA 2.3.1
|
Apache UIMA 2.3.1
|
||||||
Apache ZooKeeper 3.4.5
|
Apache ZooKeeper 3.4.5
|
||||||
|
|
||||||
|
Detailed Change List
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
New Features
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
* SOLR-5167: Add support for AnalyzingInfixSuggester (AnalyzingInfixLookupFactory).
|
||||||
|
(Areek Zillur, Varun Thacker via Robert Muir)
|
||||||
|
|
||||||
|
Other Changes
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
* SOLR-5237: Add indexHeapUsageBytes to LukeRequestHandler, indicating how much
|
||||||
|
heap memory is being used by the underlying Lucene index structures.
|
||||||
|
(Areek Zillur via Robert Muir)
|
||||||
|
|
||||||
================== 4.5.0 ==================
|
================== 4.5.0 ==================
|
||||||
|
|
||||||
Versions of Major Components
|
Versions of Major Components
|
||||||
|
|
|
@ -555,6 +555,7 @@ public class LukeRequestHandler extends RequestHandlerBase
|
||||||
indexInfo.add("numDocs", reader.numDocs());
|
indexInfo.add("numDocs", reader.numDocs());
|
||||||
indexInfo.add("maxDoc", reader.maxDoc());
|
indexInfo.add("maxDoc", reader.maxDoc());
|
||||||
indexInfo.add("deletedDocs", reader.maxDoc() - reader.numDocs());
|
indexInfo.add("deletedDocs", reader.maxDoc() - reader.numDocs());
|
||||||
|
indexInfo.add("indexHeapUsageBytes", getIndexHeapUsed(reader));
|
||||||
|
|
||||||
indexInfo.add("version", reader.getVersion()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )?
|
indexInfo.add("version", reader.getVersion()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )?
|
||||||
indexInfo.add("segmentCount", reader.leaves().size());
|
indexInfo.add("segmentCount", reader.leaves().size());
|
||||||
|
@ -569,6 +570,21 @@ public class LukeRequestHandler extends RequestHandlerBase
|
||||||
return indexInfo;
|
return indexInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns the sum of RAM bytes used by each segment */
|
||||||
|
private static long getIndexHeapUsed(DirectoryReader reader) {
|
||||||
|
long indexHeapRamBytesUsed = 0;
|
||||||
|
for(AtomicReaderContext atomicReaderContext : reader.leaves()) {
|
||||||
|
AtomicReader atomicReader = atomicReaderContext.reader();
|
||||||
|
if (atomicReader instanceof SegmentReader) {
|
||||||
|
indexHeapRamBytesUsed += ((SegmentReader) atomicReader).ramBytesUsed();
|
||||||
|
} else {
|
||||||
|
// Not supported for any reader that is not a SegmentReader
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return indexHeapRamBytesUsed;
|
||||||
|
}
|
||||||
|
|
||||||
// Get terribly detailed information about a particular field. This is a very expensive call, use it with caution
|
// Get terribly detailed information about a particular field. This is a very expensive call, use it with caution
|
||||||
// especially on large indexes!
|
// especially on large indexes!
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
|
|
|
@ -0,0 +1,97 @@
|
||||||
|
package org.apache.solr.spelling.suggest.fst;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.search.suggest.Lookup;
|
||||||
|
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
|
||||||
|
import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.schema.FieldType;
|
||||||
|
import org.apache.solr.spelling.suggest.LookupFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory for {@link AnalyzingInfixSuggester}
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class AnalyzingInfixLookupFactory extends LookupFactory {
|
||||||
|
/**
|
||||||
|
* The analyzer used at "query-time" and "build-time" to analyze suggestions.
|
||||||
|
*/
|
||||||
|
public static final String QUERY_ANALYZER = "suggestAnalyzerFieldType";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The path where the underlying index is stored
|
||||||
|
* if no index is found, it will be generated by
|
||||||
|
* the AnalyzingInfixSuggester
|
||||||
|
*/
|
||||||
|
public static final String INDEX_PATH = "indexPath";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Minimum number of leading characters before PrefixQuery is used (default 4).
|
||||||
|
* Prefixes shorter than this are indexed as character ngrams
|
||||||
|
* (increasing index size but making lookups faster)
|
||||||
|
*/
|
||||||
|
private static final String MIN_PREFIX_CHARS = "minPrefixChars";
|
||||||
|
|
||||||
|
private static final String DEFAULT_INDEX_PATH = "analyzingInfixSuggesterIndexDir";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* File name for the automaton.
|
||||||
|
*/
|
||||||
|
private static final String FILENAME = "iwfsta.bin";
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Lookup create(NamedList params, SolrCore core) {
|
||||||
|
// mandatory parameter
|
||||||
|
Object fieldTypeName = params.get(QUERY_ANALYZER);
|
||||||
|
if (fieldTypeName == null) {
|
||||||
|
throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
|
||||||
|
}
|
||||||
|
FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
|
||||||
|
Analyzer indexAnalyzer = ft.getAnalyzer();
|
||||||
|
Analyzer queryAnalyzer = ft.getQueryAnalyzer();
|
||||||
|
|
||||||
|
// optional parameters
|
||||||
|
|
||||||
|
String indexPath = params.get(INDEX_PATH) != null
|
||||||
|
? params.get(INDEX_PATH).toString()
|
||||||
|
: DEFAULT_INDEX_PATH;
|
||||||
|
|
||||||
|
int minPrefixChars = params.get(MIN_PREFIX_CHARS) != null
|
||||||
|
? Integer.parseInt(params.get(MIN_PREFIX_CHARS).toString())
|
||||||
|
: AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS;
|
||||||
|
|
||||||
|
try {
|
||||||
|
return new AnalyzingInfixSuggester(core.getSolrConfig().luceneMatchVersion,
|
||||||
|
new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String storeFileName() {
|
||||||
|
return FILENAME;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,5 @@
|
||||||
|
# simple AnalyzingInfix suggest phrase dictionary for testing
|
||||||
|
Japanese Autocomplete and Japanese Highlighter broken
|
||||||
|
Add Japanese Kanji number normalization to Kuromoji
|
||||||
|
Add decompose compound Japanese Katakana token capability to Kuromoji
|
||||||
|
This is just another entry!
|
|
@ -65,6 +65,24 @@
|
||||||
<str name="queryAnalyzerFieldType">phrase_suggest</str>
|
<str name="queryAnalyzerFieldType">phrase_suggest</str>
|
||||||
</searchComponent>
|
</searchComponent>
|
||||||
|
|
||||||
|
<!-- AnalyzingInfixLookup suggest component (default)-->
|
||||||
|
<searchComponent class="solr.SpellCheckComponent" name="infix_suggest_analyzing">
|
||||||
|
<lst name="spellchecker">
|
||||||
|
<str name="name">infix_suggest_analyzing</str>
|
||||||
|
<str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
|
||||||
|
<str name="lookupImpl">org.apache.solr.spelling.suggest.fst.AnalyzingInfixLookupFactory</str>
|
||||||
|
<str name="buildOnCommit">false</str>
|
||||||
|
|
||||||
|
<!-- Suggester properties -->
|
||||||
|
<str name="suggestAnalyzerFieldType">text</str>
|
||||||
|
|
||||||
|
<str name="sourceLocation">analyzingInfixSuggest.txt</str>
|
||||||
|
</lst>
|
||||||
|
|
||||||
|
<!-- specify a fieldtype using keywordtokenizer + lowercase + cleanup -->
|
||||||
|
<str name="queryAnalyzerFieldType">phrase_suggest</str>
|
||||||
|
</searchComponent>
|
||||||
|
|
||||||
<!-- FuzzyLookup suggest component (default)-->
|
<!-- FuzzyLookup suggest component (default)-->
|
||||||
<searchComponent class="solr.SpellCheckComponent" name="fuzzy_suggest_analyzing">
|
<searchComponent class="solr.SpellCheckComponent" name="fuzzy_suggest_analyzing">
|
||||||
<lst name="spellchecker">
|
<lst name="spellchecker">
|
||||||
|
@ -183,7 +201,20 @@
|
||||||
</arr>
|
</arr>
|
||||||
</requestHandler>
|
</requestHandler>
|
||||||
|
|
||||||
<!-- Fuzzy analyzing handler with 1 max edit -->
|
<!-- Infix analyzing handler (default) -->
|
||||||
|
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/infix_suggest_analyzing">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="spellcheck">true</str>
|
||||||
|
<str name="spellcheck.dictionary">infix_suggest_analyzing</str>
|
||||||
|
<str name="spellcheck.collate">false</str>
|
||||||
|
<!-- NOTE: if this is false, results are alpha-ordered, not by weight! -->
|
||||||
|
<str name="spellcheck.onlyMorePopular">true</str>
|
||||||
|
</lst>
|
||||||
|
<arr name="components">
|
||||||
|
<str>infix_suggest_analyzing</str>
|
||||||
|
</arr>
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/fuzzy_suggest_analyzing">
|
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/fuzzy_suggest_analyzing">
|
||||||
<lst name="defaults">
|
<lst name="defaults">
|
||||||
<str name="spellcheck">true</str>
|
<str name="spellcheck">true</str>
|
||||||
|
|
|
@ -0,0 +1,66 @@
|
||||||
|
package org.apache.solr.spelling.suggest;
|
||||||
|
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.common.params.SpellingParams;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class TestAnalyzeInfixSuggestions extends SolrTestCaseJ4 {
|
||||||
|
static final String URI_DEFAULT = "/infix_suggest_analyzing";
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
initCore("solrconfig-phrasesuggest.xml","schema-phrasesuggest.xml");
|
||||||
|
assertQ(req("qt", URI_DEFAULT, "q", "", SpellingParams.SPELLCHECK_BUILD, "true"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSingle() throws Exception {
|
||||||
|
|
||||||
|
assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "1"),
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='1']",
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='<b>Japan</b>ese Autocomplete and <b>Japan</b>ese Highlighter broken']"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ(req("qt", URI_DEFAULT, "q", "high", SpellingParams.SPELLCHECK_COUNT, "1"),
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='high']/int[@name='numFound'][.='1']",
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='high']/arr[@name='suggestion']/str[1][.='Japanese Autocomplete and Japanese <b>High</b>lighter broken']"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testMultiple() throws Exception {
|
||||||
|
|
||||||
|
assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "2"),
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='2']",
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='<b>Japan</b>ese Autocomplete and <b>Japan</b>ese Highlighter broken']",
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add <b>Japan</b>ese Kanji number normalization to Kuromoji']"
|
||||||
|
);
|
||||||
|
assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "3"),
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='3']",
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='<b>Japan</b>ese Autocomplete and <b>Japan</b>ese Highlighter broken']",
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add <b>Japan</b>ese Kanji number normalization to Kuromoji']",
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[3][.='Add decompose compound <b>Japan</b>ese Katakana token capability to Kuromoji']"
|
||||||
|
);
|
||||||
|
assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "4"),
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='3']",
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='<b>Japan</b>ese Autocomplete and <b>Japan</b>ese Highlighter broken']",
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add <b>Japan</b>ese Kanji number normalization to Kuromoji']",
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[3][.='Add decompose compound <b>Japan</b>ese Katakana token capability to Kuromoji']"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue