mirror of https://github.com/apache/lucene.git
Merge branch 'apache-https-master' into jira/solr-8593
This commit is contained in:
commit
750cf6d7a5
|
@ -431,6 +431,7 @@ reChangesSectionHREF = re.compile('<a id="(.*?)".*?>(.*?)</a>', re.IGNORECASE)
|
||||||
reUnderbarNotDashHTML = re.compile(r'<li>(\s*(LUCENE|SOLR)_\d\d\d\d+)')
|
reUnderbarNotDashHTML = re.compile(r'<li>(\s*(LUCENE|SOLR)_\d\d\d\d+)')
|
||||||
reUnderbarNotDashTXT = re.compile(r'\s+((LUCENE|SOLR)_\d\d\d\d+)', re.MULTILINE)
|
reUnderbarNotDashTXT = re.compile(r'\s+((LUCENE|SOLR)_\d\d\d\d+)', re.MULTILINE)
|
||||||
def checkChangesContent(s, version, name, project, isHTML):
|
def checkChangesContent(s, version, name, project, isHTML):
|
||||||
|
currentVersionTuple = versionToTuple(version, name)
|
||||||
|
|
||||||
if isHTML and s.find('Release %s' % version) == -1:
|
if isHTML and s.find('Release %s' % version) == -1:
|
||||||
raise RuntimeError('did not see "Release %s" in %s' % (version, name))
|
raise RuntimeError('did not see "Release %s" in %s' % (version, name))
|
||||||
|
@ -459,7 +460,8 @@ def checkChangesContent(s, version, name, project, isHTML):
|
||||||
raise RuntimeError('did not see "%s" in %s' % (sub, name))
|
raise RuntimeError('did not see "%s" in %s' % (sub, name))
|
||||||
|
|
||||||
if isHTML:
|
if isHTML:
|
||||||
# Make sure a section only appears once under each release:
|
# Make sure that a section only appears once under each release,
|
||||||
|
# and that each release is not greater than the current version
|
||||||
seenIDs = set()
|
seenIDs = set()
|
||||||
seenText = set()
|
seenText = set()
|
||||||
|
|
||||||
|
@ -468,6 +470,9 @@ def checkChangesContent(s, version, name, project, isHTML):
|
||||||
if text.lower().startswith('release '):
|
if text.lower().startswith('release '):
|
||||||
release = text[8:].strip()
|
release = text[8:].strip()
|
||||||
seenText.clear()
|
seenText.clear()
|
||||||
|
releaseTuple = versionToTuple(release, name)
|
||||||
|
if releaseTuple > currentVersionTuple:
|
||||||
|
raise RuntimeError('Future release %s is greater than %s in %s' % (release, version, name))
|
||||||
if id in seenIDs:
|
if id in seenIDs:
|
||||||
raise RuntimeError('%s has duplicate section "%s" under release "%s"' % (name, text, release))
|
raise RuntimeError('%s has duplicate section "%s" under release "%s"' % (name, text, release))
|
||||||
seenIDs.add(id)
|
seenIDs.add(id)
|
||||||
|
@ -475,6 +480,27 @@ def checkChangesContent(s, version, name, project, isHTML):
|
||||||
raise RuntimeError('%s has duplicate section "%s" under release "%s"' % (name, text, release))
|
raise RuntimeError('%s has duplicate section "%s" under release "%s"' % (name, text, release))
|
||||||
seenText.add(text)
|
seenText.add(text)
|
||||||
|
|
||||||
|
|
||||||
|
reVersion = re.compile(r'(\d+)\.(\d+)(?:\.(\d+))?\s*(-alpha|-beta|final|RC\d+)?\s*(?:\[.*\])?', re.IGNORECASE)
|
||||||
|
def versionToTuple(version, name):
|
||||||
|
versionMatch = reVersion.match(version)
|
||||||
|
if versionMatch is None:
|
||||||
|
raise RuntimeError('Version %s in %s cannot be parsed' % (version, name))
|
||||||
|
versionTuple = versionMatch.groups()
|
||||||
|
while versionTuple[-1] is None or versionTuple[-1] == '':
|
||||||
|
versionTuple = versionTuple[:-1]
|
||||||
|
if versionTuple[-1].lower() == '-alpha':
|
||||||
|
versionTuple = versionTuple[:-1] + ('0',)
|
||||||
|
elif versionTuple[-1].lower() == '-beta':
|
||||||
|
versionTuple = versionTuple[:-1] + ('1',)
|
||||||
|
elif versionTuple[-1].lower() == 'final':
|
||||||
|
versionTuple = versionTuple[:-2] + ('100',)
|
||||||
|
elif versionTuple[-1].lower()[:2] == 'rc':
|
||||||
|
versionTuple = versionTuple[:-2] + (versionTuple[-1][2:],)
|
||||||
|
print('%s: %s' % (version, versionTuple))
|
||||||
|
return versionTuple
|
||||||
|
|
||||||
|
|
||||||
reUnixPath = re.compile(r'\b[a-zA-Z_]+=(?:"(?:\\"|[^"])*"' + '|(?:\\\\.|[^"\'\\s])*' + r"|'(?:\\'|[^'])*')" \
|
reUnixPath = re.compile(r'\b[a-zA-Z_]+=(?:"(?:\\"|[^"])*"' + '|(?:\\\\.|[^"\'\\s])*' + r"|'(?:\\'|[^'])*')" \
|
||||||
+ r'|(/(?:\\.|[^"\'\s])*)' \
|
+ r'|(/(?:\\.|[^"\'\s])*)' \
|
||||||
+ r'|("/(?:\\.|[^"])*")' \
|
+ r'|("/(?:\\.|[^"])*")' \
|
||||||
|
|
|
@ -56,6 +56,11 @@ Other
|
||||||
|
|
||||||
======================= Lucene 6.4.0 =======================
|
======================= Lucene 6.4.0 =======================
|
||||||
|
|
||||||
|
API Changes
|
||||||
|
|
||||||
|
* LUCENE-7533: Classic query parser no longer allows autoGeneratePhraseQueries
|
||||||
|
to be set to true when splitOnWhitespace is false (and vice-versa).
|
||||||
|
|
||||||
New features
|
New features
|
||||||
|
|
||||||
* LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand)
|
* LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand)
|
||||||
|
@ -65,6 +70,15 @@ Bug Fixes
|
||||||
* LUCENE-7547: JapaneseTokenizerFactory was failing to close the
|
* LUCENE-7547: JapaneseTokenizerFactory was failing to close the
|
||||||
dictionary file it opened (Markus via Mike McCandless)
|
dictionary file it opened (Markus via Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-7562: CompletionFieldsConsumer sometimes throws
|
||||||
|
NullPointerException on ghost fields (Oliver Eilhard via Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-7533: Classic query parser: disallow autoGeneratePhraseQueries=true
|
||||||
|
when splitOnWhitespace=false (and vice-versa). (Steve Rowe)
|
||||||
|
|
||||||
|
* LUCENE-7536: ASCIIFoldingFilterFactory used to return an illegal multi-term
|
||||||
|
component when preserveOriginal was set to true. (Adrien Grand)
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
|
|
||||||
* LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
|
* LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
|
||||||
|
@ -84,6 +98,9 @@ Improvements
|
||||||
|
|
||||||
* LUCENE-7524: Added more detailed explanation of how IDF is computed in
|
* LUCENE-7524: Added more detailed explanation of how IDF is computed in
|
||||||
ClassicSimilarity and BM25Similarity. (Adrien Grand)
|
ClassicSimilarity and BM25Similarity. (Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-7564: AnalyzingInfixSuggester should close its IndexWriter by default
|
||||||
|
at the end of build(). (Steve Rowe)
|
||||||
|
|
||||||
* LUCENE-7526: Enhanced UnifiedHighlighter's passage relevancy for queries with
|
* LUCENE-7526: Enhanced UnifiedHighlighter's passage relevancy for queries with
|
||||||
wildcards and sometimes just terms. Added shouldPreferPassageRelevancyOverSpeed()
|
wildcards and sometimes just terms. Added shouldPreferPassageRelevancyOverSpeed()
|
||||||
|
@ -93,6 +110,11 @@ Improvements
|
||||||
* LUCENE-7537: Index time sorting now supports multi-valued sorts
|
* LUCENE-7537: Index time sorting now supports multi-valued sorts
|
||||||
using selectors (MIN, MAX, etc.) (Jim Ferenczi via Mike McCandless)
|
using selectors (MIN, MAX, etc.) (Jim Ferenczi via Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-7560: QueryBuilder.createFieldQuery is no longer final,
|
||||||
|
giving custom query parsers subclassing QueryBuilder more freedom to
|
||||||
|
control how text is analyzed and converted into a query (Matt Weber
|
||||||
|
via Mike McCandless)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
||||||
* LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file
|
* LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file
|
||||||
|
@ -100,6 +122,9 @@ Other
|
||||||
|
|
||||||
* LUCENE-7534: fix smokeTestRelease.py to run on Cygwin (Mikhail Khludnev)
|
* LUCENE-7534: fix smokeTestRelease.py to run on Cygwin (Mikhail Khludnev)
|
||||||
|
|
||||||
|
* LUCENE-7559: UnifiedHighlighter: Make Passage more exposed to allow passage creation to
|
||||||
|
be customized. (David Smiley)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
||||||
* LUCENE-7387: fix defaultCodec in build.xml to account for the line ending (hossman)
|
* LUCENE-7387: fix defaultCodec in build.xml to account for the line ending (hossman)
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.analysis.miscellaneous;
|
package org.apache.lucene.analysis.miscellaneous;
|
||||||
|
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||||
|
@ -36,12 +37,14 @@ import org.apache.lucene.analysis.TokenStream;
|
||||||
* </fieldType></pre>
|
* </fieldType></pre>
|
||||||
*/
|
*/
|
||||||
public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||||
|
private static final String PRESERVE_ORIGINAL = "preserveOriginal";
|
||||||
|
|
||||||
private final boolean preserveOriginal;
|
private final boolean preserveOriginal;
|
||||||
|
|
||||||
/** Creates a new ASCIIFoldingFilterFactory */
|
/** Creates a new ASCIIFoldingFilterFactory */
|
||||||
public ASCIIFoldingFilterFactory(Map<String,String> args) {
|
public ASCIIFoldingFilterFactory(Map<String,String> args) {
|
||||||
super(args);
|
super(args);
|
||||||
preserveOriginal = getBoolean(args, "preserveOriginal", false);
|
preserveOriginal = getBoolean(args, PRESERVE_ORIGINAL, false);
|
||||||
if (!args.isEmpty()) {
|
if (!args.isEmpty()) {
|
||||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||||
}
|
}
|
||||||
|
@ -54,7 +57,17 @@ public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements Mul
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AbstractAnalysisFactory getMultiTermComponent() {
|
public AbstractAnalysisFactory getMultiTermComponent() {
|
||||||
return this;
|
if (preserveOriginal) {
|
||||||
|
// The main use-case for using preserveOriginal is to match regardless of
|
||||||
|
// case but to give better scores to exact matches. Since most multi-term
|
||||||
|
// queries return constant scores anyway, the multi-term component only
|
||||||
|
// emits the folded token
|
||||||
|
Map<String, String> args = new HashMap<>(getOriginalArgs());
|
||||||
|
args.remove(PRESERVE_ORIGINAL);
|
||||||
|
return new ASCIIFoldingFilterFactory(args);
|
||||||
|
} else {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.analysis.miscellaneous;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.CannedTokenStream;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
|
||||||
|
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||||
|
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||||
|
|
||||||
|
public class TestAsciiFoldingFilterFactory extends BaseTokenStreamFactoryTestCase {
|
||||||
|
|
||||||
|
public void testMultiTermAnalysis() throws IOException {
|
||||||
|
TokenFilterFactory factory = new ASCIIFoldingFilterFactory(Collections.emptyMap());
|
||||||
|
TokenStream stream = new CannedTokenStream(new Token("Été", 0, 3));
|
||||||
|
stream = factory.create(stream);
|
||||||
|
assertTokenStreamContents(stream, new String[] { "Ete" });
|
||||||
|
|
||||||
|
factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
|
||||||
|
stream = new CannedTokenStream(new Token("Été", 0, 3));
|
||||||
|
stream = factory.create(stream);
|
||||||
|
assertTokenStreamContents(stream, new String[] { "Ete" });
|
||||||
|
|
||||||
|
factory = new ASCIIFoldingFilterFactory(new HashMap<>(Collections.singletonMap("preserveOriginal", "true")));
|
||||||
|
stream = new CannedTokenStream(new Token("Été", 0, 3));
|
||||||
|
stream = factory.create(stream);
|
||||||
|
assertTokenStreamContents(stream, new String[] { "Ete", "Été" });
|
||||||
|
|
||||||
|
factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
|
||||||
|
stream = new CannedTokenStream(new Token("Été", 0, 3));
|
||||||
|
stream = factory.create(stream);
|
||||||
|
assertTokenStreamContents(stream, new String[] { "Ete" });
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -25,13 +25,18 @@ import java.lang.reflect.Modifier;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
|
import java.text.ParsePosition;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.Date;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
import java.util.TimeZone;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
@ -62,6 +67,8 @@ import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
import org.apache.lucene.search.Sort;
|
||||||
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.store.BaseDirectoryWrapper;
|
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
@ -165,6 +172,57 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||||
// a test option to not remove temp dir...):
|
// a test option to not remove temp dir...):
|
||||||
Thread.sleep(100000);
|
Thread.sleep(100000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ant test -Dtestcase=TestBackwardsCompatibility -Dtestmethod=testCreateSortedIndex -Dtests.codec=default -Dtests.useSecurityManager=false -Dtests.bwcdir=/tmp/sorted
|
||||||
|
public void testCreateSortedIndex() throws Exception {
|
||||||
|
|
||||||
|
Path indexDir = getIndexDir().resolve("sorted");
|
||||||
|
Files.deleteIfExists(indexDir);
|
||||||
|
Directory dir = newFSDirectory(indexDir);
|
||||||
|
|
||||||
|
LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
|
||||||
|
mp.setNoCFSRatio(1.0);
|
||||||
|
mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
|
||||||
|
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||||
|
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||||
|
|
||||||
|
// TODO: remove randomness
|
||||||
|
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
|
||||||
|
conf.setMergePolicy(mp);
|
||||||
|
conf.setUseCompoundFile(false);
|
||||||
|
conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
|
||||||
|
IndexWriter writer = new IndexWriter(dir, conf);
|
||||||
|
LineFileDocs docs = new LineFileDocs(random());
|
||||||
|
SimpleDateFormat parser = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT);
|
||||||
|
parser.setTimeZone(TimeZone.getTimeZone("UTC"));
|
||||||
|
ParsePosition position = new ParsePosition(0);
|
||||||
|
Field dateDVField = null;
|
||||||
|
for(int i=0;i<50;i++) {
|
||||||
|
Document doc = docs.nextDoc();
|
||||||
|
String dateString = doc.get("date");
|
||||||
|
|
||||||
|
position.setIndex(0);
|
||||||
|
Date date = parser.parse(dateString, position);
|
||||||
|
if (position.getErrorIndex() != -1) {
|
||||||
|
throw new AssertionError("failed to parse \"" + dateString + "\" as date");
|
||||||
|
}
|
||||||
|
if (position.getIndex() != dateString.length()) {
|
||||||
|
throw new AssertionError("failed to parse \"" + dateString + "\" as date");
|
||||||
|
}
|
||||||
|
if (dateDVField == null) {
|
||||||
|
dateDVField = new NumericDocValuesField("dateDV", 0l);
|
||||||
|
doc.add(dateDVField);
|
||||||
|
}
|
||||||
|
dateDVField.setLongValue(date.getTime());
|
||||||
|
if (i == 250) {
|
||||||
|
writer.commit();
|
||||||
|
}
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
writer.forceMerge(1);
|
||||||
|
writer.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
private void updateNumeric(IndexWriter writer, String id, String f, String cf, long value) throws IOException {
|
private void updateNumeric(IndexWriter writer, String id, String f, String cf, long value) throws IOException {
|
||||||
writer.updateNumericDocValue(new Term("id", id), f, value);
|
writer.updateNumericDocValue(new Term("id", id), f, value);
|
||||||
|
@ -1483,6 +1541,30 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSortedIndex() throws Exception {
|
||||||
|
String[] versions = new String[] {"6.2.0", "6.2.1", "6.3.0"};
|
||||||
|
for(String version : versions) {
|
||||||
|
Path path = createTempDir("sorted");
|
||||||
|
InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream("sorted." + version + ".zip");
|
||||||
|
assertNotNull("Sorted index index " + version + " not found", resource);
|
||||||
|
TestUtil.unzip(resource, path);
|
||||||
|
|
||||||
|
// TODO: more tests
|
||||||
|
Directory dir = newFSDirectory(path);
|
||||||
|
|
||||||
|
DirectoryReader reader = DirectoryReader.open(dir);
|
||||||
|
assertEquals(1, reader.leaves().size());
|
||||||
|
Sort sort = reader.leaves().get(0).reader().getIndexSort();
|
||||||
|
assertNotNull(sort);
|
||||||
|
assertEquals("<long: \"dateDV\">!", sort.toString());
|
||||||
|
reader.close();
|
||||||
|
|
||||||
|
// this will confirm the docs really are sorted:
|
||||||
|
TestUtil.checkIndex(dir);
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static long getValue(BinaryDocValues bdv) throws IOException {
|
static long getValue(BinaryDocValues bdv) throws IOException {
|
||||||
BytesRef term = bdv.binaryValue();
|
BytesRef term = bdv.binaryValue();
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -196,7 +196,7 @@ public class QueryBuilder {
|
||||||
* @param quoted true if phrases should be generated when terms occur at more than one position
|
* @param quoted true if phrases should be generated when terms occur at more than one position
|
||||||
* @param phraseSlop slop factor for phrase/multiphrase queries
|
* @param phraseSlop slop factor for phrase/multiphrase queries
|
||||||
*/
|
*/
|
||||||
protected final Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, String queryText, boolean quoted, int phraseSlop) {
|
protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, String queryText, boolean quoted, int phraseSlop) {
|
||||||
assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
|
assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
|
||||||
|
|
||||||
// Use the analyzer to get all the tokens, and then build an appropriate
|
// Use the analyzer to get all the tokens, and then build an appropriate
|
||||||
|
|
|
@ -75,6 +75,9 @@ public abstract class AnalysisOffsetStrategy extends FieldOffsetStrategy {
|
||||||
*
|
*
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
|
// TODO we could make this go away. MemoryIndexOffsetStrategy could simply split and analyze each value into the
|
||||||
|
// MemoryIndex. TokenStreamOffsetStrategy's hack TokenStreamPostingsEnum could incorporate this logic,
|
||||||
|
// albeit with less code, less hack.
|
||||||
private static final class MultiValueTokenStream extends TokenFilter {
|
private static final class MultiValueTokenStream extends TokenFilter {
|
||||||
|
|
||||||
private final String fieldName;
|
private final String fieldName;
|
||||||
|
|
|
@ -24,115 +24,117 @@ package org.apache.lucene.search.uhighlight;
|
||||||
* ellipses between unconnected passages.
|
* ellipses between unconnected passages.
|
||||||
*/
|
*/
|
||||||
public class DefaultPassageFormatter extends PassageFormatter {
|
public class DefaultPassageFormatter extends PassageFormatter {
|
||||||
/** text that will appear before highlighted terms */
|
/** text that will appear before highlighted terms */
|
||||||
protected final String preTag;
|
protected final String preTag;
|
||||||
/** text that will appear after highlighted terms */
|
/** text that will appear after highlighted terms */
|
||||||
protected final String postTag;
|
protected final String postTag;
|
||||||
/** text that will appear between two unconnected passages */
|
/** text that will appear between two unconnected passages */
|
||||||
protected final String ellipsis;
|
protected final String ellipsis;
|
||||||
/** true if we should escape for html */
|
/** true if we should escape for html */
|
||||||
protected final boolean escape;
|
protected final boolean escape;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new DefaultPassageFormatter with the default tags.
|
* Creates a new DefaultPassageFormatter with the default tags.
|
||||||
*/
|
*/
|
||||||
public DefaultPassageFormatter() {
|
public DefaultPassageFormatter() {
|
||||||
this("<b>", "</b>", "... ", false);
|
this("<b>", "</b>", "... ", false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new DefaultPassageFormatter with custom tags.
|
||||||
|
*
|
||||||
|
* @param preTag text which should appear before a highlighted term.
|
||||||
|
* @param postTag text which should appear after a highlighted term.
|
||||||
|
* @param ellipsis text which should be used to connect two unconnected passages.
|
||||||
|
* @param escape true if text should be html-escaped
|
||||||
|
*/
|
||||||
|
public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
|
||||||
|
if (preTag == null || postTag == null || ellipsis == null) {
|
||||||
|
throw new NullPointerException();
|
||||||
}
|
}
|
||||||
|
this.preTag = preTag;
|
||||||
|
this.postTag = postTag;
|
||||||
|
this.ellipsis = ellipsis;
|
||||||
|
this.escape = escape;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
@Override
|
||||||
* Creates a new DefaultPassageFormatter with custom tags.
|
public String format(Passage passages[], String content) {
|
||||||
* @param preTag text which should appear before a highlighted term.
|
StringBuilder sb = new StringBuilder();
|
||||||
* @param postTag text which should appear after a highlighted term.
|
int pos = 0;
|
||||||
* @param ellipsis text which should be used to connect two unconnected passages.
|
for (Passage passage : passages) {
|
||||||
* @param escape true if text should be html-escaped
|
// don't add ellipsis if its the first one, or if its connected.
|
||||||
*/
|
if (passage.getStartOffset() > pos && pos > 0) {
|
||||||
public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
|
sb.append(ellipsis);
|
||||||
if (preTag == null || postTag == null || ellipsis == null) {
|
}
|
||||||
throw new NullPointerException();
|
pos = passage.getStartOffset();
|
||||||
|
for (int i = 0; i < passage.getNumMatches(); i++) {
|
||||||
|
int start = passage.getMatchStarts()[i];
|
||||||
|
int end = passage.getMatchEnds()[i];
|
||||||
|
// its possible to have overlapping terms
|
||||||
|
if (start > pos) {
|
||||||
|
append(sb, content, pos, start);
|
||||||
}
|
}
|
||||||
this.preTag = preTag;
|
if (end > pos) {
|
||||||
this.postTag = postTag;
|
sb.append(preTag);
|
||||||
this.ellipsis = ellipsis;
|
append(sb, content, Math.max(pos, start), end);
|
||||||
this.escape = escape;
|
sb.append(postTag);
|
||||||
}
|
pos = end;
|
||||||
|
|
||||||
@Override
|
|
||||||
public String format(Passage passages[], String content) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
int pos = 0;
|
|
||||||
for (Passage passage : passages) {
|
|
||||||
// don't add ellipsis if its the first one, or if its connected.
|
|
||||||
if (passage.startOffset > pos && pos > 0) {
|
|
||||||
sb.append(ellipsis);
|
|
||||||
}
|
|
||||||
pos = passage.startOffset;
|
|
||||||
for (int i = 0; i < passage.numMatches; i++) {
|
|
||||||
int start = passage.matchStarts[i];
|
|
||||||
int end = passage.matchEnds[i];
|
|
||||||
// its possible to have overlapping terms
|
|
||||||
if (start > pos) {
|
|
||||||
append(sb, content, pos, start);
|
|
||||||
}
|
|
||||||
if (end > pos) {
|
|
||||||
sb.append(preTag);
|
|
||||||
append(sb, content, Math.max(pos, start), end);
|
|
||||||
sb.append(postTag);
|
|
||||||
pos = end;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// its possible a "term" from the analyzer could span a sentence boundary.
|
|
||||||
append(sb, content, pos, Math.max(pos, passage.endOffset));
|
|
||||||
pos = passage.endOffset;
|
|
||||||
}
|
}
|
||||||
return sb.toString();
|
}
|
||||||
|
// its possible a "term" from the analyzer could span a sentence boundary.
|
||||||
|
append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
|
||||||
|
pos = passage.getEndOffset();
|
||||||
}
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Appends original text to the response.
|
* Appends original text to the response.
|
||||||
* @param dest resulting text, possibly transformed or encoded
|
*
|
||||||
* @param content original text content
|
* @param dest resulting text, possibly transformed or encoded
|
||||||
* @param start index of the first character in content
|
* @param content original text content
|
||||||
* @param end index of the character following the last character in content
|
* @param start index of the first character in content
|
||||||
*/
|
* @param end index of the character following the last character in content
|
||||||
protected void append(StringBuilder dest, String content, int start, int end) {
|
*/
|
||||||
if (escape) {
|
protected void append(StringBuilder dest, String content, int start, int end) {
|
||||||
// note: these are the rules from owasp.org
|
if (escape) {
|
||||||
for (int i = start; i < end; i++) {
|
// note: these are the rules from owasp.org
|
||||||
char ch = content.charAt(i);
|
for (int i = start; i < end; i++) {
|
||||||
switch(ch) {
|
char ch = content.charAt(i);
|
||||||
case '&':
|
switch (ch) {
|
||||||
dest.append("&");
|
case '&':
|
||||||
break;
|
dest.append("&");
|
||||||
case '<':
|
break;
|
||||||
dest.append("<");
|
case '<':
|
||||||
break;
|
dest.append("<");
|
||||||
case '>':
|
break;
|
||||||
dest.append(">");
|
case '>':
|
||||||
break;
|
dest.append(">");
|
||||||
case '"':
|
break;
|
||||||
dest.append(""");
|
case '"':
|
||||||
break;
|
dest.append(""");
|
||||||
case '\'':
|
break;
|
||||||
dest.append("'");
|
case '\'':
|
||||||
break;
|
dest.append("'");
|
||||||
case '/':
|
break;
|
||||||
dest.append("/");
|
case '/':
|
||||||
break;
|
dest.append("/");
|
||||||
default:
|
break;
|
||||||
if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
|
default:
|
||||||
dest.append(ch);
|
if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
|
||||||
} else if (ch < 0xff) {
|
dest.append(ch);
|
||||||
dest.append("&#");
|
} else if (ch < 0xff) {
|
||||||
dest.append((int)ch);
|
dest.append("&#");
|
||||||
dest.append(";");
|
dest.append((int) ch);
|
||||||
} else {
|
dest.append(";");
|
||||||
dest.append(ch);
|
} else {
|
||||||
}
|
dest.append(ch);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
dest.append(content, start, end);
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
dest.append(content, start, end);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -117,9 +117,9 @@ public class FieldHighlighter {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
Passage passage = new Passage();
|
Passage passage = new Passage();
|
||||||
passage.score = Float.NaN;
|
passage.setScore(Float.NaN);
|
||||||
passage.startOffset = pos;
|
passage.setStartOffset(pos);
|
||||||
passage.endOffset = next;
|
passage.setEndOffset(next);
|
||||||
passages.add(passage);
|
passages.add(passage);
|
||||||
pos = next;
|
pos = next;
|
||||||
}
|
}
|
||||||
|
@ -145,12 +145,12 @@ public class FieldHighlighter {
|
||||||
offsetsEnumQueue.add(new OffsetsEnum(null, EMPTY)); // a sentinel for termination
|
offsetsEnumQueue.add(new OffsetsEnum(null, EMPTY)); // a sentinel for termination
|
||||||
|
|
||||||
PriorityQueue<Passage> passageQueue = new PriorityQueue<>(Math.min(64, maxPassages + 1), (left, right) -> {
|
PriorityQueue<Passage> passageQueue = new PriorityQueue<>(Math.min(64, maxPassages + 1), (left, right) -> {
|
||||||
if (left.score < right.score) {
|
if (left.getScore() < right.getScore()) {
|
||||||
return -1;
|
return -1;
|
||||||
} else if (left.score > right.score) {
|
} else if (left.getScore() > right.getScore()) {
|
||||||
return 1;
|
return 1;
|
||||||
} else {
|
} else {
|
||||||
return left.startOffset - right.startOffset;
|
return left.getStartOffset() - right.getStartOffset();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
Passage passage = new Passage(); // the current passage in-progress. Will either get reset or added to queue.
|
Passage passage = new Passage(); // the current passage in-progress. Will either get reset or added to queue.
|
||||||
|
@ -170,12 +170,12 @@ public class FieldHighlighter {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// See if this term should be part of a new passage.
|
// See if this term should be part of a new passage.
|
||||||
if (start >= passage.endOffset) {
|
if (start >= passage.getEndOffset()) {
|
||||||
if (passage.startOffset >= 0) { // true if this passage has terms; otherwise couldn't find any (yet)
|
if (passage.getStartOffset() >= 0) { // true if this passage has terms; otherwise couldn't find any (yet)
|
||||||
// finalize passage
|
// finalize passage
|
||||||
passage.score *= scorer.norm(passage.startOffset);
|
passage.setScore(passage.getScore() * scorer.norm(passage.getStartOffset()));
|
||||||
// new sentence: first add 'passage' to queue
|
// new sentence: first add 'passage' to queue
|
||||||
if (passageQueue.size() == maxPassages && passage.score < passageQueue.peek().score) {
|
if (passageQueue.size() == maxPassages && passage.getScore() < passageQueue.peek().getScore()) {
|
||||||
passage.reset(); // can't compete, just reset it
|
passage.reset(); // can't compete, just reset it
|
||||||
} else {
|
} else {
|
||||||
passageQueue.offer(passage);
|
passageQueue.offer(passage);
|
||||||
|
@ -192,8 +192,8 @@ public class FieldHighlighter {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// advance breakIterator
|
// advance breakIterator
|
||||||
passage.startOffset = Math.max(breakIterator.preceding(start + 1), 0);
|
passage.setStartOffset(Math.max(breakIterator.preceding(start + 1), 0));
|
||||||
passage.endOffset = Math.min(breakIterator.following(start), contentLength);
|
passage.setEndOffset(Math.min(breakIterator.following(start), contentLength));
|
||||||
}
|
}
|
||||||
// Add this term to the passage.
|
// Add this term to the passage.
|
||||||
int tf = 0;
|
int tf = 0;
|
||||||
|
@ -209,12 +209,12 @@ public class FieldHighlighter {
|
||||||
off.nextPosition();
|
off.nextPosition();
|
||||||
start = off.startOffset();
|
start = off.startOffset();
|
||||||
end = off.endOffset();
|
end = off.endOffset();
|
||||||
if (start >= passage.endOffset || end > contentLength) { // it's beyond this passage
|
if (start >= passage.getEndOffset() || end > contentLength) { // it's beyond this passage
|
||||||
offsetsEnumQueue.offer(off);
|
offsetsEnumQueue.offer(off);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
passage.score += off.weight * scorer.tf(tf, passage.endOffset - passage.startOffset);
|
passage.setScore(passage.getScore() + off.weight * scorer.tf(tf, passage.getEndOffset() - passage.getStartOffset()));
|
||||||
}
|
}
|
||||||
|
|
||||||
Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
|
Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
|
||||||
|
@ -222,7 +222,7 @@ public class FieldHighlighter {
|
||||||
p.sort();
|
p.sort();
|
||||||
}
|
}
|
||||||
// sort in ascending order
|
// sort in ascending order
|
||||||
Arrays.sort(passages, (left, right) -> left.startOffset - right.startOffset);
|
Arrays.sort(passages, (left, right) -> left.getStartOffset() - right.getStartOffset());
|
||||||
return passages;
|
return passages;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -66,9 +66,8 @@ public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
BytesRef getTerm() throws IOException {
|
BytesRef getTerm() throws IOException {
|
||||||
// the dp.getPayload thing is a hack -- see MultiTermHighlighting
|
// TODO TokenStreamOffsetStrategy could override OffsetsEnum; then remove this hack here
|
||||||
return term != null ? term : postingsEnum.getPayload();
|
return term != null ? term : postingsEnum.getPayload(); // abusing payload like this is a total hack!
|
||||||
// We don't deepcopy() because in this hack we know we don't have to.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean hasMorePositions() throws IOException {
|
boolean hasMorePositions() throws IOException {
|
||||||
|
@ -91,7 +90,8 @@ public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
if (postingsEnum instanceof Closeable) { // the one in MultiTermHighlighting is.
|
// TODO TokenStreamOffsetStrategy could override OffsetsEnum; then this base impl would be no-op.
|
||||||
|
if (postingsEnum instanceof Closeable) {
|
||||||
((Closeable) postingsEnum).close();
|
((Closeable) postingsEnum).close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,139 +23,159 @@ import org.apache.lucene.util.InPlaceMergeSorter;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a passage (typically a sentence of the document).
|
* Represents a passage (typically a sentence of the document).
|
||||||
* <p>
|
* <p>
|
||||||
* A passage contains {@link #getNumMatches} highlights from the query,
|
* A passage contains {@link #getNumMatches} highlights from the query,
|
||||||
* and the offsets and query terms that correspond with each match.
|
* and the offsets and query terms that correspond with each match.
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public final class Passage {
|
public class Passage {
|
||||||
int startOffset = -1;
|
private int startOffset = -1;
|
||||||
int endOffset = -1;
|
private int endOffset = -1;
|
||||||
float score = 0.0f;
|
private float score = 0.0f;
|
||||||
|
|
||||||
int matchStarts[] = new int[8];
|
private int[] matchStarts = new int[8];
|
||||||
int matchEnds[] = new int[8];
|
private int[] matchEnds = new int[8];
|
||||||
BytesRef matchTerms[] = new BytesRef[8];
|
private BytesRef[] matchTerms = new BytesRef[8];
|
||||||
int numMatches = 0;
|
private int numMatches = 0;
|
||||||
|
|
||||||
public void addMatch(int startOffset, int endOffset, BytesRef term) {
|
/** @lucene.internal */
|
||||||
assert startOffset >= this.startOffset && startOffset <= this.endOffset;
|
public void addMatch(int startOffset, int endOffset, BytesRef term) {
|
||||||
if (numMatches == matchStarts.length) {
|
assert startOffset >= this.startOffset && startOffset <= this.endOffset;
|
||||||
int newLength = ArrayUtil.oversize(numMatches+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
|
if (numMatches == matchStarts.length) {
|
||||||
int newMatchStarts[] = new int[newLength];
|
int newLength = ArrayUtil.oversize(numMatches + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
|
||||||
int newMatchEnds[] = new int[newLength];
|
int newMatchStarts[] = new int[newLength];
|
||||||
BytesRef newMatchTerms[] = new BytesRef[newLength];
|
int newMatchEnds[] = new int[newLength];
|
||||||
System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
|
BytesRef newMatchTerms[] = new BytesRef[newLength];
|
||||||
System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
|
System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
|
||||||
System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
|
System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
|
||||||
matchStarts = newMatchStarts;
|
System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
|
||||||
matchEnds = newMatchEnds;
|
matchStarts = newMatchStarts;
|
||||||
matchTerms = newMatchTerms;
|
matchEnds = newMatchEnds;
|
||||||
}
|
matchTerms = newMatchTerms;
|
||||||
assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
|
|
||||||
matchStarts[numMatches] = startOffset;
|
|
||||||
matchEnds[numMatches] = endOffset;
|
|
||||||
matchTerms[numMatches] = term;
|
|
||||||
numMatches++;
|
|
||||||
}
|
}
|
||||||
|
assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
|
||||||
|
matchStarts[numMatches] = startOffset;
|
||||||
|
matchEnds[numMatches] = endOffset;
|
||||||
|
matchTerms[numMatches] = term;
|
||||||
|
numMatches++;
|
||||||
|
}
|
||||||
|
|
||||||
void sort() {
|
/** @lucene.internal */
|
||||||
final int starts[] = matchStarts;
|
public void sort() {
|
||||||
final int ends[] = matchEnds;
|
final int starts[] = matchStarts;
|
||||||
final BytesRef terms[] = matchTerms;
|
final int ends[] = matchEnds;
|
||||||
new InPlaceMergeSorter() {
|
final BytesRef terms[] = matchTerms;
|
||||||
@Override
|
new InPlaceMergeSorter() {
|
||||||
protected void swap(int i, int j) {
|
@Override
|
||||||
int temp = starts[i];
|
protected void swap(int i, int j) {
|
||||||
starts[i] = starts[j];
|
int temp = starts[i];
|
||||||
starts[j] = temp;
|
starts[i] = starts[j];
|
||||||
|
starts[j] = temp;
|
||||||
|
|
||||||
temp = ends[i];
|
temp = ends[i];
|
||||||
ends[i] = ends[j];
|
ends[i] = ends[j];
|
||||||
ends[j] = temp;
|
ends[j] = temp;
|
||||||
|
|
||||||
BytesRef tempTerm = terms[i];
|
BytesRef tempTerm = terms[i];
|
||||||
terms[i] = terms[j];
|
terms[i] = terms[j];
|
||||||
terms[j] = tempTerm;
|
terms[j] = tempTerm;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int compare(int i, int j) {
|
protected int compare(int i, int j) {
|
||||||
return Integer.compare(starts[i], starts[j]);
|
return Integer.compare(starts[i], starts[j]);
|
||||||
}
|
}
|
||||||
|
|
||||||
}.sort(0, numMatches);
|
}.sort(0, numMatches);
|
||||||
}
|
}
|
||||||
|
|
||||||
void reset() {
|
/** @lucene.internal */
|
||||||
startOffset = endOffset = -1;
|
public void reset() {
|
||||||
score = 0.0f;
|
startOffset = endOffset = -1;
|
||||||
numMatches = 0;
|
score = 0.0f;
|
||||||
}
|
numMatches = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start offset of this passage.
|
* Start offset of this passage.
|
||||||
* @return start index (inclusive) of the passage in the
|
*
|
||||||
* original content: always >= 0.
|
* @return start index (inclusive) of the passage in the
|
||||||
*/
|
* original content: always >= 0.
|
||||||
public int getStartOffset() {
|
*/
|
||||||
return startOffset;
|
public int getStartOffset() {
|
||||||
}
|
return startOffset;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* End offset of this passage.
|
* End offset of this passage.
|
||||||
* @return end index (exclusive) of the passage in the
|
*
|
||||||
* original content: always >= {@link #getStartOffset()}
|
* @return end index (exclusive) of the passage in the
|
||||||
*/
|
* original content: always >= {@link #getStartOffset()}
|
||||||
public int getEndOffset() {
|
*/
|
||||||
return endOffset;
|
public int getEndOffset() {
|
||||||
}
|
return endOffset;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Passage's score.
|
* Passage's score.
|
||||||
*/
|
*/
|
||||||
public float getScore() {
|
public float getScore() {
|
||||||
return score;
|
return score;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Number of term matches available in
|
* Number of term matches available in
|
||||||
* {@link #getMatchStarts}, {@link #getMatchEnds},
|
* {@link #getMatchStarts}, {@link #getMatchEnds},
|
||||||
* {@link #getMatchTerms}
|
* {@link #getMatchTerms}
|
||||||
*/
|
*/
|
||||||
public int getNumMatches() {
|
public int getNumMatches() {
|
||||||
return numMatches;
|
return numMatches;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start offsets of the term matches, in increasing order.
|
* Start offsets of the term matches, in increasing order.
|
||||||
* <p>
|
* <p>
|
||||||
* Only {@link #getNumMatches} are valid. Note that these
|
* Only {@link #getNumMatches} are valid. Note that these
|
||||||
* offsets are absolute (not relative to {@link #getStartOffset()}).
|
* offsets are absolute (not relative to {@link #getStartOffset()}).
|
||||||
*/
|
*/
|
||||||
public int[] getMatchStarts() {
|
public int[] getMatchStarts() {
|
||||||
return matchStarts;
|
return matchStarts;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* End offsets of the term matches, corresponding with {@link #getMatchStarts}.
|
* End offsets of the term matches, corresponding with {@link #getMatchStarts}.
|
||||||
* <p>
|
* <p>
|
||||||
* Only {@link #getNumMatches} are valid. Note that its possible that an end offset
|
* Only {@link #getNumMatches} are valid. Note that its possible that an end offset
|
||||||
* could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
|
* could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
|
||||||
* Analyzer produced a term which spans a passage boundary.
|
* Analyzer produced a term which spans a passage boundary.
|
||||||
*/
|
*/
|
||||||
public int[] getMatchEnds() {
|
public int[] getMatchEnds() {
|
||||||
return matchEnds;
|
return matchEnds;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
|
* BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
|
||||||
* <p>
|
* <p>
|
||||||
* Only {@link #getNumMatches()} are valid.
|
* Only {@link #getNumMatches()} are valid.
|
||||||
*/
|
*/
|
||||||
public BytesRef[] getMatchTerms() {
|
public BytesRef[] getMatchTerms() {
|
||||||
return matchTerms;
|
return matchTerms;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @lucene.internal */
|
||||||
|
public void setStartOffset(int startOffset) {
|
||||||
|
this.startOffset = startOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @lucene.internal */
|
||||||
|
public void setEndOffset(int endOffset) {
|
||||||
|
this.endOffset = endOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @lucene.internal */
|
||||||
|
public void setScore(float score) {
|
||||||
|
this.score = score;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,10 +69,8 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
|
||||||
return Collections.singletonList(new OffsetsEnum(null, mtqPostingsEnum));
|
return Collections.singletonList(new OffsetsEnum(null, mtqPostingsEnum));
|
||||||
}
|
}
|
||||||
|
|
||||||
// but this would have a performance cost for likely little gain in the user experience, it
|
// See class javadocs.
|
||||||
// would only serve to make this method less bogus.
|
// TODO: DWS perhaps instead OffsetsEnum could become abstract and this would be an impl? See TODOs in OffsetsEnum.
|
||||||
// instead, we always return freq() = Integer.MAX_VALUE and let the highlighter terminate based on offset...
|
|
||||||
// TODO: DWS perhaps instead OffsetsEnum could become abstract and this would be an impl?
|
|
||||||
private static class TokenStreamPostingsEnum extends PostingsEnum implements Closeable {
|
private static class TokenStreamPostingsEnum extends PostingsEnum implements Closeable {
|
||||||
TokenStream stream; // becomes null when closed
|
TokenStream stream; // becomes null when closed
|
||||||
final CharacterRunAutomaton[] matchers;
|
final CharacterRunAutomaton[] matchers;
|
||||||
|
@ -134,6 +132,7 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
|
||||||
return currentEndOffset;
|
return currentEndOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TOTAL HACK; used in OffsetsEnum.getTerm()
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getPayload() throws IOException {
|
public BytesRef getPayload() throws IOException {
|
||||||
if (matchDescriptions[currentMatch] == null) {
|
if (matchDescriptions[currentMatch] == null) {
|
||||||
|
|
|
@ -697,13 +697,13 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
for (Passage passage : passages) {
|
for (Passage passage : passages) {
|
||||||
// don't add ellipsis if its the first one, or if its connected.
|
// don't add ellipsis if its the first one, or if its connected.
|
||||||
if (passage.startOffset > pos && pos > 0) {
|
if (passage.getStartOffset() > pos && pos > 0) {
|
||||||
sb.append("... ");
|
sb.append("... ");
|
||||||
}
|
}
|
||||||
pos = passage.startOffset;
|
pos = passage.getStartOffset();
|
||||||
for (int i = 0; i < passage.numMatches; i++) {
|
for (int i = 0; i < passage.getNumMatches(); i++) {
|
||||||
int start = passage.matchStarts[i];
|
int start = passage.getMatchStarts()[i];
|
||||||
int end = passage.matchEnds[i];
|
int end = passage.getMatchEnds()[i];
|
||||||
// its possible to have overlapping terms
|
// its possible to have overlapping terms
|
||||||
if (start > pos) {
|
if (start > pos) {
|
||||||
sb.append(content, pos, start);
|
sb.append(content, pos, start);
|
||||||
|
@ -719,8 +719,8 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// its possible a "term" from the analyzer could span a sentence boundary.
|
// its possible a "term" from the analyzer could span a sentence boundary.
|
||||||
sb.append(content, pos, Math.max(pos, passage.endOffset));
|
sb.append(content, pos, Math.max(pos, passage.getEndOffset()));
|
||||||
pos = passage.endOffset;
|
pos = passage.getEndOffset();
|
||||||
}
|
}
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
|
@ -96,6 +96,27 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
||||||
init(f, a);
|
init(f, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set to true if phrase queries will be automatically generated
|
||||||
|
* when the analyzer returns more than one term from whitespace
|
||||||
|
* delimited text.
|
||||||
|
* NOTE: this behavior may not be suitable for all languages.
|
||||||
|
* <p>
|
||||||
|
* Set to false if phrase queries should only be generated when
|
||||||
|
* surrounded by double quotes.
|
||||||
|
* <p>
|
||||||
|
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
|
||||||
|
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setAutoGeneratePhraseQueries(boolean value) {
|
||||||
|
if (splitOnWhitespace == false && value == true) {
|
||||||
|
throw new IllegalArgumentException
|
||||||
|
("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false");
|
||||||
|
}
|
||||||
|
this.autoGeneratePhraseQueries = value;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @see #setSplitOnWhitespace(boolean)
|
* @see #setSplitOnWhitespace(boolean)
|
||||||
*/
|
*/
|
||||||
|
@ -106,8 +127,15 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
||||||
/**
|
/**
|
||||||
* Whether query text should be split on whitespace prior to analysis.
|
* Whether query text should be split on whitespace prior to analysis.
|
||||||
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
|
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
|
||||||
|
* <p>
|
||||||
|
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
|
||||||
|
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
|
||||||
*/
|
*/
|
||||||
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
|
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
|
||||||
|
if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) {
|
||||||
|
throw new IllegalArgumentException
|
||||||
|
("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true");
|
||||||
|
}
|
||||||
this.splitOnWhitespace = splitOnWhitespace;
|
this.splitOnWhitespace = splitOnWhitespace;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -635,6 +663,31 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
||||||
finally { jj_save(2, xla); }
|
finally { jj_save(2, xla); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean jj_3R_3() {
|
||||||
|
if (jj_scan_token(TERM)) return true;
|
||||||
|
jj_lookingAhead = true;
|
||||||
|
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
|
||||||
|
jj_lookingAhead = false;
|
||||||
|
if (!jj_semLA || jj_3R_6()) return true;
|
||||||
|
Token xsp;
|
||||||
|
if (jj_3R_7()) return true;
|
||||||
|
while (true) {
|
||||||
|
xsp = jj_scanpos;
|
||||||
|
if (jj_3R_7()) { jj_scanpos = xsp; break; }
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean jj_3R_6() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean jj_3R_5() {
|
||||||
|
if (jj_scan_token(STAR)) return true;
|
||||||
|
if (jj_scan_token(COLON)) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
private boolean jj_3R_4() {
|
private boolean jj_3R_4() {
|
||||||
if (jj_scan_token(TERM)) return true;
|
if (jj_scan_token(TERM)) return true;
|
||||||
if (jj_scan_token(COLON)) return true;
|
if (jj_scan_token(COLON)) return true;
|
||||||
|
@ -666,31 +719,6 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean jj_3R_3() {
|
|
||||||
if (jj_scan_token(TERM)) return true;
|
|
||||||
jj_lookingAhead = true;
|
|
||||||
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
|
|
||||||
jj_lookingAhead = false;
|
|
||||||
if (!jj_semLA || jj_3R_6()) return true;
|
|
||||||
Token xsp;
|
|
||||||
if (jj_3R_7()) return true;
|
|
||||||
while (true) {
|
|
||||||
xsp = jj_scanpos;
|
|
||||||
if (jj_3R_7()) { jj_scanpos = xsp; break; }
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean jj_3R_6() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean jj_3R_5() {
|
|
||||||
if (jj_scan_token(STAR)) return true;
|
|
||||||
if (jj_scan_token(COLON)) return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Generated Token Manager. */
|
/** Generated Token Manager. */
|
||||||
public QueryParserTokenManager token_source;
|
public QueryParserTokenManager token_source;
|
||||||
/** Current token. */
|
/** Current token. */
|
||||||
|
|
|
@ -120,6 +120,27 @@ public class QueryParser extends QueryParserBase {
|
||||||
init(f, a);
|
init(f, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set to true if phrase queries will be automatically generated
|
||||||
|
* when the analyzer returns more than one term from whitespace
|
||||||
|
* delimited text.
|
||||||
|
* NOTE: this behavior may not be suitable for all languages.
|
||||||
|
* <p>
|
||||||
|
* Set to false if phrase queries should only be generated when
|
||||||
|
* surrounded by double quotes.
|
||||||
|
* <p>
|
||||||
|
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
|
||||||
|
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setAutoGeneratePhraseQueries(boolean value) {
|
||||||
|
if (splitOnWhitespace == false && value == true) {
|
||||||
|
throw new IllegalArgumentException
|
||||||
|
("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false");
|
||||||
|
}
|
||||||
|
this.autoGeneratePhraseQueries = value;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @see #setSplitOnWhitespace(boolean)
|
* @see #setSplitOnWhitespace(boolean)
|
||||||
*/
|
*/
|
||||||
|
@ -130,8 +151,15 @@ public class QueryParser extends QueryParserBase {
|
||||||
/**
|
/**
|
||||||
* Whether query text should be split on whitespace prior to analysis.
|
* Whether query text should be split on whitespace prior to analysis.
|
||||||
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
|
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
|
||||||
|
* <p>
|
||||||
|
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
|
||||||
|
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
|
||||||
*/
|
*/
|
||||||
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
|
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
|
||||||
|
if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) {
|
||||||
|
throw new IllegalArgumentException
|
||||||
|
("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true");
|
||||||
|
}
|
||||||
this.splitOnWhitespace = splitOnWhitespace;
|
this.splitOnWhitespace = splitOnWhitespace;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -144,7 +144,7 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer
|
||||||
* Set to false if phrase queries should only be generated when
|
* Set to false if phrase queries should only be generated when
|
||||||
* surrounded by double quotes.
|
* surrounded by double quotes.
|
||||||
*/
|
*/
|
||||||
public final void setAutoGeneratePhraseQueries(boolean value) {
|
public void setAutoGeneratePhraseQueries(boolean value) {
|
||||||
this.autoGeneratePhraseQueries = value;
|
this.autoGeneratePhraseQueries = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -840,6 +840,20 @@ public class TestQueryParser extends QueryParserTestBase {
|
||||||
assertTrue(isAHit(qp.parse("เ??"), s, analyzer));
|
assertTrue(isAHit(qp.parse("เ??"), s, analyzer));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-7533
|
||||||
|
public void test_splitOnWhitespace_with_autoGeneratePhraseQueries() {
|
||||||
|
final QueryParser qp = new QueryParser(FIELD, new MockAnalyzer(random()));
|
||||||
|
expectThrows(IllegalArgumentException.class, () -> {
|
||||||
|
qp.setSplitOnWhitespace(false);
|
||||||
|
qp.setAutoGeneratePhraseQueries(true);
|
||||||
|
});
|
||||||
|
final QueryParser qp2 = new QueryParser(FIELD, new MockAnalyzer(random()));
|
||||||
|
expectThrows(IllegalArgumentException.class, () -> {
|
||||||
|
qp2.setSplitOnWhitespace(true);
|
||||||
|
qp2.setAutoGeneratePhraseQueries(true);
|
||||||
|
qp2.setSplitOnWhitespace(false);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
private boolean isAHit(Query q, String content, Analyzer analyzer) throws IOException{
|
private boolean isAHit(Query q, String content, Analyzer analyzer) throws IOException{
|
||||||
Directory ramDir = newDirectory();
|
Directory ramDir = newDirectory();
|
||||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.index.Term;
|
||||||
//import org.apache.lucene.queryparser.classic.ParseException;
|
//import org.apache.lucene.queryparser.classic.ParseException;
|
||||||
//import org.apache.lucene.queryparser.classic.QueryParser;
|
//import org.apache.lucene.queryparser.classic.QueryParser;
|
||||||
//import org.apache.lucene.queryparser.classic.QueryParserBase;
|
//import org.apache.lucene.queryparser.classic.QueryParserBase;
|
||||||
|
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||||
import org.apache.lucene.queryparser.classic.QueryParserBase;
|
import org.apache.lucene.queryparser.classic.QueryParserBase;
|
||||||
//import org.apache.lucene.queryparser.classic.QueryParserTokenManager;
|
//import org.apache.lucene.queryparser.classic.QueryParserTokenManager;
|
||||||
import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
|
import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
|
||||||
|
@ -328,6 +329,9 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
||||||
|
|
||||||
PhraseQuery expected = new PhraseQuery("field", "中", "国");
|
PhraseQuery expected = new PhraseQuery("field", "中", "国");
|
||||||
CommonQueryParserConfiguration qp = getParserConfig(analyzer);
|
CommonQueryParserConfiguration qp = getParserConfig(analyzer);
|
||||||
|
if (qp instanceof QueryParser) { // Always true, since TestStandardQP overrides this method
|
||||||
|
((QueryParser)qp).setSplitOnWhitespace(true); // LUCENE-7533
|
||||||
|
}
|
||||||
setAutoGeneratePhraseQueries(qp, true);
|
setAutoGeneratePhraseQueries(qp, true);
|
||||||
assertEquals(expected, getQuery("中国",qp));
|
assertEquals(expected, getQuery("中国",qp));
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,7 +56,7 @@ public abstract class PrimaryNode extends Node {
|
||||||
// Current NRT segment infos, incRef'd with IndexWriter.deleter:
|
// Current NRT segment infos, incRef'd with IndexWriter.deleter:
|
||||||
private SegmentInfos curInfos;
|
private SegmentInfos curInfos;
|
||||||
|
|
||||||
final IndexWriter writer;
|
protected final IndexWriter writer;
|
||||||
|
|
||||||
// IncRef'd state of the last published NRT point; when a replica comes asking, we give it this as the current NRT point:
|
// IncRef'd state of the last published NRT point; when a replica comes asking, we give it this as the current NRT point:
|
||||||
private CopyState copyState;
|
private CopyState copyState;
|
||||||
|
|
|
@ -129,9 +129,10 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
private final boolean highlight;
|
private final boolean highlight;
|
||||||
|
|
||||||
private final boolean commitOnBuild;
|
private final boolean commitOnBuild;
|
||||||
|
private final boolean closeIndexWriterOnBuild;
|
||||||
|
|
||||||
/** Used for ongoing NRT additions/updates. */
|
/** Used for ongoing NRT additions/updates. */
|
||||||
private IndexWriter writer;
|
protected IndexWriter writer;
|
||||||
|
|
||||||
/** {@link IndexSearcher} used for lookups. */
|
/** {@link IndexSearcher} used for lookups. */
|
||||||
protected SearcherManager searcherMgr;
|
protected SearcherManager searcherMgr;
|
||||||
|
@ -146,6 +147,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
/** Default higlighting option. */
|
/** Default higlighting option. */
|
||||||
public static final boolean DEFAULT_HIGHLIGHT = true;
|
public static final boolean DEFAULT_HIGHLIGHT = true;
|
||||||
|
|
||||||
|
/** Default option to close the IndexWriter once the index has been built. */
|
||||||
|
protected final static boolean DEFAULT_CLOSE_INDEXWRITER_ON_BUILD = true;
|
||||||
|
|
||||||
/** How we sort the postings and search results. */
|
/** How we sort the postings and search results. */
|
||||||
private static final Sort SORT = new Sort(new SortField("weight", SortField.Type.LONG, true));
|
private static final Sort SORT = new Sort(new SortField("weight", SortField.Type.LONG, true));
|
||||||
|
|
||||||
|
@ -198,8 +202,34 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars,
|
public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars,
|
||||||
boolean commitOnBuild,
|
boolean commitOnBuild,
|
||||||
boolean allTermsRequired, boolean highlight) throws IOException {
|
boolean allTermsRequired, boolean highlight) throws IOException {
|
||||||
|
this(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild, allTermsRequired, highlight,
|
||||||
|
DEFAULT_CLOSE_INDEXWRITER_ON_BUILD);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a new instance, loading from a previously built
|
||||||
|
* AnalyzingInfixSuggester directory, if it exists. This directory must be
|
||||||
|
* private to the infix suggester (i.e., not an external
|
||||||
|
* Lucene index). Note that {@link #close}
|
||||||
|
* will also close the provided directory.
|
||||||
|
*
|
||||||
|
* @param minPrefixChars Minimum number of leading characters
|
||||||
|
* before PrefixQuery is used (default 4).
|
||||||
|
* Prefixes shorter than this are indexed as character
|
||||||
|
* ngrams (increasing index size but making lookups
|
||||||
|
* faster).
|
||||||
|
*
|
||||||
|
* @param commitOnBuild Call commit after the index has finished building. This would persist the
|
||||||
|
* suggester index to disk and future instances of this suggester can use this pre-built dictionary.
|
||||||
|
*
|
||||||
|
* @param allTermsRequired All terms in the suggest query must be matched.
|
||||||
|
* @param highlight Highlight suggest query in suggestions.
|
||||||
|
* @param closeIndexWriterOnBuild If true, the IndexWriter will be closed after the index has finished building.
|
||||||
|
*/
|
||||||
|
public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars,
|
||||||
|
boolean commitOnBuild, boolean allTermsRequired,
|
||||||
|
boolean highlight, boolean closeIndexWriterOnBuild) throws IOException {
|
||||||
|
|
||||||
if (minPrefixChars < 0) {
|
if (minPrefixChars < 0) {
|
||||||
throw new IllegalArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars);
|
throw new IllegalArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars);
|
||||||
|
@ -212,6 +242,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
this.commitOnBuild = commitOnBuild;
|
this.commitOnBuild = commitOnBuild;
|
||||||
this.allTermsRequired = allTermsRequired;
|
this.allTermsRequired = allTermsRequired;
|
||||||
this.highlight = highlight;
|
this.highlight = highlight;
|
||||||
|
this.closeIndexWriterOnBuild = closeIndexWriterOnBuild;
|
||||||
|
|
||||||
if (DirectoryReader.indexExists(dir)) {
|
if (DirectoryReader.indexExists(dir)) {
|
||||||
// Already built; open it:
|
// Already built; open it:
|
||||||
|
@ -276,15 +307,22 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
//System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
|
//System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
|
||||||
if (commitOnBuild) {
|
if (commitOnBuild || closeIndexWriterOnBuild) {
|
||||||
commit();
|
commit();
|
||||||
}
|
}
|
||||||
searcherMgr = new SearcherManager(writer, null);
|
searcherMgr = new SearcherManager(writer, null);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (success == false && writer != null) {
|
if (success) {
|
||||||
writer.rollback();
|
if (closeIndexWriterOnBuild) {
|
||||||
writer = null;
|
writer.close();
|
||||||
|
writer = null;
|
||||||
|
}
|
||||||
|
} else { // failure
|
||||||
|
if (writer != null) {
|
||||||
|
writer.rollback();
|
||||||
|
writer = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -294,9 +332,13 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
* @see IndexWriter#commit */
|
* @see IndexWriter#commit */
|
||||||
public void commit() throws IOException {
|
public void commit() throws IOException {
|
||||||
if (writer == null) {
|
if (writer == null) {
|
||||||
throw new IllegalStateException("Cannot commit on an closed writer. Add documents first");
|
if (searcherMgr == null || closeIndexWriterOnBuild == false) {
|
||||||
|
throw new IllegalStateException("Cannot commit on an closed writer. Add documents first");
|
||||||
|
}
|
||||||
|
// else no-op: writer was committed and closed after the index was built, so commit is unnecessary
|
||||||
|
} else {
|
||||||
|
writer.commit();
|
||||||
}
|
}
|
||||||
writer.commit();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private Analyzer getGramAnalyzer() {
|
private Analyzer getGramAnalyzer() {
|
||||||
|
@ -321,13 +363,17 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
|
|
||||||
private synchronized void ensureOpen() throws IOException {
|
private synchronized void ensureOpen() throws IOException {
|
||||||
if (writer == null) {
|
if (writer == null) {
|
||||||
if (searcherMgr != null) {
|
if (DirectoryReader.indexExists(dir)) {
|
||||||
searcherMgr.close();
|
// Already built; open it:
|
||||||
searcherMgr = null;
|
writer = new IndexWriter(dir, getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.APPEND));
|
||||||
|
} else {
|
||||||
|
writer = new IndexWriter(dir, getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
|
||||||
}
|
}
|
||||||
writer = new IndexWriter(dir,
|
SearcherManager oldSearcherMgr = searcherMgr;
|
||||||
getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
|
|
||||||
searcherMgr = new SearcherManager(writer, null);
|
searcherMgr = new SearcherManager(writer, null);
|
||||||
|
if (oldSearcherMgr != null) {
|
||||||
|
oldSearcherMgr.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -382,7 +428,11 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
if (searcherMgr == null) {
|
if (searcherMgr == null) {
|
||||||
throw new IllegalStateException("suggester was not built");
|
throw new IllegalStateException("suggester was not built");
|
||||||
}
|
}
|
||||||
searcherMgr.maybeRefreshBlocking();
|
if (writer != null) {
|
||||||
|
searcherMgr.maybeRefreshBlocking();
|
||||||
|
}
|
||||||
|
// else no-op: writer was committed and closed after the index was built
|
||||||
|
// and before searchMgr was constructed, so refresh is unnecessary
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -791,9 +841,11 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
}
|
}
|
||||||
if (writer != null) {
|
if (writer != null) {
|
||||||
writer.close();
|
writer.close();
|
||||||
dir.close();
|
|
||||||
writer = null;
|
writer = null;
|
||||||
}
|
}
|
||||||
|
if (dir != null) {
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -86,6 +86,10 @@ final class CompletionFieldsConsumer extends FieldsConsumer {
|
||||||
for (String field : fields) {
|
for (String field : fields) {
|
||||||
CompletionTermWriter termWriter = new CompletionTermWriter();
|
CompletionTermWriter termWriter = new CompletionTermWriter();
|
||||||
Terms terms = fields.terms(field);
|
Terms terms = fields.terms(field);
|
||||||
|
if (terms == null) {
|
||||||
|
// this can happen from ghost fields, where the incoming Fields iterator claims a field exists but it does not
|
||||||
|
continue;
|
||||||
|
}
|
||||||
TermsEnum termsEnum = terms.iterator();
|
TermsEnum termsEnum = terms.iterator();
|
||||||
|
|
||||||
// write terms
|
// write terms
|
||||||
|
|
|
@ -35,11 +35,14 @@ import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.SearcherManager;
|
||||||
import org.apache.lucene.search.suggest.Input;
|
import org.apache.lucene.search.suggest.Input;
|
||||||
import org.apache.lucene.search.suggest.InputArrayIterator;
|
import org.apache.lucene.search.suggest.InputArrayIterator;
|
||||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||||
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
@ -1334,4 +1337,112 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
||||||
|
|
||||||
suggester.close();
|
suggester.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCloseIndexWriterOnBuild() throws Exception {
|
||||||
|
class MyAnalyzingInfixSuggester extends AnalyzingInfixSuggester {
|
||||||
|
public MyAnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
|
||||||
|
int minPrefixChars, boolean commitOnBuild, boolean allTermsRequired,
|
||||||
|
boolean highlight, boolean closeIndexWriterOnBuild) throws IOException {
|
||||||
|
super(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild,
|
||||||
|
allTermsRequired, highlight, closeIndexWriterOnBuild);
|
||||||
|
}
|
||||||
|
public IndexWriter getIndexWriter() {
|
||||||
|
return writer;
|
||||||
|
}
|
||||||
|
public SearcherManager getSearcherManager() {
|
||||||
|
return searcherMgr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// After build(), when closeIndexWriterOnBuild = true:
|
||||||
|
// * The IndexWriter should be null
|
||||||
|
// * The SearcherManager should be non-null
|
||||||
|
// * SearcherManager's IndexWriter reference should be closed
|
||||||
|
// (as evidenced by maybeRefreshBlocking() throwing AlreadyClosedException)
|
||||||
|
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||||
|
MyAnalyzingInfixSuggester suggester = new MyAnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
|
||||||
|
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
|
||||||
|
suggester.build(new InputArrayIterator(sharedInputs));
|
||||||
|
assertNull(suggester.getIndexWriter());
|
||||||
|
assertNotNull(suggester.getSearcherManager());
|
||||||
|
expectThrows(AlreadyClosedException.class, () -> suggester.getSearcherManager().maybeRefreshBlocking());
|
||||||
|
|
||||||
|
suggester.close();
|
||||||
|
a.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCommitAfterBuild() throws Exception {
|
||||||
|
performOperationWithAllOptionCombinations(suggester -> {
|
||||||
|
suggester.build(new InputArrayIterator(sharedInputs));
|
||||||
|
suggester.commit();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRefreshAfterBuild() throws Exception {
|
||||||
|
performOperationWithAllOptionCombinations(suggester -> {
|
||||||
|
suggester.build(new InputArrayIterator(sharedInputs));
|
||||||
|
suggester.refresh();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDisallowCommitBeforeBuild() throws Exception {
|
||||||
|
performOperationWithAllOptionCombinations
|
||||||
|
(suggester -> expectThrows(IllegalStateException.class, suggester::commit));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDisallowRefreshBeforeBuild() throws Exception {
|
||||||
|
performOperationWithAllOptionCombinations
|
||||||
|
(suggester -> expectThrows(IllegalStateException.class, suggester::refresh));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Input sharedInputs[] = new Input[] {
|
||||||
|
new Input("lend me your ear", 8, new BytesRef("foobar")),
|
||||||
|
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
|
||||||
|
};
|
||||||
|
|
||||||
|
private interface SuggesterOperation {
|
||||||
|
void operate(AnalyzingInfixSuggester suggester) throws Exception;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform the given operation on suggesters constructed with all combinations of options
|
||||||
|
* commitOnBuild and closeIndexWriterOnBuild, including defaults.
|
||||||
|
*/
|
||||||
|
private void performOperationWithAllOptionCombinations(SuggesterOperation operation) throws Exception {
|
||||||
|
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||||
|
|
||||||
|
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a);
|
||||||
|
operation.operate(suggester);
|
||||||
|
suggester.close();
|
||||||
|
|
||||||
|
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
|
||||||
|
operation.operate(suggester);
|
||||||
|
suggester.close();
|
||||||
|
|
||||||
|
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true);
|
||||||
|
operation.operate(suggester);
|
||||||
|
suggester.close();
|
||||||
|
|
||||||
|
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true,
|
||||||
|
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
|
||||||
|
operation.operate(suggester);
|
||||||
|
suggester.close();
|
||||||
|
|
||||||
|
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true,
|
||||||
|
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, false);
|
||||||
|
operation.operate(suggester);
|
||||||
|
suggester.close();
|
||||||
|
|
||||||
|
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
|
||||||
|
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
|
||||||
|
operation.operate(suggester);
|
||||||
|
suggester.close();
|
||||||
|
|
||||||
|
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
|
||||||
|
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, false);
|
||||||
|
operation.operate(suggester);
|
||||||
|
suggester.close();
|
||||||
|
|
||||||
|
a.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,9 +24,12 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.MockTokenFilter;
|
import org.apache.lucene.analysis.MockTokenFilter;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.SortedNumericDocValues;
|
import org.apache.lucene.index.SortedNumericDocValues;
|
||||||
|
@ -38,7 +41,6 @@ import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||||
import static org.apache.lucene.search.suggest.document.TestSuggestField.Entry;
|
import static org.apache.lucene.search.suggest.document.TestSuggestField.Entry;
|
||||||
|
@ -112,7 +114,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testSimple() throws Exception {
|
public void testSimple() throws Exception {
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
|
||||||
|
@ -141,7 +142,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
||||||
iw.close();
|
iw.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testMostlyFilteredOutDocuments() throws Exception {
|
public void testMostlyFilteredOutDocuments() throws Exception {
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
|
||||||
|
@ -188,7 +188,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
||||||
iw.close();
|
iw.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testDocFiltering() throws Exception {
|
public void testDocFiltering() throws Exception {
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
|
||||||
|
@ -230,7 +229,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
||||||
iw.close();
|
iw.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testAnalyzerWithoutPreservePosAndSep() throws Exception {
|
public void testAnalyzerWithoutPreservePosAndSep() throws Exception {
|
||||||
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
||||||
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, false, false);
|
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, false, false);
|
||||||
|
@ -254,7 +252,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
||||||
iw.close();
|
iw.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testAnalyzerWithSepAndNoPreservePos() throws Exception {
|
public void testAnalyzerWithSepAndNoPreservePos() throws Exception {
|
||||||
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
||||||
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, true, false);
|
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, true, false);
|
||||||
|
@ -278,7 +275,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
||||||
iw.close();
|
iw.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testAnalyzerWithPreservePosAndNoSep() throws Exception {
|
public void testAnalyzerWithPreservePosAndNoSep() throws Exception {
|
||||||
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
||||||
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, false, true);
|
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, false, true);
|
||||||
|
@ -302,4 +298,43 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
||||||
iw.close();
|
iw.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testGhostField() throws Exception {
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field", "suggest_field2", "suggest_field3"));
|
||||||
|
|
||||||
|
Document document = new Document();
|
||||||
|
document.add(new StringField("id", "0", Field.Store.NO));
|
||||||
|
document.add(new SuggestField("suggest_field", "apples", 3));
|
||||||
|
iw.addDocument(document);
|
||||||
|
// need another document so whole segment isn't deleted
|
||||||
|
iw.addDocument(new Document());
|
||||||
|
iw.commit();
|
||||||
|
|
||||||
|
document = new Document();
|
||||||
|
document.add(new StringField("id", "1", Field.Store.NO));
|
||||||
|
document.add(new SuggestField("suggest_field2", "apples", 3));
|
||||||
|
iw.addDocument(document);
|
||||||
|
iw.commit();
|
||||||
|
|
||||||
|
iw.deleteDocuments(new Term("id", "0"));
|
||||||
|
// first force merge is OK
|
||||||
|
iw.forceMerge(1);
|
||||||
|
|
||||||
|
// second force merge causes MultiFields to include "suggest_field" in its iteration, yet a null Terms is returned (no documents have
|
||||||
|
// this field anymore)
|
||||||
|
iw.addDocument(new Document());
|
||||||
|
iw.forceMerge(1);
|
||||||
|
|
||||||
|
DirectoryReader reader = DirectoryReader.open(iw);
|
||||||
|
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
|
||||||
|
|
||||||
|
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
|
||||||
|
assertEquals(0, indexSearcher.suggest(query, 3).totalHits);
|
||||||
|
|
||||||
|
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field2", "app"));
|
||||||
|
assertSuggestions(indexSearcher.suggest(query, 3), new Entry("apples", 3));
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
iw.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,6 +40,8 @@ import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -312,6 +314,49 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tests that level 2 ghost fields still work
|
||||||
|
public void testLevel2Ghosts() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig(null);
|
||||||
|
iwc.setCodec(getCodec());
|
||||||
|
iwc.setMergePolicy(newLogMergePolicy());
|
||||||
|
IndexWriter iw = new IndexWriter(dir, iwc);
|
||||||
|
|
||||||
|
Document document = new Document();
|
||||||
|
document.add(new StringField("id", "0", Field.Store.NO));
|
||||||
|
document.add(new StringField("suggest_field", "apples", Field.Store.NO));
|
||||||
|
iw.addDocument(document);
|
||||||
|
// need another document so whole segment isn't deleted
|
||||||
|
iw.addDocument(new Document());
|
||||||
|
iw.commit();
|
||||||
|
|
||||||
|
document = new Document();
|
||||||
|
document.add(new StringField("id", "1", Field.Store.NO));
|
||||||
|
document.add(new StringField("suggest_field2", "apples", Field.Store.NO));
|
||||||
|
iw.addDocument(document);
|
||||||
|
iw.commit();
|
||||||
|
|
||||||
|
iw.deleteDocuments(new Term("id", "0"));
|
||||||
|
// first force merge creates a level 1 ghost field
|
||||||
|
iw.forceMerge(1);
|
||||||
|
|
||||||
|
// second force merge creates a level 2 ghost field, causing MultiFields to include "suggest_field" in its iteration, yet a null Terms is returned (no documents have
|
||||||
|
// this field anymore)
|
||||||
|
iw.addDocument(new Document());
|
||||||
|
iw.forceMerge(1);
|
||||||
|
|
||||||
|
DirectoryReader reader = DirectoryReader.open(iw);
|
||||||
|
IndexSearcher indexSearcher = new IndexSearcher(reader);
|
||||||
|
|
||||||
|
assertEquals(1, indexSearcher.count(new TermQuery(new Term("id", "1"))));
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
iw.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
private static class TermFreqs {
|
private static class TermFreqs {
|
||||||
long totalTermFreq;
|
long totalTermFreq;
|
||||||
int docFreq;
|
int docFreq;
|
||||||
|
|
|
@ -120,6 +120,9 @@ New Features
|
||||||
|
|
||||||
* SOLR-9077: Streaming expressions should support collection alias (Kevin Risden)
|
* SOLR-9077: Streaming expressions should support collection alias (Kevin Risden)
|
||||||
|
|
||||||
|
* SOLR-9324: Support Secure Impersonation / Proxy User for solr authentication
|
||||||
|
(Gregory Chanan, Hrishikesh Gadre via yonik)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
|
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
|
||||||
|
@ -128,6 +131,9 @@ Optimizations
|
||||||
* SOLR-9726: Reduce number of lookupOrd calls made by the DocValuesFacets.getCounts method.
|
* SOLR-9726: Reduce number of lookupOrd calls made by the DocValuesFacets.getCounts method.
|
||||||
(Jonny Marks via Christine Poerschke)
|
(Jonny Marks via Christine Poerschke)
|
||||||
|
|
||||||
|
* SOLR-9772: Deriving distributed sort values (fieldSortValues) should reuse
|
||||||
|
comparator and only invalidate leafComparator. (John Call via yonik)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
----------------------
|
----------------------
|
||||||
* SOLR-9701: NPE in export handler when "fl" parameter is omitted.
|
* SOLR-9701: NPE in export handler when "fl" parameter is omitted.
|
||||||
|
@ -183,6 +189,10 @@ Other Changes
|
||||||
* SOLR-8332: Factor HttpShardHandler[Factory]'s url shuffling out into a ReplicaListTransformer class.
|
* SOLR-8332: Factor HttpShardHandler[Factory]'s url shuffling out into a ReplicaListTransformer class.
|
||||||
(Christine Poerschke, Noble Paul)
|
(Christine Poerschke, Noble Paul)
|
||||||
|
|
||||||
|
* SOLR-9597: Add setReadOnly(String ...) to ConnectionImpl (Kevin Risden)
|
||||||
|
|
||||||
|
* SOLR-9609: Change hard-coded keysize from 512 to 1024 (Jeremy Martini via Erick Erickson)
|
||||||
|
|
||||||
================== 6.3.0 ==================
|
================== 6.3.0 ==================
|
||||||
|
|
||||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||||
|
@ -615,9 +625,6 @@ New Features
|
||||||
* SOLR-9279: New boolean comparison function queries comparing numeric arguments: gt, gte, lt, lte, eq
|
* SOLR-9279: New boolean comparison function queries comparing numeric arguments: gt, gte, lt, lte, eq
|
||||||
(Doug Turnbull, David Smiley)
|
(Doug Turnbull, David Smiley)
|
||||||
|
|
||||||
* SOLR-9324: Support Secure Impersonation / Proxy User for solr authentication
|
|
||||||
(Gregory Chanan)
|
|
||||||
|
|
||||||
* SOLR-9252: Feature selection and logistic regression on text (Cao Manh Dat, Joel Bernstein)
|
* SOLR-9252: Feature selection and logistic regression on text (Cao Manh Dat, Joel Bernstein)
|
||||||
|
|
||||||
* SOLR-6465: CDCR: fall back to whole-index replication when tlogs are insufficient.
|
* SOLR-6465: CDCR: fall back to whole-index replication when tlogs are insufficient.
|
||||||
|
|
|
@ -2645,16 +2645,14 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
||||||
try {
|
try {
|
||||||
FileUtils.deleteDirectory(dataDir);
|
FileUtils.deleteDirectory(dataDir);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
SolrException.log(log, "Failed to delete data dir for unloaded core:" + cd.getName()
|
log.error("Failed to delete data dir for unloaded core: {} dir: {}", cd.getName(), dataDir.getAbsolutePath(), e);
|
||||||
+ " dir:" + dataDir.getAbsolutePath());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (deleteInstanceDir) {
|
if (deleteInstanceDir) {
|
||||||
try {
|
try {
|
||||||
FileUtils.deleteDirectory(cd.getInstanceDir().toFile());
|
FileUtils.deleteDirectory(cd.getInstanceDir().toFile());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
SolrException.log(log, "Failed to delete instance dir for unloaded core:" + cd.getName()
|
log.error("Failed to delete instance dir for unloaded core: {} dir: {}", cd.getName(), cd.getInstanceDir(), e);
|
||||||
+ " dir:" + cd.getInstanceDir());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -616,7 +616,7 @@ public class QueryComponent extends SearchComponent
|
||||||
// :TODO: would be simpler to always serialize every position of SortField[]
|
// :TODO: would be simpler to always serialize every position of SortField[]
|
||||||
if (type==SortField.Type.SCORE || type==SortField.Type.DOC) continue;
|
if (type==SortField.Type.SCORE || type==SortField.Type.DOC) continue;
|
||||||
|
|
||||||
FieldComparator<?> comparator = null;
|
FieldComparator<?> comparator = sortField.getComparator(1,0);
|
||||||
LeafFieldComparator leafComparator = null;
|
LeafFieldComparator leafComparator = null;
|
||||||
Object[] vals = new Object[nDocs];
|
Object[] vals = new Object[nDocs];
|
||||||
|
|
||||||
|
@ -633,13 +633,13 @@ public class QueryComponent extends SearchComponent
|
||||||
idx = ReaderUtil.subIndex(doc, leaves);
|
idx = ReaderUtil.subIndex(doc, leaves);
|
||||||
currentLeaf = leaves.get(idx);
|
currentLeaf = leaves.get(idx);
|
||||||
if (idx != lastIdx) {
|
if (idx != lastIdx) {
|
||||||
// we switched segments. invalidate comparator.
|
// we switched segments. invalidate leafComparator.
|
||||||
comparator = null;
|
lastIdx = idx;
|
||||||
|
leafComparator = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (comparator == null) {
|
if (leafComparator == null) {
|
||||||
comparator = sortField.getComparator(1,0);
|
|
||||||
leafComparator = comparator.getLeafComparator(currentLeaf);
|
leafComparator = comparator.getLeafComparator(currentLeaf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -285,6 +285,10 @@ public final class CryptoKeys {
|
||||||
private final PrivateKey privateKey;
|
private final PrivateKey privateKey;
|
||||||
private final SecureRandom random = new SecureRandom();
|
private final SecureRandom random = new SecureRandom();
|
||||||
|
|
||||||
|
// If this ever comes back to haunt us see the discussion at
|
||||||
|
// SOLR-9609 for background and code allowing this to go
|
||||||
|
// into security.json
|
||||||
|
private static final int DEFAULT_KEYPAIR_LENGTH = 1024;
|
||||||
|
|
||||||
public RSAKeyPair() {
|
public RSAKeyPair() {
|
||||||
KeyPairGenerator keyGen = null;
|
KeyPairGenerator keyGen = null;
|
||||||
|
@ -293,7 +297,7 @@ public final class CryptoKeys {
|
||||||
} catch (NoSuchAlgorithmException e) {
|
} catch (NoSuchAlgorithmException e) {
|
||||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
|
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
|
||||||
}
|
}
|
||||||
keyGen.initialize(512);
|
keyGen.initialize(DEFAULT_KEYPAIR_LENGTH);
|
||||||
java.security.KeyPair keyPair = keyGen.genKeyPair();
|
java.security.KeyPair keyPair = keyGen.genKeyPair();
|
||||||
privateKey = keyPair.getPrivate();
|
privateKey = keyPair.getPrivate();
|
||||||
publicKey = keyPair.getPublic();
|
publicKey = keyPair.getPublic();
|
||||||
|
|
|
@ -114,7 +114,7 @@ public class BlockDirectoryTest extends SolrTestCaseJ4 {
|
||||||
if (random().nextBoolean()) {
|
if (random().nextBoolean()) {
|
||||||
Metrics metrics = new Metrics();
|
Metrics metrics = new Metrics();
|
||||||
int blockSize = 8192;
|
int blockSize = 8192;
|
||||||
int slabSize = blockSize * 32768;
|
int slabSize = blockSize * 16384;
|
||||||
long totalMemory = 1 * slabSize;
|
long totalMemory = 1 * slabSize;
|
||||||
BlockCache blockCache = new BlockCache(metrics, true, totalMemory, slabSize, blockSize);
|
BlockCache blockCache = new BlockCache(metrics, true, totalMemory, slabSize, blockSize);
|
||||||
BlockDirectoryCache cache = new BlockDirectoryCache(blockCache, "/collection1", metrics, true);
|
BlockDirectoryCache cache = new BlockDirectoryCache(blockCache, "/collection1", metrics, true);
|
||||||
|
|
|
@ -155,6 +155,15 @@ class ConnectionImpl implements Connection {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When using OpenLink ODBC-JDBC bridge on Windows, it runs the method ConnectionImpl.setReadOnly(String ...).
|
||||||
|
* The spec says that setReadOnly(boolean ...) is required. This causes the ODBC-JDBC bridge to fail on Windows.
|
||||||
|
* OpenLink case: http://support.openlinksw.com/support/techupdate.vsp?c=21881
|
||||||
|
*/
|
||||||
|
public void setReadOnly(String readOnly) throws SQLException {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isReadOnly() throws SQLException {
|
public boolean isReadOnly() throws SQLException {
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -886,7 +886,7 @@ public class JavaBinCodec implements PushWriter {
|
||||||
daos.writeByte(NULL);
|
daos.writeByte(NULL);
|
||||||
return true;
|
return true;
|
||||||
} else if (val instanceof CharSequence) {
|
} else if (val instanceof CharSequence) {
|
||||||
writeStr((String) val);
|
writeStr((CharSequence) val);
|
||||||
return true;
|
return true;
|
||||||
} else if (val instanceof Number) {
|
} else if (val instanceof Number) {
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue