mirror of https://github.com/apache/lucene.git
Merge branch 'apache-https-master' into jira/solr-8593
This commit is contained in:
commit
750cf6d7a5
|
@ -431,6 +431,7 @@ reChangesSectionHREF = re.compile('<a id="(.*?)".*?>(.*?)</a>', re.IGNORECASE)
|
|||
reUnderbarNotDashHTML = re.compile(r'<li>(\s*(LUCENE|SOLR)_\d\d\d\d+)')
|
||||
reUnderbarNotDashTXT = re.compile(r'\s+((LUCENE|SOLR)_\d\d\d\d+)', re.MULTILINE)
|
||||
def checkChangesContent(s, version, name, project, isHTML):
|
||||
currentVersionTuple = versionToTuple(version, name)
|
||||
|
||||
if isHTML and s.find('Release %s' % version) == -1:
|
||||
raise RuntimeError('did not see "Release %s" in %s' % (version, name))
|
||||
|
@ -459,7 +460,8 @@ def checkChangesContent(s, version, name, project, isHTML):
|
|||
raise RuntimeError('did not see "%s" in %s' % (sub, name))
|
||||
|
||||
if isHTML:
|
||||
# Make sure a section only appears once under each release:
|
||||
# Make sure that a section only appears once under each release,
|
||||
# and that each release is not greater than the current version
|
||||
seenIDs = set()
|
||||
seenText = set()
|
||||
|
||||
|
@ -468,6 +470,9 @@ def checkChangesContent(s, version, name, project, isHTML):
|
|||
if text.lower().startswith('release '):
|
||||
release = text[8:].strip()
|
||||
seenText.clear()
|
||||
releaseTuple = versionToTuple(release, name)
|
||||
if releaseTuple > currentVersionTuple:
|
||||
raise RuntimeError('Future release %s is greater than %s in %s' % (release, version, name))
|
||||
if id in seenIDs:
|
||||
raise RuntimeError('%s has duplicate section "%s" under release "%s"' % (name, text, release))
|
||||
seenIDs.add(id)
|
||||
|
@ -475,6 +480,27 @@ def checkChangesContent(s, version, name, project, isHTML):
|
|||
raise RuntimeError('%s has duplicate section "%s" under release "%s"' % (name, text, release))
|
||||
seenText.add(text)
|
||||
|
||||
|
||||
reVersion = re.compile(r'(\d+)\.(\d+)(?:\.(\d+))?\s*(-alpha|-beta|final|RC\d+)?\s*(?:\[.*\])?', re.IGNORECASE)
|
||||
def versionToTuple(version, name):
|
||||
versionMatch = reVersion.match(version)
|
||||
if versionMatch is None:
|
||||
raise RuntimeError('Version %s in %s cannot be parsed' % (version, name))
|
||||
versionTuple = versionMatch.groups()
|
||||
while versionTuple[-1] is None or versionTuple[-1] == '':
|
||||
versionTuple = versionTuple[:-1]
|
||||
if versionTuple[-1].lower() == '-alpha':
|
||||
versionTuple = versionTuple[:-1] + ('0',)
|
||||
elif versionTuple[-1].lower() == '-beta':
|
||||
versionTuple = versionTuple[:-1] + ('1',)
|
||||
elif versionTuple[-1].lower() == 'final':
|
||||
versionTuple = versionTuple[:-2] + ('100',)
|
||||
elif versionTuple[-1].lower()[:2] == 'rc':
|
||||
versionTuple = versionTuple[:-2] + (versionTuple[-1][2:],)
|
||||
print('%s: %s' % (version, versionTuple))
|
||||
return versionTuple
|
||||
|
||||
|
||||
reUnixPath = re.compile(r'\b[a-zA-Z_]+=(?:"(?:\\"|[^"])*"' + '|(?:\\\\.|[^"\'\\s])*' + r"|'(?:\\'|[^'])*')" \
|
||||
+ r'|(/(?:\\.|[^"\'\s])*)' \
|
||||
+ r'|("/(?:\\.|[^"])*")' \
|
||||
|
|
|
@ -56,6 +56,11 @@ Other
|
|||
|
||||
======================= Lucene 6.4.0 =======================
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-7533: Classic query parser no longer allows autoGeneratePhraseQueries
|
||||
to be set to true when splitOnWhitespace is false (and vice-versa).
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand)
|
||||
|
@ -65,6 +70,15 @@ Bug Fixes
|
|||
* LUCENE-7547: JapaneseTokenizerFactory was failing to close the
|
||||
dictionary file it opened (Markus via Mike McCandless)
|
||||
|
||||
* LUCENE-7562: CompletionFieldsConsumer sometimes throws
|
||||
NullPointerException on ghost fields (Oliver Eilhard via Mike McCandless)
|
||||
|
||||
* LUCENE-7533: Classic query parser: disallow autoGeneratePhraseQueries=true
|
||||
when splitOnWhitespace=false (and vice-versa). (Steve Rowe)
|
||||
|
||||
* LUCENE-7536: ASCIIFoldingFilterFactory used to return an illegal multi-term
|
||||
component when preserveOriginal was set to true. (Adrien Grand)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
|
||||
|
@ -84,6 +98,9 @@ Improvements
|
|||
|
||||
* LUCENE-7524: Added more detailed explanation of how IDF is computed in
|
||||
ClassicSimilarity and BM25Similarity. (Adrien Grand)
|
||||
|
||||
* LUCENE-7564: AnalyzingInfixSuggester should close its IndexWriter by default
|
||||
at the end of build(). (Steve Rowe)
|
||||
|
||||
* LUCENE-7526: Enhanced UnifiedHighlighter's passage relevancy for queries with
|
||||
wildcards and sometimes just terms. Added shouldPreferPassageRelevancyOverSpeed()
|
||||
|
@ -93,6 +110,11 @@ Improvements
|
|||
* LUCENE-7537: Index time sorting now supports multi-valued sorts
|
||||
using selectors (MIN, MAX, etc.) (Jim Ferenczi via Mike McCandless)
|
||||
|
||||
* LUCENE-7560: QueryBuilder.createFieldQuery is no longer final,
|
||||
giving custom query parsers subclassing QueryBuilder more freedom to
|
||||
control how text is analyzed and converted into a query (Matt Weber
|
||||
via Mike McCandless)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file
|
||||
|
@ -100,6 +122,9 @@ Other
|
|||
|
||||
* LUCENE-7534: fix smokeTestRelease.py to run on Cygwin (Mikhail Khludnev)
|
||||
|
||||
* LUCENE-7559: UnifiedHighlighter: Make Passage more exposed to allow passage creation to
|
||||
be customized. (David Smiley)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-7387: fix defaultCodec in build.xml to account for the line ending (hossman)
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
|
@ -36,12 +37,14 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
* </fieldType></pre>
|
||||
*/
|
||||
public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
private static final String PRESERVE_ORIGINAL = "preserveOriginal";
|
||||
|
||||
private final boolean preserveOriginal;
|
||||
|
||||
/** Creates a new ASCIIFoldingFilterFactory */
|
||||
public ASCIIFoldingFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
preserveOriginal = getBoolean(args, "preserveOriginal", false);
|
||||
preserveOriginal = getBoolean(args, PRESERVE_ORIGINAL, false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
@ -54,7 +57,17 @@ public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements Mul
|
|||
|
||||
@Override
|
||||
public AbstractAnalysisFactory getMultiTermComponent() {
|
||||
return this;
|
||||
if (preserveOriginal) {
|
||||
// The main use-case for using preserveOriginal is to match regardless of
|
||||
// case but to give better scores to exact matches. Since most multi-term
|
||||
// queries return constant scores anyway, the multi-term component only
|
||||
// emits the folded token
|
||||
Map<String, String> args = new HashMap<>(getOriginalArgs());
|
||||
args.remove(PRESERVE_ORIGINAL);
|
||||
return new ASCIIFoldingFilterFactory(args);
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
public class TestAsciiFoldingFilterFactory extends BaseTokenStreamFactoryTestCase {
|
||||
|
||||
public void testMultiTermAnalysis() throws IOException {
|
||||
TokenFilterFactory factory = new ASCIIFoldingFilterFactory(Collections.emptyMap());
|
||||
TokenStream stream = new CannedTokenStream(new Token("Été", 0, 3));
|
||||
stream = factory.create(stream);
|
||||
assertTokenStreamContents(stream, new String[] { "Ete" });
|
||||
|
||||
factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
|
||||
stream = new CannedTokenStream(new Token("Été", 0, 3));
|
||||
stream = factory.create(stream);
|
||||
assertTokenStreamContents(stream, new String[] { "Ete" });
|
||||
|
||||
factory = new ASCIIFoldingFilterFactory(new HashMap<>(Collections.singletonMap("preserveOriginal", "true")));
|
||||
stream = new CannedTokenStream(new Token("Été", 0, 3));
|
||||
stream = factory.create(stream);
|
||||
assertTokenStreamContents(stream, new String[] { "Ete", "Été" });
|
||||
|
||||
factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
|
||||
stream = new CannedTokenStream(new Token("Été", 0, 3));
|
||||
stream = factory.create(stream);
|
||||
assertTokenStreamContents(stream, new String[] { "Ete" });
|
||||
}
|
||||
|
||||
}
|
|
@ -25,13 +25,18 @@ import java.lang.reflect.Modifier;
|
|||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.text.ParsePosition;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.TimeZone;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
@ -62,6 +67,8 @@ import org.apache.lucene.legacy.LegacyNumericUtils;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -165,6 +172,57 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
// a test option to not remove temp dir...):
|
||||
Thread.sleep(100000);
|
||||
}
|
||||
|
||||
// ant test -Dtestcase=TestBackwardsCompatibility -Dtestmethod=testCreateSortedIndex -Dtests.codec=default -Dtests.useSecurityManager=false -Dtests.bwcdir=/tmp/sorted
|
||||
public void testCreateSortedIndex() throws Exception {
|
||||
|
||||
Path indexDir = getIndexDir().resolve("sorted");
|
||||
Files.deleteIfExists(indexDir);
|
||||
Directory dir = newFSDirectory(indexDir);
|
||||
|
||||
LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
|
||||
mp.setNoCFSRatio(1.0);
|
||||
mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
|
||||
// TODO: remove randomness
|
||||
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
|
||||
conf.setMergePolicy(mp);
|
||||
conf.setUseCompoundFile(false);
|
||||
conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
LineFileDocs docs = new LineFileDocs(random());
|
||||
SimpleDateFormat parser = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT);
|
||||
parser.setTimeZone(TimeZone.getTimeZone("UTC"));
|
||||
ParsePosition position = new ParsePosition(0);
|
||||
Field dateDVField = null;
|
||||
for(int i=0;i<50;i++) {
|
||||
Document doc = docs.nextDoc();
|
||||
String dateString = doc.get("date");
|
||||
|
||||
position.setIndex(0);
|
||||
Date date = parser.parse(dateString, position);
|
||||
if (position.getErrorIndex() != -1) {
|
||||
throw new AssertionError("failed to parse \"" + dateString + "\" as date");
|
||||
}
|
||||
if (position.getIndex() != dateString.length()) {
|
||||
throw new AssertionError("failed to parse \"" + dateString + "\" as date");
|
||||
}
|
||||
if (dateDVField == null) {
|
||||
dateDVField = new NumericDocValuesField("dateDV", 0l);
|
||||
doc.add(dateDVField);
|
||||
}
|
||||
dateDVField.setLongValue(date.getTime());
|
||||
if (i == 250) {
|
||||
writer.commit();
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.forceMerge(1);
|
||||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void updateNumeric(IndexWriter writer, String id, String f, String cf, long value) throws IOException {
|
||||
writer.updateNumericDocValue(new Term("id", id), f, value);
|
||||
|
@ -1483,6 +1541,30 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void testSortedIndex() throws Exception {
|
||||
String[] versions = new String[] {"6.2.0", "6.2.1", "6.3.0"};
|
||||
for(String version : versions) {
|
||||
Path path = createTempDir("sorted");
|
||||
InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream("sorted." + version + ".zip");
|
||||
assertNotNull("Sorted index index " + version + " not found", resource);
|
||||
TestUtil.unzip(resource, path);
|
||||
|
||||
// TODO: more tests
|
||||
Directory dir = newFSDirectory(path);
|
||||
|
||||
DirectoryReader reader = DirectoryReader.open(dir);
|
||||
assertEquals(1, reader.leaves().size());
|
||||
Sort sort = reader.leaves().get(0).reader().getIndexSort();
|
||||
assertNotNull(sort);
|
||||
assertEquals("<long: \"dateDV\">!", sort.toString());
|
||||
reader.close();
|
||||
|
||||
// this will confirm the docs really are sorted:
|
||||
TestUtil.checkIndex(dir);
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
static long getValue(BinaryDocValues bdv) throws IOException {
|
||||
BytesRef term = bdv.binaryValue();
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -196,7 +196,7 @@ public class QueryBuilder {
|
|||
* @param quoted true if phrases should be generated when terms occur at more than one position
|
||||
* @param phraseSlop slop factor for phrase/multiphrase queries
|
||||
*/
|
||||
protected final Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, String queryText, boolean quoted, int phraseSlop) {
|
||||
protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, String queryText, boolean quoted, int phraseSlop) {
|
||||
assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
|
||||
|
||||
// Use the analyzer to get all the tokens, and then build an appropriate
|
||||
|
|
|
@ -75,6 +75,9 @@ public abstract class AnalysisOffsetStrategy extends FieldOffsetStrategy {
|
|||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
// TODO we could make this go away. MemoryIndexOffsetStrategy could simply split and analyze each value into the
|
||||
// MemoryIndex. TokenStreamOffsetStrategy's hack TokenStreamPostingsEnum could incorporate this logic,
|
||||
// albeit with less code, less hack.
|
||||
private static final class MultiValueTokenStream extends TokenFilter {
|
||||
|
||||
private final String fieldName;
|
||||
|
|
|
@ -24,115 +24,117 @@ package org.apache.lucene.search.uhighlight;
|
|||
* ellipses between unconnected passages.
|
||||
*/
|
||||
public class DefaultPassageFormatter extends PassageFormatter {
|
||||
/** text that will appear before highlighted terms */
|
||||
protected final String preTag;
|
||||
/** text that will appear after highlighted terms */
|
||||
protected final String postTag;
|
||||
/** text that will appear between two unconnected passages */
|
||||
protected final String ellipsis;
|
||||
/** true if we should escape for html */
|
||||
protected final boolean escape;
|
||||
/** text that will appear before highlighted terms */
|
||||
protected final String preTag;
|
||||
/** text that will appear after highlighted terms */
|
||||
protected final String postTag;
|
||||
/** text that will appear between two unconnected passages */
|
||||
protected final String ellipsis;
|
||||
/** true if we should escape for html */
|
||||
protected final boolean escape;
|
||||
|
||||
/**
|
||||
* Creates a new DefaultPassageFormatter with the default tags.
|
||||
*/
|
||||
public DefaultPassageFormatter() {
|
||||
this("<b>", "</b>", "... ", false);
|
||||
/**
|
||||
* Creates a new DefaultPassageFormatter with the default tags.
|
||||
*/
|
||||
public DefaultPassageFormatter() {
|
||||
this("<b>", "</b>", "... ", false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new DefaultPassageFormatter with custom tags.
|
||||
*
|
||||
* @param preTag text which should appear before a highlighted term.
|
||||
* @param postTag text which should appear after a highlighted term.
|
||||
* @param ellipsis text which should be used to connect two unconnected passages.
|
||||
* @param escape true if text should be html-escaped
|
||||
*/
|
||||
public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
|
||||
if (preTag == null || postTag == null || ellipsis == null) {
|
||||
throw new NullPointerException();
|
||||
}
|
||||
this.preTag = preTag;
|
||||
this.postTag = postTag;
|
||||
this.ellipsis = ellipsis;
|
||||
this.escape = escape;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new DefaultPassageFormatter with custom tags.
|
||||
* @param preTag text which should appear before a highlighted term.
|
||||
* @param postTag text which should appear after a highlighted term.
|
||||
* @param ellipsis text which should be used to connect two unconnected passages.
|
||||
* @param escape true if text should be html-escaped
|
||||
*/
|
||||
public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
|
||||
if (preTag == null || postTag == null || ellipsis == null) {
|
||||
throw new NullPointerException();
|
||||
@Override
|
||||
public String format(Passage passages[], String content) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int pos = 0;
|
||||
for (Passage passage : passages) {
|
||||
// don't add ellipsis if its the first one, or if its connected.
|
||||
if (passage.getStartOffset() > pos && pos > 0) {
|
||||
sb.append(ellipsis);
|
||||
}
|
||||
pos = passage.getStartOffset();
|
||||
for (int i = 0; i < passage.getNumMatches(); i++) {
|
||||
int start = passage.getMatchStarts()[i];
|
||||
int end = passage.getMatchEnds()[i];
|
||||
// its possible to have overlapping terms
|
||||
if (start > pos) {
|
||||
append(sb, content, pos, start);
|
||||
}
|
||||
this.preTag = preTag;
|
||||
this.postTag = postTag;
|
||||
this.ellipsis = ellipsis;
|
||||
this.escape = escape;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String format(Passage passages[], String content) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int pos = 0;
|
||||
for (Passage passage : passages) {
|
||||
// don't add ellipsis if its the first one, or if its connected.
|
||||
if (passage.startOffset > pos && pos > 0) {
|
||||
sb.append(ellipsis);
|
||||
}
|
||||
pos = passage.startOffset;
|
||||
for (int i = 0; i < passage.numMatches; i++) {
|
||||
int start = passage.matchStarts[i];
|
||||
int end = passage.matchEnds[i];
|
||||
// its possible to have overlapping terms
|
||||
if (start > pos) {
|
||||
append(sb, content, pos, start);
|
||||
}
|
||||
if (end > pos) {
|
||||
sb.append(preTag);
|
||||
append(sb, content, Math.max(pos, start), end);
|
||||
sb.append(postTag);
|
||||
pos = end;
|
||||
}
|
||||
}
|
||||
// its possible a "term" from the analyzer could span a sentence boundary.
|
||||
append(sb, content, pos, Math.max(pos, passage.endOffset));
|
||||
pos = passage.endOffset;
|
||||
if (end > pos) {
|
||||
sb.append(preTag);
|
||||
append(sb, content, Math.max(pos, start), end);
|
||||
sb.append(postTag);
|
||||
pos = end;
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
// its possible a "term" from the analyzer could span a sentence boundary.
|
||||
append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
|
||||
pos = passage.getEndOffset();
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends original text to the response.
|
||||
* @param dest resulting text, possibly transformed or encoded
|
||||
* @param content original text content
|
||||
* @param start index of the first character in content
|
||||
* @param end index of the character following the last character in content
|
||||
*/
|
||||
protected void append(StringBuilder dest, String content, int start, int end) {
|
||||
if (escape) {
|
||||
// note: these are the rules from owasp.org
|
||||
for (int i = start; i < end; i++) {
|
||||
char ch = content.charAt(i);
|
||||
switch(ch) {
|
||||
case '&':
|
||||
dest.append("&");
|
||||
break;
|
||||
case '<':
|
||||
dest.append("<");
|
||||
break;
|
||||
case '>':
|
||||
dest.append(">");
|
||||
break;
|
||||
case '"':
|
||||
dest.append(""");
|
||||
break;
|
||||
case '\'':
|
||||
dest.append("'");
|
||||
break;
|
||||
case '/':
|
||||
dest.append("/");
|
||||
break;
|
||||
default:
|
||||
if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
|
||||
dest.append(ch);
|
||||
} else if (ch < 0xff) {
|
||||
dest.append("&#");
|
||||
dest.append((int)ch);
|
||||
dest.append(";");
|
||||
} else {
|
||||
dest.append(ch);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Appends original text to the response.
|
||||
*
|
||||
* @param dest resulting text, possibly transformed or encoded
|
||||
* @param content original text content
|
||||
* @param start index of the first character in content
|
||||
* @param end index of the character following the last character in content
|
||||
*/
|
||||
protected void append(StringBuilder dest, String content, int start, int end) {
|
||||
if (escape) {
|
||||
// note: these are the rules from owasp.org
|
||||
for (int i = start; i < end; i++) {
|
||||
char ch = content.charAt(i);
|
||||
switch (ch) {
|
||||
case '&':
|
||||
dest.append("&");
|
||||
break;
|
||||
case '<':
|
||||
dest.append("<");
|
||||
break;
|
||||
case '>':
|
||||
dest.append(">");
|
||||
break;
|
||||
case '"':
|
||||
dest.append(""");
|
||||
break;
|
||||
case '\'':
|
||||
dest.append("'");
|
||||
break;
|
||||
case '/':
|
||||
dest.append("/");
|
||||
break;
|
||||
default:
|
||||
if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
|
||||
dest.append(ch);
|
||||
} else if (ch < 0xff) {
|
||||
dest.append("&#");
|
||||
dest.append((int) ch);
|
||||
dest.append(";");
|
||||
} else {
|
||||
dest.append(ch);
|
||||
}
|
||||
} else {
|
||||
dest.append(content, start, end);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
dest.append(content, start, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -117,9 +117,9 @@ public class FieldHighlighter {
|
|||
break;
|
||||
}
|
||||
Passage passage = new Passage();
|
||||
passage.score = Float.NaN;
|
||||
passage.startOffset = pos;
|
||||
passage.endOffset = next;
|
||||
passage.setScore(Float.NaN);
|
||||
passage.setStartOffset(pos);
|
||||
passage.setEndOffset(next);
|
||||
passages.add(passage);
|
||||
pos = next;
|
||||
}
|
||||
|
@ -145,12 +145,12 @@ public class FieldHighlighter {
|
|||
offsetsEnumQueue.add(new OffsetsEnum(null, EMPTY)); // a sentinel for termination
|
||||
|
||||
PriorityQueue<Passage> passageQueue = new PriorityQueue<>(Math.min(64, maxPassages + 1), (left, right) -> {
|
||||
if (left.score < right.score) {
|
||||
if (left.getScore() < right.getScore()) {
|
||||
return -1;
|
||||
} else if (left.score > right.score) {
|
||||
} else if (left.getScore() > right.getScore()) {
|
||||
return 1;
|
||||
} else {
|
||||
return left.startOffset - right.startOffset;
|
||||
return left.getStartOffset() - right.getStartOffset();
|
||||
}
|
||||
});
|
||||
Passage passage = new Passage(); // the current passage in-progress. Will either get reset or added to queue.
|
||||
|
@ -170,12 +170,12 @@ public class FieldHighlighter {
|
|||
continue;
|
||||
}
|
||||
// See if this term should be part of a new passage.
|
||||
if (start >= passage.endOffset) {
|
||||
if (passage.startOffset >= 0) { // true if this passage has terms; otherwise couldn't find any (yet)
|
||||
if (start >= passage.getEndOffset()) {
|
||||
if (passage.getStartOffset() >= 0) { // true if this passage has terms; otherwise couldn't find any (yet)
|
||||
// finalize passage
|
||||
passage.score *= scorer.norm(passage.startOffset);
|
||||
passage.setScore(passage.getScore() * scorer.norm(passage.getStartOffset()));
|
||||
// new sentence: first add 'passage' to queue
|
||||
if (passageQueue.size() == maxPassages && passage.score < passageQueue.peek().score) {
|
||||
if (passageQueue.size() == maxPassages && passage.getScore() < passageQueue.peek().getScore()) {
|
||||
passage.reset(); // can't compete, just reset it
|
||||
} else {
|
||||
passageQueue.offer(passage);
|
||||
|
@ -192,8 +192,8 @@ public class FieldHighlighter {
|
|||
break;
|
||||
}
|
||||
// advance breakIterator
|
||||
passage.startOffset = Math.max(breakIterator.preceding(start + 1), 0);
|
||||
passage.endOffset = Math.min(breakIterator.following(start), contentLength);
|
||||
passage.setStartOffset(Math.max(breakIterator.preceding(start + 1), 0));
|
||||
passage.setEndOffset(Math.min(breakIterator.following(start), contentLength));
|
||||
}
|
||||
// Add this term to the passage.
|
||||
int tf = 0;
|
||||
|
@ -209,12 +209,12 @@ public class FieldHighlighter {
|
|||
off.nextPosition();
|
||||
start = off.startOffset();
|
||||
end = off.endOffset();
|
||||
if (start >= passage.endOffset || end > contentLength) { // it's beyond this passage
|
||||
if (start >= passage.getEndOffset() || end > contentLength) { // it's beyond this passage
|
||||
offsetsEnumQueue.offer(off);
|
||||
break;
|
||||
}
|
||||
}
|
||||
passage.score += off.weight * scorer.tf(tf, passage.endOffset - passage.startOffset);
|
||||
passage.setScore(passage.getScore() + off.weight * scorer.tf(tf, passage.getEndOffset() - passage.getStartOffset()));
|
||||
}
|
||||
|
||||
Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
|
||||
|
@ -222,7 +222,7 @@ public class FieldHighlighter {
|
|||
p.sort();
|
||||
}
|
||||
// sort in ascending order
|
||||
Arrays.sort(passages, (left, right) -> left.startOffset - right.startOffset);
|
||||
Arrays.sort(passages, (left, right) -> left.getStartOffset() - right.getStartOffset());
|
||||
return passages;
|
||||
}
|
||||
|
||||
|
|
|
@ -66,9 +66,8 @@ public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
|
|||
}
|
||||
|
||||
BytesRef getTerm() throws IOException {
|
||||
// the dp.getPayload thing is a hack -- see MultiTermHighlighting
|
||||
return term != null ? term : postingsEnum.getPayload();
|
||||
// We don't deepcopy() because in this hack we know we don't have to.
|
||||
// TODO TokenStreamOffsetStrategy could override OffsetsEnum; then remove this hack here
|
||||
return term != null ? term : postingsEnum.getPayload(); // abusing payload like this is a total hack!
|
||||
}
|
||||
|
||||
boolean hasMorePositions() throws IOException {
|
||||
|
@ -91,7 +90,8 @@ public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (postingsEnum instanceof Closeable) { // the one in MultiTermHighlighting is.
|
||||
// TODO TokenStreamOffsetStrategy could override OffsetsEnum; then this base impl would be no-op.
|
||||
if (postingsEnum instanceof Closeable) {
|
||||
((Closeable) postingsEnum).close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,139 +23,159 @@ import org.apache.lucene.util.InPlaceMergeSorter;
|
|||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Represents a passage (typically a sentence of the document).
|
||||
* Represents a passage (typically a sentence of the document).
|
||||
* <p>
|
||||
* A passage contains {@link #getNumMatches} highlights from the query,
|
||||
* and the offsets and query terms that correspond with each match.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class Passage {
|
||||
int startOffset = -1;
|
||||
int endOffset = -1;
|
||||
float score = 0.0f;
|
||||
public class Passage {
|
||||
private int startOffset = -1;
|
||||
private int endOffset = -1;
|
||||
private float score = 0.0f;
|
||||
|
||||
int matchStarts[] = new int[8];
|
||||
int matchEnds[] = new int[8];
|
||||
BytesRef matchTerms[] = new BytesRef[8];
|
||||
int numMatches = 0;
|
||||
private int[] matchStarts = new int[8];
|
||||
private int[] matchEnds = new int[8];
|
||||
private BytesRef[] matchTerms = new BytesRef[8];
|
||||
private int numMatches = 0;
|
||||
|
||||
public void addMatch(int startOffset, int endOffset, BytesRef term) {
|
||||
assert startOffset >= this.startOffset && startOffset <= this.endOffset;
|
||||
if (numMatches == matchStarts.length) {
|
||||
int newLength = ArrayUtil.oversize(numMatches+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
|
||||
int newMatchStarts[] = new int[newLength];
|
||||
int newMatchEnds[] = new int[newLength];
|
||||
BytesRef newMatchTerms[] = new BytesRef[newLength];
|
||||
System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
|
||||
System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
|
||||
System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
|
||||
matchStarts = newMatchStarts;
|
||||
matchEnds = newMatchEnds;
|
||||
matchTerms = newMatchTerms;
|
||||
}
|
||||
assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
|
||||
matchStarts[numMatches] = startOffset;
|
||||
matchEnds[numMatches] = endOffset;
|
||||
matchTerms[numMatches] = term;
|
||||
numMatches++;
|
||||
/** @lucene.internal */
|
||||
public void addMatch(int startOffset, int endOffset, BytesRef term) {
|
||||
assert startOffset >= this.startOffset && startOffset <= this.endOffset;
|
||||
if (numMatches == matchStarts.length) {
|
||||
int newLength = ArrayUtil.oversize(numMatches + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
|
||||
int newMatchStarts[] = new int[newLength];
|
||||
int newMatchEnds[] = new int[newLength];
|
||||
BytesRef newMatchTerms[] = new BytesRef[newLength];
|
||||
System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
|
||||
System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
|
||||
System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
|
||||
matchStarts = newMatchStarts;
|
||||
matchEnds = newMatchEnds;
|
||||
matchTerms = newMatchTerms;
|
||||
}
|
||||
assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
|
||||
matchStarts[numMatches] = startOffset;
|
||||
matchEnds[numMatches] = endOffset;
|
||||
matchTerms[numMatches] = term;
|
||||
numMatches++;
|
||||
}
|
||||
|
||||
void sort() {
|
||||
final int starts[] = matchStarts;
|
||||
final int ends[] = matchEnds;
|
||||
final BytesRef terms[] = matchTerms;
|
||||
new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
int temp = starts[i];
|
||||
starts[i] = starts[j];
|
||||
starts[j] = temp;
|
||||
/** @lucene.internal */
|
||||
public void sort() {
|
||||
final int starts[] = matchStarts;
|
||||
final int ends[] = matchEnds;
|
||||
final BytesRef terms[] = matchTerms;
|
||||
new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
int temp = starts[i];
|
||||
starts[i] = starts[j];
|
||||
starts[j] = temp;
|
||||
|
||||
temp = ends[i];
|
||||
ends[i] = ends[j];
|
||||
ends[j] = temp;
|
||||
temp = ends[i];
|
||||
ends[i] = ends[j];
|
||||
ends[j] = temp;
|
||||
|
||||
BytesRef tempTerm = terms[i];
|
||||
terms[i] = terms[j];
|
||||
terms[j] = tempTerm;
|
||||
}
|
||||
BytesRef tempTerm = terms[i];
|
||||
terms[i] = terms[j];
|
||||
terms[j] = tempTerm;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return Integer.compare(starts[i], starts[j]);
|
||||
}
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return Integer.compare(starts[i], starts[j]);
|
||||
}
|
||||
|
||||
}.sort(0, numMatches);
|
||||
}
|
||||
}.sort(0, numMatches);
|
||||
}
|
||||
|
||||
void reset() {
|
||||
startOffset = endOffset = -1;
|
||||
score = 0.0f;
|
||||
numMatches = 0;
|
||||
}
|
||||
/** @lucene.internal */
|
||||
public void reset() {
|
||||
startOffset = endOffset = -1;
|
||||
score = 0.0f;
|
||||
numMatches = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start offset of this passage.
|
||||
* @return start index (inclusive) of the passage in the
|
||||
* original content: always >= 0.
|
||||
*/
|
||||
public int getStartOffset() {
|
||||
return startOffset;
|
||||
}
|
||||
/**
|
||||
* Start offset of this passage.
|
||||
*
|
||||
* @return start index (inclusive) of the passage in the
|
||||
* original content: always >= 0.
|
||||
*/
|
||||
public int getStartOffset() {
|
||||
return startOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* End offset of this passage.
|
||||
* @return end index (exclusive) of the passage in the
|
||||
* original content: always >= {@link #getStartOffset()}
|
||||
*/
|
||||
public int getEndOffset() {
|
||||
return endOffset;
|
||||
}
|
||||
/**
|
||||
* End offset of this passage.
|
||||
*
|
||||
* @return end index (exclusive) of the passage in the
|
||||
* original content: always >= {@link #getStartOffset()}
|
||||
*/
|
||||
public int getEndOffset() {
|
||||
return endOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Passage's score.
|
||||
*/
|
||||
public float getScore() {
|
||||
return score;
|
||||
}
|
||||
/**
|
||||
* Passage's score.
|
||||
*/
|
||||
public float getScore() {
|
||||
return score;
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of term matches available in
|
||||
* {@link #getMatchStarts}, {@link #getMatchEnds},
|
||||
* {@link #getMatchTerms}
|
||||
*/
|
||||
public int getNumMatches() {
|
||||
return numMatches;
|
||||
}
|
||||
/**
|
||||
* Number of term matches available in
|
||||
* {@link #getMatchStarts}, {@link #getMatchEnds},
|
||||
* {@link #getMatchTerms}
|
||||
*/
|
||||
public int getNumMatches() {
|
||||
return numMatches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start offsets of the term matches, in increasing order.
|
||||
* <p>
|
||||
* Only {@link #getNumMatches} are valid. Note that these
|
||||
* offsets are absolute (not relative to {@link #getStartOffset()}).
|
||||
*/
|
||||
public int[] getMatchStarts() {
|
||||
return matchStarts;
|
||||
}
|
||||
/**
|
||||
* Start offsets of the term matches, in increasing order.
|
||||
* <p>
|
||||
* Only {@link #getNumMatches} are valid. Note that these
|
||||
* offsets are absolute (not relative to {@link #getStartOffset()}).
|
||||
*/
|
||||
public int[] getMatchStarts() {
|
||||
return matchStarts;
|
||||
}
|
||||
|
||||
/**
|
||||
* End offsets of the term matches, corresponding with {@link #getMatchStarts}.
|
||||
* <p>
|
||||
* Only {@link #getNumMatches} are valid. Note that its possible that an end offset
|
||||
* could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
|
||||
* Analyzer produced a term which spans a passage boundary.
|
||||
*/
|
||||
public int[] getMatchEnds() {
|
||||
return matchEnds;
|
||||
}
|
||||
/**
|
||||
* End offsets of the term matches, corresponding with {@link #getMatchStarts}.
|
||||
* <p>
|
||||
* Only {@link #getNumMatches} are valid. Note that its possible that an end offset
|
||||
* could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
|
||||
* Analyzer produced a term which spans a passage boundary.
|
||||
*/
|
||||
public int[] getMatchEnds() {
|
||||
return matchEnds;
|
||||
}
|
||||
|
||||
/**
|
||||
* BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
|
||||
* <p>
|
||||
* Only {@link #getNumMatches()} are valid.
|
||||
*/
|
||||
public BytesRef[] getMatchTerms() {
|
||||
return matchTerms;
|
||||
}
|
||||
/**
|
||||
* BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
|
||||
* <p>
|
||||
* Only {@link #getNumMatches()} are valid.
|
||||
*/
|
||||
public BytesRef[] getMatchTerms() {
|
||||
return matchTerms;
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public void setStartOffset(int startOffset) {
|
||||
this.startOffset = startOffset;
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public void setEndOffset(int endOffset) {
|
||||
this.endOffset = endOffset;
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public void setScore(float score) {
|
||||
this.score = score;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,10 +69,8 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
|
|||
return Collections.singletonList(new OffsetsEnum(null, mtqPostingsEnum));
|
||||
}
|
||||
|
||||
// but this would have a performance cost for likely little gain in the user experience, it
|
||||
// would only serve to make this method less bogus.
|
||||
// instead, we always return freq() = Integer.MAX_VALUE and let the highlighter terminate based on offset...
|
||||
// TODO: DWS perhaps instead OffsetsEnum could become abstract and this would be an impl?
|
||||
// See class javadocs.
|
||||
// TODO: DWS perhaps instead OffsetsEnum could become abstract and this would be an impl? See TODOs in OffsetsEnum.
|
||||
private static class TokenStreamPostingsEnum extends PostingsEnum implements Closeable {
|
||||
TokenStream stream; // becomes null when closed
|
||||
final CharacterRunAutomaton[] matchers;
|
||||
|
@ -134,6 +132,7 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
|
|||
return currentEndOffset;
|
||||
}
|
||||
|
||||
// TOTAL HACK; used in OffsetsEnum.getTerm()
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
if (matchDescriptions[currentMatch] == null) {
|
||||
|
|
|
@ -697,13 +697,13 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
int pos = 0;
|
||||
for (Passage passage : passages) {
|
||||
// don't add ellipsis if its the first one, or if its connected.
|
||||
if (passage.startOffset > pos && pos > 0) {
|
||||
if (passage.getStartOffset() > pos && pos > 0) {
|
||||
sb.append("... ");
|
||||
}
|
||||
pos = passage.startOffset;
|
||||
for (int i = 0; i < passage.numMatches; i++) {
|
||||
int start = passage.matchStarts[i];
|
||||
int end = passage.matchEnds[i];
|
||||
pos = passage.getStartOffset();
|
||||
for (int i = 0; i < passage.getNumMatches(); i++) {
|
||||
int start = passage.getMatchStarts()[i];
|
||||
int end = passage.getMatchEnds()[i];
|
||||
// its possible to have overlapping terms
|
||||
if (start > pos) {
|
||||
sb.append(content, pos, start);
|
||||
|
@ -719,8 +719,8 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
// its possible a "term" from the analyzer could span a sentence boundary.
|
||||
sb.append(content, pos, Math.max(pos, passage.endOffset));
|
||||
pos = passage.endOffset;
|
||||
sb.append(content, pos, Math.max(pos, passage.getEndOffset()));
|
||||
pos = passage.getEndOffset();
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
|
|
@ -96,6 +96,27 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
init(f, a);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set to true if phrase queries will be automatically generated
|
||||
* when the analyzer returns more than one term from whitespace
|
||||
* delimited text.
|
||||
* NOTE: this behavior may not be suitable for all languages.
|
||||
* <p>
|
||||
* Set to false if phrase queries should only be generated when
|
||||
* surrounded by double quotes.
|
||||
* <p>
|
||||
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
|
||||
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
|
||||
*/
|
||||
@Override
|
||||
public void setAutoGeneratePhraseQueries(boolean value) {
|
||||
if (splitOnWhitespace == false && value == true) {
|
||||
throw new IllegalArgumentException
|
||||
("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false");
|
||||
}
|
||||
this.autoGeneratePhraseQueries = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setSplitOnWhitespace(boolean)
|
||||
*/
|
||||
|
@ -106,8 +127,15 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
/**
|
||||
* Whether query text should be split on whitespace prior to analysis.
|
||||
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
|
||||
* <p>
|
||||
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
|
||||
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
|
||||
*/
|
||||
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
|
||||
if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) {
|
||||
throw new IllegalArgumentException
|
||||
("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true");
|
||||
}
|
||||
this.splitOnWhitespace = splitOnWhitespace;
|
||||
}
|
||||
|
||||
|
@ -635,6 +663,31 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
finally { jj_save(2, xla); }
|
||||
}
|
||||
|
||||
private boolean jj_3R_3() {
|
||||
if (jj_scan_token(TERM)) return true;
|
||||
jj_lookingAhead = true;
|
||||
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
|
||||
jj_lookingAhead = false;
|
||||
if (!jj_semLA || jj_3R_6()) return true;
|
||||
Token xsp;
|
||||
if (jj_3R_7()) return true;
|
||||
while (true) {
|
||||
xsp = jj_scanpos;
|
||||
if (jj_3R_7()) { jj_scanpos = xsp; break; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_6() {
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_5() {
|
||||
if (jj_scan_token(STAR)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_4() {
|
||||
if (jj_scan_token(TERM)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
|
@ -666,31 +719,6 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_3() {
|
||||
if (jj_scan_token(TERM)) return true;
|
||||
jj_lookingAhead = true;
|
||||
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
|
||||
jj_lookingAhead = false;
|
||||
if (!jj_semLA || jj_3R_6()) return true;
|
||||
Token xsp;
|
||||
if (jj_3R_7()) return true;
|
||||
while (true) {
|
||||
xsp = jj_scanpos;
|
||||
if (jj_3R_7()) { jj_scanpos = xsp; break; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_6() {
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_5() {
|
||||
if (jj_scan_token(STAR)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Generated Token Manager. */
|
||||
public QueryParserTokenManager token_source;
|
||||
/** Current token. */
|
||||
|
|
|
@ -120,6 +120,27 @@ public class QueryParser extends QueryParserBase {
|
|||
init(f, a);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set to true if phrase queries will be automatically generated
|
||||
* when the analyzer returns more than one term from whitespace
|
||||
* delimited text.
|
||||
* NOTE: this behavior may not be suitable for all languages.
|
||||
* <p>
|
||||
* Set to false if phrase queries should only be generated when
|
||||
* surrounded by double quotes.
|
||||
* <p>
|
||||
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
|
||||
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
|
||||
*/
|
||||
@Override
|
||||
public void setAutoGeneratePhraseQueries(boolean value) {
|
||||
if (splitOnWhitespace == false && value == true) {
|
||||
throw new IllegalArgumentException
|
||||
("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false");
|
||||
}
|
||||
this.autoGeneratePhraseQueries = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setSplitOnWhitespace(boolean)
|
||||
*/
|
||||
|
@ -130,8 +151,15 @@ public class QueryParser extends QueryParserBase {
|
|||
/**
|
||||
* Whether query text should be split on whitespace prior to analysis.
|
||||
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
|
||||
* <p>
|
||||
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
|
||||
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
|
||||
*/
|
||||
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
|
||||
if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) {
|
||||
throw new IllegalArgumentException
|
||||
("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true");
|
||||
}
|
||||
this.splitOnWhitespace = splitOnWhitespace;
|
||||
}
|
||||
|
||||
|
|
|
@ -144,7 +144,7 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer
|
|||
* Set to false if phrase queries should only be generated when
|
||||
* surrounded by double quotes.
|
||||
*/
|
||||
public final void setAutoGeneratePhraseQueries(boolean value) {
|
||||
public void setAutoGeneratePhraseQueries(boolean value) {
|
||||
this.autoGeneratePhraseQueries = value;
|
||||
}
|
||||
|
||||
|
|
|
@ -840,6 +840,20 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||
assertTrue(isAHit(qp.parse("เ??"), s, analyzer));
|
||||
}
|
||||
|
||||
// LUCENE-7533
|
||||
public void test_splitOnWhitespace_with_autoGeneratePhraseQueries() {
|
||||
final QueryParser qp = new QueryParser(FIELD, new MockAnalyzer(random()));
|
||||
expectThrows(IllegalArgumentException.class, () -> {
|
||||
qp.setSplitOnWhitespace(false);
|
||||
qp.setAutoGeneratePhraseQueries(true);
|
||||
});
|
||||
final QueryParser qp2 = new QueryParser(FIELD, new MockAnalyzer(random()));
|
||||
expectThrows(IllegalArgumentException.class, () -> {
|
||||
qp2.setSplitOnWhitespace(true);
|
||||
qp2.setAutoGeneratePhraseQueries(true);
|
||||
qp2.setSplitOnWhitespace(false);
|
||||
});
|
||||
}
|
||||
|
||||
private boolean isAHit(Query q, String content, Analyzer analyzer) throws IOException{
|
||||
Directory ramDir = newDirectory();
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.index.Term;
|
|||
//import org.apache.lucene.queryparser.classic.ParseException;
|
||||
//import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
//import org.apache.lucene.queryparser.classic.QueryParserBase;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.queryparser.classic.QueryParserBase;
|
||||
//import org.apache.lucene.queryparser.classic.QueryParserTokenManager;
|
||||
import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
|
||||
|
@ -328,6 +329,9 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
|
||||
PhraseQuery expected = new PhraseQuery("field", "中", "国");
|
||||
CommonQueryParserConfiguration qp = getParserConfig(analyzer);
|
||||
if (qp instanceof QueryParser) { // Always true, since TestStandardQP overrides this method
|
||||
((QueryParser)qp).setSplitOnWhitespace(true); // LUCENE-7533
|
||||
}
|
||||
setAutoGeneratePhraseQueries(qp, true);
|
||||
assertEquals(expected, getQuery("中国",qp));
|
||||
}
|
||||
|
|
|
@ -56,7 +56,7 @@ public abstract class PrimaryNode extends Node {
|
|||
// Current NRT segment infos, incRef'd with IndexWriter.deleter:
|
||||
private SegmentInfos curInfos;
|
||||
|
||||
final IndexWriter writer;
|
||||
protected final IndexWriter writer;
|
||||
|
||||
// IncRef'd state of the last published NRT point; when a replica comes asking, we give it this as the current NRT point:
|
||||
private CopyState copyState;
|
||||
|
|
|
@ -129,9 +129,10 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
private final boolean highlight;
|
||||
|
||||
private final boolean commitOnBuild;
|
||||
private final boolean closeIndexWriterOnBuild;
|
||||
|
||||
/** Used for ongoing NRT additions/updates. */
|
||||
private IndexWriter writer;
|
||||
protected IndexWriter writer;
|
||||
|
||||
/** {@link IndexSearcher} used for lookups. */
|
||||
protected SearcherManager searcherMgr;
|
||||
|
@ -146,6 +147,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
/** Default higlighting option. */
|
||||
public static final boolean DEFAULT_HIGHLIGHT = true;
|
||||
|
||||
/** Default option to close the IndexWriter once the index has been built. */
|
||||
protected final static boolean DEFAULT_CLOSE_INDEXWRITER_ON_BUILD = true;
|
||||
|
||||
/** How we sort the postings and search results. */
|
||||
private static final Sort SORT = new Sort(new SortField("weight", SortField.Type.LONG, true));
|
||||
|
||||
|
@ -198,8 +202,34 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
*
|
||||
*/
|
||||
public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars,
|
||||
boolean commitOnBuild,
|
||||
boolean commitOnBuild,
|
||||
boolean allTermsRequired, boolean highlight) throws IOException {
|
||||
this(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild, allTermsRequired, highlight,
|
||||
DEFAULT_CLOSE_INDEXWRITER_ON_BUILD);
|
||||
}
|
||||
|
||||
/** Create a new instance, loading from a previously built
|
||||
* AnalyzingInfixSuggester directory, if it exists. This directory must be
|
||||
* private to the infix suggester (i.e., not an external
|
||||
* Lucene index). Note that {@link #close}
|
||||
* will also close the provided directory.
|
||||
*
|
||||
* @param minPrefixChars Minimum number of leading characters
|
||||
* before PrefixQuery is used (default 4).
|
||||
* Prefixes shorter than this are indexed as character
|
||||
* ngrams (increasing index size but making lookups
|
||||
* faster).
|
||||
*
|
||||
* @param commitOnBuild Call commit after the index has finished building. This would persist the
|
||||
* suggester index to disk and future instances of this suggester can use this pre-built dictionary.
|
||||
*
|
||||
* @param allTermsRequired All terms in the suggest query must be matched.
|
||||
* @param highlight Highlight suggest query in suggestions.
|
||||
* @param closeIndexWriterOnBuild If true, the IndexWriter will be closed after the index has finished building.
|
||||
*/
|
||||
public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars,
|
||||
boolean commitOnBuild, boolean allTermsRequired,
|
||||
boolean highlight, boolean closeIndexWriterOnBuild) throws IOException {
|
||||
|
||||
if (minPrefixChars < 0) {
|
||||
throw new IllegalArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars);
|
||||
|
@ -212,6 +242,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
this.commitOnBuild = commitOnBuild;
|
||||
this.allTermsRequired = allTermsRequired;
|
||||
this.highlight = highlight;
|
||||
this.closeIndexWriterOnBuild = closeIndexWriterOnBuild;
|
||||
|
||||
if (DirectoryReader.indexExists(dir)) {
|
||||
// Already built; open it:
|
||||
|
@ -276,15 +307,22 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
}
|
||||
|
||||
//System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
|
||||
if (commitOnBuild) {
|
||||
if (commitOnBuild || closeIndexWriterOnBuild) {
|
||||
commit();
|
||||
}
|
||||
searcherMgr = new SearcherManager(writer, null);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success == false && writer != null) {
|
||||
writer.rollback();
|
||||
writer = null;
|
||||
if (success) {
|
||||
if (closeIndexWriterOnBuild) {
|
||||
writer.close();
|
||||
writer = null;
|
||||
}
|
||||
} else { // failure
|
||||
if (writer != null) {
|
||||
writer.rollback();
|
||||
writer = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -294,9 +332,13 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
* @see IndexWriter#commit */
|
||||
public void commit() throws IOException {
|
||||
if (writer == null) {
|
||||
throw new IllegalStateException("Cannot commit on an closed writer. Add documents first");
|
||||
if (searcherMgr == null || closeIndexWriterOnBuild == false) {
|
||||
throw new IllegalStateException("Cannot commit on an closed writer. Add documents first");
|
||||
}
|
||||
// else no-op: writer was committed and closed after the index was built, so commit is unnecessary
|
||||
} else {
|
||||
writer.commit();
|
||||
}
|
||||
writer.commit();
|
||||
}
|
||||
|
||||
private Analyzer getGramAnalyzer() {
|
||||
|
@ -321,13 +363,17 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
|
||||
private synchronized void ensureOpen() throws IOException {
|
||||
if (writer == null) {
|
||||
if (searcherMgr != null) {
|
||||
searcherMgr.close();
|
||||
searcherMgr = null;
|
||||
if (DirectoryReader.indexExists(dir)) {
|
||||
// Already built; open it:
|
||||
writer = new IndexWriter(dir, getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.APPEND));
|
||||
} else {
|
||||
writer = new IndexWriter(dir, getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
|
||||
}
|
||||
writer = new IndexWriter(dir,
|
||||
getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
|
||||
SearcherManager oldSearcherMgr = searcherMgr;
|
||||
searcherMgr = new SearcherManager(writer, null);
|
||||
if (oldSearcherMgr != null) {
|
||||
oldSearcherMgr.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -382,7 +428,11 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
if (searcherMgr == null) {
|
||||
throw new IllegalStateException("suggester was not built");
|
||||
}
|
||||
searcherMgr.maybeRefreshBlocking();
|
||||
if (writer != null) {
|
||||
searcherMgr.maybeRefreshBlocking();
|
||||
}
|
||||
// else no-op: writer was committed and closed after the index was built
|
||||
// and before searchMgr was constructed, so refresh is unnecessary
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -791,9 +841,11 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
}
|
||||
if (writer != null) {
|
||||
writer.close();
|
||||
dir.close();
|
||||
writer = null;
|
||||
}
|
||||
if (dir != null) {
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -86,6 +86,10 @@ final class CompletionFieldsConsumer extends FieldsConsumer {
|
|||
for (String field : fields) {
|
||||
CompletionTermWriter termWriter = new CompletionTermWriter();
|
||||
Terms terms = fields.terms(field);
|
||||
if (terms == null) {
|
||||
// this can happen from ghost fields, where the incoming Fields iterator claims a field exists but it does not
|
||||
continue;
|
||||
}
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
|
||||
// write terms
|
||||
|
|
|
@ -35,11 +35,14 @@ import org.apache.lucene.analysis.StopFilter;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.SearcherManager;
|
||||
import org.apache.lucene.search.suggest.Input;
|
||||
import org.apache.lucene.search.suggest.InputArrayIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -1334,4 +1337,112 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
|
||||
suggester.close();
|
||||
}
|
||||
|
||||
public void testCloseIndexWriterOnBuild() throws Exception {
|
||||
class MyAnalyzingInfixSuggester extends AnalyzingInfixSuggester {
|
||||
public MyAnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
|
||||
int minPrefixChars, boolean commitOnBuild, boolean allTermsRequired,
|
||||
boolean highlight, boolean closeIndexWriterOnBuild) throws IOException {
|
||||
super(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild,
|
||||
allTermsRequired, highlight, closeIndexWriterOnBuild);
|
||||
}
|
||||
public IndexWriter getIndexWriter() {
|
||||
return writer;
|
||||
}
|
||||
public SearcherManager getSearcherManager() {
|
||||
return searcherMgr;
|
||||
}
|
||||
}
|
||||
|
||||
// After build(), when closeIndexWriterOnBuild = true:
|
||||
// * The IndexWriter should be null
|
||||
// * The SearcherManager should be non-null
|
||||
// * SearcherManager's IndexWriter reference should be closed
|
||||
// (as evidenced by maybeRefreshBlocking() throwing AlreadyClosedException)
|
||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||
MyAnalyzingInfixSuggester suggester = new MyAnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
|
||||
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
|
||||
suggester.build(new InputArrayIterator(sharedInputs));
|
||||
assertNull(suggester.getIndexWriter());
|
||||
assertNotNull(suggester.getSearcherManager());
|
||||
expectThrows(AlreadyClosedException.class, () -> suggester.getSearcherManager().maybeRefreshBlocking());
|
||||
|
||||
suggester.close();
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testCommitAfterBuild() throws Exception {
|
||||
performOperationWithAllOptionCombinations(suggester -> {
|
||||
suggester.build(new InputArrayIterator(sharedInputs));
|
||||
suggester.commit();
|
||||
});
|
||||
}
|
||||
|
||||
public void testRefreshAfterBuild() throws Exception {
|
||||
performOperationWithAllOptionCombinations(suggester -> {
|
||||
suggester.build(new InputArrayIterator(sharedInputs));
|
||||
suggester.refresh();
|
||||
});
|
||||
}
|
||||
|
||||
public void testDisallowCommitBeforeBuild() throws Exception {
|
||||
performOperationWithAllOptionCombinations
|
||||
(suggester -> expectThrows(IllegalStateException.class, suggester::commit));
|
||||
}
|
||||
|
||||
public void testDisallowRefreshBeforeBuild() throws Exception {
|
||||
performOperationWithAllOptionCombinations
|
||||
(suggester -> expectThrows(IllegalStateException.class, suggester::refresh));
|
||||
}
|
||||
|
||||
private Input sharedInputs[] = new Input[] {
|
||||
new Input("lend me your ear", 8, new BytesRef("foobar")),
|
||||
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
|
||||
};
|
||||
|
||||
private interface SuggesterOperation {
|
||||
void operate(AnalyzingInfixSuggester suggester) throws Exception;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the given operation on suggesters constructed with all combinations of options
|
||||
* commitOnBuild and closeIndexWriterOnBuild, including defaults.
|
||||
*/
|
||||
private void performOperationWithAllOptionCombinations(SuggesterOperation operation) throws Exception {
|
||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a);
|
||||
operation.operate(suggester);
|
||||
suggester.close();
|
||||
|
||||
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
|
||||
operation.operate(suggester);
|
||||
suggester.close();
|
||||
|
||||
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true);
|
||||
operation.operate(suggester);
|
||||
suggester.close();
|
||||
|
||||
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true,
|
||||
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
|
||||
operation.operate(suggester);
|
||||
suggester.close();
|
||||
|
||||
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true,
|
||||
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, false);
|
||||
operation.operate(suggester);
|
||||
suggester.close();
|
||||
|
||||
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
|
||||
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
|
||||
operation.operate(suggester);
|
||||
suggester.close();
|
||||
|
||||
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
|
||||
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, false);
|
||||
operation.operate(suggester);
|
||||
suggester.close();
|
||||
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,9 +24,12 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
|
@ -38,7 +41,6 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
import static org.apache.lucene.search.suggest.document.TestSuggestField.Entry;
|
||||
|
@ -112,7 +114,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimple() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
|
||||
|
@ -141,7 +142,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
iw.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMostlyFilteredOutDocuments() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
|
||||
|
@ -188,7 +188,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
iw.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDocFiltering() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
|
||||
|
@ -230,7 +229,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
iw.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAnalyzerWithoutPreservePosAndSep() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
||||
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, false, false);
|
||||
|
@ -254,7 +252,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
iw.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAnalyzerWithSepAndNoPreservePos() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
||||
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, true, false);
|
||||
|
@ -278,7 +275,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
iw.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAnalyzerWithPreservePosAndNoSep() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
||||
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, false, true);
|
||||
|
@ -302,4 +298,43 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
iw.close();
|
||||
}
|
||||
|
||||
public void testGhostField() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field", "suggest_field2", "suggest_field3"));
|
||||
|
||||
Document document = new Document();
|
||||
document.add(new StringField("id", "0", Field.Store.NO));
|
||||
document.add(new SuggestField("suggest_field", "apples", 3));
|
||||
iw.addDocument(document);
|
||||
// need another document so whole segment isn't deleted
|
||||
iw.addDocument(new Document());
|
||||
iw.commit();
|
||||
|
||||
document = new Document();
|
||||
document.add(new StringField("id", "1", Field.Store.NO));
|
||||
document.add(new SuggestField("suggest_field2", "apples", 3));
|
||||
iw.addDocument(document);
|
||||
iw.commit();
|
||||
|
||||
iw.deleteDocuments(new Term("id", "0"));
|
||||
// first force merge is OK
|
||||
iw.forceMerge(1);
|
||||
|
||||
// second force merge causes MultiFields to include "suggest_field" in its iteration, yet a null Terms is returned (no documents have
|
||||
// this field anymore)
|
||||
iw.addDocument(new Document());
|
||||
iw.forceMerge(1);
|
||||
|
||||
DirectoryReader reader = DirectoryReader.open(iw);
|
||||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
|
||||
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
|
||||
assertEquals(0, indexSearcher.suggest(query, 3).totalHits);
|
||||
|
||||
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field2", "app"));
|
||||
assertSuggestions(indexSearcher.suggest(query, 3), new Entry("apples", 3));
|
||||
|
||||
reader.close();
|
||||
iw.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,6 +40,8 @@ import org.apache.lucene.document.StringField;
|
|||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -312,6 +314,49 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
|
|||
dir.close();
|
||||
}
|
||||
|
||||
// tests that level 2 ghost fields still work
|
||||
public void testLevel2Ghosts() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(null);
|
||||
iwc.setCodec(getCodec());
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
IndexWriter iw = new IndexWriter(dir, iwc);
|
||||
|
||||
Document document = new Document();
|
||||
document.add(new StringField("id", "0", Field.Store.NO));
|
||||
document.add(new StringField("suggest_field", "apples", Field.Store.NO));
|
||||
iw.addDocument(document);
|
||||
// need another document so whole segment isn't deleted
|
||||
iw.addDocument(new Document());
|
||||
iw.commit();
|
||||
|
||||
document = new Document();
|
||||
document.add(new StringField("id", "1", Field.Store.NO));
|
||||
document.add(new StringField("suggest_field2", "apples", Field.Store.NO));
|
||||
iw.addDocument(document);
|
||||
iw.commit();
|
||||
|
||||
iw.deleteDocuments(new Term("id", "0"));
|
||||
// first force merge creates a level 1 ghost field
|
||||
iw.forceMerge(1);
|
||||
|
||||
// second force merge creates a level 2 ghost field, causing MultiFields to include "suggest_field" in its iteration, yet a null Terms is returned (no documents have
|
||||
// this field anymore)
|
||||
iw.addDocument(new Document());
|
||||
iw.forceMerge(1);
|
||||
|
||||
DirectoryReader reader = DirectoryReader.open(iw);
|
||||
IndexSearcher indexSearcher = new IndexSearcher(reader);
|
||||
|
||||
assertEquals(1, indexSearcher.count(new TermQuery(new Term("id", "1"))));
|
||||
|
||||
reader.close();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static class TermFreqs {
|
||||
long totalTermFreq;
|
||||
int docFreq;
|
||||
|
|
|
@ -120,6 +120,9 @@ New Features
|
|||
|
||||
* SOLR-9077: Streaming expressions should support collection alias (Kevin Risden)
|
||||
|
||||
* SOLR-9324: Support Secure Impersonation / Proxy User for solr authentication
|
||||
(Gregory Chanan, Hrishikesh Gadre via yonik)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
|
||||
|
@ -128,6 +131,9 @@ Optimizations
|
|||
* SOLR-9726: Reduce number of lookupOrd calls made by the DocValuesFacets.getCounts method.
|
||||
(Jonny Marks via Christine Poerschke)
|
||||
|
||||
* SOLR-9772: Deriving distributed sort values (fieldSortValues) should reuse
|
||||
comparator and only invalidate leafComparator. (John Call via yonik)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
* SOLR-9701: NPE in export handler when "fl" parameter is omitted.
|
||||
|
@ -183,6 +189,10 @@ Other Changes
|
|||
* SOLR-8332: Factor HttpShardHandler[Factory]'s url shuffling out into a ReplicaListTransformer class.
|
||||
(Christine Poerschke, Noble Paul)
|
||||
|
||||
* SOLR-9597: Add setReadOnly(String ...) to ConnectionImpl (Kevin Risden)
|
||||
|
||||
* SOLR-9609: Change hard-coded keysize from 512 to 1024 (Jeremy Martini via Erick Erickson)
|
||||
|
||||
================== 6.3.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
@ -615,9 +625,6 @@ New Features
|
|||
* SOLR-9279: New boolean comparison function queries comparing numeric arguments: gt, gte, lt, lte, eq
|
||||
(Doug Turnbull, David Smiley)
|
||||
|
||||
* SOLR-9324: Support Secure Impersonation / Proxy User for solr authentication
|
||||
(Gregory Chanan)
|
||||
|
||||
* SOLR-9252: Feature selection and logistic regression on text (Cao Manh Dat, Joel Bernstein)
|
||||
|
||||
* SOLR-6465: CDCR: fall back to whole-index replication when tlogs are insufficient.
|
||||
|
|
|
@ -2645,16 +2645,14 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
try {
|
||||
FileUtils.deleteDirectory(dataDir);
|
||||
} catch (IOException e) {
|
||||
SolrException.log(log, "Failed to delete data dir for unloaded core:" + cd.getName()
|
||||
+ " dir:" + dataDir.getAbsolutePath());
|
||||
log.error("Failed to delete data dir for unloaded core: {} dir: {}", cd.getName(), dataDir.getAbsolutePath(), e);
|
||||
}
|
||||
}
|
||||
if (deleteInstanceDir) {
|
||||
try {
|
||||
FileUtils.deleteDirectory(cd.getInstanceDir().toFile());
|
||||
} catch (IOException e) {
|
||||
SolrException.log(log, "Failed to delete instance dir for unloaded core:" + cd.getName()
|
||||
+ " dir:" + cd.getInstanceDir());
|
||||
log.error("Failed to delete instance dir for unloaded core: {} dir: {}", cd.getName(), cd.getInstanceDir(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -616,7 +616,7 @@ public class QueryComponent extends SearchComponent
|
|||
// :TODO: would be simpler to always serialize every position of SortField[]
|
||||
if (type==SortField.Type.SCORE || type==SortField.Type.DOC) continue;
|
||||
|
||||
FieldComparator<?> comparator = null;
|
||||
FieldComparator<?> comparator = sortField.getComparator(1,0);
|
||||
LeafFieldComparator leafComparator = null;
|
||||
Object[] vals = new Object[nDocs];
|
||||
|
||||
|
@ -633,13 +633,13 @@ public class QueryComponent extends SearchComponent
|
|||
idx = ReaderUtil.subIndex(doc, leaves);
|
||||
currentLeaf = leaves.get(idx);
|
||||
if (idx != lastIdx) {
|
||||
// we switched segments. invalidate comparator.
|
||||
comparator = null;
|
||||
// we switched segments. invalidate leafComparator.
|
||||
lastIdx = idx;
|
||||
leafComparator = null;
|
||||
}
|
||||
}
|
||||
|
||||
if (comparator == null) {
|
||||
comparator = sortField.getComparator(1,0);
|
||||
if (leafComparator == null) {
|
||||
leafComparator = comparator.getLeafComparator(currentLeaf);
|
||||
}
|
||||
|
||||
|
|
|
@ -285,6 +285,10 @@ public final class CryptoKeys {
|
|||
private final PrivateKey privateKey;
|
||||
private final SecureRandom random = new SecureRandom();
|
||||
|
||||
// If this ever comes back to haunt us see the discussion at
|
||||
// SOLR-9609 for background and code allowing this to go
|
||||
// into security.json
|
||||
private static final int DEFAULT_KEYPAIR_LENGTH = 1024;
|
||||
|
||||
public RSAKeyPair() {
|
||||
KeyPairGenerator keyGen = null;
|
||||
|
@ -293,7 +297,7 @@ public final class CryptoKeys {
|
|||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
|
||||
}
|
||||
keyGen.initialize(512);
|
||||
keyGen.initialize(DEFAULT_KEYPAIR_LENGTH);
|
||||
java.security.KeyPair keyPair = keyGen.genKeyPair();
|
||||
privateKey = keyPair.getPrivate();
|
||||
publicKey = keyPair.getPublic();
|
||||
|
|
|
@ -114,7 +114,7 @@ public class BlockDirectoryTest extends SolrTestCaseJ4 {
|
|||
if (random().nextBoolean()) {
|
||||
Metrics metrics = new Metrics();
|
||||
int blockSize = 8192;
|
||||
int slabSize = blockSize * 32768;
|
||||
int slabSize = blockSize * 16384;
|
||||
long totalMemory = 1 * slabSize;
|
||||
BlockCache blockCache = new BlockCache(metrics, true, totalMemory, slabSize, blockSize);
|
||||
BlockDirectoryCache cache = new BlockDirectoryCache(blockCache, "/collection1", metrics, true);
|
||||
|
|
|
@ -155,6 +155,15 @@ class ConnectionImpl implements Connection {
|
|||
|
||||
}
|
||||
|
||||
/*
|
||||
* When using OpenLink ODBC-JDBC bridge on Windows, it runs the method ConnectionImpl.setReadOnly(String ...).
|
||||
* The spec says that setReadOnly(boolean ...) is required. This causes the ODBC-JDBC bridge to fail on Windows.
|
||||
* OpenLink case: http://support.openlinksw.com/support/techupdate.vsp?c=21881
|
||||
*/
|
||||
public void setReadOnly(String readOnly) throws SQLException {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReadOnly() throws SQLException {
|
||||
return true;
|
||||
|
|
|
@ -886,7 +886,7 @@ public class JavaBinCodec implements PushWriter {
|
|||
daos.writeByte(NULL);
|
||||
return true;
|
||||
} else if (val instanceof CharSequence) {
|
||||
writeStr((String) val);
|
||||
writeStr((CharSequence) val);
|
||||
return true;
|
||||
} else if (val instanceof Number) {
|
||||
|
||||
|
|
Loading…
Reference in New Issue