Merge branch 'apache-https-master' into jira/solr-8593

This commit is contained in:
Kevin Risden 2016-11-18 10:42:14 -06:00
commit 750cf6d7a5
34 changed files with 875 additions and 312 deletions

View File

@ -431,6 +431,7 @@ reChangesSectionHREF = re.compile('<a id="(.*?)".*?>(.*?)</a>', re.IGNORECASE)
reUnderbarNotDashHTML = re.compile(r'<li>(\s*(LUCENE|SOLR)_\d\d\d\d+)')
reUnderbarNotDashTXT = re.compile(r'\s+((LUCENE|SOLR)_\d\d\d\d+)', re.MULTILINE)
def checkChangesContent(s, version, name, project, isHTML):
currentVersionTuple = versionToTuple(version, name)
if isHTML and s.find('Release %s' % version) == -1:
raise RuntimeError('did not see "Release %s" in %s' % (version, name))
@ -459,7 +460,8 @@ def checkChangesContent(s, version, name, project, isHTML):
raise RuntimeError('did not see "%s" in %s' % (sub, name))
if isHTML:
# Make sure a section only appears once under each release:
# Make sure that a section only appears once under each release,
# and that each release is not greater than the current version
seenIDs = set()
seenText = set()
@ -468,6 +470,9 @@ def checkChangesContent(s, version, name, project, isHTML):
if text.lower().startswith('release '):
release = text[8:].strip()
seenText.clear()
releaseTuple = versionToTuple(release, name)
if releaseTuple > currentVersionTuple:
raise RuntimeError('Future release %s is greater than %s in %s' % (release, version, name))
if id in seenIDs:
raise RuntimeError('%s has duplicate section "%s" under release "%s"' % (name, text, release))
seenIDs.add(id)
@ -475,6 +480,27 @@ def checkChangesContent(s, version, name, project, isHTML):
raise RuntimeError('%s has duplicate section "%s" under release "%s"' % (name, text, release))
seenText.add(text)
reVersion = re.compile(r'(\d+)\.(\d+)(?:\.(\d+))?\s*(-alpha|-beta|final|RC\d+)?\s*(?:\[.*\])?', re.IGNORECASE)
def versionToTuple(version, name):
versionMatch = reVersion.match(version)
if versionMatch is None:
raise RuntimeError('Version %s in %s cannot be parsed' % (version, name))
versionTuple = versionMatch.groups()
while versionTuple[-1] is None or versionTuple[-1] == '':
versionTuple = versionTuple[:-1]
if versionTuple[-1].lower() == '-alpha':
versionTuple = versionTuple[:-1] + ('0',)
elif versionTuple[-1].lower() == '-beta':
versionTuple = versionTuple[:-1] + ('1',)
elif versionTuple[-1].lower() == 'final':
versionTuple = versionTuple[:-2] + ('100',)
elif versionTuple[-1].lower()[:2] == 'rc':
versionTuple = versionTuple[:-2] + (versionTuple[-1][2:],)
print('%s: %s' % (version, versionTuple))
return versionTuple
reUnixPath = re.compile(r'\b[a-zA-Z_]+=(?:"(?:\\"|[^"])*"' + '|(?:\\\\.|[^"\'\\s])*' + r"|'(?:\\'|[^'])*')" \
+ r'|(/(?:\\.|[^"\'\s])*)' \
+ r'|("/(?:\\.|[^"])*")' \

View File

@ -56,6 +56,11 @@ Other
======================= Lucene 6.4.0 =======================
API Changes
* LUCENE-7533: Classic query parser no longer allows autoGeneratePhraseQueries
to be set to true when splitOnWhitespace is false (and vice-versa).
New features
* LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand)
@ -65,6 +70,15 @@ Bug Fixes
* LUCENE-7547: JapaneseTokenizerFactory was failing to close the
dictionary file it opened (Markus via Mike McCandless)
* LUCENE-7562: CompletionFieldsConsumer sometimes throws
NullPointerException on ghost fields (Oliver Eilhard via Mike McCandless)
* LUCENE-7533: Classic query parser: disallow autoGeneratePhraseQueries=true
when splitOnWhitespace=false (and vice-versa). (Steve Rowe)
* LUCENE-7536: ASCIIFoldingFilterFactory used to return an illegal multi-term
component when preserveOriginal was set to true. (Adrien Grand)
Improvements
* LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
@ -84,6 +98,9 @@ Improvements
* LUCENE-7524: Added more detailed explanation of how IDF is computed in
ClassicSimilarity and BM25Similarity. (Adrien Grand)
* LUCENE-7564: AnalyzingInfixSuggester should close its IndexWriter by default
at the end of build(). (Steve Rowe)
* LUCENE-7526: Enhanced UnifiedHighlighter's passage relevancy for queries with
wildcards and sometimes just terms. Added shouldPreferPassageRelevancyOverSpeed()
@ -93,6 +110,11 @@ Improvements
* LUCENE-7537: Index time sorting now supports multi-valued sorts
using selectors (MIN, MAX, etc.) (Jim Ferenczi via Mike McCandless)
* LUCENE-7560: QueryBuilder.createFieldQuery is no longer final,
giving custom query parsers subclassing QueryBuilder more freedom to
control how text is analyzed and converted into a query (Matt Weber
via Mike McCandless)
Other
* LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file
@ -100,6 +122,9 @@ Other
* LUCENE-7534: fix smokeTestRelease.py to run on Cygwin (Mikhail Khludnev)
* LUCENE-7559: UnifiedHighlighter: Make Passage more exposed to allow passage creation to
be customized. (David Smiley)
Build
* LUCENE-7387: fix defaultCodec in build.xml to account for the line ending (hossman)

View File

@ -17,6 +17,7 @@
package org.apache.lucene.analysis.miscellaneous;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
@ -36,12 +37,14 @@ import org.apache.lucene.analysis.TokenStream;
* &lt;/fieldType&gt;</pre>
*/
public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
private static final String PRESERVE_ORIGINAL = "preserveOriginal";
private final boolean preserveOriginal;
/** Creates a new ASCIIFoldingFilterFactory */
public ASCIIFoldingFilterFactory(Map<String,String> args) {
super(args);
preserveOriginal = getBoolean(args, "preserveOriginal", false);
preserveOriginal = getBoolean(args, PRESERVE_ORIGINAL, false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@ -54,7 +57,17 @@ public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements Mul
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
return this;
if (preserveOriginal) {
// The main use-case for using preserveOriginal is to match regardless of
// case but to give better scores to exact matches. Since most multi-term
// queries return constant scores anyway, the multi-term component only
// emits the folded token
Map<String, String> args = new HashMap<>(getOriginalArgs());
args.remove(PRESERVE_ORIGINAL);
return new ASCIIFoldingFilterFactory(args);
} else {
return this;
}
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.miscellaneous;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory;
public class TestAsciiFoldingFilterFactory extends BaseTokenStreamFactoryTestCase {
public void testMultiTermAnalysis() throws IOException {
TokenFilterFactory factory = new ASCIIFoldingFilterFactory(Collections.emptyMap());
TokenStream stream = new CannedTokenStream(new Token("Été", 0, 3));
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "Ete" });
factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
stream = new CannedTokenStream(new Token("Été", 0, 3));
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "Ete" });
factory = new ASCIIFoldingFilterFactory(new HashMap<>(Collections.singletonMap("preserveOriginal", "true")));
stream = new CannedTokenStream(new Token("Été", 0, 3));
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "Ete", "Été" });
factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
stream = new CannedTokenStream(new Token("Été", 0, 3));
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "Ete" });
}
}

View File

@ -25,13 +25,18 @@ import java.lang.reflect.Modifier;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -62,6 +67,8 @@ import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.Directory;
@ -165,6 +172,57 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
// a test option to not remove temp dir...):
Thread.sleep(100000);
}
// ant test -Dtestcase=TestBackwardsCompatibility -Dtestmethod=testCreateSortedIndex -Dtests.codec=default -Dtests.useSecurityManager=false -Dtests.bwcdir=/tmp/sorted
public void testCreateSortedIndex() throws Exception {
Path indexDir = getIndexDir().resolve("sorted");
Files.deleteIfExists(indexDir);
Directory dir = newFSDirectory(indexDir);
LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
mp.setNoCFSRatio(1.0);
mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
// TODO: remove randomness
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
conf.setMergePolicy(mp);
conf.setUseCompoundFile(false);
conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
IndexWriter writer = new IndexWriter(dir, conf);
LineFileDocs docs = new LineFileDocs(random());
SimpleDateFormat parser = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT);
parser.setTimeZone(TimeZone.getTimeZone("UTC"));
ParsePosition position = new ParsePosition(0);
Field dateDVField = null;
for(int i=0;i<50;i++) {
Document doc = docs.nextDoc();
String dateString = doc.get("date");
position.setIndex(0);
Date date = parser.parse(dateString, position);
if (position.getErrorIndex() != -1) {
throw new AssertionError("failed to parse \"" + dateString + "\" as date");
}
if (position.getIndex() != dateString.length()) {
throw new AssertionError("failed to parse \"" + dateString + "\" as date");
}
if (dateDVField == null) {
dateDVField = new NumericDocValuesField("dateDV", 0l);
doc.add(dateDVField);
}
dateDVField.setLongValue(date.getTime());
if (i == 250) {
writer.commit();
}
writer.addDocument(doc);
}
writer.forceMerge(1);
writer.close();
dir.close();
}
private void updateNumeric(IndexWriter writer, String id, String f, String cf, long value) throws IOException {
writer.updateNumericDocValue(new Term("id", id), f, value);
@ -1483,6 +1541,30 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
dir.close();
}
}
public void testSortedIndex() throws Exception {
String[] versions = new String[] {"6.2.0", "6.2.1", "6.3.0"};
for(String version : versions) {
Path path = createTempDir("sorted");
InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream("sorted." + version + ".zip");
assertNotNull("Sorted index index " + version + " not found", resource);
TestUtil.unzip(resource, path);
// TODO: more tests
Directory dir = newFSDirectory(path);
DirectoryReader reader = DirectoryReader.open(dir);
assertEquals(1, reader.leaves().size());
Sort sort = reader.leaves().get(0).reader().getIndexSort();
assertNotNull(sort);
assertEquals("<long: \"dateDV\">!", sort.toString());
reader.close();
// this will confirm the docs really are sorted:
TestUtil.checkIndex(dir);
dir.close();
}
}
static long getValue(BinaryDocValues bdv) throws IOException {
BytesRef term = bdv.binaryValue();

View File

@ -196,7 +196,7 @@ public class QueryBuilder {
* @param quoted true if phrases should be generated when terms occur at more than one position
* @param phraseSlop slop factor for phrase/multiphrase queries
*/
protected final Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, String queryText, boolean quoted, int phraseSlop) {
protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, String queryText, boolean quoted, int phraseSlop) {
assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
// Use the analyzer to get all the tokens, and then build an appropriate

View File

@ -75,6 +75,9 @@ public abstract class AnalysisOffsetStrategy extends FieldOffsetStrategy {
*
* @lucene.internal
*/
// TODO we could make this go away. MemoryIndexOffsetStrategy could simply split and analyze each value into the
// MemoryIndex. TokenStreamOffsetStrategy's hack TokenStreamPostingsEnum could incorporate this logic,
// albeit with less code, less hack.
private static final class MultiValueTokenStream extends TokenFilter {
private final String fieldName;

View File

@ -24,115 +24,117 @@ package org.apache.lucene.search.uhighlight;
* ellipses between unconnected passages.
*/
public class DefaultPassageFormatter extends PassageFormatter {
/** text that will appear before highlighted terms */
protected final String preTag;
/** text that will appear after highlighted terms */
protected final String postTag;
/** text that will appear between two unconnected passages */
protected final String ellipsis;
/** true if we should escape for html */
protected final boolean escape;
/** text that will appear before highlighted terms */
protected final String preTag;
/** text that will appear after highlighted terms */
protected final String postTag;
/** text that will appear between two unconnected passages */
protected final String ellipsis;
/** true if we should escape for html */
protected final boolean escape;
/**
* Creates a new DefaultPassageFormatter with the default tags.
*/
public DefaultPassageFormatter() {
this("<b>", "</b>", "... ", false);
/**
* Creates a new DefaultPassageFormatter with the default tags.
*/
public DefaultPassageFormatter() {
this("<b>", "</b>", "... ", false);
}
/**
* Creates a new DefaultPassageFormatter with custom tags.
*
* @param preTag text which should appear before a highlighted term.
* @param postTag text which should appear after a highlighted term.
* @param ellipsis text which should be used to connect two unconnected passages.
* @param escape true if text should be html-escaped
*/
public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
if (preTag == null || postTag == null || ellipsis == null) {
throw new NullPointerException();
}
this.preTag = preTag;
this.postTag = postTag;
this.ellipsis = ellipsis;
this.escape = escape;
}
/**
* Creates a new DefaultPassageFormatter with custom tags.
* @param preTag text which should appear before a highlighted term.
* @param postTag text which should appear after a highlighted term.
* @param ellipsis text which should be used to connect two unconnected passages.
* @param escape true if text should be html-escaped
*/
public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
if (preTag == null || postTag == null || ellipsis == null) {
throw new NullPointerException();
@Override
public String format(Passage passages[], String content) {
StringBuilder sb = new StringBuilder();
int pos = 0;
for (Passage passage : passages) {
// don't add ellipsis if its the first one, or if its connected.
if (passage.getStartOffset() > pos && pos > 0) {
sb.append(ellipsis);
}
pos = passage.getStartOffset();
for (int i = 0; i < passage.getNumMatches(); i++) {
int start = passage.getMatchStarts()[i];
int end = passage.getMatchEnds()[i];
// its possible to have overlapping terms
if (start > pos) {
append(sb, content, pos, start);
}
this.preTag = preTag;
this.postTag = postTag;
this.ellipsis = ellipsis;
this.escape = escape;
}
@Override
public String format(Passage passages[], String content) {
StringBuilder sb = new StringBuilder();
int pos = 0;
for (Passage passage : passages) {
// don't add ellipsis if its the first one, or if its connected.
if (passage.startOffset > pos && pos > 0) {
sb.append(ellipsis);
}
pos = passage.startOffset;
for (int i = 0; i < passage.numMatches; i++) {
int start = passage.matchStarts[i];
int end = passage.matchEnds[i];
// its possible to have overlapping terms
if (start > pos) {
append(sb, content, pos, start);
}
if (end > pos) {
sb.append(preTag);
append(sb, content, Math.max(pos, start), end);
sb.append(postTag);
pos = end;
}
}
// its possible a "term" from the analyzer could span a sentence boundary.
append(sb, content, pos, Math.max(pos, passage.endOffset));
pos = passage.endOffset;
if (end > pos) {
sb.append(preTag);
append(sb, content, Math.max(pos, start), end);
sb.append(postTag);
pos = end;
}
return sb.toString();
}
// its possible a "term" from the analyzer could span a sentence boundary.
append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
pos = passage.getEndOffset();
}
return sb.toString();
}
/**
* Appends original text to the response.
* @param dest resulting text, possibly transformed or encoded
* @param content original text content
* @param start index of the first character in content
* @param end index of the character following the last character in content
*/
protected void append(StringBuilder dest, String content, int start, int end) {
if (escape) {
// note: these are the rules from owasp.org
for (int i = start; i < end; i++) {
char ch = content.charAt(i);
switch(ch) {
case '&':
dest.append("&amp;");
break;
case '<':
dest.append("&lt;");
break;
case '>':
dest.append("&gt;");
break;
case '"':
dest.append("&quot;");
break;
case '\'':
dest.append("&#x27;");
break;
case '/':
dest.append("&#x2F;");
break;
default:
if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
dest.append(ch);
} else if (ch < 0xff) {
dest.append("&#");
dest.append((int)ch);
dest.append(";");
} else {
dest.append(ch);
}
}
/**
* Appends original text to the response.
*
* @param dest resulting text, possibly transformed or encoded
* @param content original text content
* @param start index of the first character in content
* @param end index of the character following the last character in content
*/
protected void append(StringBuilder dest, String content, int start, int end) {
if (escape) {
// note: these are the rules from owasp.org
for (int i = start; i < end; i++) {
char ch = content.charAt(i);
switch (ch) {
case '&':
dest.append("&amp;");
break;
case '<':
dest.append("&lt;");
break;
case '>':
dest.append("&gt;");
break;
case '"':
dest.append("&quot;");
break;
case '\'':
dest.append("&#x27;");
break;
case '/':
dest.append("&#x2F;");
break;
default:
if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
dest.append(ch);
} else if (ch < 0xff) {
dest.append("&#");
dest.append((int) ch);
dest.append(";");
} else {
dest.append(ch);
}
} else {
dest.append(content, start, end);
}
}
} else {
dest.append(content, start, end);
}
}
}

View File

@ -117,9 +117,9 @@ public class FieldHighlighter {
break;
}
Passage passage = new Passage();
passage.score = Float.NaN;
passage.startOffset = pos;
passage.endOffset = next;
passage.setScore(Float.NaN);
passage.setStartOffset(pos);
passage.setEndOffset(next);
passages.add(passage);
pos = next;
}
@ -145,12 +145,12 @@ public class FieldHighlighter {
offsetsEnumQueue.add(new OffsetsEnum(null, EMPTY)); // a sentinel for termination
PriorityQueue<Passage> passageQueue = new PriorityQueue<>(Math.min(64, maxPassages + 1), (left, right) -> {
if (left.score < right.score) {
if (left.getScore() < right.getScore()) {
return -1;
} else if (left.score > right.score) {
} else if (left.getScore() > right.getScore()) {
return 1;
} else {
return left.startOffset - right.startOffset;
return left.getStartOffset() - right.getStartOffset();
}
});
Passage passage = new Passage(); // the current passage in-progress. Will either get reset or added to queue.
@ -170,12 +170,12 @@ public class FieldHighlighter {
continue;
}
// See if this term should be part of a new passage.
if (start >= passage.endOffset) {
if (passage.startOffset >= 0) { // true if this passage has terms; otherwise couldn't find any (yet)
if (start >= passage.getEndOffset()) {
if (passage.getStartOffset() >= 0) { // true if this passage has terms; otherwise couldn't find any (yet)
// finalize passage
passage.score *= scorer.norm(passage.startOffset);
passage.setScore(passage.getScore() * scorer.norm(passage.getStartOffset()));
// new sentence: first add 'passage' to queue
if (passageQueue.size() == maxPassages && passage.score < passageQueue.peek().score) {
if (passageQueue.size() == maxPassages && passage.getScore() < passageQueue.peek().getScore()) {
passage.reset(); // can't compete, just reset it
} else {
passageQueue.offer(passage);
@ -192,8 +192,8 @@ public class FieldHighlighter {
break;
}
// advance breakIterator
passage.startOffset = Math.max(breakIterator.preceding(start + 1), 0);
passage.endOffset = Math.min(breakIterator.following(start), contentLength);
passage.setStartOffset(Math.max(breakIterator.preceding(start + 1), 0));
passage.setEndOffset(Math.min(breakIterator.following(start), contentLength));
}
// Add this term to the passage.
int tf = 0;
@ -209,12 +209,12 @@ public class FieldHighlighter {
off.nextPosition();
start = off.startOffset();
end = off.endOffset();
if (start >= passage.endOffset || end > contentLength) { // it's beyond this passage
if (start >= passage.getEndOffset() || end > contentLength) { // it's beyond this passage
offsetsEnumQueue.offer(off);
break;
}
}
passage.score += off.weight * scorer.tf(tf, passage.endOffset - passage.startOffset);
passage.setScore(passage.getScore() + off.weight * scorer.tf(tf, passage.getEndOffset() - passage.getStartOffset()));
}
Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
@ -222,7 +222,7 @@ public class FieldHighlighter {
p.sort();
}
// sort in ascending order
Arrays.sort(passages, (left, right) -> left.startOffset - right.startOffset);
Arrays.sort(passages, (left, right) -> left.getStartOffset() - right.getStartOffset());
return passages;
}

View File

@ -66,9 +66,8 @@ public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
}
BytesRef getTerm() throws IOException {
// the dp.getPayload thing is a hack -- see MultiTermHighlighting
return term != null ? term : postingsEnum.getPayload();
// We don't deepcopy() because in this hack we know we don't have to.
// TODO TokenStreamOffsetStrategy could override OffsetsEnum; then remove this hack here
return term != null ? term : postingsEnum.getPayload(); // abusing payload like this is a total hack!
}
boolean hasMorePositions() throws IOException {
@ -91,7 +90,8 @@ public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
@Override
public void close() throws IOException {
if (postingsEnum instanceof Closeable) { // the one in MultiTermHighlighting is.
// TODO TokenStreamOffsetStrategy could override OffsetsEnum; then this base impl would be no-op.
if (postingsEnum instanceof Closeable) {
((Closeable) postingsEnum).close();
}
}

View File

@ -23,139 +23,159 @@ import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Represents a passage (typically a sentence of the document).
* Represents a passage (typically a sentence of the document).
* <p>
* A passage contains {@link #getNumMatches} highlights from the query,
* and the offsets and query terms that correspond with each match.
*
* @lucene.experimental
*/
public final class Passage {
int startOffset = -1;
int endOffset = -1;
float score = 0.0f;
public class Passage {
private int startOffset = -1;
private int endOffset = -1;
private float score = 0.0f;
int matchStarts[] = new int[8];
int matchEnds[] = new int[8];
BytesRef matchTerms[] = new BytesRef[8];
int numMatches = 0;
private int[] matchStarts = new int[8];
private int[] matchEnds = new int[8];
private BytesRef[] matchTerms = new BytesRef[8];
private int numMatches = 0;
public void addMatch(int startOffset, int endOffset, BytesRef term) {
assert startOffset >= this.startOffset && startOffset <= this.endOffset;
if (numMatches == matchStarts.length) {
int newLength = ArrayUtil.oversize(numMatches+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
int newMatchStarts[] = new int[newLength];
int newMatchEnds[] = new int[newLength];
BytesRef newMatchTerms[] = new BytesRef[newLength];
System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
matchStarts = newMatchStarts;
matchEnds = newMatchEnds;
matchTerms = newMatchTerms;
}
assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
matchStarts[numMatches] = startOffset;
matchEnds[numMatches] = endOffset;
matchTerms[numMatches] = term;
numMatches++;
/** @lucene.internal */
public void addMatch(int startOffset, int endOffset, BytesRef term) {
assert startOffset >= this.startOffset && startOffset <= this.endOffset;
if (numMatches == matchStarts.length) {
int newLength = ArrayUtil.oversize(numMatches + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
int newMatchStarts[] = new int[newLength];
int newMatchEnds[] = new int[newLength];
BytesRef newMatchTerms[] = new BytesRef[newLength];
System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
matchStarts = newMatchStarts;
matchEnds = newMatchEnds;
matchTerms = newMatchTerms;
}
assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
matchStarts[numMatches] = startOffset;
matchEnds[numMatches] = endOffset;
matchTerms[numMatches] = term;
numMatches++;
}
void sort() {
final int starts[] = matchStarts;
final int ends[] = matchEnds;
final BytesRef terms[] = matchTerms;
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
int temp = starts[i];
starts[i] = starts[j];
starts[j] = temp;
/** @lucene.internal */
public void sort() {
final int starts[] = matchStarts;
final int ends[] = matchEnds;
final BytesRef terms[] = matchTerms;
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
int temp = starts[i];
starts[i] = starts[j];
starts[j] = temp;
temp = ends[i];
ends[i] = ends[j];
ends[j] = temp;
temp = ends[i];
ends[i] = ends[j];
ends[j] = temp;
BytesRef tempTerm = terms[i];
terms[i] = terms[j];
terms[j] = tempTerm;
}
BytesRef tempTerm = terms[i];
terms[i] = terms[j];
terms[j] = tempTerm;
}
@Override
protected int compare(int i, int j) {
return Integer.compare(starts[i], starts[j]);
}
@Override
protected int compare(int i, int j) {
return Integer.compare(starts[i], starts[j]);
}
}.sort(0, numMatches);
}
}.sort(0, numMatches);
}
void reset() {
startOffset = endOffset = -1;
score = 0.0f;
numMatches = 0;
}
/** @lucene.internal */
public void reset() {
startOffset = endOffset = -1;
score = 0.0f;
numMatches = 0;
}
/**
* Start offset of this passage.
* @return start index (inclusive) of the passage in the
* original content: always &gt;= 0.
*/
public int getStartOffset() {
return startOffset;
}
/**
* Start offset of this passage.
*
* @return start index (inclusive) of the passage in the
* original content: always &gt;= 0.
*/
public int getStartOffset() {
return startOffset;
}
/**
* End offset of this passage.
* @return end index (exclusive) of the passage in the
* original content: always &gt;= {@link #getStartOffset()}
*/
public int getEndOffset() {
return endOffset;
}
/**
* End offset of this passage.
*
* @return end index (exclusive) of the passage in the
* original content: always &gt;= {@link #getStartOffset()}
*/
public int getEndOffset() {
return endOffset;
}
/**
* Passage's score.
*/
public float getScore() {
return score;
}
/**
* Passage's score.
*/
public float getScore() {
return score;
}
/**
* Number of term matches available in
* {@link #getMatchStarts}, {@link #getMatchEnds},
* {@link #getMatchTerms}
*/
public int getNumMatches() {
return numMatches;
}
/**
* Number of term matches available in
* {@link #getMatchStarts}, {@link #getMatchEnds},
* {@link #getMatchTerms}
*/
public int getNumMatches() {
return numMatches;
}
/**
* Start offsets of the term matches, in increasing order.
* <p>
* Only {@link #getNumMatches} are valid. Note that these
* offsets are absolute (not relative to {@link #getStartOffset()}).
*/
public int[] getMatchStarts() {
return matchStarts;
}
/**
* Start offsets of the term matches, in increasing order.
* <p>
* Only {@link #getNumMatches} are valid. Note that these
* offsets are absolute (not relative to {@link #getStartOffset()}).
*/
public int[] getMatchStarts() {
return matchStarts;
}
/**
* End offsets of the term matches, corresponding with {@link #getMatchStarts}.
* <p>
* Only {@link #getNumMatches} are valid. Note that its possible that an end offset
* could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
* Analyzer produced a term which spans a passage boundary.
*/
public int[] getMatchEnds() {
return matchEnds;
}
/**
* End offsets of the term matches, corresponding with {@link #getMatchStarts}.
* <p>
* Only {@link #getNumMatches} are valid. Note that its possible that an end offset
* could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
* Analyzer produced a term which spans a passage boundary.
*/
public int[] getMatchEnds() {
return matchEnds;
}
/**
* BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
* <p>
* Only {@link #getNumMatches()} are valid.
*/
public BytesRef[] getMatchTerms() {
return matchTerms;
}
/**
* BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
* <p>
* Only {@link #getNumMatches()} are valid.
*/
public BytesRef[] getMatchTerms() {
return matchTerms;
}
/** @lucene.internal */
public void setStartOffset(int startOffset) {
this.startOffset = startOffset;
}
/** @lucene.internal */
public void setEndOffset(int endOffset) {
this.endOffset = endOffset;
}
/** @lucene.internal */
public void setScore(float score) {
this.score = score;
}
}

View File

@ -69,10 +69,8 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
return Collections.singletonList(new OffsetsEnum(null, mtqPostingsEnum));
}
// but this would have a performance cost for likely little gain in the user experience, it
// would only serve to make this method less bogus.
// instead, we always return freq() = Integer.MAX_VALUE and let the highlighter terminate based on offset...
// TODO: DWS perhaps instead OffsetsEnum could become abstract and this would be an impl?
// See class javadocs.
// TODO: DWS perhaps instead OffsetsEnum could become abstract and this would be an impl? See TODOs in OffsetsEnum.
private static class TokenStreamPostingsEnum extends PostingsEnum implements Closeable {
TokenStream stream; // becomes null when closed
final CharacterRunAutomaton[] matchers;
@ -134,6 +132,7 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
return currentEndOffset;
}
// TOTAL HACK; used in OffsetsEnum.getTerm()
@Override
public BytesRef getPayload() throws IOException {
if (matchDescriptions[currentMatch] == null) {

View File

@ -697,13 +697,13 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
int pos = 0;
for (Passage passage : passages) {
// don't add ellipsis if its the first one, or if its connected.
if (passage.startOffset > pos && pos > 0) {
if (passage.getStartOffset() > pos && pos > 0) {
sb.append("... ");
}
pos = passage.startOffset;
for (int i = 0; i < passage.numMatches; i++) {
int start = passage.matchStarts[i];
int end = passage.matchEnds[i];
pos = passage.getStartOffset();
for (int i = 0; i < passage.getNumMatches(); i++) {
int start = passage.getMatchStarts()[i];
int end = passage.getMatchEnds()[i];
// its possible to have overlapping terms
if (start > pos) {
sb.append(content, pos, start);
@ -719,8 +719,8 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
}
}
// its possible a "term" from the analyzer could span a sentence boundary.
sb.append(content, pos, Math.max(pos, passage.endOffset));
pos = passage.endOffset;
sb.append(content, pos, Math.max(pos, passage.getEndOffset()));
pos = passage.getEndOffset();
}
return sb.toString();
}

View File

@ -96,6 +96,27 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
init(f, a);
}
/**
* Set to true if phrase queries will be automatically generated
* when the analyzer returns more than one term from whitespace
* delimited text.
* NOTE: this behavior may not be suitable for all languages.
* <p>
* Set to false if phrase queries should only be generated when
* surrounded by double quotes.
* <p>
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
*/
@Override
public void setAutoGeneratePhraseQueries(boolean value) {
if (splitOnWhitespace == false && value == true) {
throw new IllegalArgumentException
("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false");
}
this.autoGeneratePhraseQueries = value;
}
/**
* @see #setSplitOnWhitespace(boolean)
*/
@ -106,8 +127,15 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
/**
* Whether query text should be split on whitespace prior to analysis.
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
* <p>
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
*/
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) {
throw new IllegalArgumentException
("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true");
}
this.splitOnWhitespace = splitOnWhitespace;
}
@ -635,6 +663,31 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
finally { jj_save(2, xla); }
}
private boolean jj_3R_3() {
if (jj_scan_token(TERM)) return true;
jj_lookingAhead = true;
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
jj_lookingAhead = false;
if (!jj_semLA || jj_3R_6()) return true;
Token xsp;
if (jj_3R_7()) return true;
while (true) {
xsp = jj_scanpos;
if (jj_3R_7()) { jj_scanpos = xsp; break; }
}
return false;
}
private boolean jj_3R_6() {
return false;
}
private boolean jj_3R_5() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
private boolean jj_3R_4() {
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
@ -666,31 +719,6 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
return false;
}
private boolean jj_3R_3() {
if (jj_scan_token(TERM)) return true;
jj_lookingAhead = true;
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
jj_lookingAhead = false;
if (!jj_semLA || jj_3R_6()) return true;
Token xsp;
if (jj_3R_7()) return true;
while (true) {
xsp = jj_scanpos;
if (jj_3R_7()) { jj_scanpos = xsp; break; }
}
return false;
}
private boolean jj_3R_6() {
return false;
}
private boolean jj_3R_5() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
/** Generated Token Manager. */
public QueryParserTokenManager token_source;
/** Current token. */

View File

@ -120,6 +120,27 @@ public class QueryParser extends QueryParserBase {
init(f, a);
}
/**
* Set to true if phrase queries will be automatically generated
* when the analyzer returns more than one term from whitespace
* delimited text.
* NOTE: this behavior may not be suitable for all languages.
* <p>
* Set to false if phrase queries should only be generated when
* surrounded by double quotes.
* <p>
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
*/
@Override
public void setAutoGeneratePhraseQueries(boolean value) {
if (splitOnWhitespace == false && value == true) {
throw new IllegalArgumentException
("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false");
}
this.autoGeneratePhraseQueries = value;
}
/**
* @see #setSplitOnWhitespace(boolean)
*/
@ -130,8 +151,15 @@ public class QueryParser extends QueryParserBase {
/**
* Whether query text should be split on whitespace prior to analysis.
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
* <p>
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
*/
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) {
throw new IllegalArgumentException
("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true");
}
this.splitOnWhitespace = splitOnWhitespace;
}

View File

@ -144,7 +144,7 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer
* Set to false if phrase queries should only be generated when
* surrounded by double quotes.
*/
public final void setAutoGeneratePhraseQueries(boolean value) {
public void setAutoGeneratePhraseQueries(boolean value) {
this.autoGeneratePhraseQueries = value;
}

View File

@ -840,6 +840,20 @@ public class TestQueryParser extends QueryParserTestBase {
assertTrue(isAHit(qp.parse("เ??"), s, analyzer));
}
// LUCENE-7533
public void test_splitOnWhitespace_with_autoGeneratePhraseQueries() {
final QueryParser qp = new QueryParser(FIELD, new MockAnalyzer(random()));
expectThrows(IllegalArgumentException.class, () -> {
qp.setSplitOnWhitespace(false);
qp.setAutoGeneratePhraseQueries(true);
});
final QueryParser qp2 = new QueryParser(FIELD, new MockAnalyzer(random()));
expectThrows(IllegalArgumentException.class, () -> {
qp2.setSplitOnWhitespace(true);
qp2.setAutoGeneratePhraseQueries(true);
qp2.setSplitOnWhitespace(false);
});
}
private boolean isAHit(Query q, String content, Analyzer analyzer) throws IOException{
Directory ramDir = newDirectory();

View File

@ -38,6 +38,7 @@ import org.apache.lucene.index.Term;
//import org.apache.lucene.queryparser.classic.ParseException;
//import org.apache.lucene.queryparser.classic.QueryParser;
//import org.apache.lucene.queryparser.classic.QueryParserBase;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParserBase;
//import org.apache.lucene.queryparser.classic.QueryParserTokenManager;
import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
@ -328,6 +329,9 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
PhraseQuery expected = new PhraseQuery("field", "", "");
CommonQueryParserConfiguration qp = getParserConfig(analyzer);
if (qp instanceof QueryParser) { // Always true, since TestStandardQP overrides this method
((QueryParser)qp).setSplitOnWhitespace(true); // LUCENE-7533
}
setAutoGeneratePhraseQueries(qp, true);
assertEquals(expected, getQuery("中国",qp));
}

View File

@ -56,7 +56,7 @@ public abstract class PrimaryNode extends Node {
// Current NRT segment infos, incRef'd with IndexWriter.deleter:
private SegmentInfos curInfos;
final IndexWriter writer;
protected final IndexWriter writer;
// IncRef'd state of the last published NRT point; when a replica comes asking, we give it this as the current NRT point:
private CopyState copyState;

View File

@ -129,9 +129,10 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
private final boolean highlight;
private final boolean commitOnBuild;
private final boolean closeIndexWriterOnBuild;
/** Used for ongoing NRT additions/updates. */
private IndexWriter writer;
protected IndexWriter writer;
/** {@link IndexSearcher} used for lookups. */
protected SearcherManager searcherMgr;
@ -146,6 +147,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
/** Default higlighting option. */
public static final boolean DEFAULT_HIGHLIGHT = true;
/** Default option to close the IndexWriter once the index has been built. */
protected final static boolean DEFAULT_CLOSE_INDEXWRITER_ON_BUILD = true;
/** How we sort the postings and search results. */
private static final Sort SORT = new Sort(new SortField("weight", SortField.Type.LONG, true));
@ -198,8 +202,34 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
*
*/
public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars,
boolean commitOnBuild,
boolean commitOnBuild,
boolean allTermsRequired, boolean highlight) throws IOException {
this(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild, allTermsRequired, highlight,
DEFAULT_CLOSE_INDEXWRITER_ON_BUILD);
}
/** Create a new instance, loading from a previously built
* AnalyzingInfixSuggester directory, if it exists. This directory must be
* private to the infix suggester (i.e., not an external
* Lucene index). Note that {@link #close}
* will also close the provided directory.
*
* @param minPrefixChars Minimum number of leading characters
* before PrefixQuery is used (default 4).
* Prefixes shorter than this are indexed as character
* ngrams (increasing index size but making lookups
* faster).
*
* @param commitOnBuild Call commit after the index has finished building. This would persist the
* suggester index to disk and future instances of this suggester can use this pre-built dictionary.
*
* @param allTermsRequired All terms in the suggest query must be matched.
* @param highlight Highlight suggest query in suggestions.
* @param closeIndexWriterOnBuild If true, the IndexWriter will be closed after the index has finished building.
*/
public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars,
boolean commitOnBuild, boolean allTermsRequired,
boolean highlight, boolean closeIndexWriterOnBuild) throws IOException {
if (minPrefixChars < 0) {
throw new IllegalArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars);
@ -212,6 +242,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
this.commitOnBuild = commitOnBuild;
this.allTermsRequired = allTermsRequired;
this.highlight = highlight;
this.closeIndexWriterOnBuild = closeIndexWriterOnBuild;
if (DirectoryReader.indexExists(dir)) {
// Already built; open it:
@ -276,15 +307,22 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
}
//System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
if (commitOnBuild) {
if (commitOnBuild || closeIndexWriterOnBuild) {
commit();
}
searcherMgr = new SearcherManager(writer, null);
success = true;
} finally {
if (success == false && writer != null) {
writer.rollback();
writer = null;
if (success) {
if (closeIndexWriterOnBuild) {
writer.close();
writer = null;
}
} else { // failure
if (writer != null) {
writer.rollback();
writer = null;
}
}
}
}
@ -294,9 +332,13 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
* @see IndexWriter#commit */
public void commit() throws IOException {
if (writer == null) {
throw new IllegalStateException("Cannot commit on an closed writer. Add documents first");
if (searcherMgr == null || closeIndexWriterOnBuild == false) {
throw new IllegalStateException("Cannot commit on an closed writer. Add documents first");
}
// else no-op: writer was committed and closed after the index was built, so commit is unnecessary
} else {
writer.commit();
}
writer.commit();
}
private Analyzer getGramAnalyzer() {
@ -321,13 +363,17 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
private synchronized void ensureOpen() throws IOException {
if (writer == null) {
if (searcherMgr != null) {
searcherMgr.close();
searcherMgr = null;
if (DirectoryReader.indexExists(dir)) {
// Already built; open it:
writer = new IndexWriter(dir, getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.APPEND));
} else {
writer = new IndexWriter(dir, getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
}
writer = new IndexWriter(dir,
getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
SearcherManager oldSearcherMgr = searcherMgr;
searcherMgr = new SearcherManager(writer, null);
if (oldSearcherMgr != null) {
oldSearcherMgr.close();
}
}
}
@ -382,7 +428,11 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
if (searcherMgr == null) {
throw new IllegalStateException("suggester was not built");
}
searcherMgr.maybeRefreshBlocking();
if (writer != null) {
searcherMgr.maybeRefreshBlocking();
}
// else no-op: writer was committed and closed after the index was built
// and before searchMgr was constructed, so refresh is unnecessary
}
/**
@ -791,9 +841,11 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
}
if (writer != null) {
writer.close();
dir.close();
writer = null;
}
if (dir != null) {
dir.close();
}
}
@Override

View File

@ -86,6 +86,10 @@ final class CompletionFieldsConsumer extends FieldsConsumer {
for (String field : fields) {
CompletionTermWriter termWriter = new CompletionTermWriter();
Terms terms = fields.terms(field);
if (terms == null) {
// this can happen from ghost fields, where the incoming Fields iterator claims a field exists but it does not
continue;
}
TermsEnum termsEnum = terms.iterator();
// write terms

View File

@ -35,11 +35,14 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@ -1334,4 +1337,112 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
suggester.close();
}
public void testCloseIndexWriterOnBuild() throws Exception {
class MyAnalyzingInfixSuggester extends AnalyzingInfixSuggester {
public MyAnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
int minPrefixChars, boolean commitOnBuild, boolean allTermsRequired,
boolean highlight, boolean closeIndexWriterOnBuild) throws IOException {
super(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild,
allTermsRequired, highlight, closeIndexWriterOnBuild);
}
public IndexWriter getIndexWriter() {
return writer;
}
public SearcherManager getSearcherManager() {
return searcherMgr;
}
}
// After build(), when closeIndexWriterOnBuild = true:
// * The IndexWriter should be null
// * The SearcherManager should be non-null
// * SearcherManager's IndexWriter reference should be closed
// (as evidenced by maybeRefreshBlocking() throwing AlreadyClosedException)
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
MyAnalyzingInfixSuggester suggester = new MyAnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
suggester.build(new InputArrayIterator(sharedInputs));
assertNull(suggester.getIndexWriter());
assertNotNull(suggester.getSearcherManager());
expectThrows(AlreadyClosedException.class, () -> suggester.getSearcherManager().maybeRefreshBlocking());
suggester.close();
a.close();
}
public void testCommitAfterBuild() throws Exception {
performOperationWithAllOptionCombinations(suggester -> {
suggester.build(new InputArrayIterator(sharedInputs));
suggester.commit();
});
}
public void testRefreshAfterBuild() throws Exception {
performOperationWithAllOptionCombinations(suggester -> {
suggester.build(new InputArrayIterator(sharedInputs));
suggester.refresh();
});
}
public void testDisallowCommitBeforeBuild() throws Exception {
performOperationWithAllOptionCombinations
(suggester -> expectThrows(IllegalStateException.class, suggester::commit));
}
public void testDisallowRefreshBeforeBuild() throws Exception {
performOperationWithAllOptionCombinations
(suggester -> expectThrows(IllegalStateException.class, suggester::refresh));
}
private Input sharedInputs[] = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar")),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
private interface SuggesterOperation {
void operate(AnalyzingInfixSuggester suggester) throws Exception;
}
/**
* Perform the given operation on suggesters constructed with all combinations of options
* commitOnBuild and closeIndexWriterOnBuild, including defaults.
*/
private void performOperationWithAllOptionCombinations(SuggesterOperation operation) throws Exception {
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, false);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, false);
operation.operate(suggester);
suggester.close();
a.close();
}
}

View File

@ -24,9 +24,12 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedNumericDocValues;
@ -38,7 +41,6 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.apache.lucene.search.suggest.document.TestSuggestField.Entry;
@ -112,7 +114,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
dir.close();
}
@Test
public void testSimple() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
@ -141,7 +142,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
iw.close();
}
@Test
public void testMostlyFilteredOutDocuments() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
@ -188,7 +188,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
iw.close();
}
@Test
public void testDocFiltering() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
@ -230,7 +229,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
iw.close();
}
@Test
public void testAnalyzerWithoutPreservePosAndSep() throws Exception {
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, false, false);
@ -254,7 +252,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
iw.close();
}
@Test
public void testAnalyzerWithSepAndNoPreservePos() throws Exception {
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, true, false);
@ -278,7 +275,6 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
iw.close();
}
@Test
public void testAnalyzerWithPreservePosAndNoSep() throws Exception {
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, false, true);
@ -302,4 +298,43 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
iw.close();
}
public void testGhostField() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field", "suggest_field2", "suggest_field3"));
Document document = new Document();
document.add(new StringField("id", "0", Field.Store.NO));
document.add(new SuggestField("suggest_field", "apples", 3));
iw.addDocument(document);
// need another document so whole segment isn't deleted
iw.addDocument(new Document());
iw.commit();
document = new Document();
document.add(new StringField("id", "1", Field.Store.NO));
document.add(new SuggestField("suggest_field2", "apples", 3));
iw.addDocument(document);
iw.commit();
iw.deleteDocuments(new Term("id", "0"));
// first force merge is OK
iw.forceMerge(1);
// second force merge causes MultiFields to include "suggest_field" in its iteration, yet a null Terms is returned (no documents have
// this field anymore)
iw.addDocument(new Document());
iw.forceMerge(1);
DirectoryReader reader = DirectoryReader.open(iw);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
assertEquals(0, indexSearcher.suggest(query, 3).totalHits);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field2", "app"));
assertSuggestions(indexSearcher.suggest(query, 3), new Entry("apples", 3));
reader.close();
iw.close();
}
}

View File

@ -40,6 +40,8 @@ import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BytesRef;
@ -312,6 +314,49 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
dir.close();
}
// tests that level 2 ghost fields still work
public void testLevel2Ghosts() throws Exception {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwc = newIndexWriterConfig(null);
iwc.setCodec(getCodec());
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter iw = new IndexWriter(dir, iwc);
Document document = new Document();
document.add(new StringField("id", "0", Field.Store.NO));
document.add(new StringField("suggest_field", "apples", Field.Store.NO));
iw.addDocument(document);
// need another document so whole segment isn't deleted
iw.addDocument(new Document());
iw.commit();
document = new Document();
document.add(new StringField("id", "1", Field.Store.NO));
document.add(new StringField("suggest_field2", "apples", Field.Store.NO));
iw.addDocument(document);
iw.commit();
iw.deleteDocuments(new Term("id", "0"));
// first force merge creates a level 1 ghost field
iw.forceMerge(1);
// second force merge creates a level 2 ghost field, causing MultiFields to include "suggest_field" in its iteration, yet a null Terms is returned (no documents have
// this field anymore)
iw.addDocument(new Document());
iw.forceMerge(1);
DirectoryReader reader = DirectoryReader.open(iw);
IndexSearcher indexSearcher = new IndexSearcher(reader);
assertEquals(1, indexSearcher.count(new TermQuery(new Term("id", "1"))));
reader.close();
iw.close();
dir.close();
}
private static class TermFreqs {
long totalTermFreq;
int docFreq;

View File

@ -120,6 +120,9 @@ New Features
* SOLR-9077: Streaming expressions should support collection alias (Kevin Risden)
* SOLR-9324: Support Secure Impersonation / Proxy User for solr authentication
(Gregory Chanan, Hrishikesh Gadre via yonik)
Optimizations
----------------------
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
@ -128,6 +131,9 @@ Optimizations
* SOLR-9726: Reduce number of lookupOrd calls made by the DocValuesFacets.getCounts method.
(Jonny Marks via Christine Poerschke)
* SOLR-9772: Deriving distributed sort values (fieldSortValues) should reuse
comparator and only invalidate leafComparator. (John Call via yonik)
Bug Fixes
----------------------
* SOLR-9701: NPE in export handler when "fl" parameter is omitted.
@ -183,6 +189,10 @@ Other Changes
* SOLR-8332: Factor HttpShardHandler[Factory]'s url shuffling out into a ReplicaListTransformer class.
(Christine Poerschke, Noble Paul)
* SOLR-9597: Add setReadOnly(String ...) to ConnectionImpl (Kevin Risden)
* SOLR-9609: Change hard-coded keysize from 512 to 1024 (Jeremy Martini via Erick Erickson)
================== 6.3.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
@ -615,9 +625,6 @@ New Features
* SOLR-9279: New boolean comparison function queries comparing numeric arguments: gt, gte, lt, lte, eq
(Doug Turnbull, David Smiley)
* SOLR-9324: Support Secure Impersonation / Proxy User for solr authentication
(Gregory Chanan)
* SOLR-9252: Feature selection and logistic regression on text (Cao Manh Dat, Joel Bernstein)
* SOLR-6465: CDCR: fall back to whole-index replication when tlogs are insufficient.

View File

@ -2645,16 +2645,14 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
try {
FileUtils.deleteDirectory(dataDir);
} catch (IOException e) {
SolrException.log(log, "Failed to delete data dir for unloaded core:" + cd.getName()
+ " dir:" + dataDir.getAbsolutePath());
log.error("Failed to delete data dir for unloaded core: {} dir: {}", cd.getName(), dataDir.getAbsolutePath(), e);
}
}
if (deleteInstanceDir) {
try {
FileUtils.deleteDirectory(cd.getInstanceDir().toFile());
} catch (IOException e) {
SolrException.log(log, "Failed to delete instance dir for unloaded core:" + cd.getName()
+ " dir:" + cd.getInstanceDir());
log.error("Failed to delete instance dir for unloaded core: {} dir: {}", cd.getName(), cd.getInstanceDir(), e);
}
}
}

View File

@ -616,7 +616,7 @@ public class QueryComponent extends SearchComponent
// :TODO: would be simpler to always serialize every position of SortField[]
if (type==SortField.Type.SCORE || type==SortField.Type.DOC) continue;
FieldComparator<?> comparator = null;
FieldComparator<?> comparator = sortField.getComparator(1,0);
LeafFieldComparator leafComparator = null;
Object[] vals = new Object[nDocs];
@ -633,13 +633,13 @@ public class QueryComponent extends SearchComponent
idx = ReaderUtil.subIndex(doc, leaves);
currentLeaf = leaves.get(idx);
if (idx != lastIdx) {
// we switched segments. invalidate comparator.
comparator = null;
// we switched segments. invalidate leafComparator.
lastIdx = idx;
leafComparator = null;
}
}
if (comparator == null) {
comparator = sortField.getComparator(1,0);
if (leafComparator == null) {
leafComparator = comparator.getLeafComparator(currentLeaf);
}

View File

@ -285,6 +285,10 @@ public final class CryptoKeys {
private final PrivateKey privateKey;
private final SecureRandom random = new SecureRandom();
// If this ever comes back to haunt us see the discussion at
// SOLR-9609 for background and code allowing this to go
// into security.json
private static final int DEFAULT_KEYPAIR_LENGTH = 1024;
public RSAKeyPair() {
KeyPairGenerator keyGen = null;
@ -293,7 +297,7 @@ public final class CryptoKeys {
} catch (NoSuchAlgorithmException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
keyGen.initialize(512);
keyGen.initialize(DEFAULT_KEYPAIR_LENGTH);
java.security.KeyPair keyPair = keyGen.genKeyPair();
privateKey = keyPair.getPrivate();
publicKey = keyPair.getPublic();

View File

@ -114,7 +114,7 @@ public class BlockDirectoryTest extends SolrTestCaseJ4 {
if (random().nextBoolean()) {
Metrics metrics = new Metrics();
int blockSize = 8192;
int slabSize = blockSize * 32768;
int slabSize = blockSize * 16384;
long totalMemory = 1 * slabSize;
BlockCache blockCache = new BlockCache(metrics, true, totalMemory, slabSize, blockSize);
BlockDirectoryCache cache = new BlockDirectoryCache(blockCache, "/collection1", metrics, true);

View File

@ -155,6 +155,15 @@ class ConnectionImpl implements Connection {
}
/*
* When using OpenLink ODBC-JDBC bridge on Windows, it runs the method ConnectionImpl.setReadOnly(String ...).
* The spec says that setReadOnly(boolean ...) is required. This causes the ODBC-JDBC bridge to fail on Windows.
* OpenLink case: http://support.openlinksw.com/support/techupdate.vsp?c=21881
*/
public void setReadOnly(String readOnly) throws SQLException {
}
@Override
public boolean isReadOnly() throws SQLException {
return true;

View File

@ -886,7 +886,7 @@ public class JavaBinCodec implements PushWriter {
daos.writeByte(NULL);
return true;
} else if (val instanceof CharSequence) {
writeStr((String) val);
writeStr((CharSequence) val);
return true;
} else if (val instanceof Number) {