Merge branch 'apache-https-master' into jira/solr-8593

This commit is contained in:
Kevin Risden 2016-11-10 16:15:06 -05:00
commit c3400e8a2e
123 changed files with 3681 additions and 1496 deletions

View File

@ -55,7 +55,30 @@ Other
* LUCENE-7360: Remove Explanation.toHtml() (Alan Woodward) * LUCENE-7360: Remove Explanation.toHtml() (Alan Woodward)
======================= Lucene 6.4.0 ======================= ======================= Lucene 6.4.0 =======================
(No Changes)
New features
* LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand)
Improvements
* LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
PhraseQuery or MultiPhraseQuery when the word automaton is simple
(Mike McCandless)
* LUCENE-7431: Allow a certain amount of overlap to be specified between the include
and exclude arguments of SpanNotQuery via negative pre and/or post arguments.
(Marc Morissette via David Smiley)
* LUCENE-7544: UnifiedHighlighter: add extension points for handling custom queries.
(Michael Braun, David Smiley)
* LUCENE-7538: Asking IndexWriter to store a too-massive text field
now throws IllegalArgumentException instead of a cryptic exception
that closes your IndexWriter (Steve Chen via Mike McCandless)
* LUCENE-7524: Added more detailed explanation of how IDF is computed in
ClassicSimilarity and BM25Similarity. (Adrien Grand)
======================= Lucene 6.3.0 ======================= ======================= Lucene 6.3.0 =======================

View File

@ -27,7 +27,7 @@
<dependencies> <dependencies>
<dependency org="mecab" name="mecab-ipadic" rev="${/mecab/mecab-ipadic}" conf="ipadic"> <dependency org="mecab" name="mecab-ipadic" rev="${/mecab/mecab-ipadic}" conf="ipadic">
<artifact name="ipadic" type=".tar.gz" url="http://mecab.googlecode.com/files/mecab-ipadic-2.7.0-20070801.tar.gz"/> <artifact name="ipadic" type=".tar.gz" url="http://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/mecab-ipadic-2.7.0-20070801.tar.gz"/>
</dependency> </dependency>
<dependency org="mecab" name="mecab-naist-jdic" rev="${/mecab/mecab-naist-jdic}" conf="naist"> <dependency org="mecab" name="mecab-naist-jdic" rev="${/mecab/mecab-naist-jdic}" conf="naist">
<artifact name="mecab-naist-jdic" type=".tar.gz" url="http://sourceforge.jp/frs/redir.php?m=iij&amp;f=/naist-jdic/53500/mecab-naist-jdic-0.6.3b-20111013.tar.gz"/> <artifact name="mecab-naist-jdic" type=".tar.gz" url="http://sourceforge.jp/frs/redir.php?m=iij&amp;f=/naist-jdic/53500/mecab-naist-jdic-0.6.3b-20111013.tar.gz"/>

View File

@ -28,7 +28,6 @@ import org.apache.lucene.analysis.ja.dict.ConnectionCosts;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.BitUtil;
public final class ConnectionCostsWriter { public final class ConnectionCostsWriter {

View File

@ -33,12 +33,10 @@ import java.util.Comparator;
import java.util.List; import java.util.List;
import org.apache.lucene.analysis.ja.util.DictionaryBuilder.DictionaryFormat; import org.apache.lucene.analysis.ja.util.DictionaryBuilder.DictionaryFormat;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.Builder; import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs; import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.packed.PackedInts;
import com.ibm.icu.text.Normalizer2; import com.ibm.icu.text.Normalizer2;
@ -133,7 +131,7 @@ public class TokenInfoDictionaryBuilder {
System.out.println(" encode..."); System.out.println(" encode...");
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(); PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, true, PackedInts.DEFAULT, true, 15); Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, true, 15);
IntsRefBuilder scratch = new IntsRefBuilder(); IntsRefBuilder scratch = new IntsRefBuilder();
long ord = -1; // first ord will be 0 long ord = -1; // first ord will be 0
String lastValue = null; String lastValue = null;

View File

@ -231,7 +231,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
"6.2.0-cfs", "6.2.0-cfs",
"6.2.0-nocfs", "6.2.0-nocfs",
"6.2.1-cfs", "6.2.1-cfs",
"6.2.1-nocfs" "6.2.1-nocfs",
"6.3.0-cfs",
"6.3.0-nocfs"
}; };
final String[] unsupportedNames = { final String[] unsupportedNames = {

View File

@ -48,7 +48,6 @@ import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
/* /*
TODO: TODO:
@ -363,8 +362,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
final Builder<Output> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, final Builder<Output> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
0, 0, true, false, Integer.MAX_VALUE, 0, 0, true, false, Integer.MAX_VALUE,
FST_OUTPUTS, false, FST_OUTPUTS, true, 15);
PackedInts.COMPACT, true, 15);
//if (DEBUG) { //if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix); // System.out.println(" compile index for prefix=" + prefix);
//} //}

View File

@ -81,9 +81,6 @@ import org.apache.lucene.util.packed.PackedInts;
// loads itself in ram? // loads itself in ram?
public final class MemoryPostingsFormat extends PostingsFormat { public final class MemoryPostingsFormat extends PostingsFormat {
private final boolean doPackFST;
private final float acceptableOverheadRatio;
public MemoryPostingsFormat() { public MemoryPostingsFormat() {
this(false, PackedInts.DEFAULT); this(false, PackedInts.DEFAULT);
} }
@ -97,13 +94,11 @@ public final class MemoryPostingsFormat extends PostingsFormat {
*/ */
public MemoryPostingsFormat(boolean doPackFST, float acceptableOverheadRatio) { public MemoryPostingsFormat(boolean doPackFST, float acceptableOverheadRatio) {
super("Memory"); super("Memory");
this.doPackFST = doPackFST;
this.acceptableOverheadRatio = acceptableOverheadRatio;
} }
@Override @Override
public String toString() { public String toString() {
return "PostingsFormat(name=" + getName() + " doPackFST= " + doPackFST + ")"; return "PostingsFormat(name=" + getName() + ")";
} }
private final static class TermsWriter { private final static class TermsWriter {
@ -111,16 +106,12 @@ public final class MemoryPostingsFormat extends PostingsFormat {
private final FieldInfo field; private final FieldInfo field;
private final Builder<BytesRef> builder; private final Builder<BytesRef> builder;
private final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); private final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
private final boolean doPackFST;
private final float acceptableOverheadRatio;
private int termCount; private int termCount;
public TermsWriter(IndexOutput out, FieldInfo field, boolean doPackFST, float acceptableOverheadRatio) { public TermsWriter(IndexOutput out, FieldInfo field) {
this.out = out; this.out = out;
this.field = field; this.field = field;
this.doPackFST = doPackFST; builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
this.acceptableOverheadRatio = acceptableOverheadRatio;
builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, doPackFST, acceptableOverheadRatio, true, 15);
} }
private class PostingsWriter { private class PostingsWriter {
@ -307,8 +298,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
TermsEnum termsEnum = terms.iterator(); TermsEnum termsEnum = terms.iterator();
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
TermsWriter termsWriter = new TermsWriter(out, fieldInfo, TermsWriter termsWriter = new TermsWriter(out, fieldInfo);
doPackFST, acceptableOverheadRatio);
FixedBitSet docsSeen = new FixedBitSet(state.segmentInfo.maxDoc()); FixedBitSet docsSeen = new FixedBitSet(state.segmentInfo.maxDoc());
long sumTotalTermFreq = 0; long sumTotalTermFreq = 0;

View File

@ -48,7 +48,6 @@ import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
/* /*
TODO: TODO:
@ -456,8 +455,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
final Builder<BytesRef> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, final Builder<BytesRef> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
0, 0, true, false, Integer.MAX_VALUE, 0, 0, true, false, Integer.MAX_VALUE,
outputs, false, outputs, true, 15);
PackedInts.COMPACT, true, 15);
//if (DEBUG) { //if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix); // System.out.println(" compile index for prefix=" + prefix);
//} //}

View File

@ -64,7 +64,7 @@ public final class GrowableByteArrayDataOutput extends DataOutput {
@Override @Override
public void writeString(String string) throws IOException { public void writeString(String string) throws IOException {
int maxLen = string.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR; int maxLen = UnicodeUtil.maxUTF8Length(string.length());
if (maxLen <= MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING) { if (maxLen <= MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING) {
// string is small enough that we don't need to save memory by falling back to double-pass approach // string is small enough that we don't need to save memory by falling back to double-pass approach
// this is just an optimized writeString() that re-uses scratchBytes. // this is just an optimized writeString() that re-uses scratchBytes.

View File

@ -24,11 +24,9 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.Counter; import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues; import org.apache.lucene.util.packed.PackedLongValues;
@ -48,7 +46,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
private final Counter iwBytesUsed; private final Counter iwBytesUsed;
private final PackedLongValues.Builder lengths; private final PackedLongValues.Builder lengths;
private FixedBitSet docsWithField; private DocsWithFieldSet docsWithField;
private final FieldInfo fieldInfo; private final FieldInfo fieldInfo;
private long bytesUsed; private long bytesUsed;
private int lastDocID = -1; private int lastDocID = -1;
@ -60,7 +58,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
this.bytesOut = bytes.getDataOutput(); this.bytesOut = bytes.getDataOutput();
this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
this.iwBytesUsed = iwBytesUsed; this.iwBytesUsed = iwBytesUsed;
this.docsWithField = new FixedBitSet(64); this.docsWithField = new DocsWithFieldSet();
this.bytesUsed = lengths.ramBytesUsed() + docsWithField.ramBytesUsed(); this.bytesUsed = lengths.ramBytesUsed() + docsWithField.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed); iwBytesUsed.addAndGet(bytesUsed);
} }
@ -84,8 +82,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
// Should never happen! // Should never happen!
throw new RuntimeException(ioe); throw new RuntimeException(ioe);
} }
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID); docsWithField.add(docID);
docsWithField.set(docID);
updateBytesUsed(); updateBytesUsed();
lastDocID = docID; lastDocID = docID;
@ -112,7 +109,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
if (fieldInfoIn != fieldInfo) { if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo"); throw new IllegalArgumentException("wrong fieldInfo");
} }
return new BufferedBinaryDocValues(lengths, maxLength, bytes.getDataInput(), docsWithField); return new BufferedBinaryDocValues(lengths, maxLength, bytes.getDataInput(), docsWithField.iterator());
} }
}); });
} }
@ -124,12 +121,12 @@ class BinaryDocValuesWriter extends DocValuesWriter {
final DocIdSetIterator docsWithField; final DocIdSetIterator docsWithField;
final DataInput bytesIterator; final DataInput bytesIterator;
BufferedBinaryDocValues(PackedLongValues lengths, int maxLength, DataInput bytesIterator, FixedBitSet docsWithFields) { BufferedBinaryDocValues(PackedLongValues lengths, int maxLength, DataInput bytesIterator, DocIdSetIterator docsWithFields) {
this.value = new BytesRefBuilder(); this.value = new BytesRefBuilder();
this.value.grow(maxLength); this.value.grow(maxLength);
this.lengthsIterator = lengths.iterator(); this.lengthsIterator = lengths.iterator();
this.bytesIterator = bytesIterator; this.bytesIterator = bytesIterator;
this.docsWithField = new BitSetIterator(docsWithFields, lengths.size()); this.docsWithField = docsWithFields;
} }
@Override @Override

View File

@ -430,6 +430,10 @@ final class DefaultIndexingChain extends DocConsumer {
fp = getOrAddField(fieldName, fieldType, false); fp = getOrAddField(fieldName, fieldType, false);
} }
if (fieldType.stored()) { if (fieldType.stored()) {
String value = field.stringValue();
if (value != null && value.length() > IndexWriter.MAX_STORED_STRING_LENGTH) {
throw new IllegalArgumentException("stored field \"" + field.name() + "\" is too large (" + value.length() + " characters) to store");
}
try { try {
storedFieldsWriter.writeField(fp.fieldInfo, field); storedFieldsWriter.writeField(fp.fieldInfo, field);
} catch (Throwable th) { } catch (Throwable th) {

View File

@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
/** Accumulator for documents that have a value for a field. This is optimized
* for the case that all documents have a value. */
final class DocsWithFieldSet extends DocIdSet {
private static long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(DocsWithFieldSet.class);
private FixedBitSet set;
private int cost = 0;
private int lastDocId = -1;
void add(int docID) {
if (docID <= lastDocId) {
throw new IllegalArgumentException("Out of order doc ids: last=" + lastDocId + ", next=" + docID);
}
if (set != null) {
set = FixedBitSet.ensureCapacity(set, docID);
set.set(docID);
} else if (docID != cost) {
// migrate to a sparse encoding using a bit set
set = new FixedBitSet(docID + 1);
set.set(0, cost);
set.set(docID);
}
lastDocId = docID;
cost++;
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + (set == null ? 0 : set.ramBytesUsed());
}
@Override
public DocIdSetIterator iterator() {
return set != null ? new BitSetIterator(set, cost) : DocIdSetIterator.all(cost);
}
}

View File

@ -62,6 +62,7 @@ import org.apache.lucene.store.MergeInfo;
import org.apache.lucene.store.RateLimitedIndexOutput; import org.apache.lucene.store.RateLimitedIndexOutput;
import org.apache.lucene.store.TrackingDirectoryWrapper; import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.util.CloseableThreadLocal;
@ -70,6 +71,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
/** /**
@ -258,6 +260,12 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* IndexWriterConfig#setInfoStream(InfoStream)}). * IndexWriterConfig#setInfoStream(InfoStream)}).
*/ */
public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8; public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8;
/**
* Maximum length string for a stored field.
*/
public final static int MAX_STORED_STRING_LENGTH = ArrayUtil.MAX_ARRAY_LENGTH / UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR;
// when unrecoverable disaster strikes, we populate this with the reason that we had to close IndexWriter // when unrecoverable disaster strikes, we populate this with the reason that we had to close IndexWriter
volatile Throwable tragedy; volatile Throwable tragedy;

View File

@ -22,9 +22,7 @@ import java.io.IOException;
import org.apache.lucene.codecs.NormsConsumer; import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsProducer; import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Counter; import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues; import org.apache.lucene.util.packed.PackedLongValues;
@ -32,7 +30,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
* segment flushes. */ * segment flushes. */
class NormValuesWriter { class NormValuesWriter {
private FixedBitSet docsWithField; private DocsWithFieldSet docsWithField;
private PackedLongValues.Builder pending; private PackedLongValues.Builder pending;
private final Counter iwBytesUsed; private final Counter iwBytesUsed;
private long bytesUsed; private long bytesUsed;
@ -40,7 +38,7 @@ class NormValuesWriter {
private int lastDocID = -1; private int lastDocID = -1;
public NormValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { public NormValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
docsWithField = new FixedBitSet(64); docsWithField = new DocsWithFieldSet();
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed(); bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
this.fieldInfo = fieldInfo; this.fieldInfo = fieldInfo;
@ -54,8 +52,7 @@ class NormValuesWriter {
} }
pending.add(value); pending.add(value);
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID); docsWithField.add(docID);
docsWithField.set(docID);
updateBytesUsed(); updateBytesUsed();
@ -82,7 +79,7 @@ class NormValuesWriter {
if (fieldInfo != NormValuesWriter.this.fieldInfo) { if (fieldInfo != NormValuesWriter.this.fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo"); throw new IllegalArgumentException("wrong fieldInfo");
} }
return new BufferedNorms(values, docsWithField); return new BufferedNorms(values, docsWithField.iterator());
} }
@Override @Override
@ -108,9 +105,9 @@ class NormValuesWriter {
final DocIdSetIterator docsWithField; final DocIdSetIterator docsWithField;
private long value; private long value;
BufferedNorms(PackedLongValues values, FixedBitSet docsWithFields) { BufferedNorms(PackedLongValues values, DocIdSetIterator docsWithFields) {
this.iter = values.iterator(); this.iter = values.iterator();
this.docsWithField = new BitSetIterator(docsWithFields, values.size()); this.docsWithField = docsWithFields;
} }
@Override @Override

View File

@ -21,9 +21,7 @@ import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Counter; import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues; import org.apache.lucene.util.packed.PackedLongValues;
@ -34,13 +32,13 @@ class NumericDocValuesWriter extends DocValuesWriter {
private PackedLongValues.Builder pending; private PackedLongValues.Builder pending;
private final Counter iwBytesUsed; private final Counter iwBytesUsed;
private long bytesUsed; private long bytesUsed;
private FixedBitSet docsWithField; private DocsWithFieldSet docsWithField;
private final FieldInfo fieldInfo; private final FieldInfo fieldInfo;
private int lastDocID = -1; private int lastDocID = -1;
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
docsWithField = new FixedBitSet(64); docsWithField = new DocsWithFieldSet();
bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed(); bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
this.fieldInfo = fieldInfo; this.fieldInfo = fieldInfo;
this.iwBytesUsed = iwBytesUsed; this.iwBytesUsed = iwBytesUsed;
@ -53,8 +51,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
} }
pending.add(value); pending.add(value);
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID); docsWithField.add(docID);
docsWithField.set(docID);
updateBytesUsed(); updateBytesUsed();
@ -83,7 +80,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
if (fieldInfo != NumericDocValuesWriter.this.fieldInfo) { if (fieldInfo != NumericDocValuesWriter.this.fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo"); throw new IllegalArgumentException("wrong fieldInfo");
} }
return new BufferedNumericDocValues(values, docsWithField); return new BufferedNumericDocValues(values, docsWithField.iterator());
} }
}); });
} }
@ -94,9 +91,9 @@ class NumericDocValuesWriter extends DocValuesWriter {
final DocIdSetIterator docsWithField; final DocIdSetIterator docsWithField;
private long value; private long value;
BufferedNumericDocValues(PackedLongValues values, FixedBitSet docsWithFields) { BufferedNumericDocValues(PackedLongValues values, DocIdSetIterator docsWithFields) {
this.iter = values.iterator(); this.iter = values.iterator();
this.docsWithField = new BitSetIterator(docsWithFields, values.size()); this.docsWithField = docsWithFields;
} }
@Override @Override

View File

@ -22,13 +22,11 @@ import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter; import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues; import org.apache.lucene.util.packed.PackedLongValues;
@ -37,7 +35,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
class SortedDocValuesWriter extends DocValuesWriter { class SortedDocValuesWriter extends DocValuesWriter {
final BytesRefHash hash; final BytesRefHash hash;
private PackedLongValues.Builder pending; private PackedLongValues.Builder pending;
private FixedBitSet docsWithField; private DocsWithFieldSet docsWithField;
private final Counter iwBytesUsed; private final Counter iwBytesUsed;
private long bytesUsed; // this currently only tracks differences in 'pending' private long bytesUsed; // this currently only tracks differences in 'pending'
private final FieldInfo fieldInfo; private final FieldInfo fieldInfo;
@ -52,7 +50,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
BytesRefHash.DEFAULT_CAPACITY, BytesRefHash.DEFAULT_CAPACITY,
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed)); new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
docsWithField = new FixedBitSet(64); docsWithField = new DocsWithFieldSet();
bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed(); bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed); iwBytesUsed.addAndGet(bytesUsed);
} }
@ -69,8 +67,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
} }
addOneValue(value); addOneValue(value);
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID); docsWithField.add(docID);
docsWithField.set(docID);
lastDocID = docID; lastDocID = docID;
} }
@ -121,7 +118,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
if (fieldInfoIn != fieldInfo) { if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo"); throw new IllegalArgumentException("wrong fieldInfo");
} }
return new BufferedSortedDocValues(hash, valueCount, ords, sortedValues, ordMap, docsWithField); return new BufferedSortedDocValues(hash, valueCount, ords, sortedValues, ordMap, docsWithField.iterator());
} }
}); });
} }
@ -136,13 +133,13 @@ class SortedDocValuesWriter extends DocValuesWriter {
final PackedLongValues.Iterator iter; final PackedLongValues.Iterator iter;
final DocIdSetIterator docsWithField; final DocIdSetIterator docsWithField;
public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, FixedBitSet docsWithField) { public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, DocIdSetIterator docsWithField) {
this.hash = hash; this.hash = hash;
this.valueCount = valueCount; this.valueCount = valueCount;
this.sortedValues = sortedValues; this.sortedValues = sortedValues;
this.iter = docToOrd.iterator(); this.iter = docToOrd.iterator();
this.ordMap = ordMap; this.ordMap = ordMap;
this.docsWithField = new BitSetIterator(docsWithField, docToOrd.size()); this.docsWithField = docsWithField;
} }
@Override @Override

View File

@ -23,9 +23,7 @@ import java.util.Arrays;
import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Counter; import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues; import org.apache.lucene.util.packed.PackedLongValues;
@ -34,7 +32,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
class SortedNumericDocValuesWriter extends DocValuesWriter { class SortedNumericDocValuesWriter extends DocValuesWriter {
private PackedLongValues.Builder pending; // stream of all values private PackedLongValues.Builder pending; // stream of all values
private PackedLongValues.Builder pendingCounts; // count of values per doc private PackedLongValues.Builder pendingCounts; // count of values per doc
private FixedBitSet docsWithField; private DocsWithFieldSet docsWithField;
private final Counter iwBytesUsed; private final Counter iwBytesUsed;
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts' private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
private final FieldInfo fieldInfo; private final FieldInfo fieldInfo;
@ -47,7 +45,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
this.iwBytesUsed = iwBytesUsed; this.iwBytesUsed = iwBytesUsed;
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
docsWithField = new FixedBitSet(64); docsWithField = new DocsWithFieldSet();
bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + docsWithField.ramBytesUsed() + RamUsageEstimator.sizeOf(currentValues); bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + docsWithField.ramBytesUsed() + RamUsageEstimator.sizeOf(currentValues);
iwBytesUsed.addAndGet(bytesUsed); iwBytesUsed.addAndGet(bytesUsed);
} }
@ -76,8 +74,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
pendingCounts.add(currentUpto); pendingCounts.add(currentUpto);
currentUpto = 0; currentUpto = 0;
docsWithField = FixedBitSet.ensureCapacity(docsWithField, currentDoc); docsWithField.add(currentDoc);
docsWithField.set(currentDoc);
} }
@Override @Override
@ -112,7 +109,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
if (fieldInfoIn != fieldInfo) { if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo"); throw new IllegalArgumentException("wrong fieldInfo");
} }
return new BufferedSortedNumericDocValues(values, valueCounts, docsWithField); return new BufferedSortedNumericDocValues(values, valueCounts, docsWithField.iterator());
} }
}); });
} }
@ -124,10 +121,10 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
private int valueCount; private int valueCount;
private int valueUpto; private int valueUpto;
public BufferedSortedNumericDocValues(PackedLongValues values, PackedLongValues valueCounts, FixedBitSet docsWithField) { public BufferedSortedNumericDocValues(PackedLongValues values, PackedLongValues valueCounts, DocIdSetIterator docsWithField) {
valuesIter = values.iterator(); valuesIter = values.iterator();
valueCountsIter = valueCounts.iterator(); valueCountsIter = valueCounts.iterator();
this.docsWithField = new BitSetIterator(docsWithField, values.size()); this.docsWithField = docsWithField;
} }
@Override @Override

View File

@ -24,13 +24,11 @@ import java.util.Arrays;
import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter; import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues; import org.apache.lucene.util.packed.PackedLongValues;
@ -40,7 +38,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
final BytesRefHash hash; final BytesRefHash hash;
private PackedLongValues.Builder pending; // stream of all termIDs private PackedLongValues.Builder pending; // stream of all termIDs
private PackedLongValues.Builder pendingCounts; // termIDs per doc private PackedLongValues.Builder pendingCounts; // termIDs per doc
private FixedBitSet docsWithField; private DocsWithFieldSet docsWithField;
private final Counter iwBytesUsed; private final Counter iwBytesUsed;
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts' private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
private final FieldInfo fieldInfo; private final FieldInfo fieldInfo;
@ -59,7 +57,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed)); new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
pending = PackedLongValues.packedBuilder(PackedInts.COMPACT); pending = PackedLongValues.packedBuilder(PackedInts.COMPACT);
pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
docsWithField = new FixedBitSet(64); docsWithField = new DocsWithFieldSet();
bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed(); bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed); iwBytesUsed.addAndGet(bytesUsed);
} }
@ -103,8 +101,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
pendingCounts.add(count); pendingCounts.add(count);
maxCount = Math.max(maxCount, count); maxCount = Math.max(maxCount, count);
currentUpto = 0; currentUpto = 0;
docsWithField = FixedBitSet.ensureCapacity(docsWithField, currentDoc); docsWithField.add(currentDoc);
docsWithField.set(currentDoc);
} }
@Override @Override
@ -158,7 +155,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
if (fieldInfoIn != fieldInfo) { if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo"); throw new IllegalArgumentException("wrong fieldInfo");
} }
return new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField); return new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField.iterator());
} }
}); });
} }
@ -176,14 +173,14 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
private int ordCount; private int ordCount;
private int ordUpto; private int ordUpto;
public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, FixedBitSet docsWithField) { public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, DocIdSetIterator docsWithField) {
this.currentDoc = new int[maxCount]; this.currentDoc = new int[maxCount];
this.sortedValues = sortedValues; this.sortedValues = sortedValues;
this.ordMap = ordMap; this.ordMap = ordMap;
this.hash = hash; this.hash = hash;
this.ordsIter = ords.iterator(); this.ordsIter = ords.iterator();
this.ordCountsIter = ordCounts.iterator(); this.ordCountsIter = ordCounts.iterator();
this.docsWithField = new BitSetIterator(docsWithField, ordCounts.size()); this.docsWithField = docsWithField;
} }
@Override @Override

View File

@ -175,7 +175,9 @@ public class BM25Similarity extends Similarity {
final long df = termStats.docFreq(); final long df = termStats.docFreq();
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
final float idf = idf(df, docCount); final float idf = idf(df, docCount);
return Explanation.match(idf, "idf(docFreq=" + df + ", docCount=" + docCount + ")"); return Explanation.match(idf, "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
Explanation.match(df, "docFreq"),
Explanation.match(docCount, "docCount"));
} }
/** /**
@ -192,16 +194,14 @@ public class BM25Similarity extends Similarity {
* for each term. * for each term.
*/ */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) { public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); double idf = 0d; // sum into a double before casting into a float
float idf = 0.0f;
List<Explanation> details = new ArrayList<>(); List<Explanation> details = new ArrayList<>();
for (final TermStatistics stat : termStats ) { for (final TermStatistics stat : termStats ) {
final long df = stat.docFreq(); Explanation idfExplain = idfExplain(collectionStats, stat);
final float termIdf = idf(df, docCount); details.add(idfExplain);
details.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", docCount=" + docCount + ")")); idf += idfExplain.getValue();
idf += termIdf;
} }
return Explanation.match(idf, "idf(), sum of:", details); return Explanation.match((float) idf, "idf(), sum of:", details);
} }
@Override @Override
@ -303,7 +303,7 @@ public class BM25Similarity extends Similarity {
subs.add(Explanation.match(0, "parameter b (norms omitted for field)")); subs.add(Explanation.match(0, "parameter b (norms omitted for field)"));
return Explanation.match( return Explanation.match(
(freq.getValue() * (k1 + 1)) / (freq.getValue() + k1), (freq.getValue() * (k1 + 1)) / (freq.getValue() + k1),
"tfNorm, computed from:", subs); "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1) from:", subs);
} else { } else {
byte norm; byte norm;
if (norms.advanceExact(doc)) { if (norms.advanceExact(doc)) {
@ -317,7 +317,7 @@ public class BM25Similarity extends Similarity {
subs.add(Explanation.match(doclen, "fieldLength")); subs.add(Explanation.match(doclen, "fieldLength"));
return Explanation.match( return Explanation.match(
(freq.getValue() * (k1 + 1)) / (freq.getValue() + k1 * (1 - b + b * doclen/stats.avgdl)), (freq.getValue() * (k1 + 1)) / (freq.getValue() + k1 * (1 - b + b * doclen/stats.avgdl)),
"tfNorm, computed from:", subs); "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:", subs);
} }
} }

View File

@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.similarities;
import java.io.IOException;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.util.BytesRef;
/**
* Simple similarity that gives terms a score that is equal to their query
* boost. This similarity is typically used with disabled norms since neither
* document statistics nor index statistics are used for scoring. That said,
* if norms are enabled, they will be computed the same way as
* {@link SimilarityBase} and {@link BM25Similarity} with
* {@link SimilarityBase#setDiscountOverlaps(boolean) discounted overlaps}
* so that the {@link Similarity} can be changed after the index has been
* created.
*/
public class BooleanSimilarity extends Similarity {
private static final Similarity BM25_SIM = new BM25Similarity();
/** Sole constructor */
public BooleanSimilarity() {}
@Override
public long computeNorm(FieldInvertState state) {
return BM25_SIM.computeNorm(state);
}
@Override
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
return new BooleanWeight(boost);
}
private static class BooleanWeight extends SimWeight {
final float boost;
BooleanWeight(float boost) {
this.boost = boost;
}
}
@Override
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
final float boost = ((BooleanWeight) weight).boost;
return new SimScorer() {
@Override
public float score(int doc, float freq) throws IOException {
return boost;
}
@Override
public Explanation explain(int doc, Explanation freq) throws IOException {
Explanation queryBoostExpl = Explanation.match(boost, "query boost");
return Explanation.match(
queryBoostExpl.getValue(),
"score(" + getClass().getSimpleName() + ", doc=" + doc + "), computed from:",
queryBoostExpl);
}
@Override
public float computeSlopFactor(int distance) {
return 1f;
}
@Override
public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
return 1f;
}
};
}
}

View File

@ -18,6 +18,9 @@ package org.apache.lucene.search.similarities;
import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SmallFloat; import org.apache.lucene.util.SmallFloat;
@ -121,6 +124,16 @@ public class ClassicSimilarity extends TFIDFSimilarity {
return 1; return 1;
} }
@Override
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
final long df = termStats.docFreq();
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
final float idf = idf(df, docCount);
return Explanation.match(idf, "idf, computed as log((docCount+1)/(docFreq+1)) + 1 from:",
Explanation.match(df, "docFreq"),
Explanation.match(docCount, "docCount"));
}
/** Implemented as <code>log((docCount+1)/(docFreq+1)) + 1</code>. */ /** Implemented as <code>log((docCount+1)/(docFreq+1)) + 1</code>. */
@Override @Override
public float idf(long docFreq, long docCount) { public float idf(long docFreq, long docCount) {

View File

@ -484,16 +484,14 @@ public abstract class TFIDFSimilarity extends Similarity {
* for each term. * for each term.
*/ */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) { public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); double idf = 0d; // sum into a double before casting into a float
float idf = 0.0f;
List<Explanation> subs = new ArrayList<>(); List<Explanation> subs = new ArrayList<>();
for (final TermStatistics stat : termStats ) { for (final TermStatistics stat : termStats ) {
final long df = stat.docFreq(); Explanation idfExplain = idfExplain(collectionStats, stat);
final float termIdf = idf(df, docCount); subs.add(idfExplain);
subs.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", docCount=" + docCount + ")")); idf += idfExplain.getValue();
idf += termIdf;
} }
return Explanation.match(idf, "idf(), sum of:", subs); return Explanation.match((float) idf, "idf(), sum of:", subs);
} }
/** Computes a score factor based on a term's document frequency (the number /** Computes a score factor based on a term's document frequency (the number

View File

@ -49,19 +49,23 @@ public final class SpanNotQuery extends SpanQuery {
/** Construct a SpanNotQuery matching spans from <code>include</code> which /** Construct a SpanNotQuery matching spans from <code>include</code> which
* have no overlap with spans from <code>exclude</code> within * have no overlap with spans from <code>exclude</code> within
* <code>dist</code> tokens of <code>include</code>. */ * <code>dist</code> tokens of <code>include</code>. Inversely, a negative
* <code>dist</code> value may be used to specify a certain amount of allowable
* overlap. */
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) { public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {
this(include, exclude, dist, dist); this(include, exclude, dist, dist);
} }
/** Construct a SpanNotQuery matching spans from <code>include</code> which /** Construct a SpanNotQuery matching spans from <code>include</code> which
* have no overlap with spans from <code>exclude</code> within * have no overlap with spans from <code>exclude</code> within
* <code>pre</code> tokens before or <code>post</code> tokens of <code>include</code>. */ * <code>pre</code> tokens before or <code>post</code> tokens of
* <code>include</code>. Inversely, negative values for <code>pre</code> and/or
* <code>post</code> allow a certain amount of overlap to occur. */
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) { public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
this.include = Objects.requireNonNull(include); this.include = Objects.requireNonNull(include);
this.exclude = Objects.requireNonNull(exclude); this.exclude = Objects.requireNonNull(exclude);
this.pre = (pre >=0) ? pre : 0; this.pre = pre;
this.post = (post >= 0) ? post : 0; this.post = post;
if (include.getField() != null && exclude.getField() != null && !include.getField().equals(exclude.getField())) if (include.getField() != null && exclude.getField() != null && !include.getField().equals(exclude.getField()))
throw new IllegalArgumentException("Clauses must have same field."); throw new IllegalArgumentException("Clauses must have same field.");

View File

@ -84,7 +84,7 @@ public final class BytesRef implements Comparable<BytesRef>,Cloneable {
* unicode text, with no unpaired surrogates. * unicode text, with no unpaired surrogates.
*/ */
public BytesRef(CharSequence text) { public BytesRef(CharSequence text) {
this(new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * text.length()]); this(new byte[UnicodeUtil.maxUTF8Length(text.length())]);
length = UnicodeUtil.UTF16toUTF8(text, 0, text.length(), bytes); length = UnicodeUtil.UTF16toUTF8(text, 0, text.length(), bytes);
} }

View File

@ -143,7 +143,7 @@ public class BytesRefBuilder {
* represent the provided text. * represent the provided text.
*/ */
public void copyChars(CharSequence text, int off, int len) { public void copyChars(CharSequence text, int off, int len) {
grow(len * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR); grow(UnicodeUtil.maxUTF8Length(len));
ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes); ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes);
} }
@ -152,7 +152,7 @@ public class BytesRefBuilder {
* represent the provided text. * represent the provided text.
*/ */
public void copyChars(char[] text, int off, int len) { public void copyChars(char[] text, int off, int len) {
grow(len * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR); grow(UnicodeUtil.maxUTF8Length(len));
ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes); ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes);
} }

View File

@ -613,6 +613,11 @@ public final class UnicodeUtil {
return out_offset; return out_offset;
} }
/** Returns the maximum number of utf8 bytes required to encode a utf16 (e.g., java char[], String) */
public static int maxUTF8Length(int utf16Length) {
return Math.multiplyExact(utf16Length, MAX_UTF8_BYTES_PER_CHAR);
}
/** /**
* Utility method for {@link #UTF8toUTF16(byte[], int, int, char[])} * Utility method for {@link #UTF8toUTF16(byte[], int, int, char[])}
* @see #UTF8toUTF16(byte[], int, int, char[]) * @see #UTF8toUTF16(byte[], int, int, char[])

View File

@ -23,7 +23,6 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc
import org.apache.lucene.util.packed.PackedInts;
// TODO: could we somehow stream an FST to disk while we // TODO: could we somehow stream an FST to disk while we
// build it? // build it?
@ -70,10 +69,6 @@ public class Builder<T> {
private final IntsRefBuilder lastInput = new IntsRefBuilder(); private final IntsRefBuilder lastInput = new IntsRefBuilder();
// for packing
private final boolean doPackFST;
private final float acceptableOverheadRatio;
// NOTE: cutting this over to ArrayList instead loses ~6% // NOTE: cutting this over to ArrayList instead loses ~6%
// in build performance on 9.8M Wikipedia terms; so we // in build performance on 9.8M Wikipedia terms; so we
// left this as an array: // left this as an array:
@ -99,11 +94,10 @@ public class Builder<T> {
/** /**
* Instantiates an FST/FSA builder without any pruning. A shortcut * Instantiates an FST/FSA builder without any pruning. A shortcut
* to {@link #Builder(FST.INPUT_TYPE, int, int, boolean, * to {@link #Builder(FST.INPUT_TYPE, int, int, boolean,
* boolean, int, Outputs, boolean, float, * boolean, int, Outputs, boolean, int)} with pruning options turned off.
* boolean, int)} with pruning options turned off.
*/ */
public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) { public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, false, PackedInts.COMPACT, true, 15); this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
} }
/** /**
@ -143,11 +137,6 @@ public class Builder<T> {
* FSA, use {@link NoOutputs#getSingleton()} and {@link NoOutputs#getNoOutput()} as the * FSA, use {@link NoOutputs#getSingleton()} and {@link NoOutputs#getNoOutput()} as the
* singleton output object. * singleton output object.
* *
* @param doPackFST Pass true to create a packed FST.
*
* @param acceptableOverheadRatio How to trade speed for space when building the FST. This option
* is only relevant when doPackFST is true. @see PackedInts#getMutable(int, int, float)
*
* @param allowArrayArcs Pass false to disable the array arc optimization * @param allowArrayArcs Pass false to disable the array arc optimization
* while building the FST; this will make the resulting * while building the FST; this will make the resulting
* FST smaller but slower to traverse. * FST smaller but slower to traverse.
@ -159,16 +148,13 @@ public class Builder<T> {
*/ */
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix, public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs, boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
boolean doPackFST, float acceptableOverheadRatio, boolean allowArrayArcs, boolean allowArrayArcs, int bytesPageBits) {
int bytesPageBits) {
this.minSuffixCount1 = minSuffixCount1; this.minSuffixCount1 = minSuffixCount1;
this.minSuffixCount2 = minSuffixCount2; this.minSuffixCount2 = minSuffixCount2;
this.doShareNonSingletonNodes = doShareNonSingletonNodes; this.doShareNonSingletonNodes = doShareNonSingletonNodes;
this.shareMaxTailLength = shareMaxTailLength; this.shareMaxTailLength = shareMaxTailLength;
this.doPackFST = doPackFST;
this.acceptableOverheadRatio = acceptableOverheadRatio;
this.allowArrayArcs = allowArrayArcs; this.allowArrayArcs = allowArrayArcs;
fst = new FST<>(inputType, outputs, doPackFST, acceptableOverheadRatio, bytesPageBits); fst = new FST<>(inputType, outputs, bytesPageBits);
bytes = fst.bytes; bytes = fst.bytes;
assert bytes != null; assert bytes != null;
if (doShareSuffix) { if (doShareSuffix) {
@ -496,12 +482,8 @@ public class Builder<T> {
//if (DEBUG) System.out.println(" builder.finish root.isFinal=" + root.isFinal + " root.output=" + root.output); //if (DEBUG) System.out.println(" builder.finish root.isFinal=" + root.isFinal + " root.output=" + root.output);
fst.finish(compileNode(root, lastInput.length()).node); fst.finish(compileNode(root, lastInput.length()).node);
if (doPackFST) {
return fst.pack(this, 3, Math.max(10, (int) (getNodeCount()/4)), acceptableOverheadRatio);
} else {
return fst; return fst;
} }
}
private void compileAllTargets(UnCompiledNode<T> node, int tailLength) throws IOException { private void compileAllTargets(UnCompiledNode<T> node, int tailLength) throws IOException {
for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) { for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {

View File

@ -24,13 +24,9 @@ import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
@ -38,13 +34,9 @@ import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Constants; import org.apache.lucene.util.Constants;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
// TODO: break this into WritableFST and ReadOnlyFST.. then // TODO: break this into WritableFST and ReadOnlyFST.. then
// we can have subclasses of ReadOnlyFST to handle the // we can have subclasses of ReadOnlyFST to handle the
@ -90,14 +82,6 @@ public final class FST<T> implements Accountable {
static final int BIT_ARC_HAS_FINAL_OUTPUT = 1 << 5; static final int BIT_ARC_HAS_FINAL_OUTPUT = 1 << 5;
// Arcs are stored as fixed-size (per entry) array, so
// that we can find an arc using binary search. We do
// this when number of arcs is > NUM_ARCS_ARRAY:
// If set, the target node is delta coded vs current
// position:
private static final int BIT_TARGET_DELTA = 1 << 6;
// We use this as a marker (because this one flag is // We use this as a marker (because this one flag is
// illegal by itself ...): // illegal by itself ...):
private static final byte ARCS_AS_FIXED_ARRAY = BIT_ARC_HAS_FINAL_OUTPUT; private static final byte ARCS_AS_FIXED_ARRAY = BIT_ARC_HAS_FINAL_OUTPUT;
@ -137,7 +121,9 @@ public final class FST<T> implements Accountable {
/** Don't store arcWithOutputCount anymore */ /** Don't store arcWithOutputCount anymore */
private static final int VERSION_NO_NODE_ARC_COUNTS = 5; private static final int VERSION_NO_NODE_ARC_COUNTS = 5;
private static final int VERSION_CURRENT = VERSION_NO_NODE_ARC_COUNTS; private static final int VERSION_PACKED_REMOVED = 6;
private static final int VERSION_CURRENT = VERSION_PACKED_REMOVED;
// Never serialized; just used to represent the virtual // Never serialized; just used to represent the virtual
// final node w/ no arcs: // final node w/ no arcs:
@ -168,9 +154,6 @@ public final class FST<T> implements Accountable {
public final Outputs<T> outputs; public final Outputs<T> outputs;
private final boolean packed;
private PackedInts.Reader nodeRefToAddress;
private Arc<T> cachedRootArcs[]; private Arc<T> cachedRootArcs[];
/** Represents a single arc. */ /** Represents a single arc. */
@ -273,18 +256,11 @@ public final class FST<T> implements Accountable {
return (flags & bit) != 0; return (flags & bit) != 0;
} }
private GrowableWriter nodeAddress;
// TODO: we could be smarter here, and prune periodically
// as we go; high in-count nodes will "usually" become
// clear early on:
private GrowableWriter inCounts;
private final int version; private final int version;
// make a new empty FST, for building; Builder invokes // make a new empty FST, for building; Builder invokes
// this ctor // this ctor
FST(INPUT_TYPE inputType, Outputs<T> outputs, boolean willPackFST, float acceptableOverheadRatio, int bytesPageBits) { FST(INPUT_TYPE inputType, Outputs<T> outputs, int bytesPageBits) {
this.inputType = inputType; this.inputType = inputType;
this.outputs = outputs; this.outputs = outputs;
version = VERSION_CURRENT; version = VERSION_CURRENT;
@ -293,17 +269,8 @@ public final class FST<T> implements Accountable {
// pad: ensure no node gets address 0 which is reserved to mean // pad: ensure no node gets address 0 which is reserved to mean
// the stop state w/ no arcs // the stop state w/ no arcs
bytes.writeByte((byte) 0); bytes.writeByte((byte) 0);
if (willPackFST) {
nodeAddress = new GrowableWriter(15, 8, acceptableOverheadRatio);
inCounts = new GrowableWriter(1, 8, acceptableOverheadRatio);
} else {
nodeAddress = null;
inCounts = null;
}
emptyOutput = null; emptyOutput = null;
packed = false;
nodeRefToAddress = null;
} }
public static final int DEFAULT_MAX_BLOCK_BITS = Constants.JRE_IS_64BIT ? 30 : 28; public static final int DEFAULT_MAX_BLOCK_BITS = Constants.JRE_IS_64BIT ? 30 : 28;
@ -324,8 +291,12 @@ public final class FST<T> implements Accountable {
// NOTE: only reads most recent format; we don't have // NOTE: only reads most recent format; we don't have
// back-compat promise for FSTs (they are experimental): // back-compat promise for FSTs (they are experimental):
version = CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_PACKED, VERSION_NO_NODE_ARC_COUNTS); version = CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_PACKED, VERSION_CURRENT);
packed = in.readByte() == 1; if (version < VERSION_PACKED_REMOVED) {
if (in.readByte() == 1) {
throw new CorruptIndexException("Cannot read packed FSTs anymore", in);
}
}
if (in.readByte() == 1) { if (in.readByte() == 1) {
// accepts empty string // accepts empty string
// 1 KB blocks: // 1 KB blocks:
@ -334,18 +305,13 @@ public final class FST<T> implements Accountable {
emptyBytes.copyBytes(in, numBytes); emptyBytes.copyBytes(in, numBytes);
// De-serialize empty-string output: // De-serialize empty-string output:
BytesReader reader; BytesReader reader = emptyBytes.getReverseReader();
if (packed) {
reader = emptyBytes.getForwardReader();
} else {
reader = emptyBytes.getReverseReader();
// NoOutputs uses 0 bytes when writing its output, // NoOutputs uses 0 bytes when writing its output,
// so we have to check here else BytesStore gets // so we have to check here else BytesStore gets
// angry: // angry:
if (numBytes > 0) { if (numBytes > 0) {
reader.setPosition(numBytes-1); reader.setPosition(numBytes-1);
} }
}
emptyOutput = outputs.readFinalOutput(reader); emptyOutput = outputs.readFinalOutput(reader);
} else { } else {
emptyOutput = null; emptyOutput = null;
@ -364,11 +330,6 @@ public final class FST<T> implements Accountable {
default: default:
throw new IllegalStateException("invalid input type " + t); throw new IllegalStateException("invalid input type " + t);
} }
if (packed) {
nodeRefToAddress = PackedInts.getReader(in);
} else {
nodeRefToAddress = null;
}
startNode = in.readVLong(); startNode = in.readVLong();
if (version < VERSION_NO_NODE_ARC_COUNTS) { if (version < VERSION_NO_NODE_ARC_COUNTS) {
in.readVLong(); in.readVLong();
@ -424,31 +385,13 @@ public final class FST<T> implements Accountable {
} else { } else {
size += bytes.ramBytesUsed(); size += bytes.ramBytesUsed();
} }
if (packed) {
size += nodeRefToAddress.ramBytesUsed();
} else if (nodeAddress != null) {
size += nodeAddress.ramBytesUsed();
size += inCounts.ramBytesUsed();
}
size += cachedArcsBytesUsed; size += cachedArcsBytesUsed;
return size; return size;
} }
@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
if (packed) {
resources.add(Accountables.namedAccountable("node ref to address", nodeRefToAddress));
} else if (nodeAddress != null) {
resources.add(Accountables.namedAccountable("node addresses", nodeAddress));
resources.add(Accountables.namedAccountable("in counts", inCounts));
}
return resources;
}
@Override @Override
public String toString() { public String toString() {
return getClass().getSimpleName() + "(input=" + inputType + ",output=" + outputs + ",packed=" + packed; return getClass().getSimpleName() + "(input=" + inputType + ",output=" + outputs;
} }
void finish(long newStartNode) throws IOException { void finish(long newStartNode) throws IOException {
@ -464,16 +407,6 @@ public final class FST<T> implements Accountable {
cacheRootArcs(); cacheRootArcs();
} }
private long getNodeAddress(long node) {
if (nodeAddress != null) {
// Deref
return nodeAddress.get((int) node);
} else {
// Straight
return node;
}
}
// Optionally caches first 128 labels // Optionally caches first 128 labels
@SuppressWarnings({"rawtypes","unchecked"}) @SuppressWarnings({"rawtypes","unchecked"})
private void cacheRootArcs() throws IOException { private void cacheRootArcs() throws IOException {
@ -527,18 +460,7 @@ public final class FST<T> implements Accountable {
if (startNode == -1) { if (startNode == -1) {
throw new IllegalStateException("call finish first"); throw new IllegalStateException("call finish first");
} }
if (nodeAddress != null) {
throw new IllegalStateException("cannot save an FST pre-packed FST; it must first be packed");
}
if (packed && !(nodeRefToAddress instanceof PackedInts.Mutable)) {
throw new IllegalStateException("cannot save a FST which has been loaded from disk ");
}
CodecUtil.writeHeader(out, FILE_FORMAT_NAME, VERSION_CURRENT); CodecUtil.writeHeader(out, FILE_FORMAT_NAME, VERSION_CURRENT);
if (packed) {
out.writeByte((byte) 1);
} else {
out.writeByte((byte) 0);
}
// TODO: really we should encode this as an arc, arriving // TODO: really we should encode this as an arc, arriving
// to the root node, instead of special casing here: // to the root node, instead of special casing here:
if (emptyOutput != null) { if (emptyOutput != null) {
@ -552,7 +474,6 @@ public final class FST<T> implements Accountable {
byte[] emptyOutputBytes = new byte[(int) ros.getFilePointer()]; byte[] emptyOutputBytes = new byte[(int) ros.getFilePointer()];
ros.writeTo(emptyOutputBytes, 0); ros.writeTo(emptyOutputBytes, 0);
if (!packed) {
// reverse // reverse
final int stopAt = emptyOutputBytes.length/2; final int stopAt = emptyOutputBytes.length/2;
int upto = 0; int upto = 0;
@ -562,7 +483,6 @@ public final class FST<T> implements Accountable {
emptyOutputBytes[emptyOutputBytes.length-upto-1] = b; emptyOutputBytes[emptyOutputBytes.length-upto-1] = b;
upto++; upto++;
} }
}
out.writeVInt(emptyOutputBytes.length); out.writeVInt(emptyOutputBytes.length);
out.writeBytes(emptyOutputBytes, 0, emptyOutputBytes.length); out.writeBytes(emptyOutputBytes, 0, emptyOutputBytes.length);
} else { } else {
@ -577,9 +497,6 @@ public final class FST<T> implements Accountable {
t = 2; t = 2;
} }
out.writeByte(t); out.writeByte(t);
if (packed) {
((PackedInts.Mutable) nodeRefToAddress).save(out);
}
out.writeVLong(startNode); out.writeVLong(startNode);
if (bytes != null) { if (bytes != null) {
long numBytes = bytes.getPosition(); long numBytes = bytes.getPosition();
@ -705,8 +622,6 @@ public final class FST<T> implements Accountable {
if (!targetHasArcs) { if (!targetHasArcs) {
flags += BIT_STOP_NODE; flags += BIT_STOP_NODE;
} else if (inCounts != null) {
inCounts.set((int) target.node, inCounts.get((int) target.node) + 1);
} }
if (arc.output != NO_OUTPUT) { if (arc.output != NO_OUTPUT) {
@ -810,30 +725,8 @@ public final class FST<T> implements Accountable {
builder.bytes.reverse(startAddress, thisNodeAddress); builder.bytes.reverse(startAddress, thisNodeAddress);
// PackedInts uses int as the index, so we cannot handle
// > 2.1B nodes when packing:
if (nodeAddress != null && builder.nodeCount == Integer.MAX_VALUE) {
throw new IllegalStateException("cannot create a packed FST with more than 2.1 billion nodes");
}
builder.nodeCount++; builder.nodeCount++;
final long node; return thisNodeAddress;
if (nodeAddress != null) {
// Nodes are addressed by 1+ord:
if ((int) builder.nodeCount == nodeAddress.size()) {
nodeAddress = nodeAddress.resize(ArrayUtil.oversize(nodeAddress.size() + 1, nodeAddress.getBitsPerValue()));
inCounts = inCounts.resize(ArrayUtil.oversize(inCounts.size() + 1, inCounts.getBitsPerValue()));
}
nodeAddress.set((int) builder.nodeCount, thisNodeAddress);
// System.out.println(" write nodeAddress[" + nodeCount + "] = " + endAddress);
node = builder.nodeCount;
} else {
node = thisNodeAddress;
}
//System.out.println(" ret node=" + node + " address=" + thisNodeAddress + " nodeAddress=" + nodeAddress);
return node;
} }
/** Fills virtual 'start' arc, ie, an empty incoming arc to /** Fills virtual 'start' arc, ie, an empty incoming arc to
@ -876,13 +769,13 @@ public final class FST<T> implements Accountable {
arc.flags = BIT_LAST_ARC; arc.flags = BIT_LAST_ARC;
return arc; return arc;
} else { } else {
in.setPosition(getNodeAddress(follow.target)); in.setPosition(follow.target);
arc.node = follow.target; arc.node = follow.target;
final byte b = in.readByte(); final byte b = in.readByte();
if (b == ARCS_AS_FIXED_ARRAY) { if (b == ARCS_AS_FIXED_ARRAY) {
// array: jump straight to end // array: jump straight to end
arc.numArcs = in.readVInt(); arc.numArcs = in.readVInt();
if (packed || version >= VERSION_VINT_TARGET) { if (version >= VERSION_VINT_TARGET) {
arc.bytesPerArc = in.readVInt(); arc.bytesPerArc = in.readVInt();
} else { } else {
arc.bytesPerArc = in.readInt(); arc.bytesPerArc = in.readInt();
@ -906,8 +799,6 @@ public final class FST<T> implements Accountable {
} }
if (arc.flag(BIT_STOP_NODE)) { if (arc.flag(BIT_STOP_NODE)) {
} else if (arc.flag(BIT_TARGET_NEXT)) { } else if (arc.flag(BIT_TARGET_NEXT)) {
} else if (packed) {
in.readVLong();
} else { } else {
readUnpackedNodeTarget(in); readUnpackedNodeTarget(in);
} }
@ -964,7 +855,7 @@ public final class FST<T> implements Accountable {
} }
public Arc<T> readFirstRealTargetArc(long node, Arc<T> arc, final BytesReader in) throws IOException { public Arc<T> readFirstRealTargetArc(long node, Arc<T> arc, final BytesReader in) throws IOException {
final long address = getNodeAddress(node); final long address = node;
in.setPosition(address); in.setPosition(address);
//System.out.println(" readFirstRealTargtArc address=" //System.out.println(" readFirstRealTargtArc address="
//+ address); //+ address);
@ -975,7 +866,7 @@ public final class FST<T> implements Accountable {
//System.out.println(" fixedArray"); //System.out.println(" fixedArray");
// this is first arc in a fixed-array // this is first arc in a fixed-array
arc.numArcs = in.readVInt(); arc.numArcs = in.readVInt();
if (packed || version >= VERSION_VINT_TARGET) { if (version >= VERSION_VINT_TARGET) {
arc.bytesPerArc = in.readVInt(); arc.bytesPerArc = in.readVInt();
} else { } else {
arc.bytesPerArc = in.readInt(); arc.bytesPerArc = in.readInt();
@ -1002,7 +893,7 @@ public final class FST<T> implements Accountable {
if (!targetHasArcs(follow)) { if (!targetHasArcs(follow)) {
return false; return false;
} else { } else {
in.setPosition(getNodeAddress(follow.target)); in.setPosition(follow.target);
return in.readByte() == ARCS_AS_FIXED_ARRAY; return in.readByte() == ARCS_AS_FIXED_ARRAY;
} }
} }
@ -1029,7 +920,7 @@ public final class FST<T> implements Accountable {
//System.out.println(" nextArc fake " + //System.out.println(" nextArc fake " +
//arc.nextArc); //arc.nextArc);
long pos = getNodeAddress(arc.nextArc); long pos = arc.nextArc;
in.setPosition(pos); in.setPosition(pos);
final byte b = in.readByte(); final byte b = in.readByte();
@ -1038,7 +929,7 @@ public final class FST<T> implements Accountable {
in.readVInt(); in.readVInt();
// Skip bytesPerArc: // Skip bytesPerArc:
if (packed || version >= VERSION_VINT_TARGET) { if (version >= VERSION_VINT_TARGET) {
in.readVInt(); in.readVInt();
} else { } else {
in.readInt(); in.readInt();
@ -1107,7 +998,6 @@ public final class FST<T> implements Accountable {
arc.nextArc = in.getPosition(); arc.nextArc = in.getPosition();
// TODO: would be nice to make this lazy -- maybe // TODO: would be nice to make this lazy -- maybe
// caller doesn't need the target and is scanning arcs... // caller doesn't need the target and is scanning arcs...
if (nodeAddress == null) {
if (!arc.flag(BIT_LAST_ARC)) { if (!arc.flag(BIT_LAST_ARC)) {
if (arc.bytesPerArc == 0) { if (arc.bytesPerArc == 0) {
// must scan // must scan
@ -1118,30 +1008,8 @@ public final class FST<T> implements Accountable {
} }
} }
arc.target = in.getPosition(); arc.target = in.getPosition();
} else {
arc.target = arc.node - 1;
assert arc.target > 0;
}
} else {
if (packed) {
final long pos = in.getPosition();
final long code = in.readVLong();
if (arc.flag(BIT_TARGET_DELTA)) {
// Address is delta-coded from current address:
arc.target = pos + code;
//System.out.println(" delta pos=" + pos + " delta=" + code + " target=" + arc.target);
} else if (code < nodeRefToAddress.size()) {
// Deref
arc.target = nodeRefToAddress.get((int) code);
//System.out.println(" deref code=" + code + " target=" + arc.target);
} else {
// Absolute
arc.target = code;
//System.out.println(" abs code=" + code);
}
} else { } else {
arc.target = readUnpackedNodeTarget(in); arc.target = readUnpackedNodeTarget(in);
}
arc.nextArc = in.getPosition(); arc.nextArc = in.getPosition();
} }
return arc; return arc;
@ -1228,7 +1096,7 @@ public final class FST<T> implements Accountable {
return null; return null;
} }
in.setPosition(getNodeAddress(follow.target)); in.setPosition(follow.target);
arc.node = follow.target; arc.node = follow.target;
@ -1237,7 +1105,7 @@ public final class FST<T> implements Accountable {
if (in.readByte() == ARCS_AS_FIXED_ARRAY) { if (in.readByte() == ARCS_AS_FIXED_ARRAY) {
// Arcs are full array; do binary search: // Arcs are full array; do binary search:
arc.numArcs = in.readVInt(); arc.numArcs = in.readVInt();
if (packed || version >= VERSION_VINT_TARGET) { if (version >= VERSION_VINT_TARGET) {
arc.bytesPerArc = in.readVInt(); arc.bytesPerArc = in.readVInt();
} else { } else {
arc.bytesPerArc = in.readInt(); arc.bytesPerArc = in.readInt();
@ -1303,12 +1171,8 @@ public final class FST<T> implements Accountable {
} }
if (!flag(flags, BIT_STOP_NODE) && !flag(flags, BIT_TARGET_NEXT)) { if (!flag(flags, BIT_STOP_NODE) && !flag(flags, BIT_TARGET_NEXT)) {
if (packed) {
in.readVLong();
} else {
readUnpackedNodeTarget(in); readUnpackedNodeTarget(in);
} }
}
if (flag(flags, BIT_LAST_ARC)) { if (flag(flags, BIT_LAST_ARC)) {
return; return;
@ -1340,20 +1204,12 @@ public final class FST<T> implements Accountable {
/** Returns a {@link BytesReader} for this FST, positioned at /** Returns a {@link BytesReader} for this FST, positioned at
* position 0. */ * position 0. */
public BytesReader getBytesReader() { public BytesReader getBytesReader() {
if (packed) {
if (bytesArray != null) {
return new ForwardBytesReader(bytesArray);
} else {
return bytes.getForwardReader();
}
} else {
if (bytesArray != null) { if (bytesArray != null) {
return new ReverseBytesReader(bytesArray); return new ReverseBytesReader(bytesArray);
} else { } else {
return bytes.getReverseReader(); return bytes.getReverseReader();
} }
} }
}
/** Reads bytes stored in an FST. */ /** Reads bytes stored in an FST. */
public static abstract class BytesReader extends DataInput { public static abstract class BytesReader extends DataInput {
@ -1476,395 +1332,4 @@ public final class FST<T> implements Accountable {
} }
*/ */
// Creates a packed FST
private FST(INPUT_TYPE inputType, Outputs<T> outputs, int bytesPageBits) {
version = VERSION_CURRENT;
packed = true;
this.inputType = inputType;
bytesArray = null;
bytes = new BytesStore(bytesPageBits);
this.outputs = outputs;
}
/** Expert: creates an FST by packing this one. This
* process requires substantial additional RAM (currently
* up to ~8 bytes per node depending on
* <code>acceptableOverheadRatio</code>), but then should
* produce a smaller FST.
*
* <p>The implementation of this method uses ideas from
* <a target="_blank" href="http://www.cs.put.poznan.pl/dweiss/site/publications/download/fsacomp.pdf">Smaller Representation of Finite State Automata</a>,
* which describes techniques to reduce the size of a FST.
* However, this is not a strict implementation of the
* algorithms described in this paper.
*/
FST<T> pack(Builder<T> builder, int minInCountDeref, int maxDerefNodes, float acceptableOverheadRatio) throws IOException {
// NOTE: maxDerefNodes is intentionally int: we cannot
// support > 2.1B deref nodes
// TODO: other things to try
// - renumber the nodes to get more next / better locality?
// - allow multiple input labels on an arc, so
// singular chain of inputs can take one arc (on
// wikipedia terms this could save another ~6%)
// - in the ord case, the output '1' is presumably
// very common (after NO_OUTPUT)... maybe use a bit
// for it..?
// - use spare bits in flags.... for top few labels /
// outputs / targets
if (nodeAddress == null) {
throw new IllegalArgumentException("this FST was not built with willPackFST=true");
}
T NO_OUTPUT = outputs.getNoOutput();
Arc<T> arc = new Arc<>();
final BytesReader r = getBytesReader();
final int topN = Math.min(maxDerefNodes, inCounts.size());
// Find top nodes with highest number of incoming arcs:
NodeQueue q = new NodeQueue(topN);
// TODO: we could use more RAM efficient selection algo here...
NodeAndInCount bottom = null;
for(int node=0; node<inCounts.size(); node++) {
if (inCounts.get(node) >= minInCountDeref) {
if (bottom == null) {
q.add(new NodeAndInCount(node, (int) inCounts.get(node)));
if (q.size() == topN) {
bottom = q.top();
}
} else if (inCounts.get(node) > bottom.count) {
q.insertWithOverflow(new NodeAndInCount(node, (int) inCounts.get(node)));
}
}
}
// Free up RAM:
inCounts = null;
final Map<Integer,Integer> topNodeMap = new HashMap<>();
for(int downTo=q.size()-1;downTo>=0;downTo--) {
NodeAndInCount n = q.pop();
topNodeMap.put(n.node, downTo);
//System.out.println("map node=" + n.node + " inCount=" + n.count + " to newID=" + downTo);
}
// +1 because node ords start at 1 (0 is reserved as stop node):
final GrowableWriter newNodeAddress = new GrowableWriter(
PackedInts.bitsRequired(builder.bytes.getPosition()), (int) (1 + builder.nodeCount), acceptableOverheadRatio);
// Fill initial coarse guess:
for(int node=1;node<=builder.nodeCount;node++) {
newNodeAddress.set(node, 1 + builder.bytes.getPosition() - nodeAddress.get(node));
}
int absCount;
int deltaCount;
int topCount;
int nextCount;
FST<T> fst;
// Iterate until we converge:
while(true) {
//System.out.println("\nITER");
boolean changed = false;
// for assert:
boolean negDelta = false;
fst = new FST<>(inputType, outputs, builder.bytes.getBlockBits());
final BytesStore writer = fst.bytes;
// Skip 0 byte since 0 is reserved target:
writer.writeByte((byte) 0);
absCount = deltaCount = topCount = nextCount = 0;
int changedCount = 0;
long addressError = 0;
//int totWasted = 0;
// Since we re-reverse the bytes, we now write the
// nodes backwards, so that BIT_TARGET_NEXT is
// unchanged:
for(int node=(int) builder.nodeCount;node>=1;node--) {
final long address = writer.getPosition();
//System.out.println(" node: " + node + " address=" + address);
if (address != newNodeAddress.get(node)) {
addressError = address - newNodeAddress.get(node);
//System.out.println(" change: " + (address - newNodeAddress[node]));
changed = true;
newNodeAddress.set(node, address);
changedCount++;
}
int nodeArcCount = 0;
int bytesPerArc = 0;
boolean retry = false;
// for assert:
boolean anyNegDelta = false;
// Retry loop: possibly iterate more than once, if
// this is an array'd node and bytesPerArc changes:
writeNode:
while(true) { // retry writing this node
//System.out.println(" cycle: retry");
readFirstRealTargetArc(node, arc, r);
final boolean useArcArray = arc.bytesPerArc != 0;
if (useArcArray) {
// Write false first arc:
if (bytesPerArc == 0) {
bytesPerArc = arc.bytesPerArc;
}
writer.writeByte(ARCS_AS_FIXED_ARRAY);
writer.writeVInt(arc.numArcs);
writer.writeVInt(bytesPerArc);
//System.out.println("node " + node + ": " + arc.numArcs + " arcs");
}
int maxBytesPerArc = 0;
//int wasted = 0;
while(true) { // iterate over all arcs for this node
//System.out.println(" cycle next arc");
final long arcStartPos = writer.getPosition();
nodeArcCount++;
byte flags = 0;
if (arc.isLast()) {
flags += BIT_LAST_ARC;
}
/*
if (!useArcArray && nodeUpto < nodes.length-1 && arc.target == nodes[nodeUpto+1]) {
flags += BIT_TARGET_NEXT;
}
*/
if (!useArcArray && node != 1 && arc.target == node-1) {
flags += BIT_TARGET_NEXT;
if (!retry) {
nextCount++;
}
}
if (arc.isFinal()) {
flags += BIT_FINAL_ARC;
if (arc.nextFinalOutput != NO_OUTPUT) {
flags += BIT_ARC_HAS_FINAL_OUTPUT;
}
} else {
assert arc.nextFinalOutput == NO_OUTPUT;
}
if (!targetHasArcs(arc)) {
flags += BIT_STOP_NODE;
}
if (arc.output != NO_OUTPUT) {
flags += BIT_ARC_HAS_OUTPUT;
}
final long absPtr;
final boolean doWriteTarget = targetHasArcs(arc) && (flags & BIT_TARGET_NEXT) == 0;
if (doWriteTarget) {
final Integer ptr = topNodeMap.get(arc.target);
if (ptr != null) {
absPtr = ptr;
} else {
absPtr = topNodeMap.size() + newNodeAddress.get((int) arc.target) + addressError;
}
long delta = newNodeAddress.get((int) arc.target) + addressError - writer.getPosition() - 2;
if (delta < 0) {
//System.out.println("neg: " + delta);
anyNegDelta = true;
delta = 0;
}
if (delta < absPtr) {
flags |= BIT_TARGET_DELTA;
}
} else {
absPtr = 0;
}
assert flags != ARCS_AS_FIXED_ARRAY;
writer.writeByte(flags);
fst.writeLabel(writer, arc.label);
if (arc.output != NO_OUTPUT) {
outputs.write(arc.output, writer);
}
if (arc.nextFinalOutput != NO_OUTPUT) {
outputs.writeFinalOutput(arc.nextFinalOutput, writer);
}
if (doWriteTarget) {
long delta = newNodeAddress.get((int) arc.target) + addressError - writer.getPosition();
if (delta < 0) {
anyNegDelta = true;
//System.out.println("neg: " + delta);
delta = 0;
}
if (flag(flags, BIT_TARGET_DELTA)) {
//System.out.println(" delta");
writer.writeVLong(delta);
if (!retry) {
deltaCount++;
}
} else {
/*
if (ptr != null) {
System.out.println(" deref");
} else {
System.out.println(" abs");
}
*/
writer.writeVLong(absPtr);
if (!retry) {
if (absPtr >= topNodeMap.size()) {
absCount++;
} else {
topCount++;
}
}
}
}
if (useArcArray) {
final int arcBytes = (int) (writer.getPosition() - arcStartPos);
//System.out.println(" " + arcBytes + " bytes");
maxBytesPerArc = Math.max(maxBytesPerArc, arcBytes);
// NOTE: this may in fact go "backwards", if
// somehow (rarely, possibly never) we use
// more bytesPerArc in this rewrite than the
// incoming FST did... but in this case we
// will retry (below) so it's OK to ovewrite
// bytes:
//wasted += bytesPerArc - arcBytes;
writer.skipBytes((int) (arcStartPos + bytesPerArc - writer.getPosition()));
}
if (arc.isLast()) {
break;
}
readNextRealArc(arc, r);
}
if (useArcArray) {
if (maxBytesPerArc == bytesPerArc || (retry && maxBytesPerArc <= bytesPerArc)) {
// converged
//System.out.println(" bba=" + bytesPerArc + " wasted=" + wasted);
//totWasted += wasted;
break;
}
} else {
break;
}
//System.out.println(" retry this node maxBytesPerArc=" + maxBytesPerArc + " vs " + bytesPerArc);
// Retry:
bytesPerArc = maxBytesPerArc;
writer.truncate(address);
nodeArcCount = 0;
retry = true;
anyNegDelta = false;
}
negDelta |= anyNegDelta;
}
if (!changed) {
// We don't renumber the nodes (just reverse their
// order) so nodes should only point forward to
// other nodes because we only produce acyclic FSTs
// w/ nodes only pointing "forwards":
assert !negDelta;
//System.out.println("TOT wasted=" + totWasted);
// Converged!
break;
}
}
long maxAddress = 0;
for (long key : topNodeMap.keySet()) {
maxAddress = Math.max(maxAddress, newNodeAddress.get((int) key));
}
PackedInts.Mutable nodeRefToAddressIn = PackedInts.getMutable(topNodeMap.size(),
PackedInts.bitsRequired(maxAddress), acceptableOverheadRatio);
for(Map.Entry<Integer,Integer> ent : topNodeMap.entrySet()) {
nodeRefToAddressIn.set(ent.getValue(), newNodeAddress.get(ent.getKey()));
}
fst.nodeRefToAddress = nodeRefToAddressIn;
fst.startNode = newNodeAddress.get((int) startNode);
//System.out.println("new startNode=" + fst.startNode + " old startNode=" + startNode);
if (emptyOutput != null) {
fst.setEmptyOutput(emptyOutput);
}
fst.bytes.finish();
fst.cacheRootArcs();
//final int size = fst.sizeInBytes();
//System.out.println("nextCount=" + nextCount + " topCount=" + topCount + " deltaCount=" + deltaCount + " absCount=" + absCount);
return fst;
}
private static class NodeAndInCount implements Comparable<NodeAndInCount> {
final int node;
final int count;
public NodeAndInCount(int node, int count) {
this.node = node;
this.count = count;
}
@Override
public int compareTo(NodeAndInCount other) {
if (count > other.count) {
return 1;
} else if (count < other.count) {
return -1;
} else {
// Tie-break: smaller node compares as greater than
return other.node - node;
}
}
}
private static class NodeQueue extends PriorityQueue<NodeAndInCount> {
public NodeQueue(int topN) {
super(topN, false);
}
@Override
public boolean lessThan(NodeAndInCount a, NodeAndInCount b) {
final int cmp = a.compareTo(b);
assert cmp != 0;
return cmp < 0;
}
}
} }

View File

@ -24,7 +24,6 @@
* <li>Fast and low memory overhead construction of the minimal FST * <li>Fast and low memory overhead construction of the minimal FST
* (but inputs must be provided in sorted order)</li> * (but inputs must be provided in sorted order)</li>
* <li>Low object overhead and quick deserialization (byte[] representation)</li> * <li>Low object overhead and quick deserialization (byte[] representation)</li>
* <li>Optional two-pass compression: {@link org.apache.lucene.util.fst.FST#pack FST.pack()}</li>
* <li>{@link org.apache.lucene.util.fst.Util#getByOutput Lookup-by-output} when the * <li>{@link org.apache.lucene.util.fst.Util#getByOutput Lookup-by-output} when the
* outputs are in sorted order (e.g., ordinals or file pointers)</li> * outputs are in sorted order (e.g., ordinals or file pointers)</li>
* <li>Pluggable {@link org.apache.lucene.util.fst.Outputs Outputs} representation</li> * <li>Pluggable {@link org.apache.lucene.util.fst.Outputs Outputs} representation</li>

View File

@ -37,7 +37,7 @@ public class TestGrowableByteArrayDataOutput extends LuceneTestCase {
// create a small string such that the single pass approach is used // create a small string such that the single pass approach is used
int length = TestUtil.nextInt(random(), 1, minSizeForDoublePass - 1); int length = TestUtil.nextInt(random(), 1, minSizeForDoublePass - 1);
String unicode = TestUtil.randomFixedByteLengthUnicodeString(random(), length); String unicode = TestUtil.randomFixedByteLengthUnicodeString(random(), length);
byte[] utf8 = new byte[unicode.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR]; byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(unicode.length())];
int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8); int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8);
GrowableByteArrayDataOutput dataOutput = new GrowableByteArrayDataOutput(1 << 8); GrowableByteArrayDataOutput dataOutput = new GrowableByteArrayDataOutput(1 << 8);
@ -61,7 +61,7 @@ public class TestGrowableByteArrayDataOutput extends LuceneTestCase {
int num = atLeast(100); int num = atLeast(100);
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
String unicode = TestUtil.randomRealisticUnicodeString(random(), minSizeForDoublePass, 10 * minSizeForDoublePass); String unicode = TestUtil.randomRealisticUnicodeString(random(), minSizeForDoublePass, 10 * minSizeForDoublePass);
byte[] utf8 = new byte[unicode.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR]; byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(unicode.length())];
int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8); int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8);
GrowableByteArrayDataOutput dataOutput = new GrowableByteArrayDataOutput(1 << 8); GrowableByteArrayDataOutput dataOutput = new GrowableByteArrayDataOutput(1 << 8);

View File

@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestDocsWithFieldSet extends LuceneTestCase {
public void testDense() throws IOException {
DocsWithFieldSet set = new DocsWithFieldSet();
DocIdSetIterator it = set.iterator();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
set.add(0);
it = set.iterator();
assertEquals(0, it.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
long ramBytesUsed = set.ramBytesUsed();
for (int i = 1; i < 1000; ++i) {
set.add(i);
}
assertEquals(ramBytesUsed, set.ramBytesUsed());
it = set.iterator();
for (int i = 0; i < 1000; ++i) {
assertEquals(i, it.nextDoc());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
}
public void testSparse() throws IOException {
DocsWithFieldSet set = new DocsWithFieldSet();
int doc = random().nextInt(10000);
set.add(doc);
DocIdSetIterator it = set.iterator();
assertEquals(doc, it.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
int doc2 = doc + TestUtil.nextInt(random(), 1, 100);
set.add(doc2);
it = set.iterator();
assertEquals(doc, it.nextDoc());
assertEquals(doc2, it.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
}
public void testDenseThenSparse() throws IOException {
int denseCount = random().nextInt(10000);
int nextDoc = denseCount + random().nextInt(10000);
DocsWithFieldSet set = new DocsWithFieldSet();
for (int i = 0; i < denseCount; ++i) {
set.add(i);
}
set.add(nextDoc);
DocIdSetIterator it = set.iterator();
for (int i = 0; i < denseCount; ++i) {
assertEquals(i, it.nextDoc());
}
assertEquals(nextDoc, it.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
}
}

View File

@ -97,6 +97,7 @@ import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
public class TestIndexWriter extends LuceneTestCase { public class TestIndexWriter extends LuceneTestCase {
@ -2768,5 +2769,34 @@ public class TestIndexWriter extends LuceneTestCase {
dir.close(); dir.close();
} }
@Ignore("requires running tests with biggish heap")
public void testMassiveField() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
final IndexWriter w = new IndexWriter(dir, iwc);
StringBuilder b = new StringBuilder();
while (b.length() <= IndexWriter.MAX_STORED_STRING_LENGTH) {
b.append("x ");
}
final Document doc = new Document();
//doc.add(new TextField("big", b.toString(), Field.Store.YES));
doc.add(new StoredField("big", b.toString()));
Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
assertEquals("stored field \"big\" is too large (" + b.length() + " characters) to store", e.getMessage());
// make sure writer is still usable:
Document doc2 = new Document();
doc2.add(new StringField("id", "foo", Field.Store.YES));
w.addDocument(doc2);
DirectoryReader r = DirectoryReader.open(w);
assertEquals(1, r.numDocs());
r.close();
w.close();
dir.close();
}
} }

View File

@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.similarities;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestBooleanSimilarity extends LuceneTestCase {
public void testTermScoreIsEqualToBoost() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir,
newIndexWriterConfig());
Document doc = new Document();
doc.add(new StringField("foo", "bar", Store.NO));
doc.add(new StringField("foo", "baz", Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(new StringField("foo", "bar", Store.NO));
doc.add(new StringField("foo", "bar", Store.NO));
w.addDocument(doc);
DirectoryReader reader = w.getReader();
w.close();
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(new BooleanSimilarity());
TopDocs topDocs = searcher.search(new TermQuery(new Term("foo", "bar")), 2);
assertEquals(2, topDocs.totalHits);
assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
assertEquals(1f, topDocs.scoreDocs[1].score, 0f);
topDocs = searcher.search(new TermQuery(new Term("foo", "baz")), 1);
assertEquals(1, topDocs.totalHits);
assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
topDocs = searcher.search(new BoostQuery(new TermQuery(new Term("foo", "baz")), 3f), 1);
assertEquals(1, topDocs.totalHits);
assertEquals(3f, topDocs.scoreDocs[0].score, 0f);
reader.close();
dir.close();
}
public void testPhraseScoreIsEqualToBoost() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir,
newIndexWriterConfig().setSimilarity(new BooleanSimilarity()));
Document doc = new Document();
doc.add(new TextField("foo", "bar baz quux", Store.NO));
w.addDocument(doc);
DirectoryReader reader = w.getReader();
w.close();
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(new BooleanSimilarity());
PhraseQuery query = new PhraseQuery(2, "foo", "bar", "quux");
TopDocs topDocs = searcher.search(query, 2);
assertEquals(1, topDocs.totalHits);
assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
topDocs = searcher.search(new BoostQuery(query, 7), 2);
assertEquals(1, topDocs.totalHits);
assertEquals(7f, topDocs.scoreDocs[0].score, 0f);
reader.close();
dir.close();
}
public void testSameNormsAsBM25() {
BooleanSimilarity sim1 = new BooleanSimilarity();
BM25Similarity sim2 = new BM25Similarity();
sim2.setDiscountOverlaps(true);
for (int iter = 0; iter < 100; ++iter) {
final int length = TestUtil.nextInt(random(), 1, 100);
final int position = random().nextInt(length);
final int numOverlaps = random().nextInt(50);
final float boost = random().nextFloat() * 10;
FieldInvertState state = new FieldInvertState("foo", position, length, numOverlaps, 100, boost);
assertEquals(
sim2.computeNorm(state),
sim1.computeNorm(state),
0f);
}
}
}

View File

@ -274,16 +274,38 @@ public class TestBasics extends LuceneTestCase {
assertTrue(searcher.explain(query, 849).getValue() > 0.0f); assertTrue(searcher.explain(query, 849).getValue() > 0.0f);
} }
public void testSpanNotWindowNeg() throws Exception { public void testSpanNotWindowNegPost() throws Exception {
//test handling of invalid window < 0 //test handling of invalid window < 0
SpanQuery near = spanNearOrderedQuery("field", 4, "eight", "one"); SpanQuery near = spanNearOrderedQuery("field", 4, "eight", "one");
SpanQuery or = spanOrQuery("field", "forty"); SpanQuery or = spanOrQuery("field", "forty");
SpanQuery query = spanNotQuery(near, or); SpanQuery query = spanNotQuery(near, or, 0, -1);
checkHits(query, new int[] checkHits(query, new int[]
{801, 821, 831, 851, 861, 871, 881, 891, {801, 821, 831, 851, 861, 871, 881, 891,
1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891}); 1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
query = spanNotQuery(near, or, 0, -2);
checkHits(query, new int[]
{801, 821, 831, 841, 851, 861, 871, 881, 891,
1801, 1821, 1831, 1841, 1851, 1861, 1871, 1881, 1891});
assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
}
public void testSpanNotWindowNegPre() throws Exception {
//test handling of invalid window < 0
SpanQuery near = spanNearOrderedQuery("field", 4, "eight", "one");
SpanQuery or = spanOrQuery("field", "forty");
SpanQuery query = spanNotQuery(near, or, -2, 0);
checkHits(query, new int[]
{801, 821, 831, 851, 861, 871, 881, 891,
1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
query = spanNotQuery(near, or, -3, 0);
checkHits(query, new int[]
{801, 821, 831, 841, 851, 861, 871, 881, 891,
1801, 1821, 1831, 1841, 1851, 1861, 1871, 1881, 1891});
assertTrue(searcher.explain(query, 801).getValue() > 0.0f); assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
assertTrue(searcher.explain(query, 891).getValue() > 0.0f); assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
} }

View File

@ -99,7 +99,6 @@ public class TestSpans extends LuceneTestCase {
"s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx", "s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx",
"r1 s11", "r1 s11",
"r1 s21" "r1 s21"
}; };
private void checkHits(Query query, int[] results) throws IOException { private void checkHits(Query query, int[] results) throws IOException {
@ -407,41 +406,53 @@ public class TestSpans extends LuceneTestCase {
} }
public void testSpanNots() throws Throwable{ public void testSpanNots() throws Throwable {
assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", "s2", 0, 0), 0);
assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", "s2", 10, 10), 0); assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", 0, "s2", 0, 0), 0);
assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", 0, "s2", 10, 10), 0);
//focus on behind //focus on behind
assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", "s1", 6, 0)); assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", 0, "s1", 6, 0));
assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", "s1", 5, 0)); assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", 0, "s1", 5, 0));
assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", "s1", 3, 0)); assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", 0, "s1", 3, 0));
assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", "s1", 2, 0)); assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", 0, "s1", 2, 0));
assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", "s1", 0, 0)); assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", 0, "s1", 0, 0));
//focus on both //focus on both
assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", "s1", 3, 1)); assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", 0, "s1", 3, 1));
assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", "s1", 2, 1)); assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", 0, "s1", 2, 1));
assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", "s1", 1, 1)); assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", 0, "s1", 1, 1));
assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", "s1", 10, 10)); assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", 0, "s1", 10, 10));
//focus on ahead //focus on ahead
assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", "s2", 10, 10)); assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", 0, "s2", 10, 10));
assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", "s2", 0, 1)); assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", 0, "s2", 0, 1));
assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", "s2", 0, 2)); assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", 0, "s2", 0, 2));
assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", "s2", 0, 3)); assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", 0, "s2", 0, 3));
assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", "s2", 0, 4)); assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", 0, "s2", 0, 4));
assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", "s2", 0, 8)); assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", 0, "s2", 0, 8));
//exclude doesn't exist //exclude doesn't exist
assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", "s3", 8, 8)); assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", 0, "s3", 8, 8));
//include doesn't exist //include doesn't exist
assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", "s1", 8, 8)); assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", 0, "s1", 8, 8));
// Negative values
assertEquals("SpanNotS2S1NotXXNeg_0_0", 1, spanCount("s2 s1", 10, "xx", 0, 0));
assertEquals("SpanNotS2S1NotXXNeg_1_1", 1, spanCount("s2 s1", 10, "xx", -1, -1));
assertEquals("SpanNotS2S1NotXXNeg_0_2", 2, spanCount("s2 s1", 10, "xx", 0, -2));
assertEquals("SpanNotS2S1NotXXNeg_1_2", 2, spanCount("s2 s1", 10, "xx", -1, -2));
assertEquals("SpanNotS2S1NotXXNeg_2_1", 2, spanCount("s2 s1", 10, "xx", -2, -1));
assertEquals("SpanNotS2S1NotXXNeg_3_1", 2, spanCount("s2 s1", 10, "xx", -3, -1));
assertEquals("SpanNotS2S1NotXXNeg_1_3", 2, spanCount("s2 s1", 10, "xx", -1, -3));
assertEquals("SpanNotS2S1NotXXNeg_2_2", 3, spanCount("s2 s1", 10, "xx", -2, -2));
} }
private int spanCount(String include, String exclude, int pre, int post) throws IOException{
SpanQuery iq = spanTermQuery(field, include); private int spanCount(String include, int slop, String exclude, int pre, int post) throws IOException{
String[] includeTerms = include.split(" +");
SpanQuery iq = includeTerms.length == 1 ? spanTermQuery(field, include) : spanNearOrderedQuery(field, slop, includeTerms);
SpanQuery eq = spanTermQuery(field, exclude); SpanQuery eq = spanTermQuery(field, exclude);
SpanQuery snq = spanNotQuery(iq, eq, pre, post); SpanQuery snq = spanNotQuery(iq, eq, pre, post);
Spans spans = snq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); Spans spans = snq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);

View File

@ -111,7 +111,7 @@ public class TestUnicodeUtil extends LuceneTestCase {
int num = atLeast(50000); int num = atLeast(50000);
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
final String s = TestUtil.randomUnicodeString(random()); final String s = TestUtil.randomUnicodeString(random());
final byte[] utf8 = new byte[s.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR]; final byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(s.length())];
final int utf8Len = UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8); final int utf8Len = UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8);
assertEquals(s.codePointCount(0, s.length()), assertEquals(s.codePointCount(0, s.length()),
UnicodeUtil.codePointCount(new BytesRef(utf8, 0, utf8Len))); UnicodeUtil.codePointCount(new BytesRef(utf8, 0, utf8Len)));
@ -137,7 +137,7 @@ public class TestUnicodeUtil extends LuceneTestCase {
int num = atLeast(50000); int num = atLeast(50000);
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
final String s = TestUtil.randomUnicodeString(random()); final String s = TestUtil.randomUnicodeString(random());
final byte[] utf8 = new byte[s.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR]; final byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(s.length())];
final int utf8Len = UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8); final int utf8Len = UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8);
utf32 = ArrayUtil.grow(utf32, utf8Len); utf32 = ArrayUtil.grow(utf32, utf8Len);
final int utf32Len = UnicodeUtil.UTF8toUTF32(new BytesRef(utf8, 0, utf8Len), utf32); final int utf32Len = UnicodeUtil.UTF8toUTF32(new BytesRef(utf8, 0, utf8Len), utf32);
@ -208,7 +208,7 @@ public class TestUnicodeUtil extends LuceneTestCase {
int num = atLeast(5000); int num = atLeast(5000);
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
String unicode = TestUtil.randomUnicodeString(random()); String unicode = TestUtil.randomUnicodeString(random());
byte[] utf8 = new byte[unicode.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR]; byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(unicode.length())];
int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8); int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8);
assertEquals(len, UnicodeUtil.calcUTF16toUTF8Length(unicode, 0, unicode.length())); assertEquals(len, UnicodeUtil.calcUTF16toUTF8Length(unicode, 0, unicode.length()));
} }

View File

@ -41,7 +41,7 @@ public class TestUTF32ToUTF8 extends LuceneTestCase {
private boolean matches(ByteRunAutomaton a, int code) { private boolean matches(ByteRunAutomaton a, int code) {
char[] chars = Character.toChars(code); char[] chars = Character.toChars(code);
byte[] b = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * chars.length]; byte[] b = new byte[UnicodeUtil.maxUTF8Length(chars.length)];
final int len = UnicodeUtil.UTF16toUTF8(chars, 0, chars.length, b); final int len = UnicodeUtil.UTF16toUTF8(chars, 0, chars.length, b);
return a.run(b, 0, len); return a.run(b, 0, len);
} }

View File

@ -29,7 +29,6 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TimeUnits; import org.apache.lucene.util.TimeUnits;
import org.apache.lucene.util.packed.PackedInts;
import org.junit.Ignore; import org.junit.Ignore;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
@ -47,16 +46,14 @@ public class Test2BFST extends LuceneTestCase {
Directory dir = new MMapDirectory(createTempDir("2BFST")); Directory dir = new MMapDirectory(createTempDir("2BFST"));
for(int doPackIter=0;doPackIter<2;doPackIter++) { for(int iter=0;iter<1;iter++) {
boolean doPack = doPackIter == 1;
// Build FST w/ NoOutputs and stop when nodeCount > 2.2B // Build FST w/ NoOutputs and stop when nodeCount > 2.2B
if (!doPack) { {
System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS"); System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
Outputs<Object> outputs = NoOutputs.getSingleton(); Outputs<Object> outputs = NoOutputs.getSingleton();
Object NO_OUTPUT = outputs.getNoOutput(); Object NO_OUTPUT = outputs.getNoOutput();
final Builder<Object> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, final Builder<Object> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
doPack, PackedInts.COMPACT, true, 15); true, 15);
int count = 0; int count = 0;
Random r = new Random(seed); Random r = new Random(seed);
@ -135,10 +132,10 @@ public class Test2BFST extends LuceneTestCase {
// Build FST w/ ByteSequenceOutputs and stop when FST // Build FST w/ ByteSequenceOutputs and stop when FST
// size = 3GB // size = 3GB
{ {
System.out.println("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes"); System.out.println("\nTEST: 3 GB size; outputs=bytes");
Outputs<BytesRef> outputs = ByteSequenceOutputs.getSingleton(); Outputs<BytesRef> outputs = ByteSequenceOutputs.getSingleton();
final Builder<BytesRef> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, final Builder<BytesRef> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
doPack, PackedInts.COMPACT, true, 15); true, 15);
byte[] outputBytes = new byte[20]; byte[] outputBytes = new byte[20];
BytesRef output = new BytesRef(outputBytes); BytesRef output = new BytesRef(outputBytes);
@ -212,10 +209,10 @@ public class Test2BFST extends LuceneTestCase {
// Build FST w/ PositiveIntOutputs and stop when FST // Build FST w/ PositiveIntOutputs and stop when FST
// size = 3GB // size = 3GB
{ {
System.out.println("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long"); System.out.println("\nTEST: 3 GB size; outputs=long");
Outputs<Long> outputs = PositiveIntOutputs.getSingleton(); Outputs<Long> outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, final Builder<Long> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
doPack, PackedInts.COMPACT, true, 15); true, 15);
long output = 1; long output = 1;

View File

@ -76,7 +76,6 @@ import org.apache.lucene.util.fst.FST.Arc;
import org.apache.lucene.util.fst.FST.BytesReader; import org.apache.lucene.util.fst.FST.BytesReader;
import org.apache.lucene.util.fst.PairOutputs.Pair; import org.apache.lucene.util.fst.PairOutputs.Pair;
import org.apache.lucene.util.fst.Util.Result; import org.apache.lucene.util.fst.Util.Result;
import org.apache.lucene.util.packed.PackedInts;
import static org.apache.lucene.util.fst.FSTTester.getRandomString; import static org.apache.lucene.util.fst.FSTTester.getRandomString;
import static org.apache.lucene.util.fst.FSTTester.simpleRandomString; import static org.apache.lucene.util.fst.FSTTester.simpleRandomString;
@ -328,9 +327,7 @@ public class TestFSTs extends LuceneTestCase {
writer.close(); writer.close();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final boolean doRewrite = random().nextBoolean(); Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, doRewrite, PackedInts.DEFAULT, true, 15);
boolean storeOrd = random().nextBoolean(); boolean storeOrd = random().nextBoolean();
if (VERBOSE) { if (VERBOSE) {
@ -464,16 +461,14 @@ public class TestFSTs extends LuceneTestCase {
private int inputMode; private int inputMode;
private final Outputs<T> outputs; private final Outputs<T> outputs;
private final Builder<T> builder; private final Builder<T> builder;
private final boolean doPack;
public VisitTerms(Path dirOut, Path wordsFileIn, int inputMode, int prune, Outputs<T> outputs, boolean doPack, boolean noArcArrays) { public VisitTerms(Path dirOut, Path wordsFileIn, int inputMode, int prune, Outputs<T> outputs, boolean noArcArrays) {
this.dirOut = dirOut; this.dirOut = dirOut;
this.wordsFileIn = wordsFileIn; this.wordsFileIn = wordsFileIn;
this.inputMode = inputMode; this.inputMode = inputMode;
this.outputs = outputs; this.outputs = outputs;
this.doPack = doPack;
builder = new Builder<>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, doPack, PackedInts.DEFAULT, !noArcArrays, 15); builder = new Builder<>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, !noArcArrays, 15);
} }
protected abstract T getOutput(IntsRef input, int ord) throws IOException; protected abstract T getOutput(IntsRef input, int ord) throws IOException;
@ -622,7 +617,6 @@ public class TestFSTs extends LuceneTestCase {
boolean storeOrds = false; boolean storeOrds = false;
boolean storeDocFreqs = false; boolean storeDocFreqs = false;
boolean verify = true; boolean verify = true;
boolean doPack = false;
boolean noArcArrays = false; boolean noArcArrays = false;
Path wordsFileIn = null; Path wordsFileIn = null;
Path dirOut = null; Path dirOut = null;
@ -647,8 +641,6 @@ public class TestFSTs extends LuceneTestCase {
storeOrds = true; storeOrds = true;
} else if (args[idx].equals("-noverify")) { } else if (args[idx].equals("-noverify")) {
verify = false; verify = false;
} else if (args[idx].equals("-pack")) {
doPack = true;
} else if (args[idx].startsWith("-")) { } else if (args[idx].startsWith("-")) {
System.err.println("Unrecognized option: " + args[idx]); System.err.println("Unrecognized option: " + args[idx]);
System.exit(-1); System.exit(-1);
@ -677,7 +669,7 @@ public class TestFSTs extends LuceneTestCase {
final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton(); final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton();
final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton(); final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton();
final PairOutputs<Long,Long> outputs = new PairOutputs<>(o1, o2); final PairOutputs<Long,Long> outputs = new PairOutputs<>(o1, o2);
new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) { new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
Random rand; Random rand;
@Override @Override
public PairOutputs.Pair<Long,Long> getOutput(IntsRef input, int ord) { public PairOutputs.Pair<Long,Long> getOutput(IntsRef input, int ord) {
@ -691,7 +683,7 @@ public class TestFSTs extends LuceneTestCase {
} else if (storeOrds) { } else if (storeOrds) {
// Store only ords // Store only ords
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) { new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
@Override @Override
public Long getOutput(IntsRef input, int ord) { public Long getOutput(IntsRef input, int ord) {
return (long) ord; return (long) ord;
@ -700,7 +692,7 @@ public class TestFSTs extends LuceneTestCase {
} else if (storeDocFreqs) { } else if (storeDocFreqs) {
// Store only docFreq // Store only docFreq
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) { new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
Random rand; Random rand;
@Override @Override
public Long getOutput(IntsRef input, int ord) { public Long getOutput(IntsRef input, int ord) {
@ -714,7 +706,7 @@ public class TestFSTs extends LuceneTestCase {
// Store nothing // Store nothing
final NoOutputs outputs = NoOutputs.getSingleton(); final NoOutputs outputs = NoOutputs.getSingleton();
final Object NO_OUTPUT = outputs.getNoOutput(); final Object NO_OUTPUT = outputs.getNoOutput();
new VisitTerms<Object>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) { new VisitTerms<Object>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
@Override @Override
public Object getOutput(IntsRef input, int ord) { public Object getOutput(IntsRef input, int ord) {
return NO_OUTPUT; return NO_OUTPUT;
@ -1118,7 +1110,7 @@ public class TestFSTs extends LuceneTestCase {
public void testFinalOutputOnEndState() throws Exception { public void testFinalOutputOnEndState() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, random().nextBoolean(), PackedInts.DEFAULT, true, 15); final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
builder.add(Util.toUTF32("stat", new IntsRefBuilder()), 17L); builder.add(Util.toUTF32("stat", new IntsRefBuilder()), 17L);
builder.add(Util.toUTF32("station", new IntsRefBuilder()), 10L); builder.add(Util.toUTF32("station", new IntsRefBuilder()), 10L);
final FST<Long> fst = builder.finish(); final FST<Long> fst = builder.finish();
@ -1132,8 +1124,7 @@ public class TestFSTs extends LuceneTestCase {
public void testInternalFinalState() throws Exception { public void testInternalFinalState() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final boolean willRewrite = random().nextBoolean(); final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, willRewrite, PackedInts.DEFAULT, true, 15);
builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRefBuilder()), outputs.getNoOutput()); builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRefBuilder()), outputs.getNoOutput());
builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRefBuilder()), outputs.getNoOutput()); builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRefBuilder()), outputs.getNoOutput());
final FST<Long> fst = builder.finish(); final FST<Long> fst = builder.finish();

View File

@ -19,8 +19,10 @@ package org.apache.lucene.search.uhighlight;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.function.Function;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.FilteringTokenFilter; import org.apache.lucene.analysis.FilteringTokenFilter;
@ -30,6 +32,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automata;
@ -50,7 +53,9 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
private final LeafReader leafReader; private final LeafReader leafReader;
private final CharacterRunAutomaton preMemIndexFilterAutomaton; private final CharacterRunAutomaton preMemIndexFilterAutomaton;
public AnalysisOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata, Analyzer analyzer) { public AnalysisOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
CharacterRunAutomaton[] automata, Analyzer analyzer,
Function<Query, Collection<Query>> multiTermQueryRewrite) {
super(field, extractedTerms, phraseHelper, automata); super(field, extractedTerms, phraseHelper, automata);
this.analyzer = analyzer; this.analyzer = analyzer;
// Automata (Wildcards / MultiTermQuery): // Automata (Wildcards / MultiTermQuery):
@ -68,7 +73,8 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader();
// preFilter for MemoryIndex // preFilter for MemoryIndex
preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, strictPhrases); preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, strictPhrases,
multiTermQueryRewrite);
} else { } else {
memoryIndex = null; memoryIndex = null;
leafReader = null; leafReader = null;
@ -155,7 +161,8 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
*/ */
private static CharacterRunAutomaton buildCombinedAutomaton(String field, BytesRef[] terms, private static CharacterRunAutomaton buildCombinedAutomaton(String field, BytesRef[] terms,
CharacterRunAutomaton[] automata, CharacterRunAutomaton[] automata,
PhraseHelper strictPhrases) { PhraseHelper strictPhrases,
Function<Query, Collection<Query>> multiTermQueryRewrite) {
List<CharacterRunAutomaton> allAutomata = new ArrayList<>(); List<CharacterRunAutomaton> allAutomata = new ArrayList<>();
if (terms.length > 0) { if (terms.length > 0) {
allAutomata.add(new CharacterRunAutomaton(Automata.makeStringUnion(Arrays.asList(terms)))); allAutomata.add(new CharacterRunAutomaton(Automata.makeStringUnion(Arrays.asList(terms))));
@ -163,7 +170,7 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
Collections.addAll(allAutomata, automata); Collections.addAll(allAutomata, automata);
for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) { for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
Collections.addAll(allAutomata, Collections.addAll(allAutomata,
MultiTermHighlighting.extractAutomata(spanQuery, field, true));//true==lookInSpan MultiTermHighlighting.extractAutomata(spanQuery, field, true, multiTermQueryRewrite));//true==lookInSpan
} }
if (allAutomata.size() == 1) { if (allAutomata.size() == 1) {

View File

@ -20,8 +20,10 @@ import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
import java.util.function.Function;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -69,34 +71,44 @@ class MultiTermHighlighting {
* Extracts all MultiTermQueries for {@code field}, and returns equivalent * Extracts all MultiTermQueries for {@code field}, and returns equivalent
* automata that will match terms. * automata that will match terms.
*/ */
public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan) { public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan,
Function<Query, Collection<Query>> preRewriteFunc) {
List<CharacterRunAutomaton> list = new ArrayList<>(); List<CharacterRunAutomaton> list = new ArrayList<>();
if (query instanceof BooleanQuery) { Collection<Query> customSubQueries = preRewriteFunc.apply(query);
if (customSubQueries != null) {
for (Query sub : customSubQueries) {
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
}
} else if (query instanceof BooleanQuery) {
for (BooleanClause clause : (BooleanQuery) query) { for (BooleanClause clause : (BooleanQuery) query) {
if (!clause.isProhibited()) { if (!clause.isProhibited()) {
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan))); list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan, preRewriteFunc)));
} }
} }
} else if (query instanceof ConstantScoreQuery) { } else if (query instanceof ConstantScoreQuery) {
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan))); list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan,
preRewriteFunc)));
} else if (query instanceof DisjunctionMaxQuery) { } else if (query instanceof DisjunctionMaxQuery) {
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) { for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan))); list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
} }
} else if (lookInSpan && query instanceof SpanOrQuery) { } else if (lookInSpan && query instanceof SpanOrQuery) {
for (Query sub : ((SpanOrQuery) query).getClauses()) { for (Query sub : ((SpanOrQuery) query).getClauses()) {
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan))); list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
} }
} else if (lookInSpan && query instanceof SpanNearQuery) { } else if (lookInSpan && query instanceof SpanNearQuery) {
for (Query sub : ((SpanNearQuery) query).getClauses()) { for (Query sub : ((SpanNearQuery) query).getClauses()) {
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan))); list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
} }
} else if (lookInSpan && query instanceof SpanNotQuery) { } else if (lookInSpan && query instanceof SpanNotQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan))); list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan,
preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanPositionCheckQuery) { } else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan))); list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan,
preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) { } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field, lookInSpan))); list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field,
lookInSpan, preRewriteFunc)));
} else if (query instanceof AutomatonQuery) { } else if (query instanceof AutomatonQuery) {
final AutomatonQuery aq = (AutomatonQuery) query; final AutomatonQuery aq = (AutomatonQuery) query;
if (aq.getField().equals(field)) { if (aq.getField().equals(field)) {

View File

@ -40,7 +40,7 @@ import java.util.function.Function;
public class PhraseHelper { public class PhraseHelper {
public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_", public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
spanQuery -> null, true); spanQuery -> null, query -> null, true);
//TODO it seems this ought to be a general thing on Spans? //TODO it seems this ought to be a general thing on Spans?
private static final Comparator<? super Spans> SPANS_COMPARATOR = (o1, o2) -> { private static final Comparator<? super Spans> SPANS_COMPARATOR = (o1, o2) -> {
@ -69,10 +69,13 @@ public class PhraseHelper {
* {@code rewriteQueryPred} is an extension hook to override the default choice of * {@code rewriteQueryPred} is an extension hook to override the default choice of
* {@link WeightedSpanTermExtractor#mustRewriteQuery(SpanQuery)}. By default unknown query types are rewritten, * {@link WeightedSpanTermExtractor#mustRewriteQuery(SpanQuery)}. By default unknown query types are rewritten,
* so use this to return {@link Boolean#FALSE} if you know the query doesn't need to be rewritten. * so use this to return {@link Boolean#FALSE} if you know the query doesn't need to be rewritten.
* Similarly, {@code preExtractRewriteFunction} is also an extension hook for extract to allow different queries
* to be set before the {@link WeightedSpanTermExtractor}'s extraction is invoked.
* {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is * {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is
* usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones. * usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones.
*/ */
public PhraseHelper(Query query, String field, Function<SpanQuery, Boolean> rewriteQueryPred, public PhraseHelper(Query query, String field, Function<SpanQuery, Boolean> rewriteQueryPred,
Function<Query, Collection<Query>> preExtractRewriteFunction,
boolean ignoreQueriesNeedingRewrite) { boolean ignoreQueriesNeedingRewrite) {
this.fieldName = field; // if null then don't require field match this.fieldName = field; // if null then don't require field match
// filter terms to those we want // filter terms to those we want
@ -98,6 +101,18 @@ public class PhraseHelper {
} }
} }
@Override
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
Collection<Query> newQueriesToExtract = preExtractRewriteFunction.apply(query);
if (newQueriesToExtract != null) {
for (Query newQuery : newQueriesToExtract) {
extract(newQuery, boost, terms);
}
} else {
super.extract(query, boost, terms);
}
}
@Override @Override
protected boolean isQueryUnsupported(Class<? extends Query> clazz) { protected boolean isQueryUnsupported(Class<? extends Query> clazz) {
if (clazz.isAssignableFrom(MultiTermQuery.class)) { if (clazz.isAssignableFrom(MultiTermQuery.class)) {

View File

@ -21,6 +21,7 @@ import java.nio.charset.StandardCharsets;
import java.text.BreakIterator; import java.text.BreakIterator;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
@ -732,7 +733,8 @@ public class UnifiedHighlighter {
OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata); OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
switch (offsetSource) { switch (offsetSource) {
case ANALYSIS: case ANALYSIS:
return new AnalysisOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer()); return new AnalysisOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer(),
this::preMultiTermQueryRewrite);
case NONE_NEEDED: case NONE_NEEDED:
return NoOpOffsetStrategy.INSTANCE; return NoOpOffsetStrategy.INSTANCE;
case TERM_VECTORS: case TERM_VECTORS:
@ -776,13 +778,14 @@ public class UnifiedHighlighter {
boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES); boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES);
boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY); boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY);
return highlightPhrasesStrictly ? return highlightPhrasesStrictly ?
new PhraseHelper(query, field, this::requiresRewrite, !handleMultiTermQuery) : new PhraseHelper(query, field, this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) :
PhraseHelper.NONE; PhraseHelper.NONE;
} }
protected CharacterRunAutomaton[] getAutomata(String field, Query query, EnumSet<HighlightFlag> highlightFlags) { protected CharacterRunAutomaton[] getAutomata(String field, Query query, EnumSet<HighlightFlag> highlightFlags) {
return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY) return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES)) ? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES),
this::preMultiTermQueryRewrite)
: ZERO_LEN_AUTOMATA_ARRAY; : ZERO_LEN_AUTOMATA_ARRAY;
} }
@ -830,6 +833,32 @@ public class UnifiedHighlighter {
return null; return null;
} }
/**
* When highlighting phrases accurately, we may need to handle custom queries that aren't supported in the
* {@link org.apache.lucene.search.highlight.WeightedSpanTermExtractor} as called by the {@code PhraseHelper}.
* Should custom query types be needed, this method should be overriden to return a collection of queries if appropriate,
* or null if nothing to do. If the query is not custom, simply returning null will allow the default rules to apply.
*
* @param query Query to be highlighted
* @return A Collection of Query object(s) if needs to be rewritten, otherwise null.
*/
protected Collection<Query> preSpanQueryRewrite(Query query) {
return null;
}
/**
* When dealing with multi term queries / span queries, we may need to handle custom queries that aren't supported
* by the default automata extraction in {@code MultiTermHighlighting}. This can be overridden to return a collection
* of queries if appropriate, or null if nothing to do. If query is not custom, simply returning null will allow the
* default rules to apply.
*
* @param query Query to be highlighted
* @return A Collection of Query object(s) if needst o be rewritten, otherwise null.
*/
protected Collection<Query> preMultiTermQueryRewrite(Query query) {
return null;
}
private DocIdSetIterator asDocIdSetIterator(int[] sortedDocIds) { private DocIdSetIterator asDocIdSetIterator(int[] sortedDocIds) {
return new DocIdSetIterator() { return new DocIdSetIterator() {
int idx = -1; int idx = -1;

View File

@ -20,6 +20,8 @@ package org.apache.lucene.search.uhighlight;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.List;
import java.util.Objects;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
@ -56,6 +58,7 @@ import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.store.BaseDirectoryWrapper; import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
@ -933,4 +936,89 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
ir.close(); ir.close();
} }
public void testCustomSpanQueryHighlighting() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Document doc = new Document();
doc.add(new Field("body", "alpha bravo charlie delta echo foxtrot golf hotel india juliet", fieldType));
doc.add(newTextField("id", "id", Field.Store.YES));
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected List<Query> preMultiTermQueryRewrite(Query query) {
if (query instanceof MyWrapperSpanQuery) {
return Collections.singletonList(((MyWrapperSpanQuery) query).originalQuery);
}
return null;
}
};
int docId = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
WildcardQuery wildcardQuery = new WildcardQuery(new Term("body", "foxtr*"));
SpanMultiTermQueryWrapper wildcardQueryWrapper = new SpanMultiTermQueryWrapper<>(wildcardQuery);
SpanQuery wrappedQuery = new MyWrapperSpanQuery(wildcardQueryWrapper);
BooleanQuery query = new BooleanQuery.Builder()
.add(wrappedQuery, BooleanClause.Occur.SHOULD)
.build();
int[] docIds = new int[]{docId};
String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIds, new int[]{2}).get("body");
assertEquals(1, snippets.length);
assertEquals("alpha bravo charlie delta echo <b>foxtrot</b> golf hotel india juliet", snippets[0]);
ir.close();
}
private static class MyWrapperSpanQuery extends SpanQuery {
private final SpanQuery originalQuery;
private MyWrapperSpanQuery(SpanQuery originalQuery) {
this.originalQuery = Objects.requireNonNull(originalQuery);
}
@Override
public String getField() {
return originalQuery.getField();
}
@Override
public String toString(String field) {
return "(Wrapper[" + originalQuery.toString(field)+"])";
}
@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return originalQuery.createWeight(searcher, needsScores, boost);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query newOriginalQuery = originalQuery.rewrite(reader);
if (newOriginalQuery != originalQuery) {
return new MyWrapperSpanQuery((SpanQuery)newOriginalQuery);
}
return this;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
return originalQuery.equals(((MyWrapperSpanQuery)o).originalQuery);
}
@Override
public int hashCode() {
return originalQuery.hashCode();
}
}
} }

View File

@ -17,6 +17,8 @@
package org.apache.lucene.search.uhighlight; package org.apache.lucene.search.uhighlight;
import java.io.IOException; import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
@ -29,14 +31,17 @@ import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanNearQuery;
@ -401,4 +406,76 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
Object o = highlighter.highlightWithoutSearcher("body", new MatchNoDocsQuery(), content, 1); Object o = highlighter.highlightWithoutSearcher("body", new MatchNoDocsQuery(), content, 1);
assertEquals(content, o); assertEquals(content, o);
} }
public void testPreSpanQueryRewrite() throws IOException {
indexWriter.addDocument(newDoc("There is no accord and satisfaction with this - Consideration of the accord is arbitrary."));
initReaderSearcherHighlighter();
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected Collection<Query> preSpanQueryRewrite(Query query) {
if (query instanceof MyQuery) {
return Collections.singletonList(((MyQuery)query).wrapped);
}
return null;
}
};
highlighter.setHighlightPhrasesStrictly(true);
BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
Query phraseQuery = new BoostQuery(new PhraseQuery("body", "accord", "and", "satisfaction"), 2.0f);
Query oredTerms = new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(2)
.add(new TermQuery(new Term("body", "accord")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("body", "satisfaction")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("body", "consideration")), BooleanClause.Occur.SHOULD)
.build();
Query proximityBoostingQuery = new MyQuery(oredTerms);
Query totalQuery = bqBuilder
.add(phraseQuery, BooleanClause.Occur.SHOULD)
.add(proximityBoostingQuery, BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(totalQuery, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", totalQuery, topDocs);
assertArrayEquals(new String[]{"There is no <b>accord</b> <b>and</b> <b>satisfaction</b> with this - <b>Consideration</b> of the <b>accord</b> is arbitrary."}, snippets);
}
private static class MyQuery extends Query {
private final Query wrapped;
MyQuery(Query wrapped) {
this.wrapped = wrapped;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return wrapped.createWeight(searcher, needsScores, boost);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query newWrapped = wrapped.rewrite(reader);
if (newWrapped != wrapped) {
return new MyQuery(newWrapped);
}
return this;
}
@Override
public String toString(String field) {
return "[[["+wrapped.toString(field)+"]]]";
}
@Override
public boolean equals(Object obj) {
return obj != null && obj.getClass() == getClass() && wrapped.equals(((MyQuery)wrapped).wrapped);
}
@Override
public int hashCode() {
return wrapped.hashCode();
}
}
} }

View File

@ -228,7 +228,7 @@ org.bouncycastle.version = 1.45
/org.carrot2.attributes/attributes-binder = 1.3.1 /org.carrot2.attributes/attributes-binder = 1.3.1
/org.carrot2.shaded/carrot2-guava = 18.0 /org.carrot2.shaded/carrot2-guava = 18.0
/org.carrot2/carrot2-mini = 3.12.0 /org.carrot2/carrot2-mini = 3.15.0
org.carrot2.morfologik.version = 2.1.1 org.carrot2.morfologik.version = 2.1.1
/org.carrot2/morfologik-fsa = ${org.carrot2.morfologik.version} /org.carrot2/morfologik-fsa = ${org.carrot2.morfologik.version}

View File

@ -50,7 +50,6 @@ import org.apache.lucene.util.fst.PairOutputs.Pair;
import org.apache.lucene.util.fst.PairOutputs; import org.apache.lucene.util.fst.PairOutputs;
import org.apache.lucene.util.fst.PositiveIntOutputs; import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
/* /*
TODO: TODO:
@ -354,8 +353,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
final Builder<Pair<BytesRef,Long>> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, final Builder<Pair<BytesRef,Long>> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
0, 0, true, false, Integer.MAX_VALUE, 0, 0, true, false, Integer.MAX_VALUE,
FST_OUTPUTS, false, FST_OUTPUTS, true, 15);
PackedInts.COMPACT, true, 15);
//if (DEBUG) { //if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix); // System.out.println(" compile index for prefix=" + prefix);
//} //}

View File

@ -23,9 +23,10 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermContext;
@ -34,6 +35,7 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Transition; import org.apache.lucene.util.automaton.Transition;
@ -183,6 +185,10 @@ public class TermAutomatonQuery extends Query {
det = Operations.removeDeadStates(Operations.determinize(automaton, det = Operations.removeDeadStates(Operations.determinize(automaton,
maxDeterminizedStates)); maxDeterminizedStates));
if (det.isAccept(0)) {
throw new IllegalStateException("cannot accept the empty string");
}
} }
@Override @Override
@ -396,4 +402,82 @@ public class TermAutomatonQuery extends Query {
return null; return null;
} }
} }
public Query rewrite(IndexReader reader) throws IOException {
if (Operations.isEmpty(det)) {
return new MatchNoDocsQuery();
}
IntsRef single = Operations.getSingleton(det);
if (single != null && single.length == 1) {
return new TermQuery(new Term(field, idToTerm.get(single.ints[single.offset])));
}
// TODO: can PhraseQuery really handle multiple terms at the same position? If so, why do we even have MultiPhraseQuery?
// Try for either PhraseQuery or MultiPhraseQuery, which only works when the automaton is a sausage:
MultiPhraseQuery.Builder mpq = new MultiPhraseQuery.Builder();
PhraseQuery.Builder pq = new PhraseQuery.Builder();
Transition t = new Transition();
int state = 0;
int pos = 0;
query:
while (true) {
int count = det.initTransition(state, t);
if (count == 0) {
if (det.isAccept(state) == false) {
mpq = null;
pq = null;
}
break;
} else if (det.isAccept(state)) {
mpq = null;
pq = null;
break;
}
int dest = -1;
List<Term> terms = new ArrayList<>();
boolean matchesAny = false;
for(int i=0;i<count;i++) {
det.getNextTransition(t);
if (i == 0) {
dest = t.dest;
} else if (dest != t.dest) {
mpq = null;
pq = null;
break query;
}
matchesAny |= anyTermID >= t.min && anyTermID <= t.max;
if (matchesAny == false) {
for(int termID=t.min;termID<=t.max;termID++) {
terms.add(new Term(field, idToTerm.get(termID)));
}
}
}
if (matchesAny == false) {
mpq.add(terms.toArray(new Term[terms.size()]), pos);
if (pq != null) {
if (terms.size() == 1) {
pq.add(terms.get(0), pos);
} else {
pq = null;
}
}
}
state = dest;
pos++;
}
if (pq != null) {
return pq.build();
} else if (mpq != null) {
return mpq.build();
}
// TODO: we could maybe also rewrite to union of PhraseQuery (pull all finite strings) if it's "worth it"?
return this;
}
} }

View File

@ -296,7 +296,6 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
while (scorer instanceof AssertingScorer) { while (scorer instanceof AssertingScorer) {
scorer = ((AssertingScorer) scorer).getIn(); scorer = ((AssertingScorer) scorer).getIn();
} }
assert scorer instanceof TermAutomatonScorer;
} }
@Override @Override
@ -683,7 +682,7 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
w.addDocument(doc); w.addDocument(doc);
doc = new Document(); doc = new Document();
doc.add(newTextField("field", "comes here", Field.Store.NO)); doc.add(newTextField("field", "comes foo", Field.Store.NO));
w.addDocument(doc); w.addDocument(doc);
IndexReader r = w.getReader(); IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r); IndexSearcher s = newSearcher(r);
@ -691,9 +690,11 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
TermAutomatonQuery q = new TermAutomatonQuery("field"); TermAutomatonQuery q = new TermAutomatonQuery("field");
int init = q.createState(); int init = q.createState();
int s1 = q.createState(); int s1 = q.createState();
int s2 = q.createState();
q.addTransition(init, s1, "here"); q.addTransition(init, s1, "here");
q.addTransition(s1, init, "comes"); q.addTransition(s1, s2, "comes");
q.setAccept(init, true); q.addTransition(s2, s1, "here");
q.setAccept(s1, true);
q.finish(); q.finish();
assertEquals(1, s.search(q, 1).totalHits); assertEquals(1, s.search(q, 1).totalHits);
@ -779,8 +780,186 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
// System.out.println("DOT: " + q.toDot()); // System.out.println("DOT: " + q.toDot());
assertEquals(0, s.search(q, 1).totalHits); assertEquals(0, s.search(q, 1).totalHits);
w.close(); IOUtils.close(w, r, dir);
r.close(); }
dir.close();
public void testEmptyString() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
q.setAccept(initState, true);
try {
q.finish();
fail("did not hit exc");
} catch (IllegalStateException ise) {
// expected
}
}
public void testRewriteNoMatch() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
assertTrue(q.rewrite(r) instanceof MatchNoDocsQuery);
IOUtils.close(w, r, dir);
}
public void testRewriteTerm() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
q.addTransition(initState, s1, "foo");
q.setAccept(s1, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof TermQuery);
assertEquals(new Term("field", "foo"), ((TermQuery) rewrite).getTerm());
IOUtils.close(w, r, dir);
}
public void testRewriteSimplePhrase() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
int s2 = q.createState();
q.addTransition(initState, s1, "foo");
q.addTransition(s1, s2, "bar");
q.setAccept(s2, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof PhraseQuery);
Term[] terms = ((PhraseQuery) rewrite).getTerms();
assertEquals(new Term("field", "foo"), terms[0]);
assertEquals(new Term("field", "bar"), terms[1]);
int[] positions = ((PhraseQuery) rewrite).getPositions();
assertEquals(0, positions[0]);
assertEquals(1, positions[1]);
IOUtils.close(w, r, dir);
}
public void testRewritePhraseWithAny() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
int s2 = q.createState();
int s3 = q.createState();
q.addTransition(initState, s1, "foo");
q.addAnyTransition(s1, s2);
q.addTransition(s2, s3, "bar");
q.setAccept(s3, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof PhraseQuery);
Term[] terms = ((PhraseQuery) rewrite).getTerms();
assertEquals(new Term("field", "foo"), terms[0]);
assertEquals(new Term("field", "bar"), terms[1]);
int[] positions = ((PhraseQuery) rewrite).getPositions();
assertEquals(0, positions[0]);
assertEquals(2, positions[1]);
IOUtils.close(w, r, dir);
}
public void testRewriteSimpleMultiPhrase() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
q.addTransition(initState, s1, "foo");
q.addTransition(initState, s1, "bar");
q.setAccept(s1, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof MultiPhraseQuery);
Term[][] terms = ((MultiPhraseQuery) rewrite).getTermArrays();
assertEquals(1, terms.length);
assertEquals(2, terms[0].length);
assertEquals(new Term("field", "foo"), terms[0][0]);
assertEquals(new Term("field", "bar"), terms[0][1]);
int[] positions = ((MultiPhraseQuery) rewrite).getPositions();
assertEquals(1, positions.length);
assertEquals(0, positions[0]);
IOUtils.close(w, r, dir);
}
public void testRewriteMultiPhraseWithAny() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
int s2 = q.createState();
int s3 = q.createState();
q.addTransition(initState, s1, "foo");
q.addTransition(initState, s1, "bar");
q.addAnyTransition(s1, s2);
q.addTransition(s2, s3, "baz");
q.setAccept(s3, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof MultiPhraseQuery);
Term[][] terms = ((MultiPhraseQuery) rewrite).getTermArrays();
assertEquals(2, terms.length);
assertEquals(2, terms[0].length);
assertEquals(new Term("field", "foo"), terms[0][0]);
assertEquals(new Term("field", "bar"), terms[0][1]);
assertEquals(1, terms[1].length);
assertEquals(new Term("field", "baz"), terms[1][0]);
int[] positions = ((MultiPhraseQuery) rewrite).getPositions();
assertEquals(2, positions.length);
assertEquals(0, positions[0]);
assertEquals(2, positions[1]);
IOUtils.close(w, r, dir);
} }
} }

View File

@ -26,7 +26,6 @@ import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.*; import org.apache.lucene.util.fst.*;
import org.apache.lucene.util.packed.PackedInts;
/** /**
* Finite state automata based implementation of "autocomplete" functionality. * Finite state automata based implementation of "autocomplete" functionality.
@ -237,8 +236,7 @@ public class FSTCompletionBuilder {
final Object empty = outputs.getNoOutput(); final Object empty = outputs.getNoOutput();
final Builder<Object> builder = new Builder<>( final Builder<Object> builder = new Builder<>(
FST.INPUT_TYPE.BYTE1, 0, 0, true, true, FST.INPUT_TYPE.BYTE1, 0, 0, true, true,
shareMaxTailLength, outputs, false, shareMaxTailLength, outputs, true, 15);
PackedInts.DEFAULT, true, 15);
BytesRefBuilder scratch = new BytesRefBuilder(); BytesRefBuilder scratch = new BytesRefBuilder();
BytesRef entry; BytesRef entry;

View File

@ -368,8 +368,9 @@ public class CheckHits {
boolean productOf = descr.endsWith("product of:"); boolean productOf = descr.endsWith("product of:");
boolean sumOf = descr.endsWith("sum of:"); boolean sumOf = descr.endsWith("sum of:");
boolean maxOf = descr.endsWith("max of:"); boolean maxOf = descr.endsWith("max of:");
boolean computedOf = descr.matches(".*, computed as .* from:");
boolean maxTimesOthers = false; boolean maxTimesOthers = false;
if (!(productOf || sumOf || maxOf)) { if (!(productOf || sumOf || maxOf || computedOf)) {
// maybe 'max plus x times others' // maybe 'max plus x times others'
int k1 = descr.indexOf("max plus "); int k1 = descr.indexOf("max plus ");
if (k1>=0) { if (k1>=0) {
@ -387,9 +388,9 @@ public class CheckHits {
// TODO: this is a TERRIBLE assertion!!!! // TODO: this is a TERRIBLE assertion!!!!
Assert.assertTrue( Assert.assertTrue(
q+": multi valued explanation description=\""+descr q+": multi valued explanation description=\""+descr
+"\" must be 'max of plus x times others' or end with 'product of'" +"\" must be 'max of plus x times others', 'computed as x from:' or end with 'product of'"
+" or 'sum of:' or 'max of:' - "+expl, +" or 'sum of:' or 'max of:' - "+expl,
productOf || sumOf || maxOf || maxTimesOthers); productOf || sumOf || maxOf || computedOf || maxTimesOthers);
float sum = 0; float sum = 0;
float product = 1; float product = 1;
float max = 0; float max = 0;
@ -410,7 +411,8 @@ public class CheckHits {
} else if (maxTimesOthers) { } else if (maxTimesOthers) {
combined = max + x * (sum - max); combined = max + x * (sum - max);
} else { } else {
Assert.assertTrue("should never get here!",false); Assert.assertTrue("should never get here!", computedOf);
combined = value;
} }
Assert.assertEquals(q+": actual subDetails combined=="+combined+ Assert.assertEquals(q+": actual subDetails combined=="+combined+
" != value="+value+" Explanation: "+expl, " != value="+value+" Explanation: "+expl,

View File

@ -91,6 +91,7 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
allSims = new ArrayList<>(); allSims = new ArrayList<>();
allSims.add(new ClassicSimilarity()); allSims.add(new ClassicSimilarity());
allSims.add(new BM25Similarity()); allSims.add(new BM25Similarity());
allSims.add(new BooleanSimilarity());
for (BasicModel basicModel : BASIC_MODELS) { for (BasicModel basicModel : BASIC_MODELS) {
for (AfterEffect afterEffect : AFTER_EFFECTS) { for (AfterEffect afterEffect : AFTER_EFFECTS) {
for (Normalization normalization : NORMALIZATIONS) { for (Normalization normalization : NORMALIZATIONS) {

View File

@ -36,7 +36,6 @@ import org.apache.lucene.codecs.lucene70.Lucene70Codec;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextCodec; import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
import org.apache.lucene.index.RandomCodec; import org.apache.lucene.index.RandomCodec;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.RandomSimilarity; import org.apache.lucene.search.similarities.RandomSimilarity;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
@ -213,7 +212,7 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
TimeZone randomTimeZone = randomTimeZone(random()); TimeZone randomTimeZone = randomTimeZone(random());
timeZone = testTimeZone.equals("random") ? randomTimeZone : TimeZone.getTimeZone(testTimeZone); timeZone = testTimeZone.equals("random") ? randomTimeZone : TimeZone.getTimeZone(testTimeZone);
TimeZone.setDefault(timeZone); TimeZone.setDefault(timeZone);
similarity = random().nextBoolean() ? new ClassicSimilarity() : new RandomSimilarity(random()); similarity = new RandomSimilarity(random());
// Check codec restrictions once at class level. // Check codec restrictions once at class level.
try { try {

View File

@ -40,7 +40,6 @@ import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.packed.PackedInts;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
@ -273,25 +272,14 @@ public class FSTTester<T> {
System.out.println("\nTEST: prune1=" + prune1 + " prune2=" + prune2); System.out.println("\nTEST: prune1=" + prune1 + " prune2=" + prune2);
} }
final boolean willRewrite = random.nextBoolean();
final Builder<T> builder = new Builder<>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, final Builder<T> builder = new Builder<>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
prune1, prune2, prune1, prune2,
prune1==0 && prune2==0, prune1==0 && prune2==0,
allowRandomSuffixSharing ? random.nextBoolean() : true, allowRandomSuffixSharing ? random.nextBoolean() : true,
allowRandomSuffixSharing ? TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE, allowRandomSuffixSharing ? TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE,
outputs, outputs,
willRewrite,
PackedInts.DEFAULT,
true, true,
15); 15);
if (LuceneTestCase.VERBOSE) {
if (willRewrite) {
System.out.println("TEST: packed FST");
} else {
System.out.println("TEST: non-packed FST");
}
}
for(InputOutput<T> pair : pairs) { for(InputOutput<T> pair : pairs) {
if (pair.output instanceof List) { if (pair.output instanceof List) {
@ -306,7 +294,7 @@ public class FSTTester<T> {
} }
FST<T> fst = builder.finish(); FST<T> fst = builder.finish();
if (random.nextBoolean() && fst != null && !willRewrite) { if (random.nextBoolean() && fst != null) {
IOContext context = LuceneTestCase.newIOContext(random); IOContext context = LuceneTestCase.newIOContext(random);
IndexOutput out = dir.createOutput("fst.bin", context); IndexOutput out = dir.createOutput("fst.bin", context);
fst.save(out); fst.save(out);

View File

@ -70,7 +70,7 @@ Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this r
Versions of Major Components Versions of Major Components
--------------------- ---------------------
Apache Tika 1.13 Apache Tika 1.13
Carrot2 3.12.0 Carrot2 3.15.0
Velocity 1.7 and Velocity Tools 2.0 Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1 Apache UIMA 2.3.1
Apache ZooKeeper 3.4.6 Apache ZooKeeper 3.4.6
@ -81,6 +81,9 @@ Detailed Change List
New Features New Features
---------------------- ----------------------
* SOLR-9293: Solrj client support for hierarchical clusters and other topics
marker. (Dawid Weiss)
* SOLR-9681: FacetModule / JSON Facet API added the ability to add filters directly to * SOLR-9681: FacetModule / JSON Facet API added the ability to add filters directly to
any facet command. The filters are applied after any domain change operations. any facet command. The filters are applied after any domain change operations.
Example: { type:terms, field:category, filter:"user:yonik" } Example: { type:terms, field:category, filter:"user:yonik" }
@ -96,11 +99,21 @@ New Features
* SOLR-8542: Adds Solr Learning to Rank (LTR) plugin for reranking results with machine learning models. * SOLR-8542: Adds Solr Learning to Rank (LTR) plugin for reranking results with machine learning models.
(Michael Nilsson, Diego Ceccarelli, Joshua Pantony, Jon Dorando, Naveen Santhapuri, Alessandro Benedetti, David Grohmann, Christine Poerschke) (Michael Nilsson, Diego Ceccarelli, Joshua Pantony, Jon Dorando, Naveen Santhapuri, Alessandro Benedetti, David Grohmann, Christine Poerschke)
* SOLR-9055: Make collection backup/restore extensible. (Hrishikesh Gadre, Varun Thacker, Mark Miller)
* SOLR-9682: JSON Facet API: added "param" query type to facet domain filter specification to obtain
filters via query parameters. (yonik)
* SOLR-9038: Add a command-line tool to manage the snapshots functionality (Hrishikesh Gadre via yonik)
Optimizations Optimizations
---------------------- ----------------------
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
filters specified by using those filters as acceptDocs. (yonik) filters specified by using those filters as acceptDocs. (yonik)
* SOLR-9726: Reduce number of lookupOrd calls made by the DocValuesFacets.getCounts method.
(Jonny Marks via Christine Poerschke)
Bug Fixes Bug Fixes
---------------------- ----------------------
* SOLR-9701: NPE in export handler when "fl" parameter is omitted. * SOLR-9701: NPE in export handler when "fl" parameter is omitted.
@ -109,15 +122,43 @@ Bug Fixes
* SOLR-9433: SolrCore clean-up logic uses incorrect path to delete dataDir on failure to create a core. * SOLR-9433: SolrCore clean-up logic uses incorrect path to delete dataDir on failure to create a core.
(Evan Sayer, shalin) (Evan Sayer, shalin)
* SOLR-9360: Solr script not properly checking SOLR_PID
(Alessandro Benedetti via Erick Erickson)
* SOLR-9716: RecoveryStrategy sends prep recovery command without setting read time out which can cause
replica recovery to hang indefinitely on network partitions. (Cao Manh Dat, shalin)
* SOLR-9624: In Admin UI, do not attempt to highlight CSV output (Alexandre Rafalovitch)
* SOLR-9005: In files example, add a guard condition to javascript URP script (Alexandre Rafalovitch)
* SOLR-9519: JSON Facet API: don't stop at an empty facet bucket if any sub-facets still have a chance
of matching something due to filter exclusions (which can widen the domain again).
(Michael Sun, yonik)
* SOLR-9740: A bug in macro expansion of multi-valued parameters caused non-expanded values
after the first expanded value in the same multi-valued parameter to be dropped.
(Erik Hatcher, yonik)
Other Changes Other Changes
---------------------- ----------------------
* SOLR-7539: Upgrade the clustering plugin to Carrot2 3.15.0. (Dawid Weiss)
* SOLR-9621: Remove several Guava & Apache Commons calls in favor of java 8 alternatives. * SOLR-9621: Remove several Guava & Apache Commons calls in favor of java 8 alternatives.
(Michael Braun via David Smiley) (Michael Braun via David Smiley)
* SOLR-9720: Refactor Responsewriters to remove dependencies on TupleStream, * SOLR-9720: Refactor Responsewriters to remove dependencies on TupleStream,
Tuple, Explanation (noble) Tuple, Explanation (noble)
* SOLR-9717: Refactor '/export' to not hardcode the JSON output and to use an API (noble)
* SOLR-9739: JavabinCodec implements PushWriter interface (noble)
* SOLR-8332: Factor HttpShardHandler[Factory]'s url shuffling out into a ReplicaListTransformer class.
(Christine Poerschke, Noble Paul)
================== 6.3.0 ================== ================== 6.3.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -495,7 +495,7 @@ function solr_pid_by_port() {
# extract the value of the -Djetty.port parameter from a running Solr process # extract the value of the -Djetty.port parameter from a running Solr process
function jetty_port() { function jetty_port() {
SOLR_PID="$1" SOLR_PID="$1"
SOLR_PROC=`ps auxww | grep -w $SOLR_PID | grep start\.jar | grep jetty.port` SOLR_PROC=`ps auxww | grep -w $SOLR_PID | grep start\.jar | grep jetty\.port`
IFS=' ' read -a proc_args <<< "$SOLR_PROC" IFS=' ' read -a proc_args <<< "$SOLR_PROC"
for arg in "${proc_args[@]}" for arg in "${proc_args[@]}"
do do
@ -543,10 +543,10 @@ function get_info() {
done < <(find "$SOLR_PID_DIR" -name "solr-*.pid" -type f) done < <(find "$SOLR_PID_DIR" -name "solr-*.pid" -type f)
else else
# no pid files but check using ps just to be sure # no pid files but check using ps just to be sure
numSolrs=`ps auxww | grep start\.jar | grep solr.solr.home | grep -v grep | wc -l | sed -e 's/^[ \t]*//'` numSolrs=`ps auxww | grep start\.jar | grep solr\.solr\.home | grep -v grep | wc -l | sed -e 's/^[ \t]*//'`
if [ "$numSolrs" != "0" ]; then if [ "$numSolrs" != "0" ]; then
echo -e "\nFound $numSolrs Solr nodes: " echo -e "\nFound $numSolrs Solr nodes: "
PROCESSES=$(ps auxww | grep start\.jar | grep solr.solr.home | grep -v grep | awk '{print $2}' | sort -r) PROCESSES=$(ps auxww | grep start\.jar | grep solr\.solr\.home | grep -v grep | awk '{print $2}' | sort -r)
for ID in $PROCESSES for ID in $PROCESSES
do do
port=`jetty_port "$ID"` port=`jetty_port "$ID"`
@ -1345,7 +1345,7 @@ if [[ "$SCRIPT_CMD" == "start" ]]; then
if [ -z "$SOLR_PID" ]; then if [ -z "$SOLR_PID" ]; then
# not found using the pid file ... but use ps to ensure not found # not found using the pid file ... but use ps to ensure not found
SOLR_PID=`ps auxww | grep start\.jar | grep -w $SOLR_PORT | grep -v grep | awk '{print $2}' | sort -r` SOLR_PID=`ps auxww | grep start\.jar | grep -w "\-Djetty\.port=$SOLR_PORT" | grep -v grep | awk '{print $2}' | sort -r`
fi fi
if [ "$SOLR_PID" != "" ]; then if [ "$SOLR_PID" != "" ]; then
@ -1358,7 +1358,7 @@ else
SOLR_PID=`solr_pid_by_port "$SOLR_PORT"` SOLR_PID=`solr_pid_by_port "$SOLR_PORT"`
if [ -z "$SOLR_PID" ]; then if [ -z "$SOLR_PID" ]; then
# not found using the pid file ... but use ps to ensure not found # not found using the pid file ... but use ps to ensure not found
SOLR_PID=`ps auxww | grep start\.jar | grep -w $SOLR_PORT | grep -v grep | awk '{print $2}' | sort -r` SOLR_PID=`ps auxww | grep start\.jar | grep -w "\-Djetty\.port=$SOLR_PORT" | grep -v grep | awk '{print $2}' | sort -r`
fi fi
if [ "$SOLR_PID" != "" ]; then if [ "$SOLR_PID" != "" ]; then
stop_solr "$SOLR_SERVER_DIR" "$SOLR_PORT" "$STOP_KEY" "$SOLR_PID" stop_solr "$SOLR_SERVER_DIR" "$SOLR_PORT" "$STOP_KEY" "$SOLR_PID"
@ -1659,7 +1659,7 @@ function launch_solr() {
exit # subshell! exit # subshell!
fi fi
else else
SOLR_PID=`ps auxww | grep start\.jar | grep -w $SOLR_PORT | grep -v grep | awk '{print $2}' | sort -r` SOLR_PID=`ps auxww | grep start\.jar | grep -w "\-Djetty\.port=$SOLR_PORT" | grep -v grep | awk '{print $2}' | sort -r`
echo -e "\nStarted Solr server on port $SOLR_PORT (pid=$SOLR_PID). Happy searching!\n" echo -e "\nStarted Solr server on port $SOLR_PORT (pid=$SOLR_PID). Happy searching!\n"
exit # subshell! exit # subshell!
fi fi
@ -1668,7 +1668,7 @@ function launch_solr() {
else else
echo -e "NOTE: Please install lsof as this script needs it to determine if Solr is listening on port $SOLR_PORT." echo -e "NOTE: Please install lsof as this script needs it to determine if Solr is listening on port $SOLR_PORT."
sleep 10 sleep 10
SOLR_PID=`ps auxww | grep start\.jar | grep -w $SOLR_PORT | grep -v grep | awk '{print $2}' | sort -r` SOLR_PID=`ps auxww | grep start\.jar | grep -w "\-Djetty\.port=$SOLR_PORT" | grep -v grep | awk '{print $2}' | sort -r`
echo -e "\nStarted Solr server on port $SOLR_PORT (pid=$SOLR_PID). Happy searching!\n" echo -e "\nStarted Solr server on port $SOLR_PORT (pid=$SOLR_PID). Happy searching!\n"
return; return;
fi fi

View File

@ -19,6 +19,7 @@ package org.apache.solr.handler.clustering;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.Map; import java.util.Map;
@ -44,9 +45,6 @@ import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.common.collect.Maps;
/** /**
* Provides a plugin for performing cluster analysis. This can either be applied to * Provides a plugin for performing cluster analysis. This can either be applied to
* search results (e.g., via <a href="http://project.carrot2.org">Carrot<sup>2</sup></a>) or for * search results (e.g., via <a href="http://project.carrot2.org">Carrot<sup>2</sup></a>) or for
@ -68,12 +66,12 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
/** /**
* Declaration-order list of search clustering engines. * Declaration-order list of search clustering engines.
*/ */
private final LinkedHashMap<String, SearchClusteringEngine> searchClusteringEngines = Maps.newLinkedHashMap(); private final LinkedHashMap<String, SearchClusteringEngine> searchClusteringEngines = new LinkedHashMap<>();
/** /**
* Declaration order list of document clustering engines. * Declaration order list of document clustering engines.
*/ */
private final LinkedHashMap<String, DocumentClusteringEngine> documentClusteringEngines = Maps.newLinkedHashMap(); private final LinkedHashMap<String, DocumentClusteringEngine> documentClusteringEngines = new LinkedHashMap<>();
/** /**
* An unmodifiable view of {@link #searchClusteringEngines}. * An unmodifiable view of {@link #searchClusteringEngines}.
@ -173,7 +171,7 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
if (engine != null) { if (engine != null) {
checkAvailable(name, engine); checkAvailable(name, engine);
DocListAndSet results = rb.getResults(); DocListAndSet results = rb.getResults();
Map<SolrDocument,Integer> docIds = Maps.newHashMapWithExpectedSize(results.docList.size()); Map<SolrDocument,Integer> docIds = new HashMap<>(results.docList.size());
SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList( SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList(
results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds); results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds);
Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req); Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);

View File

@ -58,6 +58,8 @@ import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm; import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.LanguageCode; import org.carrot2.core.LanguageCode;
import org.carrot2.core.attribute.AttributeNames; import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.shaded.guava.common.base.MoreObjects;
import org.carrot2.shaded.guava.common.base.Strings;
import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor; import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor; import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder; import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder;
@ -69,12 +71,6 @@ import org.carrot2.util.resource.ResourceLookup;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.common.base.Objects;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
/** /**
* Search results clustering engine based on Carrot2 clustering algorithms. * Search results clustering engine based on Carrot2 clustering algorithms.
* *
@ -155,7 +151,8 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
// Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute // Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute
// of this component. This by-name convention lookup is used to simplify configuring algorithms. // of this component. This by-name convention lookup is used to simplify configuring algorithms.
String componentName = initParams.get(ClusteringEngine.ENGINE_NAME); String componentName = initParams.get(ClusteringEngine.ENGINE_NAME);
log.info("Initializing Clustering Engine '" + Objects.firstNonNull(componentName, "<no 'name' attribute>") + "'"); log.info("Initializing Clustering Engine '" +
MoreObjects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
if (!Strings.isNullOrEmpty(componentName)) { if (!Strings.isNullOrEmpty(componentName)) {
IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml"); IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml");
@ -268,7 +265,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){ protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
SolrParams solrParams = sreq.getParams(); SolrParams solrParams = sreq.getParams();
HashSet<String> fields = Sets.newHashSet(getFieldsForClustering(sreq)); HashSet<String> fields = new HashSet<>(getFieldsForClustering(sreq));
fields.add(idFieldName); fields.add(idFieldName);
fields.add(solrParams.get(CarrotParams.URL_FIELD_NAME, "url")); fields.add(solrParams.get(CarrotParams.URL_FIELD_NAME, "url"));
fields.addAll(getCustomFieldsMap(solrParams).keySet()); fields.addAll(getCustomFieldsMap(solrParams).keySet());
@ -295,7 +292,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
+ " must not be blank."); + " must not be blank.");
} }
final Set<String> fields = Sets.newHashSet(); final Set<String> fields = new HashSet<>();
fields.addAll(Arrays.asList(titleFieldSpec.split("[, ]"))); fields.addAll(Arrays.asList(titleFieldSpec.split("[, ]")));
fields.addAll(Arrays.asList(snippetFieldSpec.split("[, ]"))); fields.addAll(Arrays.asList(snippetFieldSpec.split("[, ]")));
return fields; return fields;
@ -319,7 +316,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
Map<String, String> customFields = getCustomFieldsMap(solrParams); Map<String, String> customFields = getCustomFieldsMap(solrParams);
// Parse language code map string into a map // Parse language code map string into a map
Map<String, String> languageCodeMap = Maps.newHashMap(); Map<String, String> languageCodeMap = new HashMap<>();
if (StringUtils.isNotBlank(languageField)) { if (StringUtils.isNotBlank(languageField)) {
for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) { for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
final String[] split = pair.split(":"); final String[] split = pair.split(":");
@ -340,7 +337,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
if (produceSummary) { if (produceSummary) {
highlighter = HighlightComponent.getHighlighter(core); highlighter = HighlightComponent.getHighlighter(core);
if (highlighter != null){ if (highlighter != null){
Map<String, Object> args = Maps.newHashMap(); Map<String, Object> args = new HashMap<>();
snippetFieldAry = snippetFieldSpec.split("[, ]"); snippetFieldAry = snippetFieldSpec.split("[, ]");
args.put(HighlightParams.FIELDS, snippetFieldAry); args.put(HighlightParams.FIELDS, snippetFieldAry);
args.put(HighlightParams.HIGHLIGHT, "true"); args.put(HighlightParams.HIGHLIGHT, "true");
@ -466,10 +463,10 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
* custom field names. * custom field names.
*/ */
private Map<String, String> getCustomFieldsMap(SolrParams solrParams) { private Map<String, String> getCustomFieldsMap(SolrParams solrParams) {
Map<String, String> customFields = Maps.newHashMap(); Map<String, String> customFields = new HashMap<>();
String [] customFieldsSpec = solrParams.getParams(CarrotParams.CUSTOM_FIELD_NAME); String [] customFieldsSpec = solrParams.getParams(CarrotParams.CUSTOM_FIELD_NAME);
if (customFieldsSpec != null) { if (customFieldsSpec != null) {
customFields = Maps.newHashMap(); customFields = new HashMap<>();
for (String customFieldSpec : customFieldsSpec) { for (String customFieldSpec : customFieldsSpec) {
String [] split = customFieldSpec.split(":"); String [] split = customFieldSpec.split(":");
if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) { if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
@ -501,7 +498,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
private List<NamedList<Object>> clustersToNamedList(List<Cluster> carrotClusters, private List<NamedList<Object>> clustersToNamedList(List<Cluster> carrotClusters,
SolrParams solrParams) { SolrParams solrParams) {
List<NamedList<Object>> result = Lists.newArrayList(); List<NamedList<Object>> result = new ArrayList<>();
clustersToNamedList(carrotClusters, result, solrParams.getBool( clustersToNamedList(carrotClusters, result, solrParams.getBool(
CarrotParams.OUTPUT_SUB_CLUSTERS, true), solrParams.getInt( CarrotParams.OUTPUT_SUB_CLUSTERS, true), solrParams.getInt(
CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE)); CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE));
@ -534,7 +531,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
// Add documents // Add documents
List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments(); List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments();
List<Object> docList = Lists.newArrayList(); List<Object> docList = new ArrayList<>();
cluster.add("docs", docList); cluster.add("docs", docList);
for (Document doc : docs) { for (Document doc : docs) {
docList.add(doc.getField(SOLR_DOCUMENT_ID)); docList.add(doc.getField(SOLR_DOCUMENT_ID));
@ -542,7 +539,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
// Add subclusters // Add subclusters
if (outputSubClusters && !outCluster.getSubclusters().isEmpty()) { if (outputSubClusters && !outCluster.getSubclusters().isEmpty()) {
List<NamedList<Object>> subclusters = Lists.newArrayList(); List<NamedList<Object>> subclusters = new ArrayList<>();
cluster.add("clusters", subclusters); cluster.add("clusters", subclusters);
clustersToNamedList(outCluster.getSubclusters(), subclusters, clustersToNamedList(outCluster.getSubclusters(), subclusters,
outputSubClusters, maxLabels); outputSubClusters, maxLabels);

View File

@ -16,10 +16,10 @@
*/ */
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set; import java.util.Set;
import com.google.common.collect.ImmutableSet;
/** /**
* Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration). * Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration).
* @lucene.experimental * @lucene.experimental
@ -50,7 +50,7 @@ public final class CarrotParams {
*/ */
public static String RESOURCES_DIR = CARROT_PREFIX + "resourcesDir"; public static String RESOURCES_DIR = CARROT_PREFIX + "resourcesDir";
static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of( static final Set<String> CARROT_PARAM_NAMES = new HashSet<>(Arrays.asList(
ALGORITHM, ALGORITHM,
TITLE_FIELD_NAME, TITLE_FIELD_NAME,
@ -66,7 +66,7 @@ public final class CarrotParams {
NUM_DESCRIPTIONS, NUM_DESCRIPTIONS,
OUTPUT_SUB_CLUSTERS, OUTPUT_SUB_CLUSTERS,
RESOURCES_DIR, RESOURCES_DIR,
LANGUAGE_CODE_MAP); LANGUAGE_CODE_MAP));
/** No instances. */ /** No instances. */
private CarrotParams() {} private CarrotParams() {}

View File

@ -16,7 +16,9 @@
*/ */
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import java.util.Collection; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
@ -26,6 +28,7 @@ import org.apache.lucene.analysis.core.StopFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.solr.analysis.TokenizerChain; import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
import org.carrot2.core.LanguageCode; import org.carrot2.core.LanguageCode;
import org.carrot2.core.attribute.Init; import org.carrot2.core.attribute.Init;
import org.carrot2.core.attribute.Processing; import org.carrot2.core.attribute.Processing;
@ -37,9 +40,6 @@ import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.Bindable; import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input; import org.carrot2.util.attribute.Input;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
/** /**
* An implementation of Carrot2's {@link ILexicalDataFactory} that adds stop * An implementation of Carrot2's {@link ILexicalDataFactory} that adds stop
* words from a field's StopFilter to the default stop words used in Carrot2, * words from a field's StopFilter to the default stop words used in Carrot2,
@ -67,7 +67,7 @@ public class SolrStopwordsCarrot2LexicalDataFactory implements ILexicalDataFacto
/** /**
* A lazily-built cache of stop words per field. * A lazily-built cache of stop words per field.
*/ */
private Multimap<String, CharArraySet> solrStopWords = HashMultimap.create(); private HashMap<String, List<CharArraySet>> solrStopWords = new HashMap<>();
/** /**
* Carrot2's default lexical resources to use in addition to Solr's stop * Carrot2's default lexical resources to use in addition to Solr's stop
@ -79,32 +79,35 @@ public class SolrStopwordsCarrot2LexicalDataFactory implements ILexicalDataFacto
* Obtains stop words for a field from the associated * Obtains stop words for a field from the associated
* {@link StopFilterFactory}, if any. * {@link StopFilterFactory}, if any.
*/ */
private Collection<CharArraySet> getSolrStopWordsForField(String fieldName) { private List<CharArraySet> getSolrStopWordsForField(String fieldName) {
// No need to synchronize here, Carrot2 ensures that instances // No need to synchronize here, Carrot2 ensures that instances
// of this class are not used by multiple threads at a time. // of this class are not used by multiple threads at a time.
synchronized (solrStopWords) {
if (!solrStopWords.containsKey(fieldName)) { if (!solrStopWords.containsKey(fieldName)) {
final Analyzer fieldAnalyzer = core.getLatestSchema().getFieldType(fieldName) solrStopWords.put(fieldName, new ArrayList<>());
.getIndexAnalyzer();
IndexSchema schema = core.getLatestSchema();
final Analyzer fieldAnalyzer = schema.getFieldType(fieldName).getIndexAnalyzer();
if (fieldAnalyzer instanceof TokenizerChain) { if (fieldAnalyzer instanceof TokenizerChain) {
final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer) final TokenFilterFactory[] filterFactories =
.getTokenFilterFactories(); ((TokenizerChain) fieldAnalyzer).getTokenFilterFactories();
for (TokenFilterFactory factory : filterFactories) { for (TokenFilterFactory factory : filterFactories) {
if (factory instanceof StopFilterFactory) { if (factory instanceof StopFilterFactory) {
// StopFilterFactory holds the stop words in a CharArraySet // StopFilterFactory holds the stop words in a CharArraySet
solrStopWords.put(fieldName, CharArraySet stopWords = ((StopFilterFactory) factory).getStopWords();
((StopFilterFactory) factory).getStopWords()); solrStopWords.get(fieldName).add(stopWords);
} }
if (factory instanceof CommonGramsFilterFactory) { if (factory instanceof CommonGramsFilterFactory) {
solrStopWords.put(fieldName, CharArraySet commonWords = ((CommonGramsFilterFactory) factory).getCommonWords();
((CommonGramsFilterFactory) factory) solrStopWords.get(fieldName).add(commonWords);
.getCommonWords());
} }
} }
} }
} }
return solrStopWords.get(fieldName); return solrStopWords.get(fieldName);
} }
}
@Override @Override
public ILexicalData getLexicalData(LanguageCode languageCode) { public ILexicalData getLexicalData(LanguageCode languageCode) {

View File

@ -17,6 +17,9 @@
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -45,9 +48,6 @@ import org.carrot2.core.LanguageCode;
import org.carrot2.util.attribute.AttributeUtils; import org.carrot2.util.attribute.AttributeUtils;
import org.junit.Test; import org.junit.Test;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
/** /**
* *
*/ */
@ -211,7 +211,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
// stoplabels.mt, so we're expecting only one cluster with label "online". // stoplabels.mt, so we're expecting only one cluster with label "online".
final List<NamedList<Object>> clusters = checkEngine( final List<NamedList<Object>> clusters = checkEngine(
getClusteringEngine(engineName), 1, params); getClusteringEngine(engineName), 1, params);
assertEquals(getLabels(clusters.get(0)), ImmutableList.of("online")); assertEquals(getLabels(clusters.get(0)), Collections.singletonList("online"));
} }
@Test @Test
@ -226,7 +226,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
// only one cluster with label "online". // only one cluster with label "online".
final List<NamedList<Object>> clusters = checkEngine( final List<NamedList<Object>> clusters = checkEngine(
getClusteringEngine("lexical-resource-check"), 1, params); getClusteringEngine("lexical-resource-check"), 1, params);
assertEquals(getLabels(clusters.get(0)), ImmutableList.of("online")); assertEquals(getLabels(clusters.get(0)), Collections.singletonList("online"));
} }
@Test @Test
@ -243,9 +243,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
final List<NamedList<Object>> clusters = checkEngine( final List<NamedList<Object>> clusters = checkEngine(
getClusteringEngine("lexical-resource-check"), 2, params); getClusteringEngine("lexical-resource-check"), 2, params);
assertEquals(ImmutableList.of("online"), getLabels(clusters.get(0))); assertEquals(Collections.singletonList("online"), getLabels(clusters.get(0)));
assertEquals(ImmutableList.of("solrownstopword"), assertEquals(Collections.singletonList("solrownstopword"), getLabels(clusters.get(1)));
getLabels(clusters.get(1)));
} }
@Test @Test
@ -395,8 +394,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-default"); ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-default");
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp); Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
assertEquals( assertEquals(
Lists.newArrayList("stc", "default", "mock"), Arrays.asList("stc", "default", "mock"),
Lists.newArrayList(engines.keySet())); new ArrayList<>(engines.keySet()));
assertEquals( assertEquals(
LingoClusteringAlgorithm.class, LingoClusteringAlgorithm.class,
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass()); ((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
@ -407,8 +406,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-decl-order"); ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-decl-order");
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp); Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
assertEquals( assertEquals(
Lists.newArrayList("unavailable", "lingo", "stc", "mock", "default"), Arrays.asList("unavailable", "lingo", "stc", "mock", "default"),
Lists.newArrayList(engines.keySet())); new ArrayList<>(engines.keySet()));
assertEquals( assertEquals(
LingoClusteringAlgorithm.class, LingoClusteringAlgorithm.class,
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass()); ((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
@ -419,8 +418,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-dups"); ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-dups");
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp); Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
assertEquals( assertEquals(
Lists.newArrayList("", "default"), Arrays.asList("", "default"),
Lists.newArrayList(engines.keySet())); new ArrayList<>(engines.keySet()));
assertEquals( assertEquals(
MockClusteringAlgorithm.class, MockClusteringAlgorithm.class,
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass()); ((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());

View File

@ -15,6 +15,7 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.carrot2.core.Cluster; import org.carrot2.core.Cluster;
@ -29,8 +30,6 @@ import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input; import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output; import org.carrot2.util.attribute.Output;
import com.google.common.collect.Lists;
/** /**
* A mock Carrot2 clustering algorithm that outputs input documents as clusters. * A mock Carrot2 clustering algorithm that outputs input documents as clusters.
* Useful only in tests. * Useful only in tests.
@ -56,7 +55,7 @@ public class EchoClusteringAlgorithm extends ProcessingComponentBase implements
@Override @Override
public void process() throws ProcessingException { public void process() throws ProcessingException {
clusters = Lists.newArrayListWithCapacity(documents.size()); clusters = new ArrayList<>();
for (Document document : documents) { for (Document document : documents) {
final Cluster cluster = new Cluster(); final Cluster cluster = new Cluster();

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.carrot2.core.Cluster; import org.carrot2.core.Cluster;
@ -36,8 +37,6 @@ import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input; import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output; import org.carrot2.util.attribute.Output;
import com.google.common.collect.Lists;
/** /**
* A mock Carrot2 clustering algorithm that outputs stem of each token of each * A mock Carrot2 clustering algorithm that outputs stem of each token of each
* document as a separate cluster. Useful only in tests. * document as a separate cluster. Useful only in tests.
@ -64,7 +63,7 @@ public class EchoStemsClusteringAlgorithm extends ProcessingComponentBase
final AllTokens allTokens = preprocessingContext.allTokens; final AllTokens allTokens = preprocessingContext.allTokens;
final AllWords allWords = preprocessingContext.allWords; final AllWords allWords = preprocessingContext.allWords;
final AllStems allStems = preprocessingContext.allStems; final AllStems allStems = preprocessingContext.allStems;
clusters = Lists.newArrayListWithCapacity(allTokens.image.length); clusters = new ArrayList<>();
for (int i = 0; i < allTokens.image.length; i++) { for (int i = 0; i < allTokens.image.length; i++) {
if (allTokens.wordIndex[i] >= 0) { if (allTokens.wordIndex[i] >= 0) {
clusters.add(new Cluster(new String( clusters.add(new Cluster(new String(

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.carrot2.core.Cluster; import org.carrot2.core.Cluster;
@ -33,7 +34,6 @@ import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input; import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output; import org.carrot2.util.attribute.Output;
import com.google.common.collect.Lists;
/** /**
* A mock Carrot2 clustering algorithm that outputs each token of each document * A mock Carrot2 clustering algorithm that outputs each token of each document
@ -58,8 +58,7 @@ public class EchoTokensClusteringAlgorithm extends ProcessingComponentBase
public void process() throws ProcessingException { public void process() throws ProcessingException {
final PreprocessingContext preprocessingContext = preprocessing.preprocess( final PreprocessingContext preprocessingContext = preprocessing.preprocess(
documents, "", LanguageCode.ENGLISH); documents, "", LanguageCode.ENGLISH);
clusters = Lists clusters = new ArrayList<>();
.newArrayListWithCapacity(preprocessingContext.allTokens.image.length);
for (char[] token : preprocessingContext.allTokens.image) { for (char[] token : preprocessingContext.allTokens.image) {
if (token != null) { if (token != null) {
clusters.add(new Cluster(new String(token))); clusters.add(new Cluster(new String(token)));

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.carrot2.core.Cluster; import org.carrot2.core.Cluster;
@ -33,8 +34,6 @@ import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input; import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output; import org.carrot2.util.attribute.Output;
import com.google.common.collect.Lists;
/** /**
* A mock implementation of Carrot2 clustering algorithm for testing whether the * A mock implementation of Carrot2 clustering algorithm for testing whether the
* customized lexical resource lookup works correctly. This algorithm ignores * customized lexical resource lookup works correctly. This algorithm ignores
@ -60,7 +59,7 @@ public class LexicalResourcesCheckClusteringAlgorithm extends
@Override @Override
public void process() throws ProcessingException { public void process() throws ProcessingException {
clusters = Lists.newArrayList(); clusters = new ArrayList<>();
if (wordsToCheck == null) { if (wordsToCheck == null) {
return; return;
} }

View File

@ -15,13 +15,13 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import com.google.common.collect.Lists;
import org.carrot2.core.*; import org.carrot2.core.*;
import org.carrot2.core.attribute.AttributeNames; import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.core.attribute.Processing; import org.carrot2.core.attribute.Processing;
import org.carrot2.util.attribute.*; import org.carrot2.util.attribute.*;
import org.carrot2.util.attribute.constraint.IntRange; import org.carrot2.util.attribute.constraint.IntRange;
import java.util.ArrayList;
import java.util.List; import java.util.List;
@Bindable(prefix = "MockClusteringAlgorithm") @Bindable(prefix = "MockClusteringAlgorithm")
@ -62,7 +62,7 @@ public class MockClusteringAlgorithm extends ProcessingComponentBase implements
@Override @Override
public void process() throws ProcessingException { public void process() throws ProcessingException {
clusters = Lists.newArrayList(); clusters = new ArrayList<>();
if (documents == null) { if (documents == null) {
return; return;
} }

View File

@ -26,6 +26,7 @@ import java.util.Map;
import java.util.Optional; import java.util.Optional;
import java.util.Properties; import java.util.Properties;
import org.apache.lucene.util.Version;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.ClusterState;
@ -35,6 +36,7 @@ import org.apache.solr.common.cloud.Replica.State;
import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
@ -68,31 +70,13 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd {
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception { public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
String collectionName = message.getStr(COLLECTION_PROP); String collectionName = message.getStr(COLLECTION_PROP);
String backupName = message.getStr(NAME); String backupName = message.getStr(NAME);
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
String asyncId = message.getStr(ASYNC);
String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY); String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY);
String commitName = message.getStr(CoreAdminParams.COMMIT_NAME);
Optional<CollectionSnapshotMetaData> snapshotMeta = Optional.empty();
if (commitName != null) {
SolrZkClient zkClient = ocmh.overseer.getZkController().getZkClient();
snapshotMeta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName);
if (!snapshotMeta.isPresent()) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName
+ " does not exist for collection " + collectionName);
}
if (snapshotMeta.get().getStatus() != SnapshotStatus.Successful) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName + " for collection " + collectionName
+ " has not completed successfully. The status is " + snapshotMeta.get().getStatus());
}
}
Map<String, String> requestMap = new HashMap<>();
Instant startTime = Instant.now(); Instant startTime = Instant.now();
CoreContainer cc = ocmh.overseer.getZkController().getCoreContainer(); CoreContainer cc = ocmh.overseer.getZkController().getCoreContainer();
BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo)); BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo));
BackupManager backupMgr = new BackupManager(repository, ocmh.zkStateReader, collectionName); BackupManager backupMgr = new BackupManager(repository, ocmh.zkStateReader);
// Backup location // Backup location
URI location = repository.createURI(message.getStr(CoreAdminParams.BACKUP_LOCATION)); URI location = repository.createURI(message.getStr(CoreAdminParams.BACKUP_LOCATION));
@ -106,51 +90,17 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd {
// Create a directory to store backup details. // Create a directory to store backup details.
repository.createDirectory(backupPath); repository.createDirectory(backupPath);
log.info("Starting backup of collection={} with backupName={} at location={}", collectionName, backupName, String strategy = message.getStr(CollectionAdminParams.INDEX_BACKUP_STRATEGY, CollectionAdminParams.COPY_FILES_STRATEGY);
backupPath); switch (strategy) {
case CollectionAdminParams.COPY_FILES_STRATEGY: {
Collection<String> shardsToConsider = Collections.emptySet(); copyIndexFiles(backupPath, message, results);
if (snapshotMeta.isPresent()) { break;
shardsToConsider = snapshotMeta.get().getShards();
} }
case CollectionAdminParams.NO_INDEX_BACKUP_STRATEGY: {
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getActiveSlices()) { break;
Replica replica = null;
if (snapshotMeta.isPresent()) {
if (!shardsToConsider.contains(slice.getName())) {
log.warn("Skipping the backup for shard {} since it wasn't part of the collection {} when snapshot {} was created.",
slice.getName(), collectionName, snapshotMeta.get().getName());
continue;
}
replica = selectReplicaWithSnapshot(snapshotMeta.get(), slice);
} else {
// Note - Actually this can return a null value when there is no leader for this shard.
replica = slice.getLeader();
if (replica == null) {
throw new SolrException(ErrorCode.SERVER_ERROR, "No 'leader' replica available for shard " + slice.getName() + " of collection " + collectionName);
} }
} }
String coreName = replica.getStr(CORE_NAME_PROP);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.BACKUPCORE.toString());
params.set(NAME, slice.getName());
params.set(CoreAdminParams.BACKUP_REPOSITORY, repo);
params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString()); // note: index dir will be here then the "snapshot." + slice name
params.set(CORE_NAME_PROP, coreName);
if (snapshotMeta.isPresent()) {
params.set(CoreAdminParams.COMMIT_NAME, snapshotMeta.get().getName());
}
ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
log.debug("Sent backup request to core={} for backupName={}", coreName, backupName);
}
log.debug("Sent backup requests to all shard leaders for backupName={}", backupName);
ocmh.processResponses(results, shardHandler, true, "Could not backup all replicas", asyncId, requestMap);
log.info("Starting to backup ZK data for backupName={}", backupName); log.info("Starting to backup ZK data for backupName={}", backupName);
//Download the configs //Download the configs
@ -168,6 +118,7 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd {
properties.put(BackupManager.COLLECTION_NAME_PROP, collectionName); properties.put(BackupManager.COLLECTION_NAME_PROP, collectionName);
properties.put(COLL_CONF, configName); properties.put(COLL_CONF, configName);
properties.put(BackupManager.START_TIME_PROP, startTime.toString()); properties.put(BackupManager.START_TIME_PROP, startTime.toString());
properties.put(BackupManager.INDEX_VERSION_PROP, Version.LATEST.toString());
//TODO: Add MD5 of the configset. If during restore the same name configset exists then we can compare checksums to see if they are the same. //TODO: Add MD5 of the configset. If during restore the same name configset exists then we can compare checksums to see if they are the same.
//if they are not the same then we can throw an error or have an 'overwriteConfig' flag //if they are not the same then we can throw an error or have an 'overwriteConfig' flag
//TODO save numDocs for the shardLeader. We can use it to sanity check the restore. //TODO save numDocs for the shardLeader. We can use it to sanity check the restore.
@ -202,4 +153,73 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd {
return r.get(); return r.get();
} }
private void copyIndexFiles(URI backupPath, ZkNodeProps request, NamedList results) throws Exception {
String collectionName = request.getStr(COLLECTION_PROP);
String backupName = request.getStr(NAME);
String asyncId = request.getStr(ASYNC);
String repoName = request.getStr(CoreAdminParams.BACKUP_REPOSITORY);
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
Map<String, String> requestMap = new HashMap<>();
String commitName = request.getStr(CoreAdminParams.COMMIT_NAME);
Optional<CollectionSnapshotMetaData> snapshotMeta = Optional.empty();
if (commitName != null) {
SolrZkClient zkClient = ocmh.overseer.getZkController().getZkClient();
snapshotMeta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName);
if (!snapshotMeta.isPresent()) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName
+ " does not exist for collection " + collectionName);
}
if (snapshotMeta.get().getStatus() != SnapshotStatus.Successful) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName + " for collection " + collectionName
+ " has not completed successfully. The status is " + snapshotMeta.get().getStatus());
}
}
log.info("Starting backup of collection={} with backupName={} at location={}", collectionName, backupName,
backupPath);
Collection<String> shardsToConsider = Collections.emptySet();
if (snapshotMeta.isPresent()) {
shardsToConsider = snapshotMeta.get().getShards();
}
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getActiveSlices()) {
Replica replica = null;
if (snapshotMeta.isPresent()) {
if (!shardsToConsider.contains(slice.getName())) {
log.warn("Skipping the backup for shard {} since it wasn't part of the collection {} when snapshot {} was created.",
slice.getName(), collectionName, snapshotMeta.get().getName());
continue;
}
replica = selectReplicaWithSnapshot(snapshotMeta.get(), slice);
} else {
// Note - Actually this can return a null value when there is no leader for this shard.
replica = slice.getLeader();
if (replica == null) {
throw new SolrException(ErrorCode.SERVER_ERROR, "No 'leader' replica available for shard " + slice.getName() + " of collection " + collectionName);
}
}
String coreName = replica.getStr(CORE_NAME_PROP);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.BACKUPCORE.toString());
params.set(NAME, slice.getName());
params.set(CoreAdminParams.BACKUP_REPOSITORY, repoName);
params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString()); // note: index dir will be here then the "snapshot." + slice name
params.set(CORE_NAME_PROP, coreName);
if (snapshotMeta.isPresent()) {
params.set(CoreAdminParams.COMMIT_NAME, snapshotMeta.get().getName());
}
ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
log.debug("Sent backup request to core={} for backupName={}", coreName, backupName);
}
log.debug("Sent backup requests to all shard leaders for backupName={}", backupName);
ocmh.processResponses(results, shardHandler, true, "Could not backup all replicas", asyncId, requestMap);
}
} }

View File

@ -19,6 +19,7 @@ package org.apache.solr.cloud;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.net.SocketTimeoutException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
@ -572,8 +573,6 @@ public class RecoveryStrategy extends Thread implements Closeable {
private void sendPrepRecoveryCmd(String leaderBaseUrl, String leaderCoreName, Slice slice) private void sendPrepRecoveryCmd(String leaderBaseUrl, String leaderCoreName, Slice slice)
throws SolrServerException, IOException, InterruptedException, ExecutionException { throws SolrServerException, IOException, InterruptedException, ExecutionException {
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderBaseUrl).build()) {
client.setConnectionTimeout(30000);
WaitForState prepCmd = new WaitForState(); WaitForState prepCmd = new WaitForState();
prepCmd.setCoreName(leaderCoreName); prepCmd.setCoreName(leaderCoreName);
prepCmd.setNodeName(zkController.getNodeName()); prepCmd.setNodeName(zkController.getNodeName());
@ -585,6 +584,28 @@ public class RecoveryStrategy extends Thread implements Closeable {
if (state != Slice.State.CONSTRUCTION && state != Slice.State.RECOVERY && state != Slice.State.RECOVERY_FAILED) { if (state != Slice.State.CONSTRUCTION && state != Slice.State.RECOVERY && state != Slice.State.RECOVERY_FAILED) {
prepCmd.setOnlyIfLeaderActive(true); prepCmd.setOnlyIfLeaderActive(true);
} }
final int maxTries = 30;
for (int numTries = 0; numTries < maxTries; numTries++) {
try {
sendPrepRecoveryCmd(leaderBaseUrl, prepCmd);
break;
} catch (ExecutionException e) {
SolrServerException solrException = (SolrServerException) e.getCause();
if (solrException.getRootCause() instanceof SocketTimeoutException && numTries < maxTries) {
LOG.warn("Socket timeout when send prep recovery cmd, retrying.. ");
continue;
}
throw e;
}
}
}
private void sendPrepRecoveryCmd(String leaderBaseUrl, WaitForState prepCmd)
throws SolrServerException, IOException, InterruptedException, ExecutionException {
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderBaseUrl).build()) {
client.setConnectionTimeout(10000);
client.setSoTimeout(10000);
HttpUriRequestResponse mrr = client.httpUriRequest(prepCmd); HttpUriRequestResponse mrr = client.httpUriRequest(prepCmd);
prevSendPreRecoveryHttpUriRequest = mrr.httpUriRequest; prevSendPreRecoveryHttpUriRequest = mrr.httpUriRequest;

View File

@ -87,7 +87,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
URI location = repository.createURI(message.getStr(CoreAdminParams.BACKUP_LOCATION)); URI location = repository.createURI(message.getStr(CoreAdminParams.BACKUP_LOCATION));
URI backupPath = repository.resolve(location, backupName); URI backupPath = repository.resolve(location, backupName);
ZkStateReader zkStateReader = ocmh.zkStateReader; ZkStateReader zkStateReader = ocmh.zkStateReader;
BackupManager backupMgr = new BackupManager(repository, zkStateReader, restoreCollectionName); BackupManager backupMgr = new BackupManager(repository, zkStateReader);
Properties properties = backupMgr.readBackupProperties(location, backupName); Properties properties = backupMgr.readBackupProperties(location, backupName);
String backupCollection = properties.getProperty(BackupManager.COLLECTION_NAME_PROP); String backupCollection = properties.getProperty(BackupManager.COLLECTION_NAME_PROP);

View File

@ -110,7 +110,6 @@ import org.apache.solr.response.RubyResponseWriter;
import org.apache.solr.response.SchemaXmlResponseWriter; import org.apache.solr.response.SchemaXmlResponseWriter;
import org.apache.solr.response.SmileResponseWriter; import org.apache.solr.response.SmileResponseWriter;
import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.response.SortingResponseWriter;
import org.apache.solr.response.XMLResponseWriter; import org.apache.solr.response.XMLResponseWriter;
import org.apache.solr.response.transform.TransformerFactory; import org.apache.solr.response.transform.TransformerFactory;
import org.apache.solr.rest.ManagedResourceStorage; import org.apache.solr.rest.ManagedResourceStorage;
@ -2332,7 +2331,6 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
m.put("raw", new RawResponseWriter()); m.put("raw", new RawResponseWriter());
m.put(CommonParams.JAVABIN, new BinaryResponseWriter()); m.put(CommonParams.JAVABIN, new BinaryResponseWriter());
m.put("csv", new CSVResponseWriter()); m.put("csv", new CSVResponseWriter());
m.put("xsort", new SortingResponseWriter());
m.put("schema.xml", new SchemaXmlResponseWriter()); m.put("schema.xml", new SchemaXmlResponseWriter());
m.put("smile", new SmileResponseWriter()); m.put("smile", new SmileResponseWriter());
m.put(ReplicationHandler.FILE_STREAM, getFileStreamWriter()); m.put(ReplicationHandler.FILE_STREAM, getFileStreamWriter());
@ -2350,13 +2348,22 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
@Override @Override
public void write(OutputStream out, SolrQueryRequest req, SolrQueryResponse response) throws IOException { public void write(OutputStream out, SolrQueryRequest req, SolrQueryResponse response) throws IOException {
RawWriter rawWriter = (RawWriter) response.getValues().get(ReplicationHandler.FILE_STREAM); RawWriter rawWriter = (RawWriter) response.getValues().get(ReplicationHandler.FILE_STREAM);
if(rawWriter!=null) rawWriter.write(out); if (rawWriter != null) {
rawWriter.write(out);
if (rawWriter instanceof Closeable) ((Closeable) rawWriter).close();
}
} }
@Override @Override
public String getContentType(SolrQueryRequest request, SolrQueryResponse response) { public String getContentType(SolrQueryRequest request, SolrQueryResponse response) {
RawWriter rawWriter = (RawWriter) response.getValues().get(ReplicationHandler.FILE_STREAM);
if (rawWriter != null) {
return rawWriter.getContentType();
} else {
return BinaryResponseParser.BINARY_CONTENT_TYPE; return BinaryResponseParser.BINARY_CONTENT_TYPE;
} }
}
}; };
} }
@ -2365,6 +2372,9 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
} }
public interface RawWriter { public interface RawWriter {
default String getContentType() {
return BinaryResponseParser.BINARY_CONTENT_TYPE;
}
void write(OutputStream os) throws IOException ; void write(OutputStream os) throws IOException ;
} }

View File

@ -68,7 +68,7 @@ public class BackupManager {
protected final ZkStateReader zkStateReader; protected final ZkStateReader zkStateReader;
protected final BackupRepository repository; protected final BackupRepository repository;
public BackupManager(BackupRepository repository, ZkStateReader zkStateReader, String collectionName) { public BackupManager(BackupRepository repository, ZkStateReader zkStateReader) {
this.repository = Objects.requireNonNull(repository); this.repository = Objects.requireNonNull(repository);
this.zkStateReader = Objects.requireNonNull(zkStateReader); this.zkStateReader = Objects.requireNonNull(zkStateReader);
} }
@ -126,6 +126,7 @@ public class BackupManager {
* *
* @param backupLoc The base path used to store the backup data. * @param backupLoc The base path used to store the backup data.
* @param backupId The unique name for the backup. * @param backupId The unique name for the backup.
* @param collectionName The name of the collection whose meta-data is to be returned.
* @return the meta-data information for the backed-up collection. * @return the meta-data information for the backed-up collection.
* @throws IOException in case of errors. * @throws IOException in case of errors.
*/ */

View File

@ -0,0 +1,468 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core.snapshots;
import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.lang.invoke.MethodHandles;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.hadoop.fs.Path;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.snapshots.CollectionSnapshotMetaData.CoreSnapshotMetaData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
/**
* This class provides utility functions required for Solr snapshots functionality.
*/
public class SolrSnapshotsTool implements Closeable {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final DateFormat dateFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z", Locale.getDefault());
private static final String CREATE = "create";
private static final String DELETE = "delete";
private static final String LIST = "list";
private static final String DESCRIBE = "describe";
private static final String PREPARE_FOR_EXPORT = "prepare-snapshot-export";
private static final String EXPORT_SNAPSHOT = "export";
private static final String HELP = "help";
private static final String COLLECTION = "c";
private static final String TEMP_DIR = "t";
private static final String DEST_DIR = "d";
private static final String SOLR_ZK_ENSEMBLE = "z";
private static final String HDFS_PATH_PREFIX = "p";
private static final String BACKUP_REPO_NAME = "r";
private static final String ASYNC_REQ_ID = "i";
private static final List<String> OPTION_HELP_ORDER = Arrays.asList(CREATE, DELETE, LIST, DESCRIBE,
PREPARE_FOR_EXPORT, EXPORT_SNAPSHOT, HELP, SOLR_ZK_ENSEMBLE, COLLECTION, DEST_DIR, BACKUP_REPO_NAME,
ASYNC_REQ_ID, TEMP_DIR, HDFS_PATH_PREFIX);
private final CloudSolrClient solrClient;
public SolrSnapshotsTool(String solrZkEnsemble) {
solrClient = (new CloudSolrClient.Builder()).withZkHost(solrZkEnsemble).build();
}
@Override
public void close() throws IOException {
if (solrClient != null) {
solrClient.close();
}
}
public void createSnapshot(String collectionName, String snapshotName) {
CollectionAdminRequest.CreateSnapshot createSnap = new CollectionAdminRequest.CreateSnapshot(collectionName, snapshotName);
CollectionAdminResponse resp;
try {
resp = createSnap.process(solrClient);
Preconditions.checkState(resp.getStatus() == 0, "The CREATESNAPSHOT request failed. The status code is " + resp.getStatus());
System.out.println("Successfully created snapshot with name " + snapshotName + " for collection " + collectionName);
} catch (Exception e) {
log.error("Failed to create a snapshot with name " + snapshotName + " for collection " + collectionName, e);
System.out.println("Failed to create a snapshot with name " + snapshotName + " for collection " + collectionName
+" due to following error : "+e.getLocalizedMessage());
}
}
public void deleteSnapshot(String collectionName, String snapshotName) {
CollectionAdminRequest.DeleteSnapshot deleteSnap = new CollectionAdminRequest.DeleteSnapshot(collectionName, snapshotName);
CollectionAdminResponse resp;
try {
resp = deleteSnap.process(solrClient);
Preconditions.checkState(resp.getStatus() == 0, "The DELETESNAPSHOT request failed. The status code is " + resp.getStatus());
System.out.println("Successfully deleted snapshot with name " + snapshotName + " for collection " + collectionName);
} catch (Exception e) {
log.error("Failed to delete a snapshot with name " + snapshotName + " for collection " + collectionName, e);
System.out.println("Failed to delete a snapshot with name " + snapshotName + " for collection " + collectionName
+" due to following error : "+e.getLocalizedMessage());
}
}
@SuppressWarnings("rawtypes")
public void listSnapshots(String collectionName) {
CollectionAdminRequest.ListSnapshots listSnaps = new CollectionAdminRequest.ListSnapshots(collectionName);
CollectionAdminResponse resp;
try {
resp = listSnaps.process(solrClient);
Preconditions.checkState(resp.getStatus() == 0, "The LISTSNAPSHOTS request failed. The status code is " + resp.getStatus());
NamedList apiResult = (NamedList) resp.getResponse().get(SolrSnapshotManager.SNAPSHOTS_INFO);
for (int i = 0; i < apiResult.size(); i++) {
System.out.println(apiResult.getName(i));
}
} catch (Exception e) {
log.error("Failed to list snapshots for collection " + collectionName, e);
System.out.println("Failed to list snapshots for collection " + collectionName
+" due to following error : "+e.getLocalizedMessage());
}
}
public void describeSnapshot(String collectionName, String snapshotName) {
try {
Collection<CollectionSnapshotMetaData> snaps = listCollectionSnapshots(collectionName);
for (CollectionSnapshotMetaData m : snaps) {
if (snapshotName.equals(m.getName())) {
System.out.println("Name: " + m.getName());
System.out.println("Status: " + m.getStatus());
System.out.println("Time of creation: " + dateFormat.format(m.getCreationDate()));
System.out.println("Total number of cores with snapshot: " + m.getReplicaSnapshots().size());
System.out.println("-----------------------------------");
for (CoreSnapshotMetaData n : m.getReplicaSnapshots()) {
StringBuilder builder = new StringBuilder();
builder.append("Core [name=");
builder.append(n.getCoreName());
builder.append(", leader=");
builder.append(n.isLeader());
builder.append(", generation=");
builder.append(n.getGenerationNumber());
builder.append(", indexDirPath=");
builder.append(n.getIndexDirPath());
builder.append("]\n");
System.out.println(builder.toString());
}
}
}
} catch (Exception e) {
log.error("Failed to fetch snapshot details", e);
System.out.println("Failed to fetch snapshot details due to following error : " + e.getLocalizedMessage());
}
}
public Map<String, List<String>> getIndexFilesPathForSnapshot(String collectionName, String snapshotName, Optional<String> pathPrefix)
throws SolrServerException, IOException {
Map<String, List<String>> result = new HashMap<>();
Collection<CollectionSnapshotMetaData> snaps = listCollectionSnapshots(collectionName);
Optional<CollectionSnapshotMetaData> meta = Optional.empty();
for (CollectionSnapshotMetaData m : snaps) {
if (snapshotName.equals(m.getName())) {
meta = Optional.of(m);
}
}
if (!meta.isPresent()) {
throw new IllegalArgumentException("The snapshot named " + snapshotName
+ " is not found for collection " + collectionName);
}
DocCollection collectionState = solrClient.getZkStateReader().getClusterState().getCollection(collectionName);
for (Slice s : collectionState.getSlices()) {
List<CoreSnapshotMetaData> replicaSnaps = meta.get().getReplicaSnapshotsForShard(s.getName());
// Prepare a list of *existing* replicas (since one or more replicas could have been deleted after the snapshot creation).
List<CoreSnapshotMetaData> availableReplicas = new ArrayList<>();
for (CoreSnapshotMetaData m : replicaSnaps) {
if (isReplicaAvailable(s, m.getCoreName())) {
availableReplicas.add(m);
}
}
if (availableReplicas.isEmpty()) {
throw new IllegalArgumentException(
"The snapshot named " + snapshotName + " not found for shard "
+ s.getName() + " of collection " + collectionName);
}
// Prefer a leader replica (at the time when the snapshot was created).
CoreSnapshotMetaData coreSnap = availableReplicas.get(0);
for (CoreSnapshotMetaData m : availableReplicas) {
if (m.isLeader()) {
coreSnap = m;
}
}
String indexDirPath = coreSnap.getIndexDirPath();
if (pathPrefix.isPresent()) {
// If the path prefix is specified, rebuild the path to the index directory.
Path t = new Path(coreSnap.getIndexDirPath());
indexDirPath = (new Path(pathPrefix.get(), t.toUri().getPath())).toString();
}
List<String> paths = new ArrayList<>();
for (String fileName : coreSnap.getFiles()) {
Path p = new Path(indexDirPath, fileName);
paths.add(p.toString());
}
result.put(s.getName(), paths);
}
return result;
}
public void buildCopyListings(String collectionName, String snapshotName, String localFsPath, Optional<String> pathPrefix)
throws SolrServerException, IOException {
Map<String, List<String>> paths = getIndexFilesPathForSnapshot(collectionName, snapshotName, pathPrefix);
for (Map.Entry<String,List<String>> entry : paths.entrySet()) {
StringBuilder filesBuilder = new StringBuilder();
for (String filePath : entry.getValue()) {
filesBuilder.append(filePath);
filesBuilder.append("\n");
}
String files = filesBuilder.toString().trim();
try (Writer w = new OutputStreamWriter(new FileOutputStream(new File(localFsPath, entry.getKey())), StandardCharsets.UTF_8)) {
w.write(files);
}
}
}
public void backupCollectionMetaData(String collectionName, String snapshotName, String backupLoc) throws SolrServerException, IOException {
// Backup the collection meta-data
CollectionAdminRequest.Backup backup = new CollectionAdminRequest.Backup(collectionName, snapshotName);
backup.setIndexBackupStrategy(CollectionAdminParams.NO_INDEX_BACKUP_STRATEGY);
backup.setLocation(backupLoc);
CollectionAdminResponse resp = backup.process(solrClient);
Preconditions.checkState(resp.getStatus() == 0, "The request failed. The status code is " + resp.getStatus());
}
public void prepareForExport(String collectionName, String snapshotName, String localFsPath, Optional<String> pathPrefix, String destPath) {
try {
buildCopyListings(collectionName, snapshotName, localFsPath, pathPrefix);
System.out.println("Successfully prepared copylisting for the snapshot export.");
} catch (Exception e) {
log.error("Failed to prepare a copylisting for snapshot with name " + snapshotName + " for collection "
+ collectionName, e);
System.out.println("Failed to prepare a copylisting for snapshot with name " + snapshotName + " for collection "
+ collectionName + " due to following error : " + e.getLocalizedMessage());
System.exit(1);
}
try {
backupCollectionMetaData(collectionName, snapshotName, destPath);
System.out.println("Successfully backed up collection meta-data");
} catch (Exception e) {
log.error("Failed to backup collection meta-data for collection " + collectionName, e);
System.out.println("Failed to backup collection meta-data for collection " + collectionName
+ " due to following error : " + e.getLocalizedMessage());
System.exit(1);
}
}
public void exportSnapshot(String collectionName, String snapshotName, String destPath, Optional<String> backupRepo,
Optional<String> asyncReqId) {
try {
CollectionAdminRequest.Backup backup = new CollectionAdminRequest.Backup(collectionName, snapshotName);
backup.setIndexBackupStrategy(CollectionAdminParams.COPY_FILES_STRATEGY);
backup.setLocation(destPath);
if (backupRepo.isPresent()) {
backup.setRepositoryName(backupRepo.get());
}
if (asyncReqId.isPresent()) {
backup.setAsyncId(asyncReqId.get());
}
CollectionAdminResponse resp = backup.process(solrClient);
Preconditions.checkState(resp.getStatus() == 0, "The request failed. The status code is " + resp.getStatus());
} catch (Exception e) {
log.error("Failed to backup collection meta-data for collection " + collectionName, e);
System.out.println("Failed to backup collection meta-data for collection " + collectionName
+ " due to following error : " + e.getLocalizedMessage());
System.exit(1);
}
}
public static void main(String[] args) throws IOException {
CommandLineParser parser = new PosixParser();
Options options = new Options();
options.addOption(null, CREATE, true, "This command will create a snapshot with the specified name");
options.addOption(null, DELETE, true, "This command will delete a snapshot with the specified name");
options.addOption(null, LIST, false, "This command will list all the named snapshots for the specified collection.");
options.addOption(null, DESCRIBE, true, "This command will print details for a named snapshot for the specified collection.");
options.addOption(null, PREPARE_FOR_EXPORT, true, "This command will prepare copylistings for the specified snapshot."
+ " This command should only be used only if Solr is deployed with Hadoop and collection index files are stored on a shared"
+ " file-system e.g. HDFS");
options.addOption(null, EXPORT_SNAPSHOT, true, "This command will create a backup for the specified snapshot.");
options.addOption(null, HELP, false, "This command will print the help message for the snapshots related commands.");
options.addOption(TEMP_DIR, true, "This parameter specifies the path of a temporary directory on local filesystem"
+ " during prepare-snapshot-export command.");
options.addOption(DEST_DIR, true, "This parameter specifies the path on shared file-system (e.g. HDFS) where the snapshot related"
+ " information should be stored.");
options.addOption(COLLECTION, true, "This parameter specifies the name of the collection to be used during snapshot operation");
options.addOption(SOLR_ZK_ENSEMBLE, true, "This parameter specifies the Solr Zookeeper ensemble address");
options.addOption(HDFS_PATH_PREFIX, true, "This parameter specifies the HDFS URI prefix to be used"
+ " during snapshot export preparation. This is applicable only if the Solr collection index files are stored on HDFS.");
options.addOption(BACKUP_REPO_NAME, true, "This parameter specifies the name of the backup repository to be used"
+ " during snapshot export preparation");
options.addOption(ASYNC_REQ_ID, true, "This parameter specifies the async request identifier to be used"
+ " during snapshot export preparation");
CommandLine cmd = null;
try {
cmd = parser.parse(options, args);
} catch (ParseException e) {
System.out.println(e.getLocalizedMessage());
printHelp(options);
System.exit(1);
}
if (cmd.hasOption(CREATE) || cmd.hasOption(DELETE) || cmd.hasOption(LIST) || cmd.hasOption(DESCRIBE)
|| cmd.hasOption(PREPARE_FOR_EXPORT) || cmd.hasOption(EXPORT_SNAPSHOT)) {
try (SolrSnapshotsTool tool = new SolrSnapshotsTool(cmd.getOptionValue(SOLR_ZK_ENSEMBLE))) {
if (cmd.hasOption(CREATE)) {
String snapshotName = cmd.getOptionValue(CREATE);
String collectionName = cmd.getOptionValue(COLLECTION);
tool.createSnapshot(collectionName, snapshotName);
} else if (cmd.hasOption(DELETE)) {
String snapshotName = cmd.getOptionValue(DELETE);
String collectionName = cmd.getOptionValue(COLLECTION);
tool.deleteSnapshot(collectionName, snapshotName);
} else if (cmd.hasOption(LIST)) {
String collectionName = cmd.getOptionValue(COLLECTION);
tool.listSnapshots(collectionName);
} else if (cmd.hasOption(DESCRIBE)) {
String snapshotName = cmd.getOptionValue(DESCRIBE);
String collectionName = cmd.getOptionValue(COLLECTION);
tool.describeSnapshot(collectionName, snapshotName);
} else if (cmd.hasOption(PREPARE_FOR_EXPORT)) {
String snapshotName = cmd.getOptionValue(PREPARE_FOR_EXPORT);
String collectionName = cmd.getOptionValue(COLLECTION);
String localFsDir = requiredArg(options, cmd, TEMP_DIR);
String hdfsOpDir = requiredArg(options, cmd, DEST_DIR);
Optional<String> pathPrefix = Optional.ofNullable(cmd.getOptionValue(HDFS_PATH_PREFIX));
if (pathPrefix.isPresent()) {
try {
new URI(pathPrefix.get());
} catch (URISyntaxException e) {
System.out.println(
"The specified File system path prefix " + pathPrefix.get()
+ " is invalid. The error is " + e.getLocalizedMessage());
System.exit(1);
}
}
tool.prepareForExport(collectionName, snapshotName, localFsDir, pathPrefix, hdfsOpDir);
} else if (cmd.hasOption(EXPORT_SNAPSHOT)) {
String snapshotName = cmd.getOptionValue(EXPORT_SNAPSHOT);
String collectionName = cmd.getOptionValue(COLLECTION);
String destDir = requiredArg(options, cmd, DEST_DIR);
Optional<String> backupRepo = Optional.ofNullable(cmd.getOptionValue(BACKUP_REPO_NAME));
Optional<String> asyncReqId = Optional.ofNullable(cmd.getOptionValue(ASYNC_REQ_ID));
tool.exportSnapshot(collectionName, snapshotName, destDir, backupRepo, asyncReqId);
}
}
} else if (cmd.hasOption(HELP)) {
printHelp(options);
} else {
System.out.println("Unknown command specified.");
printHelp(options);
}
}
private static String requiredArg(Options options, CommandLine cmd, String optVal) {
if (!cmd.hasOption(optVal)) {
System.out.println("Please specify the value for option " + optVal);
printHelp(options);
System.exit(1);
}
return cmd.getOptionValue(optVal);
}
private static boolean isReplicaAvailable (Slice s, String coreName) {
for (Replica r: s.getReplicas()) {
if (coreName.equals(r.getCoreName())) {
return true;
}
}
return false;
}
private Collection<CollectionSnapshotMetaData> listCollectionSnapshots(String collectionName)
throws SolrServerException, IOException {
CollectionAdminRequest.ListSnapshots listSnapshots = new CollectionAdminRequest.ListSnapshots(collectionName);
CollectionAdminResponse resp = listSnapshots.process(solrClient);
Preconditions.checkState(resp.getStatus() == 0);
NamedList apiResult = (NamedList) resp.getResponse().get(SolrSnapshotManager.SNAPSHOTS_INFO);
Collection<CollectionSnapshotMetaData> result = new ArrayList<>();
for (int i = 0; i < apiResult.size(); i++) {
result.add(new CollectionSnapshotMetaData((NamedList<Object>)apiResult.getVal(i)));
}
return result;
}
private static void printHelp(Options options) {
StringBuilder helpFooter = new StringBuilder();
helpFooter.append("Examples: \n");
helpFooter.append("snapshotscli.sh --create snapshot-1 -c books -z localhost:2181 \n");
helpFooter.append("snapshotscli.sh --list -c books -z localhost:2181 \n");
helpFooter.append("snapshotscli.sh --describe snapshot-1 -c books -z localhost:2181 \n");
helpFooter.append("snapshotscli.sh --export snapshot-1 -c books -z localhost:2181 -b repo -l backupPath -i req_0 \n");
helpFooter.append("snapshotscli.sh --delete snapshot-1 -c books -z localhost:2181 \n");
HelpFormatter formatter = new HelpFormatter();
formatter.setOptionComparator(new OptionComarator<>());
formatter.printHelp("SolrSnapshotsTool", null, options, helpFooter.toString(), false);
}
private static class OptionComarator<T extends Option> implements Comparator<T> {
public int compare(T o1, T o2) {
String s1 = o1.hasLongOpt() ? o1.getLongOpt() : o1.getOpt();
String s2 = o2.hasLongOpt() ? o2.getLongOpt() : o2.getOpt();
return OPTION_HELP_ORDER.indexOf(s1) - OPTION_HELP_ORDER.indexOf(s2);
}
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler;
import java.util.HashMap;
import java.util.Map;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import static org.apache.solr.common.params.CommonParams.JSON;
public class ExportHandler extends SearchHandler {
@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
try {
super.handleRequestBody(req, rsp);
} catch (Exception e) {
rsp.setException(e);
}
String wt = req.getParams().get(CommonParams.WT, JSON);
if("xsort".equals(wt)) wt = JSON;
Map<String, String> map = new HashMap<>(1);
map.put(CommonParams.WT, ReplicationHandler.FILE_STREAM);
req.setParams(SolrParams.wrapDefaults(new MapSolrParams(map),req.getParams()));
rsp.add(ReplicationHandler.FILE_STREAM, new ExportWriter(req, rsp, wt));
}
}

View File

@ -14,17 +14,21 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.response;
package org.apache.solr.handler;
import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.io.Writer;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.ArrayList; import java.nio.charset.StandardCharsets;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.MultiDocValues;
@ -40,11 +44,18 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongValues; import org.apache.lucene.util.LongValues;
import org.apache.solr.client.solrj.impl.BinaryResponseParser;
import org.apache.solr.common.IteratorWriter;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.MapWriter.EntryWriter;
import org.apache.solr.common.PushWriter;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.JSONResponseWriter;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.BoolField; import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
@ -61,24 +72,65 @@ import org.apache.solr.search.SyntaxError;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
import static org.apache.solr.common.util.Utils.makeMap;
public class SortingResponseWriter implements QueryResponseWriter { public class ExportWriter implements SolrCore.RawWriter, Closeable {
private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private OutputStreamWriter respWriter;
final SolrQueryRequest req;
final SolrQueryResponse res;
FieldWriter[] fieldWriters;
int totalHits = 0;
FixedBitSet[] sets = null;
PushWriter writer;
private String wt;
ExportWriter(SolrQueryRequest req, SolrQueryResponse res, String wt) {
this.req = req;
this.res = res;
this.wt = wt;
public void init(NamedList args) {
/* NOOP */
} }
public String getContentType(SolrQueryRequest req, SolrQueryResponse res) { @Override
return "application/json"; public String getContentType() {
if ("javabin".equals(wt)) {
return BinaryResponseParser.BINARY_CONTENT_TYPE;
} else return "json";
} }
public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse res) throws IOException { @Override
Exception e1 = res.getException(); public void close() throws IOException {
if(e1 != null) { if (writer != null) writer.close();
if(!(e1 instanceof IgnoreException)) { if (respWriter != null) {
writeException(e1, writer, false); respWriter.flush();
respWriter.close();
}
}
protected void writeException(Exception e, PushWriter w, boolean log) throws IOException {
w.writeMap(mw -> {
mw.put("responseHeader", singletonMap("status", 400))
.put("response", makeMap(
"numFound", 0,
"docs", singletonList(singletonMap("EXCEPTION", e.getMessage()))));
});
if (log) {
SolrException.log(logger, e);
}
}
public void write(OutputStream os) throws IOException {
respWriter = new OutputStreamWriter(os, StandardCharsets.UTF_8);
writer = JSONResponseWriter.getPushWriter(respWriter, req, res);
Exception exception = res.getException();
if (exception != null) {
if (!(exception instanceof IgnoreException)) {
writeException(exception, writer, false);
} }
return; return;
} }
@ -113,8 +165,6 @@ public class SortingResponseWriter implements QueryResponseWriter {
// You'll have to uncomment the if below to hit the null pointer exception. // You'll have to uncomment the if below to hit the null pointer exception.
// This is such an unusual case (i.e. an empty index) that catching this concdition here is probably OK. // This is such an unusual case (i.e. an empty index) that catching this concdition here is probably OK.
// This came to light in the very artifical case of indexing a single doc to Cloud. // This came to light in the very artifical case of indexing a single doc to Cloud.
int totalHits = 0;
FixedBitSet[] sets = null;
if (req.getContext().get("totalHits") != null) { if (req.getContext().get("totalHits") != null) {
totalHits = ((Integer)req.getContext().get("totalHits")).intValue(); totalHits = ((Integer)req.getContext().get("totalHits")).intValue();
sets = (FixedBitSet[]) req.getContext().get("export"); sets = (FixedBitSet[]) req.getContext().get("export");
@ -145,8 +195,6 @@ public class SortingResponseWriter implements QueryResponseWriter {
} }
} }
FieldWriter[] fieldWriters = null;
try { try {
fieldWriters = getFieldWriters(fields, req.getSearcher()); fieldWriters = getFieldWriters(fields, req.getSearcher());
} catch (Exception e) { } catch (Exception e) {
@ -154,9 +202,17 @@ public class SortingResponseWriter implements QueryResponseWriter {
return; return;
} }
writer.write("{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":"+totalHits+", \"docs\":["); writer.writeMap(m -> {
m.put("responseHeader", singletonMap("status", 0));
m.put("response", (MapWriter) mw -> {
mw.put("numFound", totalHits);
mw.put("docs", (IteratorWriter) iw -> writeDocs(req, iw, sort));
});
});
}
protected void writeDocs(SolrQueryRequest req, IteratorWriter.ItemWriter writer, Sort sort) throws IOException {
//Write the data. //Write the data.
List<LeafReaderContext> leaves = req.getSearcher().getTopReaderContext().leaves(); List<LeafReaderContext> leaves = req.getSearcher().getTopReaderContext().leaves();
SortDoc sortDoc = getSortDoc(req.getSearcher(), sort.getSort()); SortDoc sortDoc = getSortDoc(req.getSearcher(), sort.getSort());
@ -165,7 +221,6 @@ public class SortingResponseWriter implements QueryResponseWriter {
SortQueue queue = new SortQueue(queueSize, sortDoc); SortQueue queue = new SortQueue(queueSize, sortDoc);
SortDoc[] outDocs = new SortDoc[queueSize]; SortDoc[] outDocs = new SortDoc[queueSize];
boolean commaNeeded = false;
while(count < totalHits) { while(count < totalHits) {
//long begin = System.nanoTime(); //long begin = System.nanoTime();
queue.reset(); queue.reset();
@ -199,12 +254,10 @@ public class SortingResponseWriter implements QueryResponseWriter {
try { try {
for(int i=outDocsIndex; i>=0; --i) { for(int i=outDocsIndex; i>=0; --i) {
SortDoc s = outDocs[i]; SortDoc s = outDocs[i];
if(commaNeeded){writer.write(',');} writer.add((MapWriter) ew -> {
writer.write('{'); writeDoc(s, leaves, ew);
writeDoc(s, leaves, fieldWriters, sets, writer);
writer.write('}');
commaNeeded = true;
s.reset(); s.reset();
});
} }
} catch(Throwable e) { } catch(Throwable e) {
Throwable ex = e; Throwable ex = e;
@ -224,54 +277,24 @@ public class SortingResponseWriter implements QueryResponseWriter {
} }
} }
} }
//System.out.println("Sort Time 2:"+Long.toString(total/1000000));
writer.write("]}}");
writer.flush();
} }
public static class IgnoreException extends IOException {
public void printStackTrace(PrintWriter pw) {
pw.print("Early Client Disconnect");
}
public String getMessage() {
return "Early Client Disconnect";
}
}
protected void writeDoc(SortDoc sortDoc, protected void writeDoc(SortDoc sortDoc,
List<LeafReaderContext> leaves, List<LeafReaderContext> leaves,
FieldWriter[] fieldWriters, EntryWriter ew) throws IOException {
FixedBitSet[] sets,
Writer out) throws IOException{
int ord = sortDoc.ord; int ord = sortDoc.ord;
FixedBitSet set = sets[ord]; FixedBitSet set = sets[ord];
set.clear(sortDoc.docId); set.clear(sortDoc.docId);
LeafReaderContext context = leaves.get(ord); LeafReaderContext context = leaves.get(ord);
int fieldIndex = 0; int fieldIndex = 0;
for(FieldWriter fieldWriter : fieldWriters) { for (FieldWriter fieldWriter : fieldWriters) {
if(fieldWriter.write(sortDoc.docId, context.reader(), out, fieldIndex)){ if (fieldWriter.write(sortDoc.docId, context.reader(), ew, fieldIndex)) {
++fieldIndex; ++fieldIndex;
} }
} }
} }
protected void writeException(Exception e, Writer out, boolean log) throws IOException{
out.write("{\"responseHeader\": {\"status\": 400}, \"response\":{\"numFound\":0, \"docs\":[");
out.write("{\"EXCEPTION\":\"");
writeStr(e.getMessage(), out);
out.write("\"}");
out.write("]}}");
out.flush();
if(log) {
SolrException.log(logger, e);
}
}
protected FieldWriter[] getFieldWriters(String[] fields, SolrIndexSearcher searcher) throws IOException { protected FieldWriter[] getFieldWriters(String[] fields, SolrIndexSearcher searcher) throws IOException {
IndexSchema schema = searcher.getSchema(); IndexSchema schema = searcher.getSchema();
FieldWriter[] writers = new FieldWriter[fields.length]; FieldWriter[] writers = new FieldWriter[fields.length];
@ -291,50 +314,49 @@ public class SortingResponseWriter implements QueryResponseWriter {
boolean multiValued = schemaField.multiValued(); boolean multiValued = schemaField.multiValued();
FieldType fieldType = schemaField.getType(); FieldType fieldType = schemaField.getType();
if(fieldType instanceof TrieIntField) { if (fieldType instanceof TrieIntField) {
if(multiValued) { if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, true); writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
} else { } else {
writers[i] = new IntFieldWriter(field); writers[i] = new IntFieldWriter(field);
} }
} else if (fieldType instanceof TrieLongField) { } else if (fieldType instanceof TrieLongField) {
if(multiValued) { if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, true); writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
} else { } else {
writers[i] = new LongFieldWriter(field); writers[i] = new LongFieldWriter(field);
} }
} else if (fieldType instanceof TrieFloatField) { } else if (fieldType instanceof TrieFloatField) {
if(multiValued) { if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, true); writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
} else { } else {
writers[i] = new FloatFieldWriter(field); writers[i] = new FloatFieldWriter(field);
} }
} else if(fieldType instanceof TrieDoubleField) { } else if (fieldType instanceof TrieDoubleField) {
if(multiValued) { if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, true); writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
} else { } else {
writers[i] = new DoubleFieldWriter(field); writers[i] = new DoubleFieldWriter(field);
} }
} else if(fieldType instanceof StrField) { } else if (fieldType instanceof StrField) {
if(multiValued) { if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, false); writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false);
} else { } else {
writers[i] = new StringFieldWriter(field, fieldType); writers[i] = new StringFieldWriter(field, fieldType);
} }
} else if (fieldType instanceof TrieDateField) { } else if (fieldType instanceof TrieDateField) {
if (multiValued) { if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, false); writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false);
} else { } else {
writers[i] = new DateFieldWriter(field); writers[i] = new DateFieldWriter(field);
} }
} else if(fieldType instanceof BoolField) { } else if (fieldType instanceof BoolField) {
if(multiValued) { if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, true); writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
} else { } else {
writers[i] = new BoolFieldWriter(field, fieldType); writers[i] = new BoolFieldWriter(field, fieldType);
} }
} } else {
else {
throw new IOException("Export fields must either be one of the following types: int,float,long,double,string,date,boolean"); throw new IOException("Export fields must either be one of the following types: int,float,long,double,string,date,boolean");
} }
} }
@ -399,7 +421,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
// we can just use the existing StringValue here. // we can just use the existing StringValue here.
LeafReader reader = searcher.getSlowAtomicReader(); LeafReader reader = searcher.getSlowAtomicReader();
SortedDocValues vals = reader.getSortedDocValues(field); SortedDocValues vals = reader.getSortedDocValues(field);
if(reverse) { if (reverse) {
sortValues[i] = new StringValue(vals, field, new IntDesc()); sortValues[i] = new StringValue(vals, field, new IntDesc());
} else { } else {
sortValues[i] = new StringValue(vals, field, new IntAsc()); sortValues[i] = new StringValue(vals, field, new IntAsc());
@ -439,7 +461,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
private void populate() { private void populate() {
Object[] heap = getHeapArray(); Object[] heap = getHeapArray();
cache = new SortDoc[heap.length]; cache = new SortDoc[heap.length];
for(int i=1; i<heap.length; i++) { for (int i = 1; i < heap.length; i++) {
cache[i] = heap[i] = proto.copy(); cache[i] = heap[i] = proto.copy();
} }
size = maxSize; size = maxSize;
@ -470,7 +492,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
public void setNextReader(LeafReaderContext context) throws IOException { public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord; this.ord = context.ord;
for(SortValue value : sortValues) { for (SortValue value : sortValues) {
value.setNextReader(context); value.setNextReader(context);
} }
} }
@ -1295,7 +1317,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
} }
protected abstract class FieldWriter { protected abstract class FieldWriter {
public abstract boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException; public abstract boolean write(int docId, LeafReader reader, EntryWriter out, int fieldIndex) throws IOException;
} }
class IntFieldWriter extends FieldWriter { class IntFieldWriter extends FieldWriter {
@ -1305,7 +1327,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
this.field = field; this.field = field;
} }
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException { public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
NumericDocValues vals = DocValues.getNumeric(reader, this.field); NumericDocValues vals = DocValues.getNumeric(reader, this.field);
int val; int val;
if (vals.advance(docId) == docId) { if (vals.advance(docId) == docId) {
@ -1313,14 +1335,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
} else { } else {
val = 0; val = 0;
} }
if(fieldIndex>0) { ew.put(this.field, val);
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(':');
out.write(Integer.toString(val));
return true; return true;
} }
} }
@ -1328,57 +1343,31 @@ public class SortingResponseWriter implements QueryResponseWriter {
class MultiFieldWriter extends FieldWriter { class MultiFieldWriter extends FieldWriter {
private String field; private String field;
private FieldType fieldType; private FieldType fieldType;
private SchemaField schemaField;
private boolean numeric; private boolean numeric;
private CharsRefBuilder cref = new CharsRefBuilder(); private CharsRefBuilder cref = new CharsRefBuilder();
public MultiFieldWriter(String field, FieldType fieldType, boolean numeric) { public MultiFieldWriter(String field, FieldType fieldType, SchemaField schemaField, boolean numeric) {
this.field = field; this.field = field;
this.fieldType = fieldType; this.fieldType = fieldType;
this.schemaField = schemaField;
this.numeric = numeric; this.numeric = numeric;
} }
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
public boolean write(int docId, LeafReader reader, EntryWriter out, int fieldIndex) throws IOException {
SortedSetDocValues vals = DocValues.getSortedSet(reader, this.field); SortedSetDocValues vals = DocValues.getSortedSet(reader, this.field);
List<Long> ords; if (vals.advance(docId) != docId) return false;
if (vals.advance(docId) == docId) { out.put(this.field,
ords = new ArrayList(); (IteratorWriter) w -> {
long o = -1; long o;
while((o = vals.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { while((o = vals.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
ords.add(o); BytesRef ref = vals.lookupOrd(o);
}
assert ords.size() > 0;
} else {
return false;
}
if(fieldIndex>0) {
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(':');
out.write('[');
int v = 0;
for(long ord : ords) {
BytesRef ref = vals.lookupOrd(ord);
fieldType.indexedToReadable(ref, cref); fieldType.indexedToReadable(ref, cref);
if(v > 0) { IndexableField f = fieldType.createField(schemaField, cref.toString(), 1.0f);
out.write(','); if (f == null) w.add(cref.toString());
else w.add(fieldType.toObject(f));
} }
});
if(!numeric) {
out.write('"');
}
writeStr(cref.toString(), out);
if(!numeric) {
out.write('"');
}
++v;
}
out.write("]");
return true; return true;
} }
} }
@ -1390,7 +1379,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
this.field = field; this.field = field;
} }
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException { public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
NumericDocValues vals = DocValues.getNumeric(reader, this.field); NumericDocValues vals = DocValues.getNumeric(reader, this.field);
long val; long val;
if (vals.advance(docId) == docId) { if (vals.advance(docId) == docId) {
@ -1398,14 +1387,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
} else { } else {
val = 0; val = 0;
} }
if(fieldIndex > 0) { ew.put(field, val);
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(':');
out.write(Long.toString(val));
return true; return true;
} }
} }
@ -1417,7 +1399,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
this.field = field; this.field = field;
} }
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException { public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
NumericDocValues vals = DocValues.getNumeric(reader, this.field); NumericDocValues vals = DocValues.getNumeric(reader, this.field);
long val; long val;
if (vals.advance(docId) == docId) { if (vals.advance(docId) == docId) {
@ -1425,17 +1407,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
} else { } else {
val = 0; val = 0;
} }
ew.put(this.field, new Date(val));
if (fieldIndex > 0) {
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(':');
out.write('"');
writeStr(new Date(val).toInstant().toString(), out);
out.write('"');
return true; return true;
} }
} }
@ -1450,7 +1422,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
this.fieldType = fieldType; this.fieldType = fieldType;
} }
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException { public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
SortedDocValues vals = DocValues.getSorted(reader, this.field); SortedDocValues vals = DocValues.getSorted(reader, this.field);
if (vals.advance(docId) != docId) { if (vals.advance(docId) != docId) {
return false; return false;
@ -1459,17 +1431,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
BytesRef ref = vals.lookupOrd(ord); BytesRef ref = vals.lookupOrd(ord);
fieldType.indexedToReadable(ref, cref); fieldType.indexedToReadable(ref, cref);
ew.put(this.field, "true".equals(cref.toString()));
if (fieldIndex > 0) {
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(':');
//out.write('"');
writeStr(cref.toString(), out);
//out.write('"');
return true; return true;
} }
} }
@ -1481,7 +1443,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
this.field = field; this.field = field;
} }
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException { public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
NumericDocValues vals = DocValues.getNumeric(reader, this.field); NumericDocValues vals = DocValues.getNumeric(reader, this.field);
int val; int val;
if (vals.advance(docId) == docId) { if (vals.advance(docId) == docId) {
@ -1489,14 +1451,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
} else { } else {
val = 0; val = 0;
} }
if(fieldIndex > 0) { ew.put(this.field, Float.intBitsToFloat(val));
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(':');
out.write(Float.toString(Float.intBitsToFloat(val)));
return true; return true;
} }
} }
@ -1508,7 +1463,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
this.field = field; this.field = field;
} }
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException { public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
NumericDocValues vals = DocValues.getNumeric(reader, this.field); NumericDocValues vals = DocValues.getNumeric(reader, this.field);
long val; long val;
if (vals.advance(docId) == docId) { if (vals.advance(docId) == docId) {
@ -1516,14 +1471,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
} else { } else {
val = 0; val = 0;
} }
if(fieldIndex > 0) { ew.put(this.field, Double.longBitsToDouble(val));
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(':');
out.write(Double.toString(Double.longBitsToDouble(val)));
return true; return true;
} }
} }
@ -1538,7 +1486,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
this.fieldType = fieldType; this.fieldType = fieldType;
} }
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException { public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
SortedDocValues vals = DocValues.getSorted(reader, this.field); SortedDocValues vals = DocValues.getSorted(reader, this.field);
if (vals.advance(docId) != docId) { if (vals.advance(docId) != docId) {
return false; return false;
@ -1547,64 +1495,11 @@ public class SortingResponseWriter implements QueryResponseWriter {
BytesRef ref = vals.lookupOrd(ord); BytesRef ref = vals.lookupOrd(ord);
fieldType.indexedToReadable(ref, cref); fieldType.indexedToReadable(ref, cref);
if(fieldIndex > 0) { ew.put(this.field, cref.toString());
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(":");
out.write('"');
writeStr(cref.toString(), out);
out.write('"');
return true; return true;
} }
} }
private void writeStr(String val, Writer writer) throws IOException {
for (int i=0; i<val.length(); i++) {
char ch = val.charAt(i);
if ((ch > '#' && ch != '\\' && ch < '\u2028') || ch == ' ') { // fast path
writer.write(ch);
continue;
}
switch(ch) {
case '"':
case '\\':
writer.write('\\');
writer.write(ch);
break;
case '\r': writer.write('\\'); writer.write('r'); break;
case '\n': writer.write('\\'); writer.write('n'); break;
case '\t': writer.write('\\'); writer.write('t'); break;
case '\b': writer.write('\\'); writer.write('b'); break;
case '\f': writer.write('\\'); writer.write('f'); break;
case '\u2028': // fallthrough
case '\u2029':
unicodeEscape(writer,ch);
break;
// case '/':
default: {
if (ch <= 0x1F) {
unicodeEscape(writer,ch);
} else {
writer.write(ch);
}
}
}
}
}
private static char[] hexdigits = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
protected static void unicodeEscape(Appendable out, int ch) throws IOException {
out.append('\\');
out.append('u');
out.append(hexdigits[(ch>>>12) ]);
out.append(hexdigits[(ch>>>8) & 0xf]);
out.append(hexdigits[(ch>>>4) & 0xf]);
out.append(hexdigits[(ch) & 0xf]);
}
public abstract class PriorityQueue<T> { public abstract class PriorityQueue<T> {
protected int size = 0; protected int size = 0;
protected final int maxSize; protected final int maxSize;
@ -1802,4 +1697,15 @@ public class SortingResponseWriter implements QueryResponseWriter {
return (Object[]) heap; return (Object[]) heap;
} }
} }
public class IgnoreException extends IOException {
public void printStackTrace(PrintWriter pw) {
pw.print("Early Client Disconnect");
}
public String getMessage() {
return "Early Client Disconnect";
}
}
} }

View File

@ -734,8 +734,14 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to check the existance of " + uri + ". Is it valid?", ex); throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to check the existance of " + uri + ". Is it valid?", ex);
} }
String strategy = req.getParams().get(CollectionAdminParams.INDEX_BACKUP_STRATEGY, CollectionAdminParams.COPY_FILES_STRATEGY);
if (!CollectionAdminParams.INDEX_BACKUP_STRATEGIES.contains(strategy)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown index backup strategy " + strategy);
}
Map<String, Object> params = req.getParams().getAll(null, NAME, COLLECTION_PROP, CoreAdminParams.COMMIT_NAME); Map<String, Object> params = req.getParams().getAll(null, NAME, COLLECTION_PROP, CoreAdminParams.COMMIT_NAME);
params.put(CoreAdminParams.BACKUP_LOCATION, location); params.put(CoreAdminParams.BACKUP_LOCATION, location);
params.put(CollectionAdminParams.INDEX_BACKUP_STRATEGY, strategy);
return params; return params;
}), }),
RESTORE_OP(RESTORE, (req, rsp, h) -> { RESTORE_OP(RESTORE, (req, rsp, h) -> {

View File

@ -37,6 +37,7 @@ import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.update.CommitUpdateCommand; import org.apache.solr.update.CommitUpdateCommand;
import org.apache.solr.util.RefCounted; import org.apache.solr.util.RefCounted;
import org.apache.solr.util.TestInjection;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -46,6 +47,8 @@ class PrepRecoveryOp implements CoreAdminHandler.CoreAdminOp {
@Override @Override
public void execute(CallInfo it) throws Exception { public void execute(CallInfo it) throws Exception {
assert TestInjection.injectPrepRecoveryOpPauseForever();
final SolrParams params = it.req.getParams(); final SolrParams params = it.req.getParams();
String cname = params.get(CoreAdminParams.CORE); String cname = params.get(CoreAdminParams.CORE);

View File

@ -17,6 +17,7 @@
package org.apache.solr.handler.component; package org.apache.solr.handler.component;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.net.ConnectException; import java.net.ConnectException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
@ -116,7 +117,7 @@ public class HttpShardHandler extends ShardHandler {
private List<String> getURLs(String shard, String preferredHostAddress) { private List<String> getURLs(String shard, String preferredHostAddress) {
List<String> urls = shardToURLs.get(shard); List<String> urls = shardToURLs.get(shard);
if (urls == null) { if (urls == null) {
urls = httpShardHandlerFactory.makeURLList(shard); urls = httpShardHandlerFactory.buildURLList(shard);
if (preferredHostAddress != null && urls.size() > 1) { if (preferredHostAddress != null && urls.size() > 1) {
preferCurrentHostForDistributedReq(preferredHostAddress, urls); preferCurrentHostForDistributedReq(preferredHostAddress, urls);
} }
@ -320,6 +321,8 @@ public class HttpShardHandler extends ShardHandler {
} }
} }
final ReplicaListTransformer replicaListTransformer = httpShardHandlerFactory.getReplicaListTransformer(req);
if (shards != null) { if (shards != null) {
List<String> lst = StrUtils.splitSmart(shards, ",", true); List<String> lst = StrUtils.splitSmart(shards, ",", true);
rb.shards = lst.toArray(new String[lst.size()]); rb.shards = lst.toArray(new String[lst.size()]);
@ -404,7 +407,11 @@ public class HttpShardHandler extends ShardHandler {
for (int i=0; i<rb.shards.length; i++) { for (int i=0; i<rb.shards.length; i++) {
if (rb.shards[i] == null) { final List<String> shardUrls;
if (rb.shards[i] != null) {
shardUrls = StrUtils.splitSmart(rb.shards[i], "|", true);
replicaListTransformer.transform(shardUrls);
} else {
if (clusterState == null) { if (clusterState == null) {
clusterState = zkController.getClusterState(); clusterState = zkController.getClusterState();
slices = clusterState.getSlicesMap(cloudDescriptor.getCollectionName()); slices = clusterState.getSlicesMap(cloudDescriptor.getCollectionName());
@ -421,26 +428,25 @@ public class HttpShardHandler extends ShardHandler {
// throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "no such shard: " + sliceName); // throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "no such shard: " + sliceName);
} }
Map<String, Replica> sliceShards = slice.getReplicasMap(); final Collection<Replica> allSliceReplicas = slice.getReplicasMap().values();
final List<Replica> eligibleSliceReplicas = new ArrayList<>(allSliceReplicas.size());
// For now, recreate the | delimited list of equivalent servers for (Replica replica : allSliceReplicas) {
StringBuilder sliceShardsStr = new StringBuilder();
boolean first = true;
for (Replica replica : sliceShards.values()) {
if (!clusterState.liveNodesContain(replica.getNodeName()) if (!clusterState.liveNodesContain(replica.getNodeName())
|| replica.getState() != Replica.State.ACTIVE) { || replica.getState() != Replica.State.ACTIVE) {
continue; continue;
} }
if (first) { eligibleSliceReplicas.add(replica);
first = false;
} else {
sliceShardsStr.append('|');
}
String url = ZkCoreNodeProps.getCoreUrl(replica);
sliceShardsStr.append(url);
} }
if (sliceShardsStr.length() == 0) { replicaListTransformer.transform(eligibleSliceReplicas);
shardUrls = new ArrayList<>(eligibleSliceReplicas.size());
for (Replica replica : eligibleSliceReplicas) {
String url = ZkCoreNodeProps.getCoreUrl(replica);
shardUrls.add(url);
}
if (shardUrls.isEmpty()) {
boolean tolerant = rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false); boolean tolerant = rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false);
if (!tolerant) { if (!tolerant) {
// stop the check when there are no replicas available for a shard // stop the check when there are no replicas available for a shard
@ -448,9 +454,19 @@ public class HttpShardHandler extends ShardHandler {
"no servers hosting shard: " + rb.slices[i]); "no servers hosting shard: " + rb.slices[i]);
} }
} }
rb.shards[i] = sliceShardsStr.toString();
} }
// And now recreate the | delimited list of equivalent servers
final StringBuilder sliceShardsStr = new StringBuilder();
boolean first = true;
for (String shardUrl : shardUrls) {
if (first) {
first = false;
} else {
sliceShardsStr.append('|');
}
sliceShardsStr.append(shardUrl);
}
rb.shards[i] = sliceShardsStr.toString();
} }
} }
String shards_rows = params.get(ShardParams.SHARDS_ROWS); String shards_rows = params.get(ShardParams.SHARDS_ROWS);

View File

@ -31,13 +31,13 @@ import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.URLUtil; import org.apache.solr.common.util.URLUtil;
import org.apache.solr.core.PluginInfo; import org.apache.solr.core.PluginInfo;
import org.apache.solr.update.UpdateShardHandlerConfig; import org.apache.solr.update.UpdateShardHandlerConfig;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.DefaultSolrThreadFactory; import org.apache.solr.util.DefaultSolrThreadFactory;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Random; import java.util.Random;
import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ArrayBlockingQueue;
@ -84,6 +84,8 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
private final Random r = new Random(); private final Random r = new Random();
private final ReplicaListTransformer shufflingReplicaListTransformer = new ShufflingReplicaListTransformer(r);
// URL scheme to be used in distributed search. // URL scheme to be used in distributed search.
static final String INIT_URL_SCHEME = "urlScheme"; static final String INIT_URL_SCHEME = "urlScheme";
@ -227,12 +229,12 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
} }
/** /**
* Creates a randomized list of urls for the given shard. * Creates a list of urls for the given shard.
* *
* @param shard the urls for the shard, separated by '|' * @param shard the urls for the shard, separated by '|'
* @return A list of valid urls (including protocol) that are replicas for the shard * @return A list of valid urls (including protocol) that are replicas for the shard
*/ */
public List<String> makeURLList(String shard) { public List<String> buildURLList(String shard) {
List<String> urls = StrUtils.splitSmart(shard, "|", true); List<String> urls = StrUtils.splitSmart(shard, "|", true);
// convert shard to URL // convert shard to URL
@ -240,17 +242,14 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
urls.set(i, buildUrl(urls.get(i))); urls.set(i, buildUrl(urls.get(i)));
} }
//
// Shuffle the list instead of use round-robin by default.
// This prevents accidental synchronization where multiple shards could get in sync
// and query the same replica at the same time.
//
if (urls.size() > 1)
Collections.shuffle(urls, r);
return urls; return urls;
} }
ReplicaListTransformer getReplicaListTransformer(final SolrQueryRequest req)
{
return shufflingReplicaListTransformer;
}
/** /**
* Creates a new completion service for use by a single set of distributed requests. * Creates a new completion service for use by a single set of distributed requests.
*/ */

View File

@ -973,8 +973,7 @@ public class QueryComponent extends SearchComponent
// Merge the docs via a priority queue so we don't have to sort *all* of the // Merge the docs via a priority queue so we don't have to sort *all* of the
// documents... we only need to order the top (rows+start) // documents... we only need to order the top (rows+start)
ShardFieldSortedHitQueue queue; final ShardFieldSortedHitQueue queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher());
queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher());
NamedList<Object> shardInfo = null; NamedList<Object> shardInfo = null;
if(rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) { if(rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) {

View File

@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.util.List;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.params.ShardParams;
interface ReplicaListTransformer {
/**
* Transforms the passed in list of choices. Transformations can include (but are not limited to)
* reordering of elements (e.g. via shuffling) and removal of elements (i.e. filtering).
*
* @param choices - a list of choices to transform, typically the choices are {@link Replica} objects but choices
* can also be {@link String} objects such as URLs passed in via the {@link ShardParams#SHARDS} parameter.
*/
public void transform(List<?> choices);
}

View File

@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.util.Collections;
import java.util.List;
import java.util.Random;
class ShufflingReplicaListTransformer implements ReplicaListTransformer {
private final Random r;
public ShufflingReplicaListTransformer(Random r)
{
this.r = r;
}
public void transform(List<?> choices)
{
if (choices.size() > 1) {
Collections.shuffle(choices, r);
}
}
}

View File

@ -173,16 +173,17 @@ public class DocValuesFacets {
int min=mincount-1; // the smallest value in the top 'N' values int min=mincount-1; // the smallest value in the top 'N' values
for (int i=(startTermIndex==-1)?1:0; i<nTerms; i++) { for (int i=(startTermIndex==-1)?1:0; i<nTerms; i++) {
int c = counts[i]; int c = counts[i];
if (c>min) {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
// important if a lot of the counts are repeated (like zero counts would be).
if (contains != null) { if (contains != null) {
final BytesRef term = si.lookupOrd(startTermIndex+i); final BytesRef term = si.lookupOrd(startTermIndex+i);
if (!SimpleFacets.contains(term.utf8ToString(), contains, ignoreCase)) { if (!SimpleFacets.contains(term.utf8ToString(), contains, ignoreCase)) {
continue; continue;
} }
} }
if (c>min) {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
// important if a lot of the counts are repeated (like zero counts would be).
// smaller term numbers sort higher, so subtract the term number instead // smaller term numbers sort higher, so subtract the term number instead
long pair = (((long)c)<<32) + (Integer.MAX_VALUE - i); long pair = (((long)c)<<32) + (Integer.MAX_VALUE - i);

View File

@ -71,6 +71,8 @@ public class MacroExpander {
newValues.add(vv); newValues.add(vv);
} }
} }
}
if (newValues != null) {
newValues.add(newV); newValues.add(newV);
} }
} }

View File

@ -24,7 +24,11 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.solr.common.IteratorWriter;
import org.apache.solr.common.MapWriter.EntryWriter;
import org.apache.solr.common.PushWriter;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
@ -74,6 +78,11 @@ public class JSONResponseWriter implements QueryResponseWriter {
public String getContentType(SolrQueryRequest request, SolrQueryResponse response) { public String getContentType(SolrQueryRequest request, SolrQueryResponse response) {
return contentType; return contentType;
} }
public static PushWriter getPushWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) {
return new JSONWriter(writer, req, rsp);
}
} }
class JSONWriter extends TextResponseWriter { class JSONWriter extends TextResponseWriter {
@ -507,6 +516,53 @@ class JSONWriter extends TextResponseWriter {
} }
} }
@Override
public void writeIterator(IteratorWriter val) throws IOException {
writeArrayOpener(-1);
incLevel();
val.writeIter(new IteratorWriter.ItemWriter() {
boolean first = true;
@Override
public IteratorWriter.ItemWriter add(Object o) throws IOException {
if (!first) {
JSONWriter.this.indent();
JSONWriter.this.writeArraySeparator();
}
JSONWriter.this.writeVal(null, o);
first = false;
return this;
}
});
decLevel();
writeArrayCloser();
}
@Override
public void writeMap(MapWriter val)
throws IOException {
writeMapOpener(-1);
incLevel();
val.writeMap(new EntryWriter() {
boolean isFirst = true;
@Override
public EntryWriter put(String k, Object v) throws IOException {
if (isFirst) {
isFirst = false;
} else {
JSONWriter.this.writeMapSeparator();
}
if (doIndent) JSONWriter.this.indent();
JSONWriter.this.writeKey(k, true);
JSONWriter.this.writeVal(k, v);
return this;
}
});
decLevel();
writeMapCloser();
}
@Override @Override
public void writeMap(String name, Map val, boolean excludeOuter, boolean isFirstVal) throws IOException { public void writeMap(String name, Map val, boolean excludeOuter, boolean isFirstVal) throws IOException {
@ -544,12 +600,14 @@ class JSONWriter extends TextResponseWriter {
public void writeArray(String name, List l) throws IOException { public void writeArray(String name, List l) throws IOException {
writeArrayOpener(l.size()); writeArrayOpener(l.size());
writeJsonIter(l.iterator()); writeJsonIter(l.iterator());
writeArrayCloser();
} }
@Override @Override
public void writeArray(String name, Iterator val) throws IOException { public void writeArray(String name, Iterator val) throws IOException {
writeArrayOpener(-1); // no trivial way to determine array size writeArrayOpener(-1); // no trivial way to determine array size
writeJsonIter(val); writeJsonIter(val);
writeArrayCloser();
} }
private void writeJsonIter(Iterator val) throws IOException { private void writeJsonIter(Iterator val) throws IOException {
@ -564,7 +622,6 @@ class JSONWriter extends TextResponseWriter {
first=false; first=false;
} }
decLevel(); decLevel();
writeArrayCloser();
} }
// //
@ -634,11 +691,6 @@ class ArrayOfNamedValuePairJSONWriter extends JSONWriter {
} }
} }
@Override
public void writeArray(String name, List l) throws IOException {
writeArray(name, l.iterator());
}
@Override @Override
public void writeNamedList(String name, NamedList val) throws IOException { public void writeNamedList(String name, NamedList val) throws IOException {

View File

@ -31,9 +31,12 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.EnumFieldValue; import org.apache.solr.common.EnumFieldValue;
import org.apache.solr.common.IteratorWriter;
import org.apache.solr.common.MapSerializable; import org.apache.solr.common.MapSerializable;
import org.apache.solr.common.PushWriter;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.util.Base64; import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
@ -48,7 +51,7 @@ import org.apache.solr.util.FastWriter;
* *
* *
*/ */
public abstract class TextResponseWriter { public abstract class TextResponseWriter implements PushWriter {
// indent up to 40 spaces // indent up to 40 spaces
static final char[] indentChars = new char[81]; static final char[] indentChars = new char[81];
@ -138,19 +141,19 @@ public abstract class TextResponseWriter {
writeStr(name, f.stringValue(), true); writeStr(name, f.stringValue(), true);
} }
} else if (val instanceof Number) { } else if (val instanceof Number) {
writeNumber(name, (Number)val); writeNumber(name, (Number) val);
} else if (val instanceof Boolean) { } else if (val instanceof Boolean) {
writeBool(name, (Boolean)val); writeBool(name, (Boolean) val);
} else if (val instanceof Date) { } else if (val instanceof Date) {
writeDate(name,(Date)val); writeDate(name, (Date) val);
} else if (val instanceof Document) { } else if (val instanceof Document) {
SolrDocument doc = DocsStreamer.getDoc((Document) val, schema); SolrDocument doc = DocsStreamer.getDoc((Document) val, schema);
writeSolrDocument(name, doc,returnFields, 0 ); writeSolrDocument(name, doc, returnFields, 0);
} else if (val instanceof SolrDocument) { } else if (val instanceof SolrDocument) {
writeSolrDocument(name, (SolrDocument)val,returnFields, 0); writeSolrDocument(name, (SolrDocument) val, returnFields, 0);
} else if (val instanceof ResultContext) { } else if (val instanceof ResultContext) {
// requires access to IndexReader // requires access to IndexReader
writeDocuments(name, (ResultContext)val); writeDocuments(name, (ResultContext) val);
} else if (val instanceof DocList) { } else if (val instanceof DocList) {
// Should not happen normally // Should not happen normally
ResultContext ctx = new BasicResultContext((DocList)val, returnFields, null, null, req); ResultContext ctx = new BasicResultContext((DocList)val, returnFields, null, null, req);
@ -168,6 +171,8 @@ public abstract class TextResponseWriter {
writeNamedList(name, (NamedList)val); writeNamedList(name, (NamedList)val);
} else if (val instanceof Path) { } else if (val instanceof Path) {
writeStr(name, ((Path) val).toAbsolutePath().toString(), true); writeStr(name, ((Path) val).toAbsolutePath().toString(), true);
} else if (val instanceof IteratorWriter) {
writeIterator((IteratorWriter) val);
} else if (val instanceof Iterable) { } else if (val instanceof Iterable) {
writeArray(name,((Iterable)val).iterator()); writeArray(name,((Iterable)val).iterator());
} else if (val instanceof Object[]) { } else if (val instanceof Object[]) {
@ -184,6 +189,8 @@ public abstract class TextResponseWriter {
writeStr(name, val.toString(), true); writeStr(name, val.toString(), true);
} else if (val instanceof WriteableValue) { } else if (val instanceof WriteableValue) {
((WriteableValue)val).write(name, this); ((WriteableValue)val).write(name, this);
} else if (val instanceof MapWriter) {
writeMap((MapWriter) val);
} else if (val instanceof MapSerializable) { } else if (val instanceof MapSerializable) {
//todo find a better way to reuse the map more efficiently //todo find a better way to reuse the map more efficiently
writeMap(name, ((MapSerializable) val).toMap(new LinkedHashMap<>()), false, true); writeMap(name, ((MapSerializable) val).toMap(new LinkedHashMap<>()), false, true);
@ -192,6 +199,15 @@ public abstract class TextResponseWriter {
writeStr(name, val.getClass().getName() + ':' + val.toString(), true); writeStr(name, val.getClass().getName() + ':' + val.toString(), true);
} }
} }
@Override
public void writeMap(MapWriter mw) throws IOException {
//todo
}
@Override
public void writeIterator(IteratorWriter iw) throws IOException {
/*todo*/
}
protected void writeBool(String name , Boolean val) throws IOException { protected void writeBool(String name , Boolean val) throws IOException {
writeBool(name, val.toString()); writeBool(name, val.toString());

View File

@ -94,20 +94,58 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
List<Query> qlist = new ArrayList<>(freq.domain.filters.size()); List<Query> qlist = new ArrayList<>(freq.domain.filters.size());
// TODO: prevent parsing filters each time! // TODO: prevent parsing filters each time!
for (Object rawFilter : freq.domain.filters) { for (Object rawFilter : freq.domain.filters) {
Query symbolicFilter;
if (rawFilter instanceof String) { if (rawFilter instanceof String) {
QParser parser = null; QParser parser = null;
try { try {
parser = QParser.getParser((String)rawFilter, fcontext.req); parser = QParser.getParser((String)rawFilter, fcontext.req);
symbolicFilter = parser.getQuery(); Query symbolicFilter = parser.getQuery();
qlist.add(symbolicFilter);
} catch (SyntaxError syntaxError) { } catch (SyntaxError syntaxError) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
} }
} else if (rawFilter instanceof Map) {
Map<String,Object> m = (Map<String, Object>) rawFilter;
String type;
Object args;
if (m.size() == 1) {
Map.Entry<String, Object> entry = m.entrySet().iterator().next();
type = entry.getKey();
args = entry.getValue();
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't convert map to query:" + rawFilter);
}
if (!"param".equals(type)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown type. Can't convert map to query:" + rawFilter);
}
String tag;
if (!(args instanceof String)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't retrieve non-string param:" + args);
}
tag = (String)args;
String[] qstrings = fcontext.req.getParams().getParams(tag);
if (qstrings != null) {
for (String qstring : qstrings) {
QParser parser = null;
try {
parser = QParser.getParser((String) qstring, fcontext.req);
Query symbolicFilter = parser.getQuery();
qlist.add(symbolicFilter);
} catch (SyntaxError syntaxError) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
}
}
}
} else { } else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad query (expected a string):" + rawFilter); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad query (expected a string):" + rawFilter);
} }
qlist.add(symbolicFilter);
} }
this.filter = fcontext.searcher.getDocSet(qlist); this.filter = fcontext.searcher.getDocSet(qlist);
@ -363,24 +401,29 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain) throws IOException { void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain) throws IOException {
// TODO: what if a zero bucket has a sub-facet with an exclusion that would yield results? boolean emptyDomain = domain == null || domain.size() == 0;
// should we check for domain-altering exclusions, or even ask the sub-facet for
// it's domain and then only skip it if it's 0?
if (domain == null || domain.size() == 0 && !freq.processEmpty) {
return;
}
for (Map.Entry<String,FacetRequest> sub : freq.getSubFacets().entrySet()) { for (Map.Entry<String,FacetRequest> sub : freq.getSubFacets().entrySet()) {
FacetRequest subRequest = sub.getValue();
// This includes a static check if a sub-facet can possibly produce something from
// an empty domain. Should this be changed to a dynamic check as well? That would
// probably require actually executing the facet anyway, and dropping it at the
// end if it was unproductive.
if (emptyDomain && !freq.processEmpty && !subRequest.canProduceFromEmpty()) {
continue;
}
// make a new context for each sub-facet since they can change the domain // make a new context for each sub-facet since they can change the domain
FacetContext subContext = fcontext.sub(filter, domain); FacetContext subContext = fcontext.sub(filter, domain);
FacetProcessor subProcessor = sub.getValue().createFacetProcessor(subContext); FacetProcessor subProcessor = subRequest.createFacetProcessor(subContext);
if (fcontext.getDebugInfo() != null) { // if fcontext.debugInfo != null, it means rb.debug() == true if (fcontext.getDebugInfo() != null) { // if fcontext.debugInfo != null, it means rb.debug() == true
FacetDebugInfo fdebug = new FacetDebugInfo(); FacetDebugInfo fdebug = new FacetDebugInfo();
subContext.setDebugInfo(fdebug); subContext.setDebugInfo(fdebug);
fcontext.getDebugInfo().addChild(fdebug); fcontext.getDebugInfo().addChild(fdebug);
fdebug.setReqDescription(sub.getValue().getFacetDescription()); fdebug.setReqDescription(subRequest.getFacetDescription());
fdebug.setProcessor(subProcessor.getClass().getSimpleName()); fdebug.setProcessor(subProcessor.getClass().getSimpleName());
if (subContext.filter != null) fdebug.setFilter(subContext.filter.toString()); if (subContext.filter != null) fdebug.setFilter(subContext.filter.toString());

View File

@ -88,6 +88,16 @@ public abstract class FacetRequest {
public boolean toChildren; public boolean toChildren;
public String parents; // identifies the parent filter... the full set of parent documents for any block join operation public String parents; // identifies the parent filter... the full set of parent documents for any block join operation
public List<Object> filters; // list of symbolic filters (JSON query format) public List<Object> filters; // list of symbolic filters (JSON query format)
// True if a starting set of documents can be mapped onto a different set of documents not originally in the starting set.
public boolean canTransformDomain() {
return toParent || toChildren || excludeTags != null;
}
// Can this domain become non-empty if the input domain is empty? This does not check any sub-facets (see canProduceFromEmpty for that)
public boolean canBecomeNonEmpty() {
return excludeTags != null;
}
} }
public FacetRequest() { public FacetRequest() {
@ -119,6 +129,15 @@ public abstract class FacetRequest {
return false; return false;
} }
/** Returns true if this facet, or any sub-facets can produce results from an empty domain. */
public boolean canProduceFromEmpty() {
if (domain != null && domain.canBecomeNonEmpty()) return true;
for (FacetRequest freq : subFacets.values()) {
if (freq.canProduceFromEmpty()) return true;
}
return false;
}
public void addStat(String key, AggValueSource stat) { public void addStat(String key, AggValueSource stat) {
facetStats.put(key, stat); facetStats.put(key, stat);
} }

View File

@ -24,6 +24,7 @@ import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.Timer; import java.util.Timer;
import java.util.TimerTask; import java.util.TimerTask;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -110,6 +111,8 @@ public class TestInjection {
public static String updateRandomPause = null; public static String updateRandomPause = null;
public static String prepRecoveryOpPauseForever = null;
public static String randomDelayInCoreCreation = null; public static String randomDelayInCoreCreation = null;
public static int randomDelayMaxInCoreCreationInSec = 10; public static int randomDelayMaxInCoreCreationInSec = 10;
@ -118,6 +121,8 @@ public class TestInjection {
private static Set<Timer> timers = Collections.synchronizedSet(new HashSet<Timer>()); private static Set<Timer> timers = Collections.synchronizedSet(new HashSet<Timer>());
private static AtomicInteger countPrepRecoveryOpPauseForever = new AtomicInteger(0);
public static void reset() { public static void reset() {
nonGracefullClose = null; nonGracefullClose = null;
failReplicaRequests = null; failReplicaRequests = null;
@ -127,6 +132,8 @@ public class TestInjection {
updateRandomPause = null; updateRandomPause = null;
randomDelayInCoreCreation = null; randomDelayInCoreCreation = null;
splitFailureBeforeReplicaCreation = null; splitFailureBeforeReplicaCreation = null;
prepRecoveryOpPauseForever = null;
countPrepRecoveryOpPauseForever = new AtomicInteger(0);
for (Timer timer : timers) { for (Timer timer : timers) {
timer.cancel(); timer.cancel();
@ -289,6 +296,31 @@ public class TestInjection {
return true; return true;
} }
public static boolean injectPrepRecoveryOpPauseForever() {
if (prepRecoveryOpPauseForever != null) {
Random rand = random();
if (null == rand) return true;
Pair<Boolean,Integer> pair = parseValue(prepRecoveryOpPauseForever);
boolean enabled = pair.first();
int chanceIn100 = pair.second();
// Prevent for continuous pause forever
if (enabled && rand.nextInt(100) >= (100 - chanceIn100) && countPrepRecoveryOpPauseForever.get() < 2) {
countPrepRecoveryOpPauseForever.incrementAndGet();
log.info("inject pause forever for prep recovery op");
try {
Thread.sleep(Integer.MAX_VALUE);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
} else {
countPrepRecoveryOpPauseForever.set(0);
}
}
return true;
}
public static boolean injectSplitFailureBeforeReplicaCreation() { public static boolean injectSplitFailureBeforeReplicaCreation() {
if (splitFailureBeforeReplicaCreation != null) { if (splitFailureBeforeReplicaCreation != null) {
Random rand = random(); Random rand = random();

View File

@ -92,14 +92,16 @@
"useParams":"_ADMIN_FILE" "useParams":"_ADMIN_FILE"
}, },
"/export": { "/export": {
"class": "solr.SearchHandler", "class": "solr.ExportHandler",
"useParams":"_EXPORT", "useParams":"_EXPORT",
"components": [ "components": [
"query" "query"
], ],
"defaults": {
"wt": "json"
},
"invariants": { "invariants": {
"rq": "{!xport}", "rq": "{!xport}",
"wt": "xsort",
"distrib": false "distrib": false
} }
}, },

View File

@ -38,6 +38,7 @@ import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.ImplicitDocRouter; import org.apache.solr.common.cloud.ImplicitDocRouter;
import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.common.params.CoreAdminParams;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
@ -124,9 +125,24 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
} }
testBackupAndRestore(getCollectionName()); testBackupAndRestore(getCollectionName());
testConfigBackupOnly("conf1", getCollectionName());
testInvalidPath(getCollectionName()); testInvalidPath(getCollectionName());
} }
/**
* This test validates the backup of collection configuration using
* {@linkplain CollectionAdminParams#NO_INDEX_BACKUP_STRATEGY}.
*
* @param configName The config name for the collection to be backed up.
* @param collectionName The name of the collection to be backed up.
* @throws Exception in case of errors.
*/
protected void testConfigBackupOnly(String configName, String collectionName) throws Exception {
// This is deliberately no-op since we want to run this test only for one of the backup repository
// implementation (mainly to avoid redundant test execution). Currently HDFS backup repository test
// implements this.
}
// This test verifies the system behavior when the backup location cluster property is configured with an invalid // This test verifies the system behavior when the backup location cluster property is configured with an invalid
// value for the specified repository (and the default backup location is not configured in solr.xml). // value for the specified repository (and the default backup location is not configured in solr.xml).
private void testInvalidPath(String collectionName) throws Exception { private void testInvalidPath(String collectionName) throws Exception {

View File

@ -37,6 +37,8 @@ import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.update.DirectUpdateHandler2; import org.apache.solr.update.DirectUpdateHandler2;
import org.apache.solr.update.UpdateLog; import org.apache.solr.update.UpdateLog;
import org.apache.solr.util.TestInjection;
import org.junit.AfterClass;
import org.junit.Before; import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
@ -47,6 +49,7 @@ public class TestCloudRecovery extends SolrCloudTestCase {
@BeforeClass @BeforeClass
public static void setupCluster() throws Exception { public static void setupCluster() throws Exception {
TestInjection.prepRecoveryOpPauseForever = "true:30";
System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory"); System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
System.setProperty("solr.ulog.numRecordsToKeep", "1000"); System.setProperty("solr.ulog.numRecordsToKeep", "1000");
@ -62,6 +65,11 @@ public class TestCloudRecovery extends SolrCloudTestCase {
false, true, 30); false, true, 30);
} }
@AfterClass
public static void afterClass() {
TestInjection.reset();
}
@Before @Before
public void resetCollection() throws IOException, SolrServerException { public void resetCollection() throws IOException, SolrServerException {
cluster.getSolrClient().deleteByQuery(COLLECTION, "*:*"); cluster.getSolrClient().deleteByQuery(COLLECTION, "*:*");

View File

@ -16,10 +16,18 @@
*/ */
package org.apache.solr.cloud; package org.apache.solr.cloud;
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_CONF;
import static org.apache.solr.core.backup.BackupManager.*;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
@ -28,7 +36,14 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.cloud.hdfs.HdfsTestUtil; import org.apache.solr.cloud.hdfs.HdfsTestUtil;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.backup.BackupManager;
import org.apache.solr.core.backup.repository.HdfsBackupRepository;
import org.apache.solr.util.BadHdfsThreadsFilter; import org.apache.solr.util.BadHdfsThreadsFilter;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -144,4 +159,45 @@ public class TestHdfsCloudBackupRestore extends AbstractCloudBackupRestoreTestCa
public String getBackupLocation() { public String getBackupLocation() {
return null; return null;
} }
protected void testConfigBackupOnly(String configName, String collectionName) throws Exception {
String backupName = "configonlybackup";
CloudSolrClient solrClient = cluster.getSolrClient();
CollectionAdminRequest.Backup backup = CollectionAdminRequest.backupCollection(collectionName, backupName)
.setRepositoryName(getBackupRepoName())
.setIndexBackupStrategy(CollectionAdminParams.NO_INDEX_BACKUP_STRATEGY);
backup.process(solrClient);
Map<String,String> params = new HashMap<>();
params.put("location", "/backup");
params.put("solr.hdfs.home", hdfsUri + "/solr");
HdfsBackupRepository repo = new HdfsBackupRepository();
repo.init(new NamedList<>(params));
BackupManager mgr = new BackupManager(repo, solrClient.getZkStateReader());
URI baseLoc = repo.createURI("/backup");
Properties props = mgr.readBackupProperties(baseLoc, backupName);
assertNotNull(props);
assertEquals(collectionName, props.getProperty(COLLECTION_NAME_PROP));
assertEquals(backupName, props.getProperty(BACKUP_NAME_PROP));
assertEquals(configName, props.getProperty(COLL_CONF));
DocCollection collectionState = mgr.readCollectionState(baseLoc, backupName, collectionName);
assertNotNull(collectionState);
assertEquals(collectionName, collectionState.getName());
URI configDirLoc = repo.resolve(baseLoc, backupName, ZK_STATE_DIR, CONFIG_STATE_DIR, configName);
assertTrue(repo.exists(configDirLoc));
Collection<String> expected = Arrays.asList(BACKUP_PROPS_FILE, ZK_STATE_DIR);
URI backupLoc = repo.resolve(baseLoc, backupName);
String[] dirs = repo.listAll(backupLoc);
for (String d : dirs) {
assertTrue(expected.contains(d));
}
}
} }

View File

@ -97,7 +97,7 @@ public class SolrCoreTest extends SolrTestCaseJ4 {
++ihCount; assertEquals(pathToClassMap.get("/admin/system"), "solr.SystemInfoHandler"); ++ihCount; assertEquals(pathToClassMap.get("/admin/system"), "solr.SystemInfoHandler");
++ihCount; assertEquals(pathToClassMap.get("/admin/threads"), "solr.ThreadDumpHandler"); ++ihCount; assertEquals(pathToClassMap.get("/admin/threads"), "solr.ThreadDumpHandler");
++ihCount; assertEquals(pathToClassMap.get("/config"), "solr.SolrConfigHandler"); ++ihCount; assertEquals(pathToClassMap.get("/config"), "solr.SolrConfigHandler");
++ihCount; assertEquals(pathToClassMap.get("/export"), "solr.SearchHandler"); ++ihCount; assertEquals(pathToClassMap.get("/export"), "solr.ExportHandler");
++ihCount; assertEquals(pathToClassMap.get("/terms"), "solr.SearchHandler"); ++ihCount; assertEquals(pathToClassMap.get("/terms"), "solr.SearchHandler");
++ihCount; assertEquals(pathToClassMap.get("/get"), "solr.RealTimeGetHandler"); ++ihCount; assertEquals(pathToClassMap.get("/get"), "solr.RealTimeGetHandler");
++ihCount; assertEquals(pathToClassMap.get(ReplicationHandler.PATH), "solr.ReplicationHandler"); ++ihCount; assertEquals(pathToClassMap.get(ReplicationHandler.PATH), "solr.ReplicationHandler");

View File

@ -0,0 +1,163 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.Test;
public class ReplicaListTransformerTest extends LuceneTestCase {
// A transformer that keeps only matching choices
private static class ToyMatchingReplicaListTransformer implements ReplicaListTransformer {
private final String regex;
public ToyMatchingReplicaListTransformer(String regex)
{
this.regex = regex;
}
public void transform(List<?> choices)
{
Iterator<?> it = choices.iterator();
while (it.hasNext()) {
Object choice = it.next();
final String url;
if (choice instanceof String) {
url = (String)choice;
}
else if (choice instanceof Replica) {
url = ((Replica)choice).getCoreUrl();
} else {
url = null;
}
if (url == null || !url.matches(regex)) {
it.remove();
}
}
}
}
// A transformer that makes no transformation
private static class ToyNoOpReplicaListTransformer implements ReplicaListTransformer {
public ToyNoOpReplicaListTransformer()
{
}
public void transform(List<?> choices)
{
// no-op
}
}
@Test
public void testTransform() throws Exception {
final String regex = ".*" + random().nextInt(10) + ".*";
final ReplicaListTransformer transformer;
if (random().nextBoolean()) {
transformer = new ToyMatchingReplicaListTransformer(regex);
} else {
transformer = new HttpShardHandlerFactory() {
@Override
ReplicaListTransformer getReplicaListTransformer(final SolrQueryRequest req)
{
final SolrParams params = req.getParams();
if (params.getBool("toyNoTransform", false)) {
return new ToyNoOpReplicaListTransformer();
}
final String regex = params.get("toyRegEx");
if (regex != null) {
return new ToyMatchingReplicaListTransformer(regex);
}
return super.getReplicaListTransformer(req);
}
}.getReplicaListTransformer(
new LocalSolrQueryRequest(null,
new ModifiableSolrParams().add("toyRegEx", regex)));
}
final List<Replica> inputs = new ArrayList<>();
final List<Replica> expectedTransformed = new ArrayList<>();
final List<String> urls = createRandomUrls();
for (int ii=0; ii<urls.size(); ++ii) {
final String name = "replica"+(ii+1);
final String url = urls.get(ii);
final Map<String,Object> propMap = new HashMap<String,Object>();
propMap.put("base_url", url);
// a skeleton replica, good enough for this test's purposes
final Replica replica = new Replica(name, propMap);
inputs.add(replica);
if (url.matches(regex)) {
expectedTransformed.add(replica);
}
}
final List<Replica> actualTransformed = new ArrayList<>(inputs);
transformer.transform(actualTransformed);
assertEquals(expectedTransformed.size(), actualTransformed.size());
for (int ii=0; ii<expectedTransformed.size(); ++ii) {
assertEquals("mismatch for ii="+ii, expectedTransformed.get(ii), actualTransformed.get(ii));
}
}
private final List<String> createRandomUrls() throws Exception {
final List<String> urls = new ArrayList<>();
maybeAddUrl(urls, "a"+random().nextDouble());
maybeAddUrl(urls, "bb"+random().nextFloat());
maybeAddUrl(urls, "ccc"+random().nextGaussian());
maybeAddUrl(urls, "dddd"+random().nextInt());
maybeAddUrl(urls, "eeeee"+random().nextLong());
Collections.shuffle(urls, random());
return urls;
}
private final void maybeAddUrl(final List<String> urls, final String url) {
if (random().nextBoolean()) {
urls.add(url);
}
}
}

View File

@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.common.cloud.Replica;
import org.junit.Test;
public class ShufflingReplicaListTransformerTest extends LuceneTestCase {
private final ShufflingReplicaListTransformer transformer = new ShufflingReplicaListTransformer(random());
@Test
public void testTransformReplicas() throws Exception {
final List<Replica> replicas = new ArrayList<>();
for (final String url : createRandomUrls()) {
replicas.add(new Replica(url, new HashMap<String,Object>()));
}
implTestTransform(replicas);
}
@Test
public void testTransformUrls() throws Exception {
final List<String> urls = createRandomUrls();
implTestTransform(urls);
}
private <TYPE> void implTestTransform(List<TYPE> inputs) throws Exception {
final List<TYPE> transformedInputs = new ArrayList<>(inputs);
transformer.transform(transformedInputs);
final Set<TYPE> inputSet = new HashSet<>(inputs);
final Set<TYPE> transformedSet = new HashSet<>(transformedInputs);
assertTrue(inputSet.equals(transformedSet));
}
private final List<String> createRandomUrls() throws Exception {
final List<String> urls = new ArrayList<>();
maybeAddUrl(urls, "a"+random().nextDouble());
maybeAddUrl(urls, "bb"+random().nextFloat());
maybeAddUrl(urls, "ccc"+random().nextGaussian());
maybeAddUrl(urls, "dddd"+random().nextInt());
maybeAddUrl(urls, "eeeee"+random().nextLong());
Collections.shuffle(urls, random());
return urls;
}
private final void maybeAddUrl(final List<String> urls, final String url) {
if (random().nextBoolean()) {
urls.add(url);
}
}
}

View File

@ -113,4 +113,17 @@ public class TestMacroExpander extends LuceneTestCase {
} }
} }
@Test
public void testMap() { // see SOLR-9740, the second fq param was being dropped.
final Map<String,String[]> request = new HashMap<>();
request.put("fq", new String[] {"zero", "${one_ref}", "two", "${three_ref}"});
request.put("one_ref",new String[] {"one"});
request.put("three_ref",new String[] {"three"});
Map expanded = MacroExpander.expand(request);
assertEquals("zero", ((String[])expanded.get("fq"))[0]);
assertEquals("one", ((String[])expanded.get("fq"))[1]);
assertEquals("two", ((String[]) expanded.get("fq"))[2]);
assertEquals("three", ((String[]) expanded.get("fq"))[3]);
}
} }

View File

@ -181,15 +181,19 @@ public class JSONWriterTest extends SolrTestCaseJ4 {
methodsExpectedNotOverriden.add("writeMapOpener"); methodsExpectedNotOverriden.add("writeMapOpener");
methodsExpectedNotOverriden.add("writeMapSeparator"); methodsExpectedNotOverriden.add("writeMapSeparator");
methodsExpectedNotOverriden.add("writeMapCloser"); methodsExpectedNotOverriden.add("writeMapCloser");
methodsExpectedNotOverriden.add("public void org.apache.solr.response.JSONWriter.writeArray(java.lang.String,java.util.List) throws java.io.IOException");
methodsExpectedNotOverriden.add("writeArrayOpener"); methodsExpectedNotOverriden.add("writeArrayOpener");
methodsExpectedNotOverriden.add("writeArraySeparator"); methodsExpectedNotOverriden.add("writeArraySeparator");
methodsExpectedNotOverriden.add("writeArrayCloser"); methodsExpectedNotOverriden.add("writeArrayCloser");
methodsExpectedNotOverriden.add("public void org.apache.solr.response.JSONWriter.writeMap(org.apache.solr.common.MapWriter) throws java.io.IOException");
methodsExpectedNotOverriden.add("public void org.apache.solr.response.JSONWriter.writeIterator(org.apache.solr.common.IteratorWriter) throws java.io.IOException");
final Class<?> subClass = ArrayOfNamedValuePairJSONWriter.class; final Class<?> subClass = ArrayOfNamedValuePairJSONWriter.class;
final Class<?> superClass = subClass.getSuperclass(); final Class<?> superClass = subClass.getSuperclass();
for (final Method superClassMethod : superClass.getDeclaredMethods()) { for (final Method superClassMethod : superClass.getDeclaredMethods()) {
final String methodName = superClassMethod.getName(); final String methodName = superClassMethod.getName();
final String methodFullName = superClassMethod.toString();
if (!methodName.startsWith("write")) continue; if (!methodName.startsWith("write")) continue;
final int modifiers = superClassMethod.getModifiers(); final int modifiers = superClassMethod.getModifiers();
@ -197,7 +201,8 @@ public class JSONWriterTest extends SolrTestCaseJ4 {
if (Modifier.isStatic(modifiers)) continue; if (Modifier.isStatic(modifiers)) continue;
if (Modifier.isPrivate(modifiers)) continue; if (Modifier.isPrivate(modifiers)) continue;
final boolean expectOverriden = !methodsExpectedNotOverriden.contains(methodName); final boolean expectOverriden = !methodsExpectedNotOverriden.contains(methodName)
&& !methodsExpectedNotOverriden.contains(methodFullName);
try { try {
final Method subClassMethod = subClass.getDeclaredMethod( final Method subClassMethod = subClass.getDeclaredMethod(
@ -215,7 +220,7 @@ public class JSONWriterTest extends SolrTestCaseJ4 {
if (expectOverriden) { if (expectOverriden) {
fail(subClass + " needs to override '" + superClassMethod + "'"); fail(subClass + " needs to override '" + superClassMethod + "'");
} else { } else {
assertTrue(methodName+" not found in remaining "+methodsExpectedNotOverriden, methodsExpectedNotOverriden.remove(methodName)); assertTrue(methodName+" not found in remaining "+methodsExpectedNotOverriden, methodsExpectedNotOverriden.remove(methodName)|| methodsExpectedNotOverriden.remove(methodFullName));
} }
} }
} }

Some files were not shown because too many files have changed in this diff Show More