mirror of https://github.com/apache/lucene.git
Merge branch 'apache-https-master' into jira/solr-8593
This commit is contained in:
commit
c3400e8a2e
|
@ -55,7 +55,30 @@ Other
|
|||
* LUCENE-7360: Remove Explanation.toHtml() (Alan Woodward)
|
||||
|
||||
======================= Lucene 6.4.0 =======================
|
||||
(No Changes)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
|
||||
PhraseQuery or MultiPhraseQuery when the word automaton is simple
|
||||
(Mike McCandless)
|
||||
|
||||
* LUCENE-7431: Allow a certain amount of overlap to be specified between the include
|
||||
and exclude arguments of SpanNotQuery via negative pre and/or post arguments.
|
||||
(Marc Morissette via David Smiley)
|
||||
|
||||
* LUCENE-7544: UnifiedHighlighter: add extension points for handling custom queries.
|
||||
(Michael Braun, David Smiley)
|
||||
|
||||
* LUCENE-7538: Asking IndexWriter to store a too-massive text field
|
||||
now throws IllegalArgumentException instead of a cryptic exception
|
||||
that closes your IndexWriter (Steve Chen via Mike McCandless)
|
||||
|
||||
* LUCENE-7524: Added more detailed explanation of how IDF is computed in
|
||||
ClassicSimilarity and BM25Similarity. (Adrien Grand)
|
||||
|
||||
======================= Lucene 6.3.0 =======================
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
|
||||
<dependencies>
|
||||
<dependency org="mecab" name="mecab-ipadic" rev="${/mecab/mecab-ipadic}" conf="ipadic">
|
||||
<artifact name="ipadic" type=".tar.gz" url="http://mecab.googlecode.com/files/mecab-ipadic-2.7.0-20070801.tar.gz"/>
|
||||
<artifact name="ipadic" type=".tar.gz" url="http://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/mecab-ipadic-2.7.0-20070801.tar.gz"/>
|
||||
</dependency>
|
||||
<dependency org="mecab" name="mecab-naist-jdic" rev="${/mecab/mecab-naist-jdic}" conf="naist">
|
||||
<artifact name="mecab-naist-jdic" type=".tar.gz" url="http://sourceforge.jp/frs/redir.php?m=iij&f=/naist-jdic/53500/mecab-naist-jdic-0.6.3b-20111013.tar.gz"/>
|
||||
|
|
Binary file not shown.
|
@ -28,7 +28,6 @@ import org.apache.lucene.analysis.ja.dict.ConnectionCosts;
|
|||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||
import org.apache.lucene.util.BitUtil;
|
||||
|
||||
public final class ConnectionCostsWriter {
|
||||
|
||||
|
|
|
@ -33,12 +33,10 @@ import java.util.Comparator;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.ja.util.DictionaryBuilder.DictionaryFormat;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.fst.Builder;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
|
||||
|
@ -133,7 +131,7 @@ public class TokenInfoDictionaryBuilder {
|
|||
System.out.println(" encode...");
|
||||
|
||||
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
|
||||
Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, true, PackedInts.DEFAULT, true, 15);
|
||||
Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, true, 15);
|
||||
IntsRefBuilder scratch = new IntsRefBuilder();
|
||||
long ord = -1; // first ord will be 0
|
||||
String lastValue = null;
|
||||
|
|
|
@ -231,7 +231,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
"6.2.0-cfs",
|
||||
"6.2.0-nocfs",
|
||||
"6.2.1-cfs",
|
||||
"6.2.1-nocfs"
|
||||
"6.2.1-nocfs",
|
||||
"6.3.0-cfs",
|
||||
"6.3.0-nocfs"
|
||||
};
|
||||
|
||||
final String[] unsupportedNames = {
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -48,7 +48,6 @@ import org.apache.lucene.util.fst.Builder;
|
|||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/*
|
||||
TODO:
|
||||
|
@ -363,8 +362,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
final Builder<Output> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
|
||||
0, 0, true, false, Integer.MAX_VALUE,
|
||||
FST_OUTPUTS, false,
|
||||
PackedInts.COMPACT, true, 15);
|
||||
FST_OUTPUTS, true, 15);
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" compile index for prefix=" + prefix);
|
||||
//}
|
||||
|
|
|
@ -81,9 +81,6 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
// loads itself in ram?
|
||||
public final class MemoryPostingsFormat extends PostingsFormat {
|
||||
|
||||
private final boolean doPackFST;
|
||||
private final float acceptableOverheadRatio;
|
||||
|
||||
public MemoryPostingsFormat() {
|
||||
this(false, PackedInts.DEFAULT);
|
||||
}
|
||||
|
@ -97,13 +94,11 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
*/
|
||||
public MemoryPostingsFormat(boolean doPackFST, float acceptableOverheadRatio) {
|
||||
super("Memory");
|
||||
this.doPackFST = doPackFST;
|
||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "PostingsFormat(name=" + getName() + " doPackFST= " + doPackFST + ")";
|
||||
return "PostingsFormat(name=" + getName() + ")";
|
||||
}
|
||||
|
||||
private final static class TermsWriter {
|
||||
|
@ -111,16 +106,12 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
private final FieldInfo field;
|
||||
private final Builder<BytesRef> builder;
|
||||
private final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
||||
private final boolean doPackFST;
|
||||
private final float acceptableOverheadRatio;
|
||||
private int termCount;
|
||||
|
||||
public TermsWriter(IndexOutput out, FieldInfo field, boolean doPackFST, float acceptableOverheadRatio) {
|
||||
public TermsWriter(IndexOutput out, FieldInfo field) {
|
||||
this.out = out;
|
||||
this.field = field;
|
||||
this.doPackFST = doPackFST;
|
||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||
builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, doPackFST, acceptableOverheadRatio, true, 15);
|
||||
builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
|
||||
}
|
||||
|
||||
private class PostingsWriter {
|
||||
|
@ -307,8 +298,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
TermsEnum termsEnum = terms.iterator();
|
||||
|
||||
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
|
||||
TermsWriter termsWriter = new TermsWriter(out, fieldInfo,
|
||||
doPackFST, acceptableOverheadRatio);
|
||||
TermsWriter termsWriter = new TermsWriter(out, fieldInfo);
|
||||
|
||||
FixedBitSet docsSeen = new FixedBitSet(state.segmentInfo.maxDoc());
|
||||
long sumTotalTermFreq = 0;
|
||||
|
|
|
@ -48,7 +48,6 @@ import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
|||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/*
|
||||
TODO:
|
||||
|
@ -456,8 +455,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
||||
final Builder<BytesRef> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
|
||||
0, 0, true, false, Integer.MAX_VALUE,
|
||||
outputs, false,
|
||||
PackedInts.COMPACT, true, 15);
|
||||
outputs, true, 15);
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" compile index for prefix=" + prefix);
|
||||
//}
|
||||
|
|
|
@ -64,7 +64,7 @@ public final class GrowableByteArrayDataOutput extends DataOutput {
|
|||
|
||||
@Override
|
||||
public void writeString(String string) throws IOException {
|
||||
int maxLen = string.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR;
|
||||
int maxLen = UnicodeUtil.maxUTF8Length(string.length());
|
||||
if (maxLen <= MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING) {
|
||||
// string is small enough that we don't need to save memory by falling back to double-pass approach
|
||||
// this is just an optimized writeString() that re-uses scratchBytes.
|
||||
|
|
|
@ -24,11 +24,9 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
|
@ -48,7 +46,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
|||
|
||||
private final Counter iwBytesUsed;
|
||||
private final PackedLongValues.Builder lengths;
|
||||
private FixedBitSet docsWithField;
|
||||
private DocsWithFieldSet docsWithField;
|
||||
private final FieldInfo fieldInfo;
|
||||
private long bytesUsed;
|
||||
private int lastDocID = -1;
|
||||
|
@ -60,7 +58,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
|||
this.bytesOut = bytes.getDataOutput();
|
||||
this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
|
||||
this.iwBytesUsed = iwBytesUsed;
|
||||
this.docsWithField = new FixedBitSet(64);
|
||||
this.docsWithField = new DocsWithFieldSet();
|
||||
this.bytesUsed = lengths.ramBytesUsed() + docsWithField.ramBytesUsed();
|
||||
iwBytesUsed.addAndGet(bytesUsed);
|
||||
}
|
||||
|
@ -84,8 +82,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
|||
// Should never happen!
|
||||
throw new RuntimeException(ioe);
|
||||
}
|
||||
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
|
||||
docsWithField.set(docID);
|
||||
docsWithField.add(docID);
|
||||
updateBytesUsed();
|
||||
|
||||
lastDocID = docID;
|
||||
|
@ -112,7 +109,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
|||
if (fieldInfoIn != fieldInfo) {
|
||||
throw new IllegalArgumentException("wrong fieldInfo");
|
||||
}
|
||||
return new BufferedBinaryDocValues(lengths, maxLength, bytes.getDataInput(), docsWithField);
|
||||
return new BufferedBinaryDocValues(lengths, maxLength, bytes.getDataInput(), docsWithField.iterator());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -124,12 +121,12 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
|||
final DocIdSetIterator docsWithField;
|
||||
final DataInput bytesIterator;
|
||||
|
||||
BufferedBinaryDocValues(PackedLongValues lengths, int maxLength, DataInput bytesIterator, FixedBitSet docsWithFields) {
|
||||
BufferedBinaryDocValues(PackedLongValues lengths, int maxLength, DataInput bytesIterator, DocIdSetIterator docsWithFields) {
|
||||
this.value = new BytesRefBuilder();
|
||||
this.value.grow(maxLength);
|
||||
this.lengthsIterator = lengths.iterator();
|
||||
this.bytesIterator = bytesIterator;
|
||||
this.docsWithField = new BitSetIterator(docsWithFields, lengths.size());
|
||||
this.docsWithField = docsWithFields;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -430,6 +430,10 @@ final class DefaultIndexingChain extends DocConsumer {
|
|||
fp = getOrAddField(fieldName, fieldType, false);
|
||||
}
|
||||
if (fieldType.stored()) {
|
||||
String value = field.stringValue();
|
||||
if (value != null && value.length() > IndexWriter.MAX_STORED_STRING_LENGTH) {
|
||||
throw new IllegalArgumentException("stored field \"" + field.name() + "\" is too large (" + value.length() + " characters) to store");
|
||||
}
|
||||
try {
|
||||
storedFieldsWriter.writeField(fp.fieldInfo, field);
|
||||
} catch (Throwable th) {
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/** Accumulator for documents that have a value for a field. This is optimized
|
||||
* for the case that all documents have a value. */
|
||||
final class DocsWithFieldSet extends DocIdSet {
|
||||
|
||||
private static long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(DocsWithFieldSet.class);
|
||||
|
||||
private FixedBitSet set;
|
||||
private int cost = 0;
|
||||
private int lastDocId = -1;
|
||||
|
||||
void add(int docID) {
|
||||
if (docID <= lastDocId) {
|
||||
throw new IllegalArgumentException("Out of order doc ids: last=" + lastDocId + ", next=" + docID);
|
||||
}
|
||||
if (set != null) {
|
||||
set = FixedBitSet.ensureCapacity(set, docID);
|
||||
set.set(docID);
|
||||
} else if (docID != cost) {
|
||||
// migrate to a sparse encoding using a bit set
|
||||
set = new FixedBitSet(docID + 1);
|
||||
set.set(0, cost);
|
||||
set.set(docID);
|
||||
}
|
||||
lastDocId = docID;
|
||||
cost++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return BASE_RAM_BYTES_USED + (set == null ? 0 : set.ramBytesUsed());
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return set != null ? new BitSetIterator(set, cost) : DocIdSetIterator.all(cost);
|
||||
}
|
||||
|
||||
}
|
|
@ -62,6 +62,7 @@ import org.apache.lucene.store.MergeInfo;
|
|||
import org.apache.lucene.store.RateLimitedIndexOutput;
|
||||
import org.apache.lucene.store.TrackingDirectoryWrapper;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
|
@ -70,6 +71,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -258,6 +260,12 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
* IndexWriterConfig#setInfoStream(InfoStream)}).
|
||||
*/
|
||||
public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8;
|
||||
|
||||
/**
|
||||
* Maximum length string for a stored field.
|
||||
*/
|
||||
public final static int MAX_STORED_STRING_LENGTH = ArrayUtil.MAX_ARRAY_LENGTH / UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR;
|
||||
|
||||
// when unrecoverable disaster strikes, we populate this with the reason that we had to close IndexWriter
|
||||
volatile Throwable tragedy;
|
||||
|
||||
|
|
|
@ -22,9 +22,7 @@ import java.io.IOException;
|
|||
import org.apache.lucene.codecs.NormsConsumer;
|
||||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
|
||||
|
@ -32,7 +30,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
|
|||
* segment flushes. */
|
||||
class NormValuesWriter {
|
||||
|
||||
private FixedBitSet docsWithField;
|
||||
private DocsWithFieldSet docsWithField;
|
||||
private PackedLongValues.Builder pending;
|
||||
private final Counter iwBytesUsed;
|
||||
private long bytesUsed;
|
||||
|
@ -40,7 +38,7 @@ class NormValuesWriter {
|
|||
private int lastDocID = -1;
|
||||
|
||||
public NormValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
||||
docsWithField = new FixedBitSet(64);
|
||||
docsWithField = new DocsWithFieldSet();
|
||||
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
|
||||
bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
|
||||
this.fieldInfo = fieldInfo;
|
||||
|
@ -54,8 +52,7 @@ class NormValuesWriter {
|
|||
}
|
||||
|
||||
pending.add(value);
|
||||
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
|
||||
docsWithField.set(docID);
|
||||
docsWithField.add(docID);
|
||||
|
||||
updateBytesUsed();
|
||||
|
||||
|
@ -82,7 +79,7 @@ class NormValuesWriter {
|
|||
if (fieldInfo != NormValuesWriter.this.fieldInfo) {
|
||||
throw new IllegalArgumentException("wrong fieldInfo");
|
||||
}
|
||||
return new BufferedNorms(values, docsWithField);
|
||||
return new BufferedNorms(values, docsWithField.iterator());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -108,9 +105,9 @@ class NormValuesWriter {
|
|||
final DocIdSetIterator docsWithField;
|
||||
private long value;
|
||||
|
||||
BufferedNorms(PackedLongValues values, FixedBitSet docsWithFields) {
|
||||
BufferedNorms(PackedLongValues values, DocIdSetIterator docsWithFields) {
|
||||
this.iter = values.iterator();
|
||||
this.docsWithField = new BitSetIterator(docsWithFields, values.size());
|
||||
this.docsWithField = docsWithFields;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,9 +21,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
|
||||
|
@ -34,13 +32,13 @@ class NumericDocValuesWriter extends DocValuesWriter {
|
|||
private PackedLongValues.Builder pending;
|
||||
private final Counter iwBytesUsed;
|
||||
private long bytesUsed;
|
||||
private FixedBitSet docsWithField;
|
||||
private DocsWithFieldSet docsWithField;
|
||||
private final FieldInfo fieldInfo;
|
||||
private int lastDocID = -1;
|
||||
|
||||
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
||||
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
|
||||
docsWithField = new FixedBitSet(64);
|
||||
docsWithField = new DocsWithFieldSet();
|
||||
bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.iwBytesUsed = iwBytesUsed;
|
||||
|
@ -53,8 +51,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
|
|||
}
|
||||
|
||||
pending.add(value);
|
||||
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
|
||||
docsWithField.set(docID);
|
||||
docsWithField.add(docID);
|
||||
|
||||
updateBytesUsed();
|
||||
|
||||
|
@ -83,7 +80,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
|
|||
if (fieldInfo != NumericDocValuesWriter.this.fieldInfo) {
|
||||
throw new IllegalArgumentException("wrong fieldInfo");
|
||||
}
|
||||
return new BufferedNumericDocValues(values, docsWithField);
|
||||
return new BufferedNumericDocValues(values, docsWithField.iterator());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -94,9 +91,9 @@ class NumericDocValuesWriter extends DocValuesWriter {
|
|||
final DocIdSetIterator docsWithField;
|
||||
private long value;
|
||||
|
||||
BufferedNumericDocValues(PackedLongValues values, FixedBitSet docsWithFields) {
|
||||
BufferedNumericDocValues(PackedLongValues values, DocIdSetIterator docsWithFields) {
|
||||
this.iter = values.iterator();
|
||||
this.docsWithField = new BitSetIterator(docsWithFields, values.size());
|
||||
this.docsWithField = docsWithFields;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,13 +22,11 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
|
||||
|
@ -37,7 +35,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
|
|||
class SortedDocValuesWriter extends DocValuesWriter {
|
||||
final BytesRefHash hash;
|
||||
private PackedLongValues.Builder pending;
|
||||
private FixedBitSet docsWithField;
|
||||
private DocsWithFieldSet docsWithField;
|
||||
private final Counter iwBytesUsed;
|
||||
private long bytesUsed; // this currently only tracks differences in 'pending'
|
||||
private final FieldInfo fieldInfo;
|
||||
|
@ -52,7 +50,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
|
|||
BytesRefHash.DEFAULT_CAPACITY,
|
||||
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
|
||||
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
|
||||
docsWithField = new FixedBitSet(64);
|
||||
docsWithField = new DocsWithFieldSet();
|
||||
bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
|
||||
iwBytesUsed.addAndGet(bytesUsed);
|
||||
}
|
||||
|
@ -69,8 +67,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
|
|||
}
|
||||
|
||||
addOneValue(value);
|
||||
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
|
||||
docsWithField.set(docID);
|
||||
docsWithField.add(docID);
|
||||
|
||||
lastDocID = docID;
|
||||
}
|
||||
|
@ -121,7 +118,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
|
|||
if (fieldInfoIn != fieldInfo) {
|
||||
throw new IllegalArgumentException("wrong fieldInfo");
|
||||
}
|
||||
return new BufferedSortedDocValues(hash, valueCount, ords, sortedValues, ordMap, docsWithField);
|
||||
return new BufferedSortedDocValues(hash, valueCount, ords, sortedValues, ordMap, docsWithField.iterator());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -136,13 +133,13 @@ class SortedDocValuesWriter extends DocValuesWriter {
|
|||
final PackedLongValues.Iterator iter;
|
||||
final DocIdSetIterator docsWithField;
|
||||
|
||||
public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, FixedBitSet docsWithField) {
|
||||
public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, DocIdSetIterator docsWithField) {
|
||||
this.hash = hash;
|
||||
this.valueCount = valueCount;
|
||||
this.sortedValues = sortedValues;
|
||||
this.iter = docToOrd.iterator();
|
||||
this.ordMap = ordMap;
|
||||
this.docsWithField = new BitSetIterator(docsWithField, docToOrd.size());
|
||||
this.docsWithField = docsWithField;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -23,9 +23,7 @@ import java.util.Arrays;
|
|||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
|
@ -34,7 +32,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
|
|||
class SortedNumericDocValuesWriter extends DocValuesWriter {
|
||||
private PackedLongValues.Builder pending; // stream of all values
|
||||
private PackedLongValues.Builder pendingCounts; // count of values per doc
|
||||
private FixedBitSet docsWithField;
|
||||
private DocsWithFieldSet docsWithField;
|
||||
private final Counter iwBytesUsed;
|
||||
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
|
||||
private final FieldInfo fieldInfo;
|
||||
|
@ -47,7 +45,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
|
|||
this.iwBytesUsed = iwBytesUsed;
|
||||
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
|
||||
pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
|
||||
docsWithField = new FixedBitSet(64);
|
||||
docsWithField = new DocsWithFieldSet();
|
||||
bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + docsWithField.ramBytesUsed() + RamUsageEstimator.sizeOf(currentValues);
|
||||
iwBytesUsed.addAndGet(bytesUsed);
|
||||
}
|
||||
|
@ -76,8 +74,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
|
|||
pendingCounts.add(currentUpto);
|
||||
currentUpto = 0;
|
||||
|
||||
docsWithField = FixedBitSet.ensureCapacity(docsWithField, currentDoc);
|
||||
docsWithField.set(currentDoc);
|
||||
docsWithField.add(currentDoc);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -112,7 +109,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
|
|||
if (fieldInfoIn != fieldInfo) {
|
||||
throw new IllegalArgumentException("wrong fieldInfo");
|
||||
}
|
||||
return new BufferedSortedNumericDocValues(values, valueCounts, docsWithField);
|
||||
return new BufferedSortedNumericDocValues(values, valueCounts, docsWithField.iterator());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -124,10 +121,10 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
|
|||
private int valueCount;
|
||||
private int valueUpto;
|
||||
|
||||
public BufferedSortedNumericDocValues(PackedLongValues values, PackedLongValues valueCounts, FixedBitSet docsWithField) {
|
||||
public BufferedSortedNumericDocValues(PackedLongValues values, PackedLongValues valueCounts, DocIdSetIterator docsWithField) {
|
||||
valuesIter = values.iterator();
|
||||
valueCountsIter = valueCounts.iterator();
|
||||
this.docsWithField = new BitSetIterator(docsWithField, values.size());
|
||||
this.docsWithField = docsWithField;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -24,13 +24,11 @@ import java.util.Arrays;
|
|||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
|
||||
|
@ -40,7 +38,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
final BytesRefHash hash;
|
||||
private PackedLongValues.Builder pending; // stream of all termIDs
|
||||
private PackedLongValues.Builder pendingCounts; // termIDs per doc
|
||||
private FixedBitSet docsWithField;
|
||||
private DocsWithFieldSet docsWithField;
|
||||
private final Counter iwBytesUsed;
|
||||
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
|
||||
private final FieldInfo fieldInfo;
|
||||
|
@ -59,7 +57,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
|
||||
pending = PackedLongValues.packedBuilder(PackedInts.COMPACT);
|
||||
pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
|
||||
docsWithField = new FixedBitSet(64);
|
||||
docsWithField = new DocsWithFieldSet();
|
||||
bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
|
||||
iwBytesUsed.addAndGet(bytesUsed);
|
||||
}
|
||||
|
@ -103,8 +101,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
pendingCounts.add(count);
|
||||
maxCount = Math.max(maxCount, count);
|
||||
currentUpto = 0;
|
||||
docsWithField = FixedBitSet.ensureCapacity(docsWithField, currentDoc);
|
||||
docsWithField.set(currentDoc);
|
||||
docsWithField.add(currentDoc);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -158,7 +155,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
if (fieldInfoIn != fieldInfo) {
|
||||
throw new IllegalArgumentException("wrong fieldInfo");
|
||||
}
|
||||
return new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField);
|
||||
return new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField.iterator());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -176,14 +173,14 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
|
|||
private int ordCount;
|
||||
private int ordUpto;
|
||||
|
||||
public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, FixedBitSet docsWithField) {
|
||||
public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, DocIdSetIterator docsWithField) {
|
||||
this.currentDoc = new int[maxCount];
|
||||
this.sortedValues = sortedValues;
|
||||
this.ordMap = ordMap;
|
||||
this.hash = hash;
|
||||
this.ordsIter = ords.iterator();
|
||||
this.ordCountsIter = ordCounts.iterator();
|
||||
this.docsWithField = new BitSetIterator(docsWithField, ordCounts.size());
|
||||
this.docsWithField = docsWithField;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -265,7 +265,7 @@ public class PhraseQuery extends Query {
|
|||
* Returns the relative positions of terms in this phrase.
|
||||
*/
|
||||
public int[] getPositions() {
|
||||
return positions;
|
||||
return positions;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -175,7 +175,9 @@ public class BM25Similarity extends Similarity {
|
|||
final long df = termStats.docFreq();
|
||||
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
|
||||
final float idf = idf(df, docCount);
|
||||
return Explanation.match(idf, "idf(docFreq=" + df + ", docCount=" + docCount + ")");
|
||||
return Explanation.match(idf, "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
|
||||
Explanation.match(df, "docFreq"),
|
||||
Explanation.match(docCount, "docCount"));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -192,16 +194,14 @@ public class BM25Similarity extends Similarity {
|
|||
* for each term.
|
||||
*/
|
||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
|
||||
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
|
||||
float idf = 0.0f;
|
||||
double idf = 0d; // sum into a double before casting into a float
|
||||
List<Explanation> details = new ArrayList<>();
|
||||
for (final TermStatistics stat : termStats ) {
|
||||
final long df = stat.docFreq();
|
||||
final float termIdf = idf(df, docCount);
|
||||
details.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", docCount=" + docCount + ")"));
|
||||
idf += termIdf;
|
||||
Explanation idfExplain = idfExplain(collectionStats, stat);
|
||||
details.add(idfExplain);
|
||||
idf += idfExplain.getValue();
|
||||
}
|
||||
return Explanation.match(idf, "idf(), sum of:", details);
|
||||
return Explanation.match((float) idf, "idf(), sum of:", details);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -303,7 +303,7 @@ public class BM25Similarity extends Similarity {
|
|||
subs.add(Explanation.match(0, "parameter b (norms omitted for field)"));
|
||||
return Explanation.match(
|
||||
(freq.getValue() * (k1 + 1)) / (freq.getValue() + k1),
|
||||
"tfNorm, computed from:", subs);
|
||||
"tfNorm, computed as (freq * (k1 + 1)) / (freq + k1) from:", subs);
|
||||
} else {
|
||||
byte norm;
|
||||
if (norms.advanceExact(doc)) {
|
||||
|
@ -317,7 +317,7 @@ public class BM25Similarity extends Similarity {
|
|||
subs.add(Explanation.match(doclen, "fieldLength"));
|
||||
return Explanation.match(
|
||||
(freq.getValue() * (k1 + 1)) / (freq.getValue() + k1 * (1 - b + b * doclen/stats.avgdl)),
|
||||
"tfNorm, computed from:", subs);
|
||||
"tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:", subs);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search.similarities;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Simple similarity that gives terms a score that is equal to their query
|
||||
* boost. This similarity is typically used with disabled norms since neither
|
||||
* document statistics nor index statistics are used for scoring. That said,
|
||||
* if norms are enabled, they will be computed the same way as
|
||||
* {@link SimilarityBase} and {@link BM25Similarity} with
|
||||
* {@link SimilarityBase#setDiscountOverlaps(boolean) discounted overlaps}
|
||||
* so that the {@link Similarity} can be changed after the index has been
|
||||
* created.
|
||||
*/
|
||||
public class BooleanSimilarity extends Similarity {
|
||||
|
||||
private static final Similarity BM25_SIM = new BM25Similarity();
|
||||
|
||||
/** Sole constructor */
|
||||
public BooleanSimilarity() {}
|
||||
|
||||
@Override
|
||||
public long computeNorm(FieldInvertState state) {
|
||||
return BM25_SIM.computeNorm(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
return new BooleanWeight(boost);
|
||||
}
|
||||
|
||||
private static class BooleanWeight extends SimWeight {
|
||||
final float boost;
|
||||
|
||||
BooleanWeight(float boost) {
|
||||
this.boost = boost;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
|
||||
final float boost = ((BooleanWeight) weight).boost;
|
||||
|
||||
return new SimScorer() {
|
||||
|
||||
@Override
|
||||
public float score(int doc, float freq) throws IOException {
|
||||
return boost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(int doc, Explanation freq) throws IOException {
|
||||
Explanation queryBoostExpl = Explanation.match(boost, "query boost");
|
||||
return Explanation.match(
|
||||
queryBoostExpl.getValue(),
|
||||
"score(" + getClass().getSimpleName() + ", doc=" + doc + "), computed from:",
|
||||
queryBoostExpl);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float computeSlopFactor(int distance) {
|
||||
return 1f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
|
||||
return 1f;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
|
@ -18,6 +18,9 @@ package org.apache.lucene.search.similarities;
|
|||
|
||||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.SmallFloat;
|
||||
|
||||
|
@ -121,6 +124,16 @@ public class ClassicSimilarity extends TFIDFSimilarity {
|
|||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||
final long df = termStats.docFreq();
|
||||
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
|
||||
final float idf = idf(df, docCount);
|
||||
return Explanation.match(idf, "idf, computed as log((docCount+1)/(docFreq+1)) + 1 from:",
|
||||
Explanation.match(df, "docFreq"),
|
||||
Explanation.match(docCount, "docCount"));
|
||||
}
|
||||
|
||||
/** Implemented as <code>log((docCount+1)/(docFreq+1)) + 1</code>. */
|
||||
@Override
|
||||
public float idf(long docFreq, long docCount) {
|
||||
|
|
|
@ -484,16 +484,14 @@ public abstract class TFIDFSimilarity extends Similarity {
|
|||
* for each term.
|
||||
*/
|
||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
|
||||
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
|
||||
float idf = 0.0f;
|
||||
double idf = 0d; // sum into a double before casting into a float
|
||||
List<Explanation> subs = new ArrayList<>();
|
||||
for (final TermStatistics stat : termStats ) {
|
||||
final long df = stat.docFreq();
|
||||
final float termIdf = idf(df, docCount);
|
||||
subs.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", docCount=" + docCount + ")"));
|
||||
idf += termIdf;
|
||||
Explanation idfExplain = idfExplain(collectionStats, stat);
|
||||
subs.add(idfExplain);
|
||||
idf += idfExplain.getValue();
|
||||
}
|
||||
return Explanation.match(idf, "idf(), sum of:", subs);
|
||||
return Explanation.match((float) idf, "idf(), sum of:", subs);
|
||||
}
|
||||
|
||||
/** Computes a score factor based on a term's document frequency (the number
|
||||
|
|
|
@ -49,19 +49,23 @@ public final class SpanNotQuery extends SpanQuery {
|
|||
|
||||
/** Construct a SpanNotQuery matching spans from <code>include</code> which
|
||||
* have no overlap with spans from <code>exclude</code> within
|
||||
* <code>dist</code> tokens of <code>include</code>. */
|
||||
* <code>dist</code> tokens of <code>include</code>. Inversely, a negative
|
||||
* <code>dist</code> value may be used to specify a certain amount of allowable
|
||||
* overlap. */
|
||||
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {
|
||||
this(include, exclude, dist, dist);
|
||||
}
|
||||
|
||||
/** Construct a SpanNotQuery matching spans from <code>include</code> which
|
||||
* have no overlap with spans from <code>exclude</code> within
|
||||
* <code>pre</code> tokens before or <code>post</code> tokens of <code>include</code>. */
|
||||
* <code>pre</code> tokens before or <code>post</code> tokens of
|
||||
* <code>include</code>. Inversely, negative values for <code>pre</code> and/or
|
||||
* <code>post</code> allow a certain amount of overlap to occur. */
|
||||
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
|
||||
this.include = Objects.requireNonNull(include);
|
||||
this.exclude = Objects.requireNonNull(exclude);
|
||||
this.pre = (pre >=0) ? pre : 0;
|
||||
this.post = (post >= 0) ? post : 0;
|
||||
this.pre = pre;
|
||||
this.post = post;
|
||||
|
||||
if (include.getField() != null && exclude.getField() != null && !include.getField().equals(exclude.getField()))
|
||||
throw new IllegalArgumentException("Clauses must have same field.");
|
||||
|
@ -226,4 +230,4 @@ public final class SpanNotQuery extends SpanQuery {
|
|||
return h;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -84,7 +84,7 @@ public final class BytesRef implements Comparable<BytesRef>,Cloneable {
|
|||
* unicode text, with no unpaired surrogates.
|
||||
*/
|
||||
public BytesRef(CharSequence text) {
|
||||
this(new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * text.length()]);
|
||||
this(new byte[UnicodeUtil.maxUTF8Length(text.length())]);
|
||||
length = UnicodeUtil.UTF16toUTF8(text, 0, text.length(), bytes);
|
||||
}
|
||||
|
||||
|
|
|
@ -143,7 +143,7 @@ public class BytesRefBuilder {
|
|||
* represent the provided text.
|
||||
*/
|
||||
public void copyChars(CharSequence text, int off, int len) {
|
||||
grow(len * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR);
|
||||
grow(UnicodeUtil.maxUTF8Length(len));
|
||||
ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes);
|
||||
}
|
||||
|
||||
|
@ -152,7 +152,7 @@ public class BytesRefBuilder {
|
|||
* represent the provided text.
|
||||
*/
|
||||
public void copyChars(char[] text, int off, int len) {
|
||||
grow(len * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR);
|
||||
grow(UnicodeUtil.maxUTF8Length(len));
|
||||
ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes);
|
||||
}
|
||||
|
||||
|
|
|
@ -612,6 +612,11 @@ public final class UnicodeUtil {
|
|||
}
|
||||
return out_offset;
|
||||
}
|
||||
|
||||
/** Returns the maximum number of utf8 bytes required to encode a utf16 (e.g., java char[], String) */
|
||||
public static int maxUTF8Length(int utf16Length) {
|
||||
return Math.multiplyExact(utf16Length, MAX_UTF8_BYTES_PER_CHAR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method for {@link #UTF8toUTF16(byte[], int, int, char[])}
|
||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
// TODO: could we somehow stream an FST to disk while we
|
||||
// build it?
|
||||
|
@ -69,10 +68,6 @@ public class Builder<T> {
|
|||
private final int shareMaxTailLength;
|
||||
|
||||
private final IntsRefBuilder lastInput = new IntsRefBuilder();
|
||||
|
||||
// for packing
|
||||
private final boolean doPackFST;
|
||||
private final float acceptableOverheadRatio;
|
||||
|
||||
// NOTE: cutting this over to ArrayList instead loses ~6%
|
||||
// in build performance on 9.8M Wikipedia terms; so we
|
||||
|
@ -99,11 +94,10 @@ public class Builder<T> {
|
|||
/**
|
||||
* Instantiates an FST/FSA builder without any pruning. A shortcut
|
||||
* to {@link #Builder(FST.INPUT_TYPE, int, int, boolean,
|
||||
* boolean, int, Outputs, boolean, float,
|
||||
* boolean, int)} with pruning options turned off.
|
||||
* boolean, int, Outputs, boolean, int)} with pruning options turned off.
|
||||
*/
|
||||
public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
|
||||
this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, false, PackedInts.COMPACT, true, 15);
|
||||
this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -143,11 +137,6 @@ public class Builder<T> {
|
|||
* FSA, use {@link NoOutputs#getSingleton()} and {@link NoOutputs#getNoOutput()} as the
|
||||
* singleton output object.
|
||||
*
|
||||
* @param doPackFST Pass true to create a packed FST.
|
||||
*
|
||||
* @param acceptableOverheadRatio How to trade speed for space when building the FST. This option
|
||||
* is only relevant when doPackFST is true. @see PackedInts#getMutable(int, int, float)
|
||||
*
|
||||
* @param allowArrayArcs Pass false to disable the array arc optimization
|
||||
* while building the FST; this will make the resulting
|
||||
* FST smaller but slower to traverse.
|
||||
|
@ -159,16 +148,13 @@ public class Builder<T> {
|
|||
*/
|
||||
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
|
||||
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
|
||||
boolean doPackFST, float acceptableOverheadRatio, boolean allowArrayArcs,
|
||||
int bytesPageBits) {
|
||||
boolean allowArrayArcs, int bytesPageBits) {
|
||||
this.minSuffixCount1 = minSuffixCount1;
|
||||
this.minSuffixCount2 = minSuffixCount2;
|
||||
this.doShareNonSingletonNodes = doShareNonSingletonNodes;
|
||||
this.shareMaxTailLength = shareMaxTailLength;
|
||||
this.doPackFST = doPackFST;
|
||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||
this.allowArrayArcs = allowArrayArcs;
|
||||
fst = new FST<>(inputType, outputs, doPackFST, acceptableOverheadRatio, bytesPageBits);
|
||||
fst = new FST<>(inputType, outputs, bytesPageBits);
|
||||
bytes = fst.bytes;
|
||||
assert bytes != null;
|
||||
if (doShareSuffix) {
|
||||
|
@ -496,11 +482,7 @@ public class Builder<T> {
|
|||
//if (DEBUG) System.out.println(" builder.finish root.isFinal=" + root.isFinal + " root.output=" + root.output);
|
||||
fst.finish(compileNode(root, lastInput.length()).node);
|
||||
|
||||
if (doPackFST) {
|
||||
return fst.pack(this, 3, Math.max(10, (int) (getNodeCount()/4)), acceptableOverheadRatio);
|
||||
} else {
|
||||
return fst;
|
||||
}
|
||||
return fst;
|
||||
}
|
||||
|
||||
private void compileAllTargets(UnCompiledNode<T> node, int tailLength) throws IOException {
|
||||
|
|
|
@ -24,13 +24,9 @@ import java.io.InputStream;
|
|||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
|
@ -38,13 +34,9 @@ import org.apache.lucene.store.InputStreamDataInput;
|
|||
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||
import org.apache.lucene.store.RAMOutputStream;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.GrowableWriter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
// TODO: break this into WritableFST and ReadOnlyFST.. then
|
||||
// we can have subclasses of ReadOnlyFST to handle the
|
||||
|
@ -90,14 +82,6 @@ public final class FST<T> implements Accountable {
|
|||
|
||||
static final int BIT_ARC_HAS_FINAL_OUTPUT = 1 << 5;
|
||||
|
||||
// Arcs are stored as fixed-size (per entry) array, so
|
||||
// that we can find an arc using binary search. We do
|
||||
// this when number of arcs is > NUM_ARCS_ARRAY:
|
||||
|
||||
// If set, the target node is delta coded vs current
|
||||
// position:
|
||||
private static final int BIT_TARGET_DELTA = 1 << 6;
|
||||
|
||||
// We use this as a marker (because this one flag is
|
||||
// illegal by itself ...):
|
||||
private static final byte ARCS_AS_FIXED_ARRAY = BIT_ARC_HAS_FINAL_OUTPUT;
|
||||
|
@ -137,7 +121,9 @@ public final class FST<T> implements Accountable {
|
|||
/** Don't store arcWithOutputCount anymore */
|
||||
private static final int VERSION_NO_NODE_ARC_COUNTS = 5;
|
||||
|
||||
private static final int VERSION_CURRENT = VERSION_NO_NODE_ARC_COUNTS;
|
||||
private static final int VERSION_PACKED_REMOVED = 6;
|
||||
|
||||
private static final int VERSION_CURRENT = VERSION_PACKED_REMOVED;
|
||||
|
||||
// Never serialized; just used to represent the virtual
|
||||
// final node w/ no arcs:
|
||||
|
@ -168,9 +154,6 @@ public final class FST<T> implements Accountable {
|
|||
|
||||
public final Outputs<T> outputs;
|
||||
|
||||
private final boolean packed;
|
||||
private PackedInts.Reader nodeRefToAddress;
|
||||
|
||||
private Arc<T> cachedRootArcs[];
|
||||
|
||||
/** Represents a single arc. */
|
||||
|
@ -273,18 +256,11 @@ public final class FST<T> implements Accountable {
|
|||
return (flags & bit) != 0;
|
||||
}
|
||||
|
||||
private GrowableWriter nodeAddress;
|
||||
|
||||
// TODO: we could be smarter here, and prune periodically
|
||||
// as we go; high in-count nodes will "usually" become
|
||||
// clear early on:
|
||||
private GrowableWriter inCounts;
|
||||
|
||||
private final int version;
|
||||
|
||||
// make a new empty FST, for building; Builder invokes
|
||||
// this ctor
|
||||
FST(INPUT_TYPE inputType, Outputs<T> outputs, boolean willPackFST, float acceptableOverheadRatio, int bytesPageBits) {
|
||||
FST(INPUT_TYPE inputType, Outputs<T> outputs, int bytesPageBits) {
|
||||
this.inputType = inputType;
|
||||
this.outputs = outputs;
|
||||
version = VERSION_CURRENT;
|
||||
|
@ -293,17 +269,8 @@ public final class FST<T> implements Accountable {
|
|||
// pad: ensure no node gets address 0 which is reserved to mean
|
||||
// the stop state w/ no arcs
|
||||
bytes.writeByte((byte) 0);
|
||||
if (willPackFST) {
|
||||
nodeAddress = new GrowableWriter(15, 8, acceptableOverheadRatio);
|
||||
inCounts = new GrowableWriter(1, 8, acceptableOverheadRatio);
|
||||
} else {
|
||||
nodeAddress = null;
|
||||
inCounts = null;
|
||||
}
|
||||
|
||||
emptyOutput = null;
|
||||
packed = false;
|
||||
nodeRefToAddress = null;
|
||||
}
|
||||
|
||||
public static final int DEFAULT_MAX_BLOCK_BITS = Constants.JRE_IS_64BIT ? 30 : 28;
|
||||
|
@ -324,8 +291,12 @@ public final class FST<T> implements Accountable {
|
|||
|
||||
// NOTE: only reads most recent format; we don't have
|
||||
// back-compat promise for FSTs (they are experimental):
|
||||
version = CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_PACKED, VERSION_NO_NODE_ARC_COUNTS);
|
||||
packed = in.readByte() == 1;
|
||||
version = CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_PACKED, VERSION_CURRENT);
|
||||
if (version < VERSION_PACKED_REMOVED) {
|
||||
if (in.readByte() == 1) {
|
||||
throw new CorruptIndexException("Cannot read packed FSTs anymore", in);
|
||||
}
|
||||
}
|
||||
if (in.readByte() == 1) {
|
||||
// accepts empty string
|
||||
// 1 KB blocks:
|
||||
|
@ -334,17 +305,12 @@ public final class FST<T> implements Accountable {
|
|||
emptyBytes.copyBytes(in, numBytes);
|
||||
|
||||
// De-serialize empty-string output:
|
||||
BytesReader reader;
|
||||
if (packed) {
|
||||
reader = emptyBytes.getForwardReader();
|
||||
} else {
|
||||
reader = emptyBytes.getReverseReader();
|
||||
// NoOutputs uses 0 bytes when writing its output,
|
||||
// so we have to check here else BytesStore gets
|
||||
// angry:
|
||||
if (numBytes > 0) {
|
||||
reader.setPosition(numBytes-1);
|
||||
}
|
||||
BytesReader reader = emptyBytes.getReverseReader();
|
||||
// NoOutputs uses 0 bytes when writing its output,
|
||||
// so we have to check here else BytesStore gets
|
||||
// angry:
|
||||
if (numBytes > 0) {
|
||||
reader.setPosition(numBytes-1);
|
||||
}
|
||||
emptyOutput = outputs.readFinalOutput(reader);
|
||||
} else {
|
||||
|
@ -364,11 +330,6 @@ public final class FST<T> implements Accountable {
|
|||
default:
|
||||
throw new IllegalStateException("invalid input type " + t);
|
||||
}
|
||||
if (packed) {
|
||||
nodeRefToAddress = PackedInts.getReader(in);
|
||||
} else {
|
||||
nodeRefToAddress = null;
|
||||
}
|
||||
startNode = in.readVLong();
|
||||
if (version < VERSION_NO_NODE_ARC_COUNTS) {
|
||||
in.readVLong();
|
||||
|
@ -424,31 +385,13 @@ public final class FST<T> implements Accountable {
|
|||
} else {
|
||||
size += bytes.ramBytesUsed();
|
||||
}
|
||||
if (packed) {
|
||||
size += nodeRefToAddress.ramBytesUsed();
|
||||
} else if (nodeAddress != null) {
|
||||
size += nodeAddress.ramBytesUsed();
|
||||
size += inCounts.ramBytesUsed();
|
||||
}
|
||||
size += cachedArcsBytesUsed;
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<Accountable> getChildResources() {
|
||||
List<Accountable> resources = new ArrayList<>();
|
||||
if (packed) {
|
||||
resources.add(Accountables.namedAccountable("node ref to address", nodeRefToAddress));
|
||||
} else if (nodeAddress != null) {
|
||||
resources.add(Accountables.namedAccountable("node addresses", nodeAddress));
|
||||
resources.add(Accountables.namedAccountable("in counts", inCounts));
|
||||
}
|
||||
return resources;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + "(input=" + inputType + ",output=" + outputs + ",packed=" + packed;
|
||||
return getClass().getSimpleName() + "(input=" + inputType + ",output=" + outputs;
|
||||
}
|
||||
|
||||
void finish(long newStartNode) throws IOException {
|
||||
|
@ -463,16 +406,6 @@ public final class FST<T> implements Accountable {
|
|||
bytes.finish();
|
||||
cacheRootArcs();
|
||||
}
|
||||
|
||||
private long getNodeAddress(long node) {
|
||||
if (nodeAddress != null) {
|
||||
// Deref
|
||||
return nodeAddress.get((int) node);
|
||||
} else {
|
||||
// Straight
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
// Optionally caches first 128 labels
|
||||
@SuppressWarnings({"rawtypes","unchecked"})
|
||||
|
@ -527,18 +460,7 @@ public final class FST<T> implements Accountable {
|
|||
if (startNode == -1) {
|
||||
throw new IllegalStateException("call finish first");
|
||||
}
|
||||
if (nodeAddress != null) {
|
||||
throw new IllegalStateException("cannot save an FST pre-packed FST; it must first be packed");
|
||||
}
|
||||
if (packed && !(nodeRefToAddress instanceof PackedInts.Mutable)) {
|
||||
throw new IllegalStateException("cannot save a FST which has been loaded from disk ");
|
||||
}
|
||||
CodecUtil.writeHeader(out, FILE_FORMAT_NAME, VERSION_CURRENT);
|
||||
if (packed) {
|
||||
out.writeByte((byte) 1);
|
||||
} else {
|
||||
out.writeByte((byte) 0);
|
||||
}
|
||||
// TODO: really we should encode this as an arc, arriving
|
||||
// to the root node, instead of special casing here:
|
||||
if (emptyOutput != null) {
|
||||
|
@ -552,16 +474,14 @@ public final class FST<T> implements Accountable {
|
|||
byte[] emptyOutputBytes = new byte[(int) ros.getFilePointer()];
|
||||
ros.writeTo(emptyOutputBytes, 0);
|
||||
|
||||
if (!packed) {
|
||||
// reverse
|
||||
final int stopAt = emptyOutputBytes.length/2;
|
||||
int upto = 0;
|
||||
while(upto < stopAt) {
|
||||
final byte b = emptyOutputBytes[upto];
|
||||
emptyOutputBytes[upto] = emptyOutputBytes[emptyOutputBytes.length-upto-1];
|
||||
emptyOutputBytes[emptyOutputBytes.length-upto-1] = b;
|
||||
upto++;
|
||||
}
|
||||
// reverse
|
||||
final int stopAt = emptyOutputBytes.length/2;
|
||||
int upto = 0;
|
||||
while(upto < stopAt) {
|
||||
final byte b = emptyOutputBytes[upto];
|
||||
emptyOutputBytes[upto] = emptyOutputBytes[emptyOutputBytes.length-upto-1];
|
||||
emptyOutputBytes[emptyOutputBytes.length-upto-1] = b;
|
||||
upto++;
|
||||
}
|
||||
out.writeVInt(emptyOutputBytes.length);
|
||||
out.writeBytes(emptyOutputBytes, 0, emptyOutputBytes.length);
|
||||
|
@ -577,9 +497,6 @@ public final class FST<T> implements Accountable {
|
|||
t = 2;
|
||||
}
|
||||
out.writeByte(t);
|
||||
if (packed) {
|
||||
((PackedInts.Mutable) nodeRefToAddress).save(out);
|
||||
}
|
||||
out.writeVLong(startNode);
|
||||
if (bytes != null) {
|
||||
long numBytes = bytes.getPosition();
|
||||
|
@ -705,8 +622,6 @@ public final class FST<T> implements Accountable {
|
|||
|
||||
if (!targetHasArcs) {
|
||||
flags += BIT_STOP_NODE;
|
||||
} else if (inCounts != null) {
|
||||
inCounts.set((int) target.node, inCounts.get((int) target.node) + 1);
|
||||
}
|
||||
|
||||
if (arc.output != NO_OUTPUT) {
|
||||
|
@ -810,30 +725,8 @@ public final class FST<T> implements Accountable {
|
|||
|
||||
builder.bytes.reverse(startAddress, thisNodeAddress);
|
||||
|
||||
// PackedInts uses int as the index, so we cannot handle
|
||||
// > 2.1B nodes when packing:
|
||||
if (nodeAddress != null && builder.nodeCount == Integer.MAX_VALUE) {
|
||||
throw new IllegalStateException("cannot create a packed FST with more than 2.1 billion nodes");
|
||||
}
|
||||
|
||||
builder.nodeCount++;
|
||||
final long node;
|
||||
if (nodeAddress != null) {
|
||||
|
||||
// Nodes are addressed by 1+ord:
|
||||
if ((int) builder.nodeCount == nodeAddress.size()) {
|
||||
nodeAddress = nodeAddress.resize(ArrayUtil.oversize(nodeAddress.size() + 1, nodeAddress.getBitsPerValue()));
|
||||
inCounts = inCounts.resize(ArrayUtil.oversize(inCounts.size() + 1, inCounts.getBitsPerValue()));
|
||||
}
|
||||
nodeAddress.set((int) builder.nodeCount, thisNodeAddress);
|
||||
// System.out.println(" write nodeAddress[" + nodeCount + "] = " + endAddress);
|
||||
node = builder.nodeCount;
|
||||
} else {
|
||||
node = thisNodeAddress;
|
||||
}
|
||||
|
||||
//System.out.println(" ret node=" + node + " address=" + thisNodeAddress + " nodeAddress=" + nodeAddress);
|
||||
return node;
|
||||
return thisNodeAddress;
|
||||
}
|
||||
|
||||
/** Fills virtual 'start' arc, ie, an empty incoming arc to
|
||||
|
@ -876,13 +769,13 @@ public final class FST<T> implements Accountable {
|
|||
arc.flags = BIT_LAST_ARC;
|
||||
return arc;
|
||||
} else {
|
||||
in.setPosition(getNodeAddress(follow.target));
|
||||
in.setPosition(follow.target);
|
||||
arc.node = follow.target;
|
||||
final byte b = in.readByte();
|
||||
if (b == ARCS_AS_FIXED_ARRAY) {
|
||||
// array: jump straight to end
|
||||
arc.numArcs = in.readVInt();
|
||||
if (packed || version >= VERSION_VINT_TARGET) {
|
||||
if (version >= VERSION_VINT_TARGET) {
|
||||
arc.bytesPerArc = in.readVInt();
|
||||
} else {
|
||||
arc.bytesPerArc = in.readInt();
|
||||
|
@ -906,8 +799,6 @@ public final class FST<T> implements Accountable {
|
|||
}
|
||||
if (arc.flag(BIT_STOP_NODE)) {
|
||||
} else if (arc.flag(BIT_TARGET_NEXT)) {
|
||||
} else if (packed) {
|
||||
in.readVLong();
|
||||
} else {
|
||||
readUnpackedNodeTarget(in);
|
||||
}
|
||||
|
@ -964,7 +855,7 @@ public final class FST<T> implements Accountable {
|
|||
}
|
||||
|
||||
public Arc<T> readFirstRealTargetArc(long node, Arc<T> arc, final BytesReader in) throws IOException {
|
||||
final long address = getNodeAddress(node);
|
||||
final long address = node;
|
||||
in.setPosition(address);
|
||||
//System.out.println(" readFirstRealTargtArc address="
|
||||
//+ address);
|
||||
|
@ -975,7 +866,7 @@ public final class FST<T> implements Accountable {
|
|||
//System.out.println(" fixedArray");
|
||||
// this is first arc in a fixed-array
|
||||
arc.numArcs = in.readVInt();
|
||||
if (packed || version >= VERSION_VINT_TARGET) {
|
||||
if (version >= VERSION_VINT_TARGET) {
|
||||
arc.bytesPerArc = in.readVInt();
|
||||
} else {
|
||||
arc.bytesPerArc = in.readInt();
|
||||
|
@ -1002,7 +893,7 @@ public final class FST<T> implements Accountable {
|
|||
if (!targetHasArcs(follow)) {
|
||||
return false;
|
||||
} else {
|
||||
in.setPosition(getNodeAddress(follow.target));
|
||||
in.setPosition(follow.target);
|
||||
return in.readByte() == ARCS_AS_FIXED_ARRAY;
|
||||
}
|
||||
}
|
||||
|
@ -1029,7 +920,7 @@ public final class FST<T> implements Accountable {
|
|||
//System.out.println(" nextArc fake " +
|
||||
//arc.nextArc);
|
||||
|
||||
long pos = getNodeAddress(arc.nextArc);
|
||||
long pos = arc.nextArc;
|
||||
in.setPosition(pos);
|
||||
|
||||
final byte b = in.readByte();
|
||||
|
@ -1038,7 +929,7 @@ public final class FST<T> implements Accountable {
|
|||
in.readVInt();
|
||||
|
||||
// Skip bytesPerArc:
|
||||
if (packed || version >= VERSION_VINT_TARGET) {
|
||||
if (version >= VERSION_VINT_TARGET) {
|
||||
in.readVInt();
|
||||
} else {
|
||||
in.readInt();
|
||||
|
@ -1107,41 +998,18 @@ public final class FST<T> implements Accountable {
|
|||
arc.nextArc = in.getPosition();
|
||||
// TODO: would be nice to make this lazy -- maybe
|
||||
// caller doesn't need the target and is scanning arcs...
|
||||
if (nodeAddress == null) {
|
||||
if (!arc.flag(BIT_LAST_ARC)) {
|
||||
if (arc.bytesPerArc == 0) {
|
||||
// must scan
|
||||
seekToNextNode(in);
|
||||
} else {
|
||||
in.setPosition(arc.posArcsStart);
|
||||
in.skipBytes(arc.bytesPerArc * arc.numArcs);
|
||||
}
|
||||
}
|
||||
arc.target = in.getPosition();
|
||||
} else {
|
||||
arc.target = arc.node - 1;
|
||||
assert arc.target > 0;
|
||||
}
|
||||
} else {
|
||||
if (packed) {
|
||||
final long pos = in.getPosition();
|
||||
final long code = in.readVLong();
|
||||
if (arc.flag(BIT_TARGET_DELTA)) {
|
||||
// Address is delta-coded from current address:
|
||||
arc.target = pos + code;
|
||||
//System.out.println(" delta pos=" + pos + " delta=" + code + " target=" + arc.target);
|
||||
} else if (code < nodeRefToAddress.size()) {
|
||||
// Deref
|
||||
arc.target = nodeRefToAddress.get((int) code);
|
||||
//System.out.println(" deref code=" + code + " target=" + arc.target);
|
||||
if (!arc.flag(BIT_LAST_ARC)) {
|
||||
if (arc.bytesPerArc == 0) {
|
||||
// must scan
|
||||
seekToNextNode(in);
|
||||
} else {
|
||||
// Absolute
|
||||
arc.target = code;
|
||||
//System.out.println(" abs code=" + code);
|
||||
in.setPosition(arc.posArcsStart);
|
||||
in.skipBytes(arc.bytesPerArc * arc.numArcs);
|
||||
}
|
||||
} else {
|
||||
arc.target = readUnpackedNodeTarget(in);
|
||||
}
|
||||
arc.target = in.getPosition();
|
||||
} else {
|
||||
arc.target = readUnpackedNodeTarget(in);
|
||||
arc.nextArc = in.getPosition();
|
||||
}
|
||||
return arc;
|
||||
|
@ -1228,7 +1096,7 @@ public final class FST<T> implements Accountable {
|
|||
return null;
|
||||
}
|
||||
|
||||
in.setPosition(getNodeAddress(follow.target));
|
||||
in.setPosition(follow.target);
|
||||
|
||||
arc.node = follow.target;
|
||||
|
||||
|
@ -1237,7 +1105,7 @@ public final class FST<T> implements Accountable {
|
|||
if (in.readByte() == ARCS_AS_FIXED_ARRAY) {
|
||||
// Arcs are full array; do binary search:
|
||||
arc.numArcs = in.readVInt();
|
||||
if (packed || version >= VERSION_VINT_TARGET) {
|
||||
if (version >= VERSION_VINT_TARGET) {
|
||||
arc.bytesPerArc = in.readVInt();
|
||||
} else {
|
||||
arc.bytesPerArc = in.readInt();
|
||||
|
@ -1303,11 +1171,7 @@ public final class FST<T> implements Accountable {
|
|||
}
|
||||
|
||||
if (!flag(flags, BIT_STOP_NODE) && !flag(flags, BIT_TARGET_NEXT)) {
|
||||
if (packed) {
|
||||
in.readVLong();
|
||||
} else {
|
||||
readUnpackedNodeTarget(in);
|
||||
}
|
||||
readUnpackedNodeTarget(in);
|
||||
}
|
||||
|
||||
if (flag(flags, BIT_LAST_ARC)) {
|
||||
|
@ -1340,18 +1204,10 @@ public final class FST<T> implements Accountable {
|
|||
/** Returns a {@link BytesReader} for this FST, positioned at
|
||||
* position 0. */
|
||||
public BytesReader getBytesReader() {
|
||||
if (packed) {
|
||||
if (bytesArray != null) {
|
||||
return new ForwardBytesReader(bytesArray);
|
||||
} else {
|
||||
return bytes.getForwardReader();
|
||||
}
|
||||
if (bytesArray != null) {
|
||||
return new ReverseBytesReader(bytesArray);
|
||||
} else {
|
||||
if (bytesArray != null) {
|
||||
return new ReverseBytesReader(bytesArray);
|
||||
} else {
|
||||
return bytes.getReverseReader();
|
||||
}
|
||||
return bytes.getReverseReader();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1476,395 +1332,4 @@ public final class FST<T> implements Accountable {
|
|||
}
|
||||
*/
|
||||
|
||||
// Creates a packed FST
|
||||
private FST(INPUT_TYPE inputType, Outputs<T> outputs, int bytesPageBits) {
|
||||
version = VERSION_CURRENT;
|
||||
packed = true;
|
||||
this.inputType = inputType;
|
||||
bytesArray = null;
|
||||
bytes = new BytesStore(bytesPageBits);
|
||||
this.outputs = outputs;
|
||||
}
|
||||
|
||||
/** Expert: creates an FST by packing this one. This
|
||||
* process requires substantial additional RAM (currently
|
||||
* up to ~8 bytes per node depending on
|
||||
* <code>acceptableOverheadRatio</code>), but then should
|
||||
* produce a smaller FST.
|
||||
*
|
||||
* <p>The implementation of this method uses ideas from
|
||||
* <a target="_blank" href="http://www.cs.put.poznan.pl/dweiss/site/publications/download/fsacomp.pdf">Smaller Representation of Finite State Automata</a>,
|
||||
* which describes techniques to reduce the size of a FST.
|
||||
* However, this is not a strict implementation of the
|
||||
* algorithms described in this paper.
|
||||
*/
|
||||
FST<T> pack(Builder<T> builder, int minInCountDeref, int maxDerefNodes, float acceptableOverheadRatio) throws IOException {
|
||||
|
||||
// NOTE: maxDerefNodes is intentionally int: we cannot
|
||||
// support > 2.1B deref nodes
|
||||
|
||||
// TODO: other things to try
|
||||
// - renumber the nodes to get more next / better locality?
|
||||
// - allow multiple input labels on an arc, so
|
||||
// singular chain of inputs can take one arc (on
|
||||
// wikipedia terms this could save another ~6%)
|
||||
// - in the ord case, the output '1' is presumably
|
||||
// very common (after NO_OUTPUT)... maybe use a bit
|
||||
// for it..?
|
||||
// - use spare bits in flags.... for top few labels /
|
||||
// outputs / targets
|
||||
|
||||
if (nodeAddress == null) {
|
||||
throw new IllegalArgumentException("this FST was not built with willPackFST=true");
|
||||
}
|
||||
|
||||
T NO_OUTPUT = outputs.getNoOutput();
|
||||
|
||||
Arc<T> arc = new Arc<>();
|
||||
|
||||
final BytesReader r = getBytesReader();
|
||||
|
||||
final int topN = Math.min(maxDerefNodes, inCounts.size());
|
||||
|
||||
// Find top nodes with highest number of incoming arcs:
|
||||
NodeQueue q = new NodeQueue(topN);
|
||||
|
||||
// TODO: we could use more RAM efficient selection algo here...
|
||||
NodeAndInCount bottom = null;
|
||||
for(int node=0; node<inCounts.size(); node++) {
|
||||
if (inCounts.get(node) >= minInCountDeref) {
|
||||
if (bottom == null) {
|
||||
q.add(new NodeAndInCount(node, (int) inCounts.get(node)));
|
||||
if (q.size() == topN) {
|
||||
bottom = q.top();
|
||||
}
|
||||
} else if (inCounts.get(node) > bottom.count) {
|
||||
q.insertWithOverflow(new NodeAndInCount(node, (int) inCounts.get(node)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Free up RAM:
|
||||
inCounts = null;
|
||||
|
||||
final Map<Integer,Integer> topNodeMap = new HashMap<>();
|
||||
for(int downTo=q.size()-1;downTo>=0;downTo--) {
|
||||
NodeAndInCount n = q.pop();
|
||||
topNodeMap.put(n.node, downTo);
|
||||
//System.out.println("map node=" + n.node + " inCount=" + n.count + " to newID=" + downTo);
|
||||
}
|
||||
|
||||
// +1 because node ords start at 1 (0 is reserved as stop node):
|
||||
final GrowableWriter newNodeAddress = new GrowableWriter(
|
||||
PackedInts.bitsRequired(builder.bytes.getPosition()), (int) (1 + builder.nodeCount), acceptableOverheadRatio);
|
||||
|
||||
// Fill initial coarse guess:
|
||||
for(int node=1;node<=builder.nodeCount;node++) {
|
||||
newNodeAddress.set(node, 1 + builder.bytes.getPosition() - nodeAddress.get(node));
|
||||
}
|
||||
|
||||
int absCount;
|
||||
int deltaCount;
|
||||
int topCount;
|
||||
int nextCount;
|
||||
|
||||
FST<T> fst;
|
||||
|
||||
// Iterate until we converge:
|
||||
while(true) {
|
||||
|
||||
//System.out.println("\nITER");
|
||||
boolean changed = false;
|
||||
|
||||
// for assert:
|
||||
boolean negDelta = false;
|
||||
|
||||
fst = new FST<>(inputType, outputs, builder.bytes.getBlockBits());
|
||||
|
||||
final BytesStore writer = fst.bytes;
|
||||
|
||||
// Skip 0 byte since 0 is reserved target:
|
||||
writer.writeByte((byte) 0);
|
||||
|
||||
absCount = deltaCount = topCount = nextCount = 0;
|
||||
|
||||
int changedCount = 0;
|
||||
|
||||
long addressError = 0;
|
||||
|
||||
//int totWasted = 0;
|
||||
|
||||
// Since we re-reverse the bytes, we now write the
|
||||
// nodes backwards, so that BIT_TARGET_NEXT is
|
||||
// unchanged:
|
||||
for(int node=(int) builder.nodeCount;node>=1;node--) {
|
||||
final long address = writer.getPosition();
|
||||
|
||||
//System.out.println(" node: " + node + " address=" + address);
|
||||
if (address != newNodeAddress.get(node)) {
|
||||
addressError = address - newNodeAddress.get(node);
|
||||
//System.out.println(" change: " + (address - newNodeAddress[node]));
|
||||
changed = true;
|
||||
newNodeAddress.set(node, address);
|
||||
changedCount++;
|
||||
}
|
||||
|
||||
int nodeArcCount = 0;
|
||||
int bytesPerArc = 0;
|
||||
|
||||
boolean retry = false;
|
||||
|
||||
// for assert:
|
||||
boolean anyNegDelta = false;
|
||||
|
||||
// Retry loop: possibly iterate more than once, if
|
||||
// this is an array'd node and bytesPerArc changes:
|
||||
writeNode:
|
||||
while(true) { // retry writing this node
|
||||
|
||||
//System.out.println(" cycle: retry");
|
||||
readFirstRealTargetArc(node, arc, r);
|
||||
|
||||
final boolean useArcArray = arc.bytesPerArc != 0;
|
||||
if (useArcArray) {
|
||||
// Write false first arc:
|
||||
if (bytesPerArc == 0) {
|
||||
bytesPerArc = arc.bytesPerArc;
|
||||
}
|
||||
writer.writeByte(ARCS_AS_FIXED_ARRAY);
|
||||
writer.writeVInt(arc.numArcs);
|
||||
writer.writeVInt(bytesPerArc);
|
||||
//System.out.println("node " + node + ": " + arc.numArcs + " arcs");
|
||||
}
|
||||
|
||||
int maxBytesPerArc = 0;
|
||||
//int wasted = 0;
|
||||
while(true) { // iterate over all arcs for this node
|
||||
//System.out.println(" cycle next arc");
|
||||
|
||||
final long arcStartPos = writer.getPosition();
|
||||
nodeArcCount++;
|
||||
|
||||
byte flags = 0;
|
||||
|
||||
if (arc.isLast()) {
|
||||
flags += BIT_LAST_ARC;
|
||||
}
|
||||
/*
|
||||
if (!useArcArray && nodeUpto < nodes.length-1 && arc.target == nodes[nodeUpto+1]) {
|
||||
flags += BIT_TARGET_NEXT;
|
||||
}
|
||||
*/
|
||||
if (!useArcArray && node != 1 && arc.target == node-1) {
|
||||
flags += BIT_TARGET_NEXT;
|
||||
if (!retry) {
|
||||
nextCount++;
|
||||
}
|
||||
}
|
||||
if (arc.isFinal()) {
|
||||
flags += BIT_FINAL_ARC;
|
||||
if (arc.nextFinalOutput != NO_OUTPUT) {
|
||||
flags += BIT_ARC_HAS_FINAL_OUTPUT;
|
||||
}
|
||||
} else {
|
||||
assert arc.nextFinalOutput == NO_OUTPUT;
|
||||
}
|
||||
if (!targetHasArcs(arc)) {
|
||||
flags += BIT_STOP_NODE;
|
||||
}
|
||||
|
||||
if (arc.output != NO_OUTPUT) {
|
||||
flags += BIT_ARC_HAS_OUTPUT;
|
||||
}
|
||||
|
||||
final long absPtr;
|
||||
final boolean doWriteTarget = targetHasArcs(arc) && (flags & BIT_TARGET_NEXT) == 0;
|
||||
if (doWriteTarget) {
|
||||
|
||||
final Integer ptr = topNodeMap.get(arc.target);
|
||||
if (ptr != null) {
|
||||
absPtr = ptr;
|
||||
} else {
|
||||
absPtr = topNodeMap.size() + newNodeAddress.get((int) arc.target) + addressError;
|
||||
}
|
||||
|
||||
long delta = newNodeAddress.get((int) arc.target) + addressError - writer.getPosition() - 2;
|
||||
if (delta < 0) {
|
||||
//System.out.println("neg: " + delta);
|
||||
anyNegDelta = true;
|
||||
delta = 0;
|
||||
}
|
||||
|
||||
if (delta < absPtr) {
|
||||
flags |= BIT_TARGET_DELTA;
|
||||
}
|
||||
} else {
|
||||
absPtr = 0;
|
||||
}
|
||||
|
||||
assert flags != ARCS_AS_FIXED_ARRAY;
|
||||
writer.writeByte(flags);
|
||||
|
||||
fst.writeLabel(writer, arc.label);
|
||||
|
||||
if (arc.output != NO_OUTPUT) {
|
||||
outputs.write(arc.output, writer);
|
||||
}
|
||||
if (arc.nextFinalOutput != NO_OUTPUT) {
|
||||
outputs.writeFinalOutput(arc.nextFinalOutput, writer);
|
||||
}
|
||||
|
||||
if (doWriteTarget) {
|
||||
|
||||
long delta = newNodeAddress.get((int) arc.target) + addressError - writer.getPosition();
|
||||
if (delta < 0) {
|
||||
anyNegDelta = true;
|
||||
//System.out.println("neg: " + delta);
|
||||
delta = 0;
|
||||
}
|
||||
|
||||
if (flag(flags, BIT_TARGET_DELTA)) {
|
||||
//System.out.println(" delta");
|
||||
writer.writeVLong(delta);
|
||||
if (!retry) {
|
||||
deltaCount++;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
if (ptr != null) {
|
||||
System.out.println(" deref");
|
||||
} else {
|
||||
System.out.println(" abs");
|
||||
}
|
||||
*/
|
||||
writer.writeVLong(absPtr);
|
||||
if (!retry) {
|
||||
if (absPtr >= topNodeMap.size()) {
|
||||
absCount++;
|
||||
} else {
|
||||
topCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (useArcArray) {
|
||||
final int arcBytes = (int) (writer.getPosition() - arcStartPos);
|
||||
//System.out.println(" " + arcBytes + " bytes");
|
||||
maxBytesPerArc = Math.max(maxBytesPerArc, arcBytes);
|
||||
// NOTE: this may in fact go "backwards", if
|
||||
// somehow (rarely, possibly never) we use
|
||||
// more bytesPerArc in this rewrite than the
|
||||
// incoming FST did... but in this case we
|
||||
// will retry (below) so it's OK to ovewrite
|
||||
// bytes:
|
||||
//wasted += bytesPerArc - arcBytes;
|
||||
writer.skipBytes((int) (arcStartPos + bytesPerArc - writer.getPosition()));
|
||||
}
|
||||
|
||||
if (arc.isLast()) {
|
||||
break;
|
||||
}
|
||||
|
||||
readNextRealArc(arc, r);
|
||||
}
|
||||
|
||||
if (useArcArray) {
|
||||
if (maxBytesPerArc == bytesPerArc || (retry && maxBytesPerArc <= bytesPerArc)) {
|
||||
// converged
|
||||
//System.out.println(" bba=" + bytesPerArc + " wasted=" + wasted);
|
||||
//totWasted += wasted;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
//System.out.println(" retry this node maxBytesPerArc=" + maxBytesPerArc + " vs " + bytesPerArc);
|
||||
|
||||
// Retry:
|
||||
bytesPerArc = maxBytesPerArc;
|
||||
writer.truncate(address);
|
||||
nodeArcCount = 0;
|
||||
retry = true;
|
||||
anyNegDelta = false;
|
||||
}
|
||||
|
||||
negDelta |= anyNegDelta;
|
||||
}
|
||||
|
||||
if (!changed) {
|
||||
// We don't renumber the nodes (just reverse their
|
||||
// order) so nodes should only point forward to
|
||||
// other nodes because we only produce acyclic FSTs
|
||||
// w/ nodes only pointing "forwards":
|
||||
assert !negDelta;
|
||||
//System.out.println("TOT wasted=" + totWasted);
|
||||
// Converged!
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
long maxAddress = 0;
|
||||
for (long key : topNodeMap.keySet()) {
|
||||
maxAddress = Math.max(maxAddress, newNodeAddress.get((int) key));
|
||||
}
|
||||
|
||||
PackedInts.Mutable nodeRefToAddressIn = PackedInts.getMutable(topNodeMap.size(),
|
||||
PackedInts.bitsRequired(maxAddress), acceptableOverheadRatio);
|
||||
for(Map.Entry<Integer,Integer> ent : topNodeMap.entrySet()) {
|
||||
nodeRefToAddressIn.set(ent.getValue(), newNodeAddress.get(ent.getKey()));
|
||||
}
|
||||
fst.nodeRefToAddress = nodeRefToAddressIn;
|
||||
|
||||
fst.startNode = newNodeAddress.get((int) startNode);
|
||||
//System.out.println("new startNode=" + fst.startNode + " old startNode=" + startNode);
|
||||
|
||||
if (emptyOutput != null) {
|
||||
fst.setEmptyOutput(emptyOutput);
|
||||
}
|
||||
|
||||
fst.bytes.finish();
|
||||
fst.cacheRootArcs();
|
||||
|
||||
//final int size = fst.sizeInBytes();
|
||||
//System.out.println("nextCount=" + nextCount + " topCount=" + topCount + " deltaCount=" + deltaCount + " absCount=" + absCount);
|
||||
|
||||
return fst;
|
||||
}
|
||||
|
||||
private static class NodeAndInCount implements Comparable<NodeAndInCount> {
|
||||
final int node;
|
||||
final int count;
|
||||
|
||||
public NodeAndInCount(int node, int count) {
|
||||
this.node = node;
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(NodeAndInCount other) {
|
||||
if (count > other.count) {
|
||||
return 1;
|
||||
} else if (count < other.count) {
|
||||
return -1;
|
||||
} else {
|
||||
// Tie-break: smaller node compares as greater than
|
||||
return other.node - node;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class NodeQueue extends PriorityQueue<NodeAndInCount> {
|
||||
public NodeQueue(int topN) {
|
||||
super(topN, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean lessThan(NodeAndInCount a, NodeAndInCount b) {
|
||||
final int cmp = a.compareTo(b);
|
||||
assert cmp != 0;
|
||||
return cmp < 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,6 @@
|
|||
* <li>Fast and low memory overhead construction of the minimal FST
|
||||
* (but inputs must be provided in sorted order)</li>
|
||||
* <li>Low object overhead and quick deserialization (byte[] representation)</li>
|
||||
* <li>Optional two-pass compression: {@link org.apache.lucene.util.fst.FST#pack FST.pack()}</li>
|
||||
* <li>{@link org.apache.lucene.util.fst.Util#getByOutput Lookup-by-output} when the
|
||||
* outputs are in sorted order (e.g., ordinals or file pointers)</li>
|
||||
* <li>Pluggable {@link org.apache.lucene.util.fst.Outputs Outputs} representation</li>
|
||||
|
|
|
@ -37,7 +37,7 @@ public class TestGrowableByteArrayDataOutput extends LuceneTestCase {
|
|||
// create a small string such that the single pass approach is used
|
||||
int length = TestUtil.nextInt(random(), 1, minSizeForDoublePass - 1);
|
||||
String unicode = TestUtil.randomFixedByteLengthUnicodeString(random(), length);
|
||||
byte[] utf8 = new byte[unicode.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR];
|
||||
byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(unicode.length())];
|
||||
int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8);
|
||||
|
||||
GrowableByteArrayDataOutput dataOutput = new GrowableByteArrayDataOutput(1 << 8);
|
||||
|
@ -61,7 +61,7 @@ public class TestGrowableByteArrayDataOutput extends LuceneTestCase {
|
|||
int num = atLeast(100);
|
||||
for (int i = 0; i < num; i++) {
|
||||
String unicode = TestUtil.randomRealisticUnicodeString(random(), minSizeForDoublePass, 10 * minSizeForDoublePass);
|
||||
byte[] utf8 = new byte[unicode.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR];
|
||||
byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(unicode.length())];
|
||||
int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8);
|
||||
|
||||
GrowableByteArrayDataOutput dataOutput = new GrowableByteArrayDataOutput(1 << 8);
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestDocsWithFieldSet extends LuceneTestCase {
|
||||
|
||||
public void testDense() throws IOException {
|
||||
DocsWithFieldSet set = new DocsWithFieldSet();
|
||||
DocIdSetIterator it = set.iterator();
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
|
||||
|
||||
set.add(0);
|
||||
it = set.iterator();
|
||||
assertEquals(0, it.nextDoc());
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
|
||||
|
||||
long ramBytesUsed = set.ramBytesUsed();
|
||||
for (int i = 1; i < 1000; ++i) {
|
||||
set.add(i);
|
||||
}
|
||||
assertEquals(ramBytesUsed, set.ramBytesUsed());
|
||||
it = set.iterator();
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
assertEquals(i, it.nextDoc());
|
||||
}
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
|
||||
}
|
||||
|
||||
public void testSparse() throws IOException {
|
||||
DocsWithFieldSet set = new DocsWithFieldSet();
|
||||
int doc = random().nextInt(10000);
|
||||
set.add(doc);
|
||||
DocIdSetIterator it = set.iterator();
|
||||
assertEquals(doc, it.nextDoc());
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
|
||||
int doc2 = doc + TestUtil.nextInt(random(), 1, 100);
|
||||
set.add(doc2);
|
||||
it = set.iterator();
|
||||
assertEquals(doc, it.nextDoc());
|
||||
assertEquals(doc2, it.nextDoc());
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
|
||||
}
|
||||
|
||||
public void testDenseThenSparse() throws IOException {
|
||||
int denseCount = random().nextInt(10000);
|
||||
int nextDoc = denseCount + random().nextInt(10000);
|
||||
DocsWithFieldSet set = new DocsWithFieldSet();
|
||||
for (int i = 0; i < denseCount; ++i) {
|
||||
set.add(i);
|
||||
}
|
||||
set.add(nextDoc);
|
||||
DocIdSetIterator it = set.iterator();
|
||||
for (int i = 0; i < denseCount; ++i) {
|
||||
assertEquals(i, it.nextDoc());
|
||||
}
|
||||
assertEquals(nextDoc, it.nextDoc());
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
|
||||
}
|
||||
|
||||
}
|
|
@ -97,6 +97,7 @@ import org.apache.lucene.util.ThreadInterruptedException;
|
|||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestIndexWriter extends LuceneTestCase {
|
||||
|
@ -2768,5 +2769,34 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
@Ignore("requires running tests with biggish heap")
|
||||
public void testMassiveField() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
final IndexWriter w = new IndexWriter(dir, iwc);
|
||||
|
||||
StringBuilder b = new StringBuilder();
|
||||
while (b.length() <= IndexWriter.MAX_STORED_STRING_LENGTH) {
|
||||
b.append("x ");
|
||||
}
|
||||
|
||||
final Document doc = new Document();
|
||||
//doc.add(new TextField("big", b.toString(), Field.Store.YES));
|
||||
doc.add(new StoredField("big", b.toString()));
|
||||
Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
|
||||
assertEquals("stored field \"big\" is too large (" + b.length() + " characters) to store", e.getMessage());
|
||||
|
||||
// make sure writer is still usable:
|
||||
Document doc2 = new Document();
|
||||
doc2.add(new StringField("id", "foo", Field.Store.YES));
|
||||
w.addDocument(doc2);
|
||||
|
||||
DirectoryReader r = DirectoryReader.open(w);
|
||||
assertEquals(1, r.numDocs());
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search.similarities;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestBooleanSimilarity extends LuceneTestCase {
|
||||
|
||||
public void testTermScoreIsEqualToBoost() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir,
|
||||
newIndexWriterConfig());
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("foo", "bar", Store.NO));
|
||||
doc.add(new StringField("foo", "baz", Store.NO));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new StringField("foo", "bar", Store.NO));
|
||||
doc.add(new StringField("foo", "bar", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
DirectoryReader reader = w.getReader();
|
||||
w.close();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
searcher.setSimilarity(new BooleanSimilarity());
|
||||
TopDocs topDocs = searcher.search(new TermQuery(new Term("foo", "bar")), 2);
|
||||
assertEquals(2, topDocs.totalHits);
|
||||
assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
|
||||
assertEquals(1f, topDocs.scoreDocs[1].score, 0f);
|
||||
|
||||
topDocs = searcher.search(new TermQuery(new Term("foo", "baz")), 1);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
|
||||
|
||||
topDocs = searcher.search(new BoostQuery(new TermQuery(new Term("foo", "baz")), 3f), 1);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
assertEquals(3f, topDocs.scoreDocs[0].score, 0f);
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testPhraseScoreIsEqualToBoost() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir,
|
||||
newIndexWriterConfig().setSimilarity(new BooleanSimilarity()));
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("foo", "bar baz quux", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
DirectoryReader reader = w.getReader();
|
||||
w.close();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
searcher.setSimilarity(new BooleanSimilarity());
|
||||
|
||||
PhraseQuery query = new PhraseQuery(2, "foo", "bar", "quux");
|
||||
|
||||
TopDocs topDocs = searcher.search(query, 2);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
|
||||
|
||||
topDocs = searcher.search(new BoostQuery(query, 7), 2);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
assertEquals(7f, topDocs.scoreDocs[0].score, 0f);
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSameNormsAsBM25() {
|
||||
BooleanSimilarity sim1 = new BooleanSimilarity();
|
||||
BM25Similarity sim2 = new BM25Similarity();
|
||||
sim2.setDiscountOverlaps(true);
|
||||
for (int iter = 0; iter < 100; ++iter) {
|
||||
final int length = TestUtil.nextInt(random(), 1, 100);
|
||||
final int position = random().nextInt(length);
|
||||
final int numOverlaps = random().nextInt(50);
|
||||
final float boost = random().nextFloat() * 10;
|
||||
FieldInvertState state = new FieldInvertState("foo", position, length, numOverlaps, 100, boost);
|
||||
assertEquals(
|
||||
sim2.computeNorm(state),
|
||||
sim1.computeNorm(state),
|
||||
0f);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -274,20 +274,42 @@ public class TestBasics extends LuceneTestCase {
|
|||
assertTrue(searcher.explain(query, 849).getValue() > 0.0f);
|
||||
}
|
||||
|
||||
public void testSpanNotWindowNeg() throws Exception {
|
||||
public void testSpanNotWindowNegPost() throws Exception {
|
||||
//test handling of invalid window < 0
|
||||
SpanQuery near = spanNearOrderedQuery("field", 4, "eight", "one");
|
||||
SpanQuery or = spanOrQuery("field", "forty");
|
||||
SpanQuery query = spanNotQuery(near, or);
|
||||
|
||||
SpanQuery query = spanNotQuery(near, or, 0, -1);
|
||||
checkHits(query, new int[]
|
||||
{801, 821, 831, 851, 861, 871, 881, 891,
|
||||
1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
|
||||
|
||||
query = spanNotQuery(near, or, 0, -2);
|
||||
checkHits(query, new int[]
|
||||
{801, 821, 831, 841, 851, 861, 871, 881, 891,
|
||||
1801, 1821, 1831, 1841, 1851, 1861, 1871, 1881, 1891});
|
||||
|
||||
assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
|
||||
assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
|
||||
}
|
||||
|
||||
|
||||
public void testSpanNotWindowNegPre() throws Exception {
|
||||
//test handling of invalid window < 0
|
||||
SpanQuery near = spanNearOrderedQuery("field", 4, "eight", "one");
|
||||
SpanQuery or = spanOrQuery("field", "forty");
|
||||
SpanQuery query = spanNotQuery(near, or, -2, 0);
|
||||
checkHits(query, new int[]
|
||||
{801, 821, 831, 851, 861, 871, 881, 891,
|
||||
1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
|
||||
|
||||
query = spanNotQuery(near, or, -3, 0);
|
||||
checkHits(query, new int[]
|
||||
{801, 821, 831, 841, 851, 861, 871, 881, 891,
|
||||
1801, 1821, 1831, 1841, 1851, 1861, 1871, 1881, 1891});
|
||||
|
||||
assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
|
||||
assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
|
||||
}
|
||||
|
||||
public void testSpanNotWindowDoubleExcludesBefore() throws Exception {
|
||||
//test hitting two excludes before an include
|
||||
SpanQuery near = spanNearOrderedQuery("field", 2, "forty", "two");
|
||||
|
|
|
@ -99,7 +99,6 @@ public class TestSpans extends LuceneTestCase {
|
|||
"s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx",
|
||||
"r1 s11",
|
||||
"r1 s21"
|
||||
|
||||
};
|
||||
|
||||
private void checkHits(Query query, int[] results) throws IOException {
|
||||
|
@ -406,42 +405,54 @@ public class TestSpans extends LuceneTestCase {
|
|||
|
||||
|
||||
}
|
||||
|
||||
public void testSpanNots() throws Throwable{
|
||||
assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", "s2", 0, 0), 0);
|
||||
assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", "s2", 10, 10), 0);
|
||||
|
||||
//focus on behind
|
||||
assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", "s1", 6, 0));
|
||||
assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", "s1", 5, 0));
|
||||
assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", "s1", 3, 0));
|
||||
assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", "s1", 2, 0));
|
||||
assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", "s1", 0, 0));
|
||||
|
||||
//focus on both
|
||||
assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", "s1", 3, 1));
|
||||
assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", "s1", 2, 1));
|
||||
assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", "s1", 1, 1));
|
||||
assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", "s1", 10, 10));
|
||||
|
||||
//focus on ahead
|
||||
assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", "s2", 10, 10));
|
||||
assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", "s2", 0, 1));
|
||||
assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", "s2", 0, 2));
|
||||
assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", "s2", 0, 3));
|
||||
assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", "s2", 0, 4));
|
||||
assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", "s2", 0, 8));
|
||||
|
||||
//exclude doesn't exist
|
||||
assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", "s3", 8, 8));
|
||||
|
||||
//include doesn't exist
|
||||
assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", "s1", 8, 8));
|
||||
public void testSpanNots() throws Throwable {
|
||||
|
||||
assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", 0, "s2", 0, 0), 0);
|
||||
assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", 0, "s2", 10, 10), 0);
|
||||
|
||||
//focus on behind
|
||||
assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", 0, "s1", 6, 0));
|
||||
assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", 0, "s1", 5, 0));
|
||||
assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", 0, "s1", 3, 0));
|
||||
assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", 0, "s1", 2, 0));
|
||||
assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", 0, "s1", 0, 0));
|
||||
|
||||
//focus on both
|
||||
assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", 0, "s1", 3, 1));
|
||||
assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", 0, "s1", 2, 1));
|
||||
assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", 0, "s1", 1, 1));
|
||||
assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", 0, "s1", 10, 10));
|
||||
|
||||
//focus on ahead
|
||||
assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", 0, "s2", 10, 10));
|
||||
assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", 0, "s2", 0, 1));
|
||||
assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", 0, "s2", 0, 2));
|
||||
assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", 0, "s2", 0, 3));
|
||||
assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", 0, "s2", 0, 4));
|
||||
assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", 0, "s2", 0, 8));
|
||||
|
||||
//exclude doesn't exist
|
||||
assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", 0, "s3", 8, 8));
|
||||
|
||||
//include doesn't exist
|
||||
assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", 0, "s1", 8, 8));
|
||||
|
||||
// Negative values
|
||||
assertEquals("SpanNotS2S1NotXXNeg_0_0", 1, spanCount("s2 s1", 10, "xx", 0, 0));
|
||||
assertEquals("SpanNotS2S1NotXXNeg_1_1", 1, spanCount("s2 s1", 10, "xx", -1, -1));
|
||||
assertEquals("SpanNotS2S1NotXXNeg_0_2", 2, spanCount("s2 s1", 10, "xx", 0, -2));
|
||||
assertEquals("SpanNotS2S1NotXXNeg_1_2", 2, spanCount("s2 s1", 10, "xx", -1, -2));
|
||||
assertEquals("SpanNotS2S1NotXXNeg_2_1", 2, spanCount("s2 s1", 10, "xx", -2, -1));
|
||||
assertEquals("SpanNotS2S1NotXXNeg_3_1", 2, spanCount("s2 s1", 10, "xx", -3, -1));
|
||||
assertEquals("SpanNotS2S1NotXXNeg_1_3", 2, spanCount("s2 s1", 10, "xx", -1, -3));
|
||||
assertEquals("SpanNotS2S1NotXXNeg_2_2", 3, spanCount("s2 s1", 10, "xx", -2, -2));
|
||||
}
|
||||
|
||||
private int spanCount(String include, String exclude, int pre, int post) throws IOException{
|
||||
SpanQuery iq = spanTermQuery(field, include);
|
||||
|
||||
|
||||
private int spanCount(String include, int slop, String exclude, int pre, int post) throws IOException{
|
||||
String[] includeTerms = include.split(" +");
|
||||
SpanQuery iq = includeTerms.length == 1 ? spanTermQuery(field, include) : spanNearOrderedQuery(field, slop, includeTerms);
|
||||
SpanQuery eq = spanTermQuery(field, exclude);
|
||||
SpanQuery snq = spanNotQuery(iq, eq, pre, post);
|
||||
Spans spans = snq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
|
||||
|
|
|
@ -111,7 +111,7 @@ public class TestUnicodeUtil extends LuceneTestCase {
|
|||
int num = atLeast(50000);
|
||||
for (int i = 0; i < num; i++) {
|
||||
final String s = TestUtil.randomUnicodeString(random());
|
||||
final byte[] utf8 = new byte[s.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR];
|
||||
final byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(s.length())];
|
||||
final int utf8Len = UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8);
|
||||
assertEquals(s.codePointCount(0, s.length()),
|
||||
UnicodeUtil.codePointCount(new BytesRef(utf8, 0, utf8Len)));
|
||||
|
@ -137,7 +137,7 @@ public class TestUnicodeUtil extends LuceneTestCase {
|
|||
int num = atLeast(50000);
|
||||
for (int i = 0; i < num; i++) {
|
||||
final String s = TestUtil.randomUnicodeString(random());
|
||||
final byte[] utf8 = new byte[s.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR];
|
||||
final byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(s.length())];
|
||||
final int utf8Len = UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8);
|
||||
utf32 = ArrayUtil.grow(utf32, utf8Len);
|
||||
final int utf32Len = UnicodeUtil.UTF8toUTF32(new BytesRef(utf8, 0, utf8Len), utf32);
|
||||
|
@ -208,7 +208,7 @@ public class TestUnicodeUtil extends LuceneTestCase {
|
|||
int num = atLeast(5000);
|
||||
for (int i = 0; i < num; i++) {
|
||||
String unicode = TestUtil.randomUnicodeString(random());
|
||||
byte[] utf8 = new byte[unicode.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR];
|
||||
byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(unicode.length())];
|
||||
int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8);
|
||||
assertEquals(len, UnicodeUtil.calcUTF16toUTF8Length(unicode, 0, unicode.length()));
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ public class TestUTF32ToUTF8 extends LuceneTestCase {
|
|||
|
||||
private boolean matches(ByteRunAutomaton a, int code) {
|
||||
char[] chars = Character.toChars(code);
|
||||
byte[] b = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * chars.length];
|
||||
byte[] b = new byte[UnicodeUtil.maxUTF8Length(chars.length)];
|
||||
final int len = UnicodeUtil.UTF16toUTF8(chars, 0, chars.length, b);
|
||||
return a.run(b, 0, len);
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TimeUnits;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.junit.Ignore;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
||||
|
@ -47,16 +46,14 @@ public class Test2BFST extends LuceneTestCase {
|
|||
|
||||
Directory dir = new MMapDirectory(createTempDir("2BFST"));
|
||||
|
||||
for(int doPackIter=0;doPackIter<2;doPackIter++) {
|
||||
boolean doPack = doPackIter == 1;
|
||||
|
||||
for(int iter=0;iter<1;iter++) {
|
||||
// Build FST w/ NoOutputs and stop when nodeCount > 2.2B
|
||||
if (!doPack) {
|
||||
{
|
||||
System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
|
||||
Outputs<Object> outputs = NoOutputs.getSingleton();
|
||||
Object NO_OUTPUT = outputs.getNoOutput();
|
||||
final Builder<Object> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
|
||||
doPack, PackedInts.COMPACT, true, 15);
|
||||
true, 15);
|
||||
|
||||
int count = 0;
|
||||
Random r = new Random(seed);
|
||||
|
@ -135,10 +132,10 @@ public class Test2BFST extends LuceneTestCase {
|
|||
// Build FST w/ ByteSequenceOutputs and stop when FST
|
||||
// size = 3GB
|
||||
{
|
||||
System.out.println("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes");
|
||||
System.out.println("\nTEST: 3 GB size; outputs=bytes");
|
||||
Outputs<BytesRef> outputs = ByteSequenceOutputs.getSingleton();
|
||||
final Builder<BytesRef> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
|
||||
doPack, PackedInts.COMPACT, true, 15);
|
||||
true, 15);
|
||||
|
||||
byte[] outputBytes = new byte[20];
|
||||
BytesRef output = new BytesRef(outputBytes);
|
||||
|
@ -212,10 +209,10 @@ public class Test2BFST extends LuceneTestCase {
|
|||
// Build FST w/ PositiveIntOutputs and stop when FST
|
||||
// size = 3GB
|
||||
{
|
||||
System.out.println("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long");
|
||||
System.out.println("\nTEST: 3 GB size; outputs=long");
|
||||
Outputs<Long> outputs = PositiveIntOutputs.getSingleton();
|
||||
final Builder<Long> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
|
||||
doPack, PackedInts.COMPACT, true, 15);
|
||||
true, 15);
|
||||
|
||||
long output = 1;
|
||||
|
||||
|
|
|
@ -76,7 +76,6 @@ import org.apache.lucene.util.fst.FST.Arc;
|
|||
import org.apache.lucene.util.fst.FST.BytesReader;
|
||||
import org.apache.lucene.util.fst.PairOutputs.Pair;
|
||||
import org.apache.lucene.util.fst.Util.Result;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
import static org.apache.lucene.util.fst.FSTTester.getRandomString;
|
||||
import static org.apache.lucene.util.fst.FSTTester.simpleRandomString;
|
||||
|
@ -328,9 +327,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
writer.close();
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
|
||||
final boolean doRewrite = random().nextBoolean();
|
||||
|
||||
Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, doRewrite, PackedInts.DEFAULT, true, 15);
|
||||
Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
|
||||
|
||||
boolean storeOrd = random().nextBoolean();
|
||||
if (VERBOSE) {
|
||||
|
@ -464,16 +461,14 @@ public class TestFSTs extends LuceneTestCase {
|
|||
private int inputMode;
|
||||
private final Outputs<T> outputs;
|
||||
private final Builder<T> builder;
|
||||
private final boolean doPack;
|
||||
|
||||
public VisitTerms(Path dirOut, Path wordsFileIn, int inputMode, int prune, Outputs<T> outputs, boolean doPack, boolean noArcArrays) {
|
||||
public VisitTerms(Path dirOut, Path wordsFileIn, int inputMode, int prune, Outputs<T> outputs, boolean noArcArrays) {
|
||||
this.dirOut = dirOut;
|
||||
this.wordsFileIn = wordsFileIn;
|
||||
this.inputMode = inputMode;
|
||||
this.outputs = outputs;
|
||||
this.doPack = doPack;
|
||||
|
||||
builder = new Builder<>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, doPack, PackedInts.DEFAULT, !noArcArrays, 15);
|
||||
builder = new Builder<>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, !noArcArrays, 15);
|
||||
}
|
||||
|
||||
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
|
||||
|
@ -622,7 +617,6 @@ public class TestFSTs extends LuceneTestCase {
|
|||
boolean storeOrds = false;
|
||||
boolean storeDocFreqs = false;
|
||||
boolean verify = true;
|
||||
boolean doPack = false;
|
||||
boolean noArcArrays = false;
|
||||
Path wordsFileIn = null;
|
||||
Path dirOut = null;
|
||||
|
@ -647,8 +641,6 @@ public class TestFSTs extends LuceneTestCase {
|
|||
storeOrds = true;
|
||||
} else if (args[idx].equals("-noverify")) {
|
||||
verify = false;
|
||||
} else if (args[idx].equals("-pack")) {
|
||||
doPack = true;
|
||||
} else if (args[idx].startsWith("-")) {
|
||||
System.err.println("Unrecognized option: " + args[idx]);
|
||||
System.exit(-1);
|
||||
|
@ -677,7 +669,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton();
|
||||
final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton();
|
||||
final PairOutputs<Long,Long> outputs = new PairOutputs<>(o1, o2);
|
||||
new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
|
||||
new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
|
||||
Random rand;
|
||||
@Override
|
||||
public PairOutputs.Pair<Long,Long> getOutput(IntsRef input, int ord) {
|
||||
|
@ -691,7 +683,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
} else if (storeOrds) {
|
||||
// Store only ords
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
|
||||
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
|
||||
@Override
|
||||
public Long getOutput(IntsRef input, int ord) {
|
||||
return (long) ord;
|
||||
|
@ -700,7 +692,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
} else if (storeDocFreqs) {
|
||||
// Store only docFreq
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
|
||||
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
|
||||
Random rand;
|
||||
@Override
|
||||
public Long getOutput(IntsRef input, int ord) {
|
||||
|
@ -714,7 +706,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
// Store nothing
|
||||
final NoOutputs outputs = NoOutputs.getSingleton();
|
||||
final Object NO_OUTPUT = outputs.getNoOutput();
|
||||
new VisitTerms<Object>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
|
||||
new VisitTerms<Object>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
|
||||
@Override
|
||||
public Object getOutput(IntsRef input, int ord) {
|
||||
return NO_OUTPUT;
|
||||
|
@ -1118,7 +1110,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
public void testFinalOutputOnEndState() throws Exception {
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
|
||||
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, random().nextBoolean(), PackedInts.DEFAULT, true, 15);
|
||||
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
|
||||
builder.add(Util.toUTF32("stat", new IntsRefBuilder()), 17L);
|
||||
builder.add(Util.toUTF32("station", new IntsRefBuilder()), 10L);
|
||||
final FST<Long> fst = builder.finish();
|
||||
|
@ -1132,8 +1124,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
|
||||
public void testInternalFinalState() throws Exception {
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
final boolean willRewrite = random().nextBoolean();
|
||||
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, willRewrite, PackedInts.DEFAULT, true, 15);
|
||||
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
|
||||
builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRefBuilder()), outputs.getNoOutput());
|
||||
builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRefBuilder()), outputs.getNoOutput());
|
||||
final FST<Long> fst = builder.finish();
|
||||
|
|
|
@ -19,8 +19,10 @@ package org.apache.lucene.search.uhighlight;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.FilteringTokenFilter;
|
||||
|
@ -30,6 +32,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.memory.MemoryIndex;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
|
@ -50,7 +53,9 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
|
|||
private final LeafReader leafReader;
|
||||
private final CharacterRunAutomaton preMemIndexFilterAutomaton;
|
||||
|
||||
public AnalysisOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata, Analyzer analyzer) {
|
||||
public AnalysisOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
|
||||
CharacterRunAutomaton[] automata, Analyzer analyzer,
|
||||
Function<Query, Collection<Query>> multiTermQueryRewrite) {
|
||||
super(field, extractedTerms, phraseHelper, automata);
|
||||
this.analyzer = analyzer;
|
||||
// Automata (Wildcards / MultiTermQuery):
|
||||
|
@ -68,7 +73,8 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
|
|||
memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
|
||||
leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader();
|
||||
// preFilter for MemoryIndex
|
||||
preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, strictPhrases);
|
||||
preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, strictPhrases,
|
||||
multiTermQueryRewrite);
|
||||
} else {
|
||||
memoryIndex = null;
|
||||
leafReader = null;
|
||||
|
@ -155,7 +161,8 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
|
|||
*/
|
||||
private static CharacterRunAutomaton buildCombinedAutomaton(String field, BytesRef[] terms,
|
||||
CharacterRunAutomaton[] automata,
|
||||
PhraseHelper strictPhrases) {
|
||||
PhraseHelper strictPhrases,
|
||||
Function<Query, Collection<Query>> multiTermQueryRewrite) {
|
||||
List<CharacterRunAutomaton> allAutomata = new ArrayList<>();
|
||||
if (terms.length > 0) {
|
||||
allAutomata.add(new CharacterRunAutomaton(Automata.makeStringUnion(Arrays.asList(terms))));
|
||||
|
@ -163,7 +170,7 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
|
|||
Collections.addAll(allAutomata, automata);
|
||||
for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
|
||||
Collections.addAll(allAutomata,
|
||||
MultiTermHighlighting.extractAutomata(spanQuery, field, true));//true==lookInSpan
|
||||
MultiTermHighlighting.extractAutomata(spanQuery, field, true, multiTermQueryRewrite));//true==lookInSpan
|
||||
}
|
||||
|
||||
if (allAutomata.size() == 1) {
|
||||
|
|
|
@ -20,8 +20,10 @@ import java.io.Closeable;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
@ -69,34 +71,44 @@ class MultiTermHighlighting {
|
|||
* Extracts all MultiTermQueries for {@code field}, and returns equivalent
|
||||
* automata that will match terms.
|
||||
*/
|
||||
public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan) {
|
||||
public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan,
|
||||
Function<Query, Collection<Query>> preRewriteFunc) {
|
||||
List<CharacterRunAutomaton> list = new ArrayList<>();
|
||||
if (query instanceof BooleanQuery) {
|
||||
Collection<Query> customSubQueries = preRewriteFunc.apply(query);
|
||||
if (customSubQueries != null) {
|
||||
for (Query sub : customSubQueries) {
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
} else if (query instanceof BooleanQuery) {
|
||||
for (BooleanClause clause : (BooleanQuery) query) {
|
||||
if (!clause.isProhibited()) {
|
||||
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan)));
|
||||
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
}
|
||||
} else if (query instanceof ConstantScoreQuery) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan)));
|
||||
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan,
|
||||
preRewriteFunc)));
|
||||
} else if (query instanceof DisjunctionMaxQuery) {
|
||||
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
} else if (lookInSpan && query instanceof SpanOrQuery) {
|
||||
for (Query sub : ((SpanOrQuery) query).getClauses()) {
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
} else if (lookInSpan && query instanceof SpanNearQuery) {
|
||||
for (Query sub : ((SpanNearQuery) query).getClauses()) {
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
} else if (lookInSpan && query instanceof SpanNotQuery) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan)));
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan,
|
||||
preRewriteFunc)));
|
||||
} else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan)));
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan,
|
||||
preRewriteFunc)));
|
||||
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field, lookInSpan)));
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field,
|
||||
lookInSpan, preRewriteFunc)));
|
||||
} else if (query instanceof AutomatonQuery) {
|
||||
final AutomatonQuery aq = (AutomatonQuery) query;
|
||||
if (aq.getField().equals(field)) {
|
||||
|
|
|
@ -40,7 +40,7 @@ import java.util.function.Function;
|
|||
public class PhraseHelper {
|
||||
|
||||
public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
|
||||
spanQuery -> null, true);
|
||||
spanQuery -> null, query -> null, true);
|
||||
|
||||
//TODO it seems this ought to be a general thing on Spans?
|
||||
private static final Comparator<? super Spans> SPANS_COMPARATOR = (o1, o2) -> {
|
||||
|
@ -69,11 +69,14 @@ public class PhraseHelper {
|
|||
* {@code rewriteQueryPred} is an extension hook to override the default choice of
|
||||
* {@link WeightedSpanTermExtractor#mustRewriteQuery(SpanQuery)}. By default unknown query types are rewritten,
|
||||
* so use this to return {@link Boolean#FALSE} if you know the query doesn't need to be rewritten.
|
||||
* Similarly, {@code preExtractRewriteFunction} is also an extension hook for extract to allow different queries
|
||||
* to be set before the {@link WeightedSpanTermExtractor}'s extraction is invoked.
|
||||
* {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is
|
||||
* usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones.
|
||||
*/
|
||||
public PhraseHelper(Query query, String field, Function<SpanQuery, Boolean> rewriteQueryPred,
|
||||
boolean ignoreQueriesNeedingRewrite) {
|
||||
Function<Query, Collection<Query>> preExtractRewriteFunction,
|
||||
boolean ignoreQueriesNeedingRewrite) {
|
||||
this.fieldName = field; // if null then don't require field match
|
||||
// filter terms to those we want
|
||||
positionInsensitiveTerms = field != null ? new FieldFilteringTermHashSet(field) : new HashSet<>();
|
||||
|
@ -98,6 +101,18 @@ public class PhraseHelper {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
|
||||
Collection<Query> newQueriesToExtract = preExtractRewriteFunction.apply(query);
|
||||
if (newQueriesToExtract != null) {
|
||||
for (Query newQuery : newQueriesToExtract) {
|
||||
extract(newQuery, boost, terms);
|
||||
}
|
||||
} else {
|
||||
super.extract(query, boost, terms);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isQueryUnsupported(Class<? extends Query> clazz) {
|
||||
if (clazz.isAssignableFrom(MultiTermQuery.class)) {
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.nio.charset.StandardCharsets;
|
|||
import java.text.BreakIterator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
@ -732,7 +733,8 @@ public class UnifiedHighlighter {
|
|||
OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
|
||||
switch (offsetSource) {
|
||||
case ANALYSIS:
|
||||
return new AnalysisOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer());
|
||||
return new AnalysisOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer(),
|
||||
this::preMultiTermQueryRewrite);
|
||||
case NONE_NEEDED:
|
||||
return NoOpOffsetStrategy.INSTANCE;
|
||||
case TERM_VECTORS:
|
||||
|
@ -776,13 +778,14 @@ public class UnifiedHighlighter {
|
|||
boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES);
|
||||
boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY);
|
||||
return highlightPhrasesStrictly ?
|
||||
new PhraseHelper(query, field, this::requiresRewrite, !handleMultiTermQuery) :
|
||||
new PhraseHelper(query, field, this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) :
|
||||
PhraseHelper.NONE;
|
||||
}
|
||||
|
||||
protected CharacterRunAutomaton[] getAutomata(String field, Query query, EnumSet<HighlightFlag> highlightFlags) {
|
||||
return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
|
||||
? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES))
|
||||
? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES),
|
||||
this::preMultiTermQueryRewrite)
|
||||
: ZERO_LEN_AUTOMATA_ARRAY;
|
||||
}
|
||||
|
||||
|
@ -830,6 +833,32 @@ public class UnifiedHighlighter {
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* When highlighting phrases accurately, we may need to handle custom queries that aren't supported in the
|
||||
* {@link org.apache.lucene.search.highlight.WeightedSpanTermExtractor} as called by the {@code PhraseHelper}.
|
||||
* Should custom query types be needed, this method should be overriden to return a collection of queries if appropriate,
|
||||
* or null if nothing to do. If the query is not custom, simply returning null will allow the default rules to apply.
|
||||
*
|
||||
* @param query Query to be highlighted
|
||||
* @return A Collection of Query object(s) if needs to be rewritten, otherwise null.
|
||||
*/
|
||||
protected Collection<Query> preSpanQueryRewrite(Query query) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* When dealing with multi term queries / span queries, we may need to handle custom queries that aren't supported
|
||||
* by the default automata extraction in {@code MultiTermHighlighting}. This can be overridden to return a collection
|
||||
* of queries if appropriate, or null if nothing to do. If query is not custom, simply returning null will allow the
|
||||
* default rules to apply.
|
||||
*
|
||||
* @param query Query to be highlighted
|
||||
* @return A Collection of Query object(s) if needst o be rewritten, otherwise null.
|
||||
*/
|
||||
protected Collection<Query> preMultiTermQueryRewrite(Query query) {
|
||||
return null;
|
||||
}
|
||||
|
||||
private DocIdSetIterator asDocIdSetIterator(int[] sortedDocIds) {
|
||||
return new DocIdSetIterator() {
|
||||
int idx = -1;
|
||||
|
|
|
@ -20,6 +20,8 @@ package org.apache.lucene.search.uhighlight;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -56,6 +58,7 @@ import org.apache.lucene.search.spans.SpanNotQuery;
|
|||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
|
@ -933,4 +936,89 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
ir.close();
|
||||
}
|
||||
|
||||
public void testCustomSpanQueryHighlighting() throws Exception {
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("body", "alpha bravo charlie delta echo foxtrot golf hotel india juliet", fieldType));
|
||||
doc.add(newTextField("id", "id", Field.Store.YES));
|
||||
|
||||
iw.addDocument(doc);
|
||||
IndexReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected List<Query> preMultiTermQueryRewrite(Query query) {
|
||||
if (query instanceof MyWrapperSpanQuery) {
|
||||
return Collections.singletonList(((MyWrapperSpanQuery) query).originalQuery);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
int docId = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
|
||||
|
||||
WildcardQuery wildcardQuery = new WildcardQuery(new Term("body", "foxtr*"));
|
||||
SpanMultiTermQueryWrapper wildcardQueryWrapper = new SpanMultiTermQueryWrapper<>(wildcardQuery);
|
||||
|
||||
SpanQuery wrappedQuery = new MyWrapperSpanQuery(wildcardQueryWrapper);
|
||||
|
||||
BooleanQuery query = new BooleanQuery.Builder()
|
||||
.add(wrappedQuery, BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
|
||||
int[] docIds = new int[]{docId};
|
||||
|
||||
String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIds, new int[]{2}).get("body");
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("alpha bravo charlie delta echo <b>foxtrot</b> golf hotel india juliet", snippets[0]);
|
||||
ir.close();
|
||||
}
|
||||
|
||||
private static class MyWrapperSpanQuery extends SpanQuery {
|
||||
|
||||
private final SpanQuery originalQuery;
|
||||
|
||||
private MyWrapperSpanQuery(SpanQuery originalQuery) {
|
||||
this.originalQuery = Objects.requireNonNull(originalQuery);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getField() {
|
||||
return originalQuery.getField();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "(Wrapper[" + originalQuery.toString(field)+"])";
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||
return originalQuery.createWeight(searcher, needsScores, boost);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
Query newOriginalQuery = originalQuery.rewrite(reader);
|
||||
if (newOriginalQuery != originalQuery) {
|
||||
return new MyWrapperSpanQuery((SpanQuery)newOriginalQuery);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
return originalQuery.equals(((MyWrapperSpanQuery)o).originalQuery);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return originalQuery.hashCode();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
package org.apache.lucene.search.uhighlight;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
|
@ -29,14 +31,17 @@ import org.apache.lucene.index.RandomIndexWriter;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
|
@ -401,4 +406,76 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
|
|||
Object o = highlighter.highlightWithoutSearcher("body", new MatchNoDocsQuery(), content, 1);
|
||||
assertEquals(content, o);
|
||||
}
|
||||
|
||||
public void testPreSpanQueryRewrite() throws IOException {
|
||||
indexWriter.addDocument(newDoc("There is no accord and satisfaction with this - Consideration of the accord is arbitrary."));
|
||||
initReaderSearcherHighlighter();
|
||||
|
||||
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected Collection<Query> preSpanQueryRewrite(Query query) {
|
||||
if (query instanceof MyQuery) {
|
||||
return Collections.singletonList(((MyQuery)query).wrapped);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
};
|
||||
highlighter.setHighlightPhrasesStrictly(true);
|
||||
|
||||
BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
|
||||
Query phraseQuery = new BoostQuery(new PhraseQuery("body", "accord", "and", "satisfaction"), 2.0f);
|
||||
Query oredTerms = new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(2)
|
||||
.add(new TermQuery(new Term("body", "accord")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("body", "satisfaction")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("body", "consideration")), BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
Query proximityBoostingQuery = new MyQuery(oredTerms);
|
||||
Query totalQuery = bqBuilder
|
||||
.add(phraseQuery, BooleanClause.Occur.SHOULD)
|
||||
.add(proximityBoostingQuery, BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
TopDocs topDocs = searcher.search(totalQuery, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighter.highlight("body", totalQuery, topDocs);
|
||||
assertArrayEquals(new String[]{"There is no <b>accord</b> <b>and</b> <b>satisfaction</b> with this - <b>Consideration</b> of the <b>accord</b> is arbitrary."}, snippets);
|
||||
}
|
||||
|
||||
private static class MyQuery extends Query {
|
||||
|
||||
private final Query wrapped;
|
||||
|
||||
MyQuery(Query wrapped) {
|
||||
this.wrapped = wrapped;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||
return wrapped.createWeight(searcher, needsScores, boost);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
Query newWrapped = wrapped.rewrite(reader);
|
||||
if (newWrapped != wrapped) {
|
||||
return new MyQuery(newWrapped);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "[[["+wrapped.toString(field)+"]]]";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return obj != null && obj.getClass() == getClass() && wrapped.equals(((MyQuery)wrapped).wrapped);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return wrapped.hashCode();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -228,7 +228,7 @@ org.bouncycastle.version = 1.45
|
|||
/org.carrot2.attributes/attributes-binder = 1.3.1
|
||||
/org.carrot2.shaded/carrot2-guava = 18.0
|
||||
|
||||
/org.carrot2/carrot2-mini = 3.12.0
|
||||
/org.carrot2/carrot2-mini = 3.15.0
|
||||
|
||||
org.carrot2.morfologik.version = 2.1.1
|
||||
/org.carrot2/morfologik-fsa = ${org.carrot2.morfologik.version}
|
||||
|
|
|
@ -50,7 +50,6 @@ import org.apache.lucene.util.fst.PairOutputs.Pair;
|
|||
import org.apache.lucene.util.fst.PairOutputs;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/*
|
||||
TODO:
|
||||
|
@ -354,8 +353,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
final Builder<Pair<BytesRef,Long>> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
|
||||
0, 0, true, false, Integer.MAX_VALUE,
|
||||
FST_OUTPUTS, false,
|
||||
PackedInts.COMPACT, true, 15);
|
||||
FST_OUTPUTS, true, 15);
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" compile index for prefix=" + prefix);
|
||||
//}
|
||||
|
|
|
@ -23,9 +23,10 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
|
@ -34,6 +35,7 @@ import org.apache.lucene.index.TermsEnum;
|
|||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.Transition;
|
||||
|
@ -183,6 +185,10 @@ public class TermAutomatonQuery extends Query {
|
|||
|
||||
det = Operations.removeDeadStates(Operations.determinize(automaton,
|
||||
maxDeterminizedStates));
|
||||
|
||||
if (det.isAccept(0)) {
|
||||
throw new IllegalStateException("cannot accept the empty string");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -396,4 +402,82 @@ public class TermAutomatonQuery extends Query {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
if (Operations.isEmpty(det)) {
|
||||
return new MatchNoDocsQuery();
|
||||
}
|
||||
|
||||
IntsRef single = Operations.getSingleton(det);
|
||||
if (single != null && single.length == 1) {
|
||||
return new TermQuery(new Term(field, idToTerm.get(single.ints[single.offset])));
|
||||
}
|
||||
|
||||
// TODO: can PhraseQuery really handle multiple terms at the same position? If so, why do we even have MultiPhraseQuery?
|
||||
|
||||
// Try for either PhraseQuery or MultiPhraseQuery, which only works when the automaton is a sausage:
|
||||
MultiPhraseQuery.Builder mpq = new MultiPhraseQuery.Builder();
|
||||
PhraseQuery.Builder pq = new PhraseQuery.Builder();
|
||||
|
||||
Transition t = new Transition();
|
||||
int state = 0;
|
||||
int pos = 0;
|
||||
query:
|
||||
while (true) {
|
||||
int count = det.initTransition(state, t);
|
||||
if (count == 0) {
|
||||
if (det.isAccept(state) == false) {
|
||||
mpq = null;
|
||||
pq = null;
|
||||
}
|
||||
break;
|
||||
} else if (det.isAccept(state)) {
|
||||
mpq = null;
|
||||
pq = null;
|
||||
break;
|
||||
}
|
||||
int dest = -1;
|
||||
List<Term> terms = new ArrayList<>();
|
||||
boolean matchesAny = false;
|
||||
for(int i=0;i<count;i++) {
|
||||
det.getNextTransition(t);
|
||||
if (i == 0) {
|
||||
dest = t.dest;
|
||||
} else if (dest != t.dest) {
|
||||
mpq = null;
|
||||
pq = null;
|
||||
break query;
|
||||
}
|
||||
|
||||
matchesAny |= anyTermID >= t.min && anyTermID <= t.max;
|
||||
|
||||
if (matchesAny == false) {
|
||||
for(int termID=t.min;termID<=t.max;termID++) {
|
||||
terms.add(new Term(field, idToTerm.get(termID)));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (matchesAny == false) {
|
||||
mpq.add(terms.toArray(new Term[terms.size()]), pos);
|
||||
if (pq != null) {
|
||||
if (terms.size() == 1) {
|
||||
pq.add(terms.get(0), pos);
|
||||
} else {
|
||||
pq = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
state = dest;
|
||||
pos++;
|
||||
}
|
||||
|
||||
if (pq != null) {
|
||||
return pq.build();
|
||||
} else if (mpq != null) {
|
||||
return mpq.build();
|
||||
}
|
||||
|
||||
// TODO: we could maybe also rewrite to union of PhraseQuery (pull all finite strings) if it's "worth it"?
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -296,7 +296,6 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
|
|||
while (scorer instanceof AssertingScorer) {
|
||||
scorer = ((AssertingScorer) scorer).getIn();
|
||||
}
|
||||
assert scorer instanceof TermAutomatonScorer;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -683,7 +682,7 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
|
|||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(newTextField("field", "comes here", Field.Store.NO));
|
||||
doc.add(newTextField("field", "comes foo", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
IndexSearcher s = newSearcher(r);
|
||||
|
@ -691,9 +690,11 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
|
|||
TermAutomatonQuery q = new TermAutomatonQuery("field");
|
||||
int init = q.createState();
|
||||
int s1 = q.createState();
|
||||
int s2 = q.createState();
|
||||
q.addTransition(init, s1, "here");
|
||||
q.addTransition(s1, init, "comes");
|
||||
q.setAccept(init, true);
|
||||
q.addTransition(s1, s2, "comes");
|
||||
q.addTransition(s2, s1, "here");
|
||||
q.setAccept(s1, true);
|
||||
q.finish();
|
||||
|
||||
assertEquals(1, s.search(q, 1).totalHits);
|
||||
|
@ -779,8 +780,186 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
|
|||
// System.out.println("DOT: " + q.toDot());
|
||||
assertEquals(0, s.search(q, 1).totalHits);
|
||||
|
||||
w.close();
|
||||
r.close();
|
||||
dir.close();
|
||||
IOUtils.close(w, r, dir);
|
||||
}
|
||||
|
||||
public void testEmptyString() throws Exception {
|
||||
TermAutomatonQuery q = new TermAutomatonQuery("field");
|
||||
int initState = q.createState();
|
||||
q.setAccept(initState, true);
|
||||
try {
|
||||
q.finish();
|
||||
fail("did not hit exc");
|
||||
} catch (IllegalStateException ise) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
public void testRewriteNoMatch() throws Exception {
|
||||
TermAutomatonQuery q = new TermAutomatonQuery("field");
|
||||
int initState = q.createState();
|
||||
q.finish();
|
||||
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("field", "x y z", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
assertTrue(q.rewrite(r) instanceof MatchNoDocsQuery);
|
||||
IOUtils.close(w, r, dir);
|
||||
}
|
||||
|
||||
public void testRewriteTerm() throws Exception {
|
||||
TermAutomatonQuery q = new TermAutomatonQuery("field");
|
||||
int initState = q.createState();
|
||||
int s1 = q.createState();
|
||||
q.addTransition(initState, s1, "foo");
|
||||
q.setAccept(s1, true);
|
||||
q.finish();
|
||||
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("field", "x y z", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
Query rewrite = q.rewrite(r);
|
||||
assertTrue(rewrite instanceof TermQuery);
|
||||
assertEquals(new Term("field", "foo"), ((TermQuery) rewrite).getTerm());
|
||||
IOUtils.close(w, r, dir);
|
||||
}
|
||||
|
||||
public void testRewriteSimplePhrase() throws Exception {
|
||||
TermAutomatonQuery q = new TermAutomatonQuery("field");
|
||||
int initState = q.createState();
|
||||
int s1 = q.createState();
|
||||
int s2 = q.createState();
|
||||
q.addTransition(initState, s1, "foo");
|
||||
q.addTransition(s1, s2, "bar");
|
||||
q.setAccept(s2, true);
|
||||
q.finish();
|
||||
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("field", "x y z", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
Query rewrite = q.rewrite(r);
|
||||
assertTrue(rewrite instanceof PhraseQuery);
|
||||
Term[] terms = ((PhraseQuery) rewrite).getTerms();
|
||||
assertEquals(new Term("field", "foo"), terms[0]);
|
||||
assertEquals(new Term("field", "bar"), terms[1]);
|
||||
|
||||
int[] positions = ((PhraseQuery) rewrite).getPositions();
|
||||
assertEquals(0, positions[0]);
|
||||
assertEquals(1, positions[1]);
|
||||
|
||||
IOUtils.close(w, r, dir);
|
||||
}
|
||||
|
||||
public void testRewritePhraseWithAny() throws Exception {
|
||||
TermAutomatonQuery q = new TermAutomatonQuery("field");
|
||||
int initState = q.createState();
|
||||
int s1 = q.createState();
|
||||
int s2 = q.createState();
|
||||
int s3 = q.createState();
|
||||
q.addTransition(initState, s1, "foo");
|
||||
q.addAnyTransition(s1, s2);
|
||||
q.addTransition(s2, s3, "bar");
|
||||
q.setAccept(s3, true);
|
||||
q.finish();
|
||||
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("field", "x y z", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
Query rewrite = q.rewrite(r);
|
||||
assertTrue(rewrite instanceof PhraseQuery);
|
||||
Term[] terms = ((PhraseQuery) rewrite).getTerms();
|
||||
assertEquals(new Term("field", "foo"), terms[0]);
|
||||
assertEquals(new Term("field", "bar"), terms[1]);
|
||||
|
||||
int[] positions = ((PhraseQuery) rewrite).getPositions();
|
||||
assertEquals(0, positions[0]);
|
||||
assertEquals(2, positions[1]);
|
||||
|
||||
IOUtils.close(w, r, dir);
|
||||
}
|
||||
|
||||
public void testRewriteSimpleMultiPhrase() throws Exception {
|
||||
TermAutomatonQuery q = new TermAutomatonQuery("field");
|
||||
int initState = q.createState();
|
||||
int s1 = q.createState();
|
||||
q.addTransition(initState, s1, "foo");
|
||||
q.addTransition(initState, s1, "bar");
|
||||
q.setAccept(s1, true);
|
||||
q.finish();
|
||||
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("field", "x y z", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
Query rewrite = q.rewrite(r);
|
||||
assertTrue(rewrite instanceof MultiPhraseQuery);
|
||||
Term[][] terms = ((MultiPhraseQuery) rewrite).getTermArrays();
|
||||
assertEquals(1, terms.length);
|
||||
assertEquals(2, terms[0].length);
|
||||
assertEquals(new Term("field", "foo"), terms[0][0]);
|
||||
assertEquals(new Term("field", "bar"), terms[0][1]);
|
||||
|
||||
int[] positions = ((MultiPhraseQuery) rewrite).getPositions();
|
||||
assertEquals(1, positions.length);
|
||||
assertEquals(0, positions[0]);
|
||||
|
||||
IOUtils.close(w, r, dir);
|
||||
}
|
||||
|
||||
public void testRewriteMultiPhraseWithAny() throws Exception {
|
||||
TermAutomatonQuery q = new TermAutomatonQuery("field");
|
||||
int initState = q.createState();
|
||||
int s1 = q.createState();
|
||||
int s2 = q.createState();
|
||||
int s3 = q.createState();
|
||||
q.addTransition(initState, s1, "foo");
|
||||
q.addTransition(initState, s1, "bar");
|
||||
q.addAnyTransition(s1, s2);
|
||||
q.addTransition(s2, s3, "baz");
|
||||
q.setAccept(s3, true);
|
||||
q.finish();
|
||||
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("field", "x y z", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
Query rewrite = q.rewrite(r);
|
||||
assertTrue(rewrite instanceof MultiPhraseQuery);
|
||||
Term[][] terms = ((MultiPhraseQuery) rewrite).getTermArrays();
|
||||
assertEquals(2, terms.length);
|
||||
assertEquals(2, terms[0].length);
|
||||
assertEquals(new Term("field", "foo"), terms[0][0]);
|
||||
assertEquals(new Term("field", "bar"), terms[0][1]);
|
||||
assertEquals(1, terms[1].length);
|
||||
assertEquals(new Term("field", "baz"), terms[1][0]);
|
||||
|
||||
int[] positions = ((MultiPhraseQuery) rewrite).getPositions();
|
||||
assertEquals(2, positions.length);
|
||||
assertEquals(0, positions[0]);
|
||||
assertEquals(2, positions[1]);
|
||||
|
||||
IOUtils.close(w, r, dir);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.util.BytesRefBuilder;
|
|||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.fst.*;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
* Finite state automata based implementation of "autocomplete" functionality.
|
||||
|
@ -237,8 +236,7 @@ public class FSTCompletionBuilder {
|
|||
final Object empty = outputs.getNoOutput();
|
||||
final Builder<Object> builder = new Builder<>(
|
||||
FST.INPUT_TYPE.BYTE1, 0, 0, true, true,
|
||||
shareMaxTailLength, outputs, false,
|
||||
PackedInts.DEFAULT, true, 15);
|
||||
shareMaxTailLength, outputs, true, 15);
|
||||
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
BytesRef entry;
|
||||
|
|
|
@ -368,8 +368,9 @@ public class CheckHits {
|
|||
boolean productOf = descr.endsWith("product of:");
|
||||
boolean sumOf = descr.endsWith("sum of:");
|
||||
boolean maxOf = descr.endsWith("max of:");
|
||||
boolean computedOf = descr.matches(".*, computed as .* from:");
|
||||
boolean maxTimesOthers = false;
|
||||
if (!(productOf || sumOf || maxOf)) {
|
||||
if (!(productOf || sumOf || maxOf || computedOf)) {
|
||||
// maybe 'max plus x times others'
|
||||
int k1 = descr.indexOf("max plus ");
|
||||
if (k1>=0) {
|
||||
|
@ -387,9 +388,9 @@ public class CheckHits {
|
|||
// TODO: this is a TERRIBLE assertion!!!!
|
||||
Assert.assertTrue(
|
||||
q+": multi valued explanation description=\""+descr
|
||||
+"\" must be 'max of plus x times others' or end with 'product of'"
|
||||
+"\" must be 'max of plus x times others', 'computed as x from:' or end with 'product of'"
|
||||
+" or 'sum of:' or 'max of:' - "+expl,
|
||||
productOf || sumOf || maxOf || maxTimesOthers);
|
||||
productOf || sumOf || maxOf || computedOf || maxTimesOthers);
|
||||
float sum = 0;
|
||||
float product = 1;
|
||||
float max = 0;
|
||||
|
@ -410,7 +411,8 @@ public class CheckHits {
|
|||
} else if (maxTimesOthers) {
|
||||
combined = max + x * (sum - max);
|
||||
} else {
|
||||
Assert.assertTrue("should never get here!",false);
|
||||
Assert.assertTrue("should never get here!", computedOf);
|
||||
combined = value;
|
||||
}
|
||||
Assert.assertEquals(q+": actual subDetails combined=="+combined+
|
||||
" != value="+value+" Explanation: "+expl,
|
||||
|
|
|
@ -91,6 +91,7 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
|
|||
allSims = new ArrayList<>();
|
||||
allSims.add(new ClassicSimilarity());
|
||||
allSims.add(new BM25Similarity());
|
||||
allSims.add(new BooleanSimilarity());
|
||||
for (BasicModel basicModel : BASIC_MODELS) {
|
||||
for (AfterEffect afterEffect : AFTER_EFFECTS) {
|
||||
for (Normalization normalization : NORMALIZATIONS) {
|
||||
|
|
|
@ -36,7 +36,6 @@ import org.apache.lucene.codecs.lucene70.Lucene70Codec;
|
|||
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
|
||||
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
|
||||
import org.apache.lucene.index.RandomCodec;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.search.similarities.RandomSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
|
@ -213,7 +212,7 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
|
|||
TimeZone randomTimeZone = randomTimeZone(random());
|
||||
timeZone = testTimeZone.equals("random") ? randomTimeZone : TimeZone.getTimeZone(testTimeZone);
|
||||
TimeZone.setDefault(timeZone);
|
||||
similarity = random().nextBoolean() ? new ClassicSimilarity() : new RandomSimilarity(random());
|
||||
similarity = new RandomSimilarity(random());
|
||||
|
||||
// Check codec restrictions once at class level.
|
||||
try {
|
||||
|
|
|
@ -40,7 +40,6 @@ import org.apache.lucene.util.IntsRefBuilder;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
|
@ -273,25 +272,14 @@ public class FSTTester<T> {
|
|||
System.out.println("\nTEST: prune1=" + prune1 + " prune2=" + prune2);
|
||||
}
|
||||
|
||||
final boolean willRewrite = random.nextBoolean();
|
||||
|
||||
final Builder<T> builder = new Builder<>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
|
||||
prune1, prune2,
|
||||
prune1==0 && prune2==0,
|
||||
allowRandomSuffixSharing ? random.nextBoolean() : true,
|
||||
allowRandomSuffixSharing ? TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE,
|
||||
outputs,
|
||||
willRewrite,
|
||||
PackedInts.DEFAULT,
|
||||
true,
|
||||
15);
|
||||
if (LuceneTestCase.VERBOSE) {
|
||||
if (willRewrite) {
|
||||
System.out.println("TEST: packed FST");
|
||||
} else {
|
||||
System.out.println("TEST: non-packed FST");
|
||||
}
|
||||
}
|
||||
|
||||
for(InputOutput<T> pair : pairs) {
|
||||
if (pair.output instanceof List) {
|
||||
|
@ -306,7 +294,7 @@ public class FSTTester<T> {
|
|||
}
|
||||
FST<T> fst = builder.finish();
|
||||
|
||||
if (random.nextBoolean() && fst != null && !willRewrite) {
|
||||
if (random.nextBoolean() && fst != null) {
|
||||
IOContext context = LuceneTestCase.newIOContext(random);
|
||||
IndexOutput out = dir.createOutput("fst.bin", context);
|
||||
fst.save(out);
|
||||
|
|
|
@ -70,7 +70,7 @@ Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this r
|
|||
Versions of Major Components
|
||||
---------------------
|
||||
Apache Tika 1.13
|
||||
Carrot2 3.12.0
|
||||
Carrot2 3.15.0
|
||||
Velocity 1.7 and Velocity Tools 2.0
|
||||
Apache UIMA 2.3.1
|
||||
Apache ZooKeeper 3.4.6
|
||||
|
@ -81,6 +81,9 @@ Detailed Change List
|
|||
|
||||
New Features
|
||||
----------------------
|
||||
* SOLR-9293: Solrj client support for hierarchical clusters and other topics
|
||||
marker. (Dawid Weiss)
|
||||
|
||||
* SOLR-9681: FacetModule / JSON Facet API added the ability to add filters directly to
|
||||
any facet command. The filters are applied after any domain change operations.
|
||||
Example: { type:terms, field:category, filter:"user:yonik" }
|
||||
|
@ -96,11 +99,21 @@ New Features
|
|||
* SOLR-8542: Adds Solr Learning to Rank (LTR) plugin for reranking results with machine learning models.
|
||||
(Michael Nilsson, Diego Ceccarelli, Joshua Pantony, Jon Dorando, Naveen Santhapuri, Alessandro Benedetti, David Grohmann, Christine Poerschke)
|
||||
|
||||
* SOLR-9055: Make collection backup/restore extensible. (Hrishikesh Gadre, Varun Thacker, Mark Miller)
|
||||
|
||||
* SOLR-9682: JSON Facet API: added "param" query type to facet domain filter specification to obtain
|
||||
filters via query parameters. (yonik)
|
||||
|
||||
* SOLR-9038: Add a command-line tool to manage the snapshots functionality (Hrishikesh Gadre via yonik)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
|
||||
filters specified by using those filters as acceptDocs. (yonik)
|
||||
|
||||
* SOLR-9726: Reduce number of lookupOrd calls made by the DocValuesFacets.getCounts method.
|
||||
(Jonny Marks via Christine Poerschke)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
* SOLR-9701: NPE in export handler when "fl" parameter is omitted.
|
||||
|
@ -109,15 +122,43 @@ Bug Fixes
|
|||
* SOLR-9433: SolrCore clean-up logic uses incorrect path to delete dataDir on failure to create a core.
|
||||
(Evan Sayer, shalin)
|
||||
|
||||
* SOLR-9360: Solr script not properly checking SOLR_PID
|
||||
(Alessandro Benedetti via Erick Erickson)
|
||||
|
||||
* SOLR-9716: RecoveryStrategy sends prep recovery command without setting read time out which can cause
|
||||
replica recovery to hang indefinitely on network partitions. (Cao Manh Dat, shalin)
|
||||
|
||||
* SOLR-9624: In Admin UI, do not attempt to highlight CSV output (Alexandre Rafalovitch)
|
||||
|
||||
* SOLR-9005: In files example, add a guard condition to javascript URP script (Alexandre Rafalovitch)
|
||||
|
||||
* SOLR-9519: JSON Facet API: don't stop at an empty facet bucket if any sub-facets still have a chance
|
||||
of matching something due to filter exclusions (which can widen the domain again).
|
||||
(Michael Sun, yonik)
|
||||
|
||||
* SOLR-9740: A bug in macro expansion of multi-valued parameters caused non-expanded values
|
||||
after the first expanded value in the same multi-valued parameter to be dropped.
|
||||
(Erik Hatcher, yonik)
|
||||
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
* SOLR-7539: Upgrade the clustering plugin to Carrot2 3.15.0. (Dawid Weiss)
|
||||
|
||||
* SOLR-9621: Remove several Guava & Apache Commons calls in favor of java 8 alternatives.
|
||||
(Michael Braun via David Smiley)
|
||||
|
||||
* SOLR-9720: Refactor Responsewriters to remove dependencies on TupleStream,
|
||||
Tuple, Explanation (noble)
|
||||
|
||||
* SOLR-9717: Refactor '/export' to not hardcode the JSON output and to use an API (noble)
|
||||
|
||||
* SOLR-9739: JavabinCodec implements PushWriter interface (noble)
|
||||
|
||||
* SOLR-8332: Factor HttpShardHandler[Factory]'s url shuffling out into a ReplicaListTransformer class.
|
||||
(Christine Poerschke, Noble Paul)
|
||||
|
||||
================== 6.3.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
|
|
@ -495,7 +495,7 @@ function solr_pid_by_port() {
|
|||
# extract the value of the -Djetty.port parameter from a running Solr process
|
||||
function jetty_port() {
|
||||
SOLR_PID="$1"
|
||||
SOLR_PROC=`ps auxww | grep -w $SOLR_PID | grep start\.jar | grep jetty.port`
|
||||
SOLR_PROC=`ps auxww | grep -w $SOLR_PID | grep start\.jar | grep jetty\.port`
|
||||
IFS=' ' read -a proc_args <<< "$SOLR_PROC"
|
||||
for arg in "${proc_args[@]}"
|
||||
do
|
||||
|
@ -543,10 +543,10 @@ function get_info() {
|
|||
done < <(find "$SOLR_PID_DIR" -name "solr-*.pid" -type f)
|
||||
else
|
||||
# no pid files but check using ps just to be sure
|
||||
numSolrs=`ps auxww | grep start\.jar | grep solr.solr.home | grep -v grep | wc -l | sed -e 's/^[ \t]*//'`
|
||||
numSolrs=`ps auxww | grep start\.jar | grep solr\.solr\.home | grep -v grep | wc -l | sed -e 's/^[ \t]*//'`
|
||||
if [ "$numSolrs" != "0" ]; then
|
||||
echo -e "\nFound $numSolrs Solr nodes: "
|
||||
PROCESSES=$(ps auxww | grep start\.jar | grep solr.solr.home | grep -v grep | awk '{print $2}' | sort -r)
|
||||
PROCESSES=$(ps auxww | grep start\.jar | grep solr\.solr\.home | grep -v grep | awk '{print $2}' | sort -r)
|
||||
for ID in $PROCESSES
|
||||
do
|
||||
port=`jetty_port "$ID"`
|
||||
|
@ -1345,7 +1345,7 @@ if [[ "$SCRIPT_CMD" == "start" ]]; then
|
|||
|
||||
if [ -z "$SOLR_PID" ]; then
|
||||
# not found using the pid file ... but use ps to ensure not found
|
||||
SOLR_PID=`ps auxww | grep start\.jar | grep -w $SOLR_PORT | grep -v grep | awk '{print $2}' | sort -r`
|
||||
SOLR_PID=`ps auxww | grep start\.jar | grep -w "\-Djetty\.port=$SOLR_PORT" | grep -v grep | awk '{print $2}' | sort -r`
|
||||
fi
|
||||
|
||||
if [ "$SOLR_PID" != "" ]; then
|
||||
|
@ -1358,7 +1358,7 @@ else
|
|||
SOLR_PID=`solr_pid_by_port "$SOLR_PORT"`
|
||||
if [ -z "$SOLR_PID" ]; then
|
||||
# not found using the pid file ... but use ps to ensure not found
|
||||
SOLR_PID=`ps auxww | grep start\.jar | grep -w $SOLR_PORT | grep -v grep | awk '{print $2}' | sort -r`
|
||||
SOLR_PID=`ps auxww | grep start\.jar | grep -w "\-Djetty\.port=$SOLR_PORT" | grep -v grep | awk '{print $2}' | sort -r`
|
||||
fi
|
||||
if [ "$SOLR_PID" != "" ]; then
|
||||
stop_solr "$SOLR_SERVER_DIR" "$SOLR_PORT" "$STOP_KEY" "$SOLR_PID"
|
||||
|
@ -1659,7 +1659,7 @@ function launch_solr() {
|
|||
exit # subshell!
|
||||
fi
|
||||
else
|
||||
SOLR_PID=`ps auxww | grep start\.jar | grep -w $SOLR_PORT | grep -v grep | awk '{print $2}' | sort -r`
|
||||
SOLR_PID=`ps auxww | grep start\.jar | grep -w "\-Djetty\.port=$SOLR_PORT" | grep -v grep | awk '{print $2}' | sort -r`
|
||||
echo -e "\nStarted Solr server on port $SOLR_PORT (pid=$SOLR_PID). Happy searching!\n"
|
||||
exit # subshell!
|
||||
fi
|
||||
|
@ -1668,7 +1668,7 @@ function launch_solr() {
|
|||
else
|
||||
echo -e "NOTE: Please install lsof as this script needs it to determine if Solr is listening on port $SOLR_PORT."
|
||||
sleep 10
|
||||
SOLR_PID=`ps auxww | grep start\.jar | grep -w $SOLR_PORT | grep -v grep | awk '{print $2}' | sort -r`
|
||||
SOLR_PID=`ps auxww | grep start\.jar | grep -w "\-Djetty\.port=$SOLR_PORT" | grep -v grep | awk '{print $2}' | sort -r`
|
||||
echo -e "\nStarted Solr server on port $SOLR_PORT (pid=$SOLR_PID). Happy searching!\n"
|
||||
return;
|
||||
fi
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.handler.clustering;
|
|||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
@ -44,9 +45,6 @@ import org.apache.solr.util.plugin.SolrCoreAware;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
|
||||
/**
|
||||
* Provides a plugin for performing cluster analysis. This can either be applied to
|
||||
* search results (e.g., via <a href="http://project.carrot2.org">Carrot<sup>2</sup></a>) or for
|
||||
|
@ -68,12 +66,12 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
|
|||
/**
|
||||
* Declaration-order list of search clustering engines.
|
||||
*/
|
||||
private final LinkedHashMap<String, SearchClusteringEngine> searchClusteringEngines = Maps.newLinkedHashMap();
|
||||
|
||||
private final LinkedHashMap<String, SearchClusteringEngine> searchClusteringEngines = new LinkedHashMap<>();
|
||||
|
||||
/**
|
||||
* Declaration order list of document clustering engines.
|
||||
*/
|
||||
private final LinkedHashMap<String, DocumentClusteringEngine> documentClusteringEngines = Maps.newLinkedHashMap();
|
||||
private final LinkedHashMap<String, DocumentClusteringEngine> documentClusteringEngines = new LinkedHashMap<>();
|
||||
|
||||
/**
|
||||
* An unmodifiable view of {@link #searchClusteringEngines}.
|
||||
|
@ -173,7 +171,7 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
|
|||
if (engine != null) {
|
||||
checkAvailable(name, engine);
|
||||
DocListAndSet results = rb.getResults();
|
||||
Map<SolrDocument,Integer> docIds = Maps.newHashMapWithExpectedSize(results.docList.size());
|
||||
Map<SolrDocument,Integer> docIds = new HashMap<>(results.docList.size());
|
||||
SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList(
|
||||
results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds);
|
||||
Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
|
||||
|
|
|
@ -58,6 +58,8 @@ import org.carrot2.core.Document;
|
|||
import org.carrot2.core.IClusteringAlgorithm;
|
||||
import org.carrot2.core.LanguageCode;
|
||||
import org.carrot2.core.attribute.AttributeNames;
|
||||
import org.carrot2.shaded.guava.common.base.MoreObjects;
|
||||
import org.carrot2.shaded.guava.common.base.Strings;
|
||||
import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor;
|
||||
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor;
|
||||
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder;
|
||||
|
@ -69,12 +71,6 @@ import org.carrot2.util.resource.ResourceLookup;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
/**
|
||||
* Search results clustering engine based on Carrot2 clustering algorithms.
|
||||
*
|
||||
|
@ -155,7 +151,8 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
// Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute
|
||||
// of this component. This by-name convention lookup is used to simplify configuring algorithms.
|
||||
String componentName = initParams.get(ClusteringEngine.ENGINE_NAME);
|
||||
log.info("Initializing Clustering Engine '" + Objects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
|
||||
log.info("Initializing Clustering Engine '" +
|
||||
MoreObjects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
|
||||
|
||||
if (!Strings.isNullOrEmpty(componentName)) {
|
||||
IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml");
|
||||
|
@ -268,7 +265,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
|
||||
SolrParams solrParams = sreq.getParams();
|
||||
|
||||
HashSet<String> fields = Sets.newHashSet(getFieldsForClustering(sreq));
|
||||
HashSet<String> fields = new HashSet<>(getFieldsForClustering(sreq));
|
||||
fields.add(idFieldName);
|
||||
fields.add(solrParams.get(CarrotParams.URL_FIELD_NAME, "url"));
|
||||
fields.addAll(getCustomFieldsMap(solrParams).keySet());
|
||||
|
@ -295,7 +292,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
+ " must not be blank.");
|
||||
}
|
||||
|
||||
final Set<String> fields = Sets.newHashSet();
|
||||
final Set<String> fields = new HashSet<>();
|
||||
fields.addAll(Arrays.asList(titleFieldSpec.split("[, ]")));
|
||||
fields.addAll(Arrays.asList(snippetFieldSpec.split("[, ]")));
|
||||
return fields;
|
||||
|
@ -319,7 +316,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
Map<String, String> customFields = getCustomFieldsMap(solrParams);
|
||||
|
||||
// Parse language code map string into a map
|
||||
Map<String, String> languageCodeMap = Maps.newHashMap();
|
||||
Map<String, String> languageCodeMap = new HashMap<>();
|
||||
if (StringUtils.isNotBlank(languageField)) {
|
||||
for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
|
||||
final String[] split = pair.split(":");
|
||||
|
@ -340,7 +337,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
if (produceSummary) {
|
||||
highlighter = HighlightComponent.getHighlighter(core);
|
||||
if (highlighter != null){
|
||||
Map<String, Object> args = Maps.newHashMap();
|
||||
Map<String, Object> args = new HashMap<>();
|
||||
snippetFieldAry = snippetFieldSpec.split("[, ]");
|
||||
args.put(HighlightParams.FIELDS, snippetFieldAry);
|
||||
args.put(HighlightParams.HIGHLIGHT, "true");
|
||||
|
@ -466,10 +463,10 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
* custom field names.
|
||||
*/
|
||||
private Map<String, String> getCustomFieldsMap(SolrParams solrParams) {
|
||||
Map<String, String> customFields = Maps.newHashMap();
|
||||
Map<String, String> customFields = new HashMap<>();
|
||||
String [] customFieldsSpec = solrParams.getParams(CarrotParams.CUSTOM_FIELD_NAME);
|
||||
if (customFieldsSpec != null) {
|
||||
customFields = Maps.newHashMap();
|
||||
customFields = new HashMap<>();
|
||||
for (String customFieldSpec : customFieldsSpec) {
|
||||
String [] split = customFieldSpec.split(":");
|
||||
if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
|
||||
|
@ -501,7 +498,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
|
||||
private List<NamedList<Object>> clustersToNamedList(List<Cluster> carrotClusters,
|
||||
SolrParams solrParams) {
|
||||
List<NamedList<Object>> result = Lists.newArrayList();
|
||||
List<NamedList<Object>> result = new ArrayList<>();
|
||||
clustersToNamedList(carrotClusters, result, solrParams.getBool(
|
||||
CarrotParams.OUTPUT_SUB_CLUSTERS, true), solrParams.getInt(
|
||||
CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE));
|
||||
|
@ -534,7 +531,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
|
||||
// Add documents
|
||||
List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments();
|
||||
List<Object> docList = Lists.newArrayList();
|
||||
List<Object> docList = new ArrayList<>();
|
||||
cluster.add("docs", docList);
|
||||
for (Document doc : docs) {
|
||||
docList.add(doc.getField(SOLR_DOCUMENT_ID));
|
||||
|
@ -542,7 +539,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
|
||||
// Add subclusters
|
||||
if (outputSubClusters && !outCluster.getSubclusters().isEmpty()) {
|
||||
List<NamedList<Object>> subclusters = Lists.newArrayList();
|
||||
List<NamedList<Object>> subclusters = new ArrayList<>();
|
||||
cluster.add("clusters", subclusters);
|
||||
clustersToNamedList(outCluster.getSubclusters(), subclusters,
|
||||
outputSubClusters, maxLabels);
|
||||
|
|
|
@ -16,10 +16,10 @@
|
|||
*/
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
|
||||
/**
|
||||
* Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration).
|
||||
* @lucene.experimental
|
||||
|
@ -50,7 +50,7 @@ public final class CarrotParams {
|
|||
*/
|
||||
public static String RESOURCES_DIR = CARROT_PREFIX + "resourcesDir";
|
||||
|
||||
static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
|
||||
static final Set<String> CARROT_PARAM_NAMES = new HashSet<>(Arrays.asList(
|
||||
ALGORITHM,
|
||||
|
||||
TITLE_FIELD_NAME,
|
||||
|
@ -66,8 +66,8 @@ public final class CarrotParams {
|
|||
NUM_DESCRIPTIONS,
|
||||
OUTPUT_SUB_CLUSTERS,
|
||||
RESOURCES_DIR,
|
||||
LANGUAGE_CODE_MAP);
|
||||
|
||||
LANGUAGE_CODE_MAP));
|
||||
|
||||
/** No instances. */
|
||||
private CarrotParams() {}
|
||||
}
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
*/
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -26,6 +28,7 @@ import org.apache.lucene.analysis.core.StopFilterFactory;
|
|||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.solr.analysis.TokenizerChain;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.carrot2.core.LanguageCode;
|
||||
import org.carrot2.core.attribute.Init;
|
||||
import org.carrot2.core.attribute.Processing;
|
||||
|
@ -37,9 +40,6 @@ import org.carrot2.util.attribute.Attribute;
|
|||
import org.carrot2.util.attribute.Bindable;
|
||||
import org.carrot2.util.attribute.Input;
|
||||
|
||||
import com.google.common.collect.HashMultimap;
|
||||
import com.google.common.collect.Multimap;
|
||||
|
||||
/**
|
||||
* An implementation of Carrot2's {@link ILexicalDataFactory} that adds stop
|
||||
* words from a field's StopFilter to the default stop words used in Carrot2,
|
||||
|
@ -67,7 +67,7 @@ public class SolrStopwordsCarrot2LexicalDataFactory implements ILexicalDataFacto
|
|||
/**
|
||||
* A lazily-built cache of stop words per field.
|
||||
*/
|
||||
private Multimap<String, CharArraySet> solrStopWords = HashMultimap.create();
|
||||
private HashMap<String, List<CharArraySet>> solrStopWords = new HashMap<>();
|
||||
|
||||
/**
|
||||
* Carrot2's default lexical resources to use in addition to Solr's stop
|
||||
|
@ -79,31 +79,34 @@ public class SolrStopwordsCarrot2LexicalDataFactory implements ILexicalDataFacto
|
|||
* Obtains stop words for a field from the associated
|
||||
* {@link StopFilterFactory}, if any.
|
||||
*/
|
||||
private Collection<CharArraySet> getSolrStopWordsForField(String fieldName) {
|
||||
private List<CharArraySet> getSolrStopWordsForField(String fieldName) {
|
||||
// No need to synchronize here, Carrot2 ensures that instances
|
||||
// of this class are not used by multiple threads at a time.
|
||||
if (!solrStopWords.containsKey(fieldName)) {
|
||||
final Analyzer fieldAnalyzer = core.getLatestSchema().getFieldType(fieldName)
|
||||
.getIndexAnalyzer();
|
||||
if (fieldAnalyzer instanceof TokenizerChain) {
|
||||
final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer)
|
||||
.getTokenFilterFactories();
|
||||
for (TokenFilterFactory factory : filterFactories) {
|
||||
if (factory instanceof StopFilterFactory) {
|
||||
// StopFilterFactory holds the stop words in a CharArraySet
|
||||
solrStopWords.put(fieldName,
|
||||
((StopFilterFactory) factory).getStopWords());
|
||||
}
|
||||
synchronized (solrStopWords) {
|
||||
if (!solrStopWords.containsKey(fieldName)) {
|
||||
solrStopWords.put(fieldName, new ArrayList<>());
|
||||
|
||||
if (factory instanceof CommonGramsFilterFactory) {
|
||||
solrStopWords.put(fieldName,
|
||||
((CommonGramsFilterFactory) factory)
|
||||
.getCommonWords());
|
||||
IndexSchema schema = core.getLatestSchema();
|
||||
final Analyzer fieldAnalyzer = schema.getFieldType(fieldName).getIndexAnalyzer();
|
||||
if (fieldAnalyzer instanceof TokenizerChain) {
|
||||
final TokenFilterFactory[] filterFactories =
|
||||
((TokenizerChain) fieldAnalyzer).getTokenFilterFactories();
|
||||
for (TokenFilterFactory factory : filterFactories) {
|
||||
if (factory instanceof StopFilterFactory) {
|
||||
// StopFilterFactory holds the stop words in a CharArraySet
|
||||
CharArraySet stopWords = ((StopFilterFactory) factory).getStopWords();
|
||||
solrStopWords.get(fieldName).add(stopWords);
|
||||
}
|
||||
|
||||
if (factory instanceof CommonGramsFilterFactory) {
|
||||
CharArraySet commonWords = ((CommonGramsFilterFactory) factory).getCommonWords();
|
||||
solrStopWords.get(fieldName).add(commonWords);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return solrStopWords.get(fieldName);
|
||||
}
|
||||
return solrStopWords.get(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -45,9 +48,6 @@ import org.carrot2.core.LanguageCode;
|
|||
import org.carrot2.util.attribute.AttributeUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
@ -211,7 +211,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
// stoplabels.mt, so we're expecting only one cluster with label "online".
|
||||
final List<NamedList<Object>> clusters = checkEngine(
|
||||
getClusteringEngine(engineName), 1, params);
|
||||
assertEquals(getLabels(clusters.get(0)), ImmutableList.of("online"));
|
||||
assertEquals(getLabels(clusters.get(0)), Collections.singletonList("online"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -226,7 +226,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
// only one cluster with label "online".
|
||||
final List<NamedList<Object>> clusters = checkEngine(
|
||||
getClusteringEngine("lexical-resource-check"), 1, params);
|
||||
assertEquals(getLabels(clusters.get(0)), ImmutableList.of("online"));
|
||||
assertEquals(getLabels(clusters.get(0)), Collections.singletonList("online"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -243,9 +243,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
|
||||
final List<NamedList<Object>> clusters = checkEngine(
|
||||
getClusteringEngine("lexical-resource-check"), 2, params);
|
||||
assertEquals(ImmutableList.of("online"), getLabels(clusters.get(0)));
|
||||
assertEquals(ImmutableList.of("solrownstopword"),
|
||||
getLabels(clusters.get(1)));
|
||||
assertEquals(Collections.singletonList("online"), getLabels(clusters.get(0)));
|
||||
assertEquals(Collections.singletonList("solrownstopword"), getLabels(clusters.get(1)));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -395,8 +394,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-default");
|
||||
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
|
||||
assertEquals(
|
||||
Lists.newArrayList("stc", "default", "mock"),
|
||||
Lists.newArrayList(engines.keySet()));
|
||||
Arrays.asList("stc", "default", "mock"),
|
||||
new ArrayList<>(engines.keySet()));
|
||||
assertEquals(
|
||||
LingoClusteringAlgorithm.class,
|
||||
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
|
||||
|
@ -407,8 +406,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-decl-order");
|
||||
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
|
||||
assertEquals(
|
||||
Lists.newArrayList("unavailable", "lingo", "stc", "mock", "default"),
|
||||
Lists.newArrayList(engines.keySet()));
|
||||
Arrays.asList("unavailable", "lingo", "stc", "mock", "default"),
|
||||
new ArrayList<>(engines.keySet()));
|
||||
assertEquals(
|
||||
LingoClusteringAlgorithm.class,
|
||||
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
|
||||
|
@ -419,8 +418,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-dups");
|
||||
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
|
||||
assertEquals(
|
||||
Lists.newArrayList("", "default"),
|
||||
Lists.newArrayList(engines.keySet()));
|
||||
Arrays.asList("", "default"),
|
||||
new ArrayList<>(engines.keySet()));
|
||||
assertEquals(
|
||||
MockClusteringAlgorithm.class,
|
||||
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.carrot2.core.Cluster;
|
||||
|
@ -29,8 +30,6 @@ import org.carrot2.util.attribute.Bindable;
|
|||
import org.carrot2.util.attribute.Input;
|
||||
import org.carrot2.util.attribute.Output;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* A mock Carrot2 clustering algorithm that outputs input documents as clusters.
|
||||
* Useful only in tests.
|
||||
|
@ -56,7 +55,7 @@ public class EchoClusteringAlgorithm extends ProcessingComponentBase implements
|
|||
|
||||
@Override
|
||||
public void process() throws ProcessingException {
|
||||
clusters = Lists.newArrayListWithCapacity(documents.size());
|
||||
clusters = new ArrayList<>();
|
||||
|
||||
for (Document document : documents) {
|
||||
final Cluster cluster = new Cluster();
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.carrot2.core.Cluster;
|
||||
|
@ -36,8 +37,6 @@ import org.carrot2.util.attribute.Bindable;
|
|||
import org.carrot2.util.attribute.Input;
|
||||
import org.carrot2.util.attribute.Output;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* A mock Carrot2 clustering algorithm that outputs stem of each token of each
|
||||
* document as a separate cluster. Useful only in tests.
|
||||
|
@ -64,7 +63,7 @@ public class EchoStemsClusteringAlgorithm extends ProcessingComponentBase
|
|||
final AllTokens allTokens = preprocessingContext.allTokens;
|
||||
final AllWords allWords = preprocessingContext.allWords;
|
||||
final AllStems allStems = preprocessingContext.allStems;
|
||||
clusters = Lists.newArrayListWithCapacity(allTokens.image.length);
|
||||
clusters = new ArrayList<>();
|
||||
for (int i = 0; i < allTokens.image.length; i++) {
|
||||
if (allTokens.wordIndex[i] >= 0) {
|
||||
clusters.add(new Cluster(new String(
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.carrot2.core.Cluster;
|
||||
|
@ -33,7 +34,6 @@ import org.carrot2.util.attribute.Bindable;
|
|||
import org.carrot2.util.attribute.Input;
|
||||
import org.carrot2.util.attribute.Output;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* A mock Carrot2 clustering algorithm that outputs each token of each document
|
||||
|
@ -58,8 +58,7 @@ public class EchoTokensClusteringAlgorithm extends ProcessingComponentBase
|
|||
public void process() throws ProcessingException {
|
||||
final PreprocessingContext preprocessingContext = preprocessing.preprocess(
|
||||
documents, "", LanguageCode.ENGLISH);
|
||||
clusters = Lists
|
||||
.newArrayListWithCapacity(preprocessingContext.allTokens.image.length);
|
||||
clusters = new ArrayList<>();
|
||||
for (char[] token : preprocessingContext.allTokens.image) {
|
||||
if (token != null) {
|
||||
clusters.add(new Cluster(new String(token)));
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.carrot2.core.Cluster;
|
||||
|
@ -33,8 +34,6 @@ import org.carrot2.util.attribute.Bindable;
|
|||
import org.carrot2.util.attribute.Input;
|
||||
import org.carrot2.util.attribute.Output;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* A mock implementation of Carrot2 clustering algorithm for testing whether the
|
||||
* customized lexical resource lookup works correctly. This algorithm ignores
|
||||
|
@ -60,7 +59,7 @@ public class LexicalResourcesCheckClusteringAlgorithm extends
|
|||
|
||||
@Override
|
||||
public void process() throws ProcessingException {
|
||||
clusters = Lists.newArrayList();
|
||||
clusters = new ArrayList<>();
|
||||
if (wordsToCheck == null) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -15,13 +15,13 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.carrot2.core.*;
|
||||
import org.carrot2.core.attribute.AttributeNames;
|
||||
import org.carrot2.core.attribute.Processing;
|
||||
import org.carrot2.util.attribute.*;
|
||||
import org.carrot2.util.attribute.constraint.IntRange;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Bindable(prefix = "MockClusteringAlgorithm")
|
||||
|
@ -62,7 +62,7 @@ public class MockClusteringAlgorithm extends ProcessingComponentBase implements
|
|||
|
||||
@Override
|
||||
public void process() throws ProcessingException {
|
||||
clusters = Lists.newArrayList();
|
||||
clusters = new ArrayList<>();
|
||||
if (documents == null) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.util.Map;
|
|||
import java.util.Optional;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
|
@ -35,6 +36,7 @@ import org.apache.solr.common.cloud.Replica.State;
|
|||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.params.CollectionAdminParams;
|
||||
import org.apache.solr.common.params.CoreAdminParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
@ -68,31 +70,13 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
|
||||
String collectionName = message.getStr(COLLECTION_PROP);
|
||||
String backupName = message.getStr(NAME);
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
|
||||
String asyncId = message.getStr(ASYNC);
|
||||
String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY);
|
||||
|
||||
String commitName = message.getStr(CoreAdminParams.COMMIT_NAME);
|
||||
Optional<CollectionSnapshotMetaData> snapshotMeta = Optional.empty();
|
||||
if (commitName != null) {
|
||||
SolrZkClient zkClient = ocmh.overseer.getZkController().getZkClient();
|
||||
snapshotMeta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName);
|
||||
if (!snapshotMeta.isPresent()) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName
|
||||
+ " does not exist for collection " + collectionName);
|
||||
}
|
||||
if (snapshotMeta.get().getStatus() != SnapshotStatus.Successful) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName + " for collection " + collectionName
|
||||
+ " has not completed successfully. The status is " + snapshotMeta.get().getStatus());
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, String> requestMap = new HashMap<>();
|
||||
Instant startTime = Instant.now();
|
||||
|
||||
CoreContainer cc = ocmh.overseer.getZkController().getCoreContainer();
|
||||
BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo));
|
||||
BackupManager backupMgr = new BackupManager(repository, ocmh.zkStateReader, collectionName);
|
||||
BackupManager backupMgr = new BackupManager(repository, ocmh.zkStateReader);
|
||||
|
||||
// Backup location
|
||||
URI location = repository.createURI(message.getStr(CoreAdminParams.BACKUP_LOCATION));
|
||||
|
@ -106,50 +90,16 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
// Create a directory to store backup details.
|
||||
repository.createDirectory(backupPath);
|
||||
|
||||
log.info("Starting backup of collection={} with backupName={} at location={}", collectionName, backupName,
|
||||
backupPath);
|
||||
|
||||
Collection<String> shardsToConsider = Collections.emptySet();
|
||||
if (snapshotMeta.isPresent()) {
|
||||
shardsToConsider = snapshotMeta.get().getShards();
|
||||
}
|
||||
|
||||
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getActiveSlices()) {
|
||||
Replica replica = null;
|
||||
|
||||
if (snapshotMeta.isPresent()) {
|
||||
if (!shardsToConsider.contains(slice.getName())) {
|
||||
log.warn("Skipping the backup for shard {} since it wasn't part of the collection {} when snapshot {} was created.",
|
||||
slice.getName(), collectionName, snapshotMeta.get().getName());
|
||||
continue;
|
||||
}
|
||||
replica = selectReplicaWithSnapshot(snapshotMeta.get(), slice);
|
||||
} else {
|
||||
// Note - Actually this can return a null value when there is no leader for this shard.
|
||||
replica = slice.getLeader();
|
||||
if (replica == null) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "No 'leader' replica available for shard " + slice.getName() + " of collection " + collectionName);
|
||||
}
|
||||
String strategy = message.getStr(CollectionAdminParams.INDEX_BACKUP_STRATEGY, CollectionAdminParams.COPY_FILES_STRATEGY);
|
||||
switch (strategy) {
|
||||
case CollectionAdminParams.COPY_FILES_STRATEGY: {
|
||||
copyIndexFiles(backupPath, message, results);
|
||||
break;
|
||||
}
|
||||
|
||||
String coreName = replica.getStr(CORE_NAME_PROP);
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.BACKUPCORE.toString());
|
||||
params.set(NAME, slice.getName());
|
||||
params.set(CoreAdminParams.BACKUP_REPOSITORY, repo);
|
||||
params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString()); // note: index dir will be here then the "snapshot." + slice name
|
||||
params.set(CORE_NAME_PROP, coreName);
|
||||
if (snapshotMeta.isPresent()) {
|
||||
params.set(CoreAdminParams.COMMIT_NAME, snapshotMeta.get().getName());
|
||||
case CollectionAdminParams.NO_INDEX_BACKUP_STRATEGY: {
|
||||
break;
|
||||
}
|
||||
|
||||
ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
|
||||
log.debug("Sent backup request to core={} for backupName={}", coreName, backupName);
|
||||
}
|
||||
log.debug("Sent backup requests to all shard leaders for backupName={}", backupName);
|
||||
|
||||
ocmh.processResponses(results, shardHandler, true, "Could not backup all replicas", asyncId, requestMap);
|
||||
|
||||
log.info("Starting to backup ZK data for backupName={}", backupName);
|
||||
|
||||
|
@ -168,6 +118,7 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
properties.put(BackupManager.COLLECTION_NAME_PROP, collectionName);
|
||||
properties.put(COLL_CONF, configName);
|
||||
properties.put(BackupManager.START_TIME_PROP, startTime.toString());
|
||||
properties.put(BackupManager.INDEX_VERSION_PROP, Version.LATEST.toString());
|
||||
//TODO: Add MD5 of the configset. If during restore the same name configset exists then we can compare checksums to see if they are the same.
|
||||
//if they are not the same then we can throw an error or have an 'overwriteConfig' flag
|
||||
//TODO save numDocs for the shardLeader. We can use it to sanity check the restore.
|
||||
|
@ -202,4 +153,73 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
|
||||
return r.get();
|
||||
}
|
||||
|
||||
private void copyIndexFiles(URI backupPath, ZkNodeProps request, NamedList results) throws Exception {
|
||||
String collectionName = request.getStr(COLLECTION_PROP);
|
||||
String backupName = request.getStr(NAME);
|
||||
String asyncId = request.getStr(ASYNC);
|
||||
String repoName = request.getStr(CoreAdminParams.BACKUP_REPOSITORY);
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
|
||||
Map<String, String> requestMap = new HashMap<>();
|
||||
|
||||
String commitName = request.getStr(CoreAdminParams.COMMIT_NAME);
|
||||
Optional<CollectionSnapshotMetaData> snapshotMeta = Optional.empty();
|
||||
if (commitName != null) {
|
||||
SolrZkClient zkClient = ocmh.overseer.getZkController().getZkClient();
|
||||
snapshotMeta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName);
|
||||
if (!snapshotMeta.isPresent()) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName
|
||||
+ " does not exist for collection " + collectionName);
|
||||
}
|
||||
if (snapshotMeta.get().getStatus() != SnapshotStatus.Successful) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName + " for collection " + collectionName
|
||||
+ " has not completed successfully. The status is " + snapshotMeta.get().getStatus());
|
||||
}
|
||||
}
|
||||
|
||||
log.info("Starting backup of collection={} with backupName={} at location={}", collectionName, backupName,
|
||||
backupPath);
|
||||
|
||||
Collection<String> shardsToConsider = Collections.emptySet();
|
||||
if (snapshotMeta.isPresent()) {
|
||||
shardsToConsider = snapshotMeta.get().getShards();
|
||||
}
|
||||
|
||||
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getActiveSlices()) {
|
||||
Replica replica = null;
|
||||
|
||||
if (snapshotMeta.isPresent()) {
|
||||
if (!shardsToConsider.contains(slice.getName())) {
|
||||
log.warn("Skipping the backup for shard {} since it wasn't part of the collection {} when snapshot {} was created.",
|
||||
slice.getName(), collectionName, snapshotMeta.get().getName());
|
||||
continue;
|
||||
}
|
||||
replica = selectReplicaWithSnapshot(snapshotMeta.get(), slice);
|
||||
} else {
|
||||
// Note - Actually this can return a null value when there is no leader for this shard.
|
||||
replica = slice.getLeader();
|
||||
if (replica == null) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "No 'leader' replica available for shard " + slice.getName() + " of collection " + collectionName);
|
||||
}
|
||||
}
|
||||
|
||||
String coreName = replica.getStr(CORE_NAME_PROP);
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.BACKUPCORE.toString());
|
||||
params.set(NAME, slice.getName());
|
||||
params.set(CoreAdminParams.BACKUP_REPOSITORY, repoName);
|
||||
params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString()); // note: index dir will be here then the "snapshot." + slice name
|
||||
params.set(CORE_NAME_PROP, coreName);
|
||||
if (snapshotMeta.isPresent()) {
|
||||
params.set(CoreAdminParams.COMMIT_NAME, snapshotMeta.get().getName());
|
||||
}
|
||||
|
||||
ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
|
||||
log.debug("Sent backup request to core={} for backupName={}", coreName, backupName);
|
||||
}
|
||||
log.debug("Sent backup requests to all shard leaders for backupName={}", backupName);
|
||||
|
||||
ocmh.processResponses(results, shardHandler, true, "Could not backup all replicas", asyncId, requestMap);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.cloud;
|
|||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.SocketTimeoutException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
@ -572,24 +573,44 @@ public class RecoveryStrategy extends Thread implements Closeable {
|
|||
private void sendPrepRecoveryCmd(String leaderBaseUrl, String leaderCoreName, Slice slice)
|
||||
throws SolrServerException, IOException, InterruptedException, ExecutionException {
|
||||
|
||||
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderBaseUrl).build()) {
|
||||
client.setConnectionTimeout(30000);
|
||||
WaitForState prepCmd = new WaitForState();
|
||||
prepCmd.setCoreName(leaderCoreName);
|
||||
prepCmd.setNodeName(zkController.getNodeName());
|
||||
prepCmd.setCoreNodeName(coreZkNodeName);
|
||||
prepCmd.setState(Replica.State.RECOVERING);
|
||||
prepCmd.setCheckLive(true);
|
||||
prepCmd.setOnlyIfLeader(true);
|
||||
final Slice.State state = slice.getState();
|
||||
if (state != Slice.State.CONSTRUCTION && state != Slice.State.RECOVERY && state != Slice.State.RECOVERY_FAILED) {
|
||||
prepCmd.setOnlyIfLeaderActive(true);
|
||||
WaitForState prepCmd = new WaitForState();
|
||||
prepCmd.setCoreName(leaderCoreName);
|
||||
prepCmd.setNodeName(zkController.getNodeName());
|
||||
prepCmd.setCoreNodeName(coreZkNodeName);
|
||||
prepCmd.setState(Replica.State.RECOVERING);
|
||||
prepCmd.setCheckLive(true);
|
||||
prepCmd.setOnlyIfLeader(true);
|
||||
final Slice.State state = slice.getState();
|
||||
if (state != Slice.State.CONSTRUCTION && state != Slice.State.RECOVERY && state != Slice.State.RECOVERY_FAILED) {
|
||||
prepCmd.setOnlyIfLeaderActive(true);
|
||||
}
|
||||
|
||||
final int maxTries = 30;
|
||||
for (int numTries = 0; numTries < maxTries; numTries++) {
|
||||
try {
|
||||
sendPrepRecoveryCmd(leaderBaseUrl, prepCmd);
|
||||
break;
|
||||
} catch (ExecutionException e) {
|
||||
SolrServerException solrException = (SolrServerException) e.getCause();
|
||||
if (solrException.getRootCause() instanceof SocketTimeoutException && numTries < maxTries) {
|
||||
LOG.warn("Socket timeout when send prep recovery cmd, retrying.. ");
|
||||
continue;
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void sendPrepRecoveryCmd(String leaderBaseUrl, WaitForState prepCmd)
|
||||
throws SolrServerException, IOException, InterruptedException, ExecutionException {
|
||||
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderBaseUrl).build()) {
|
||||
client.setConnectionTimeout(10000);
|
||||
client.setSoTimeout(10000);
|
||||
HttpUriRequestResponse mrr = client.httpUriRequest(prepCmd);
|
||||
prevSendPreRecoveryHttpUriRequest = mrr.httpUriRequest;
|
||||
|
||||
|
||||
LOG.info("Sending prep recovery command to [{}]; [{}]", leaderBaseUrl, prepCmd.toString());
|
||||
|
||||
|
||||
mrr.future.get();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -87,7 +87,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
URI location = repository.createURI(message.getStr(CoreAdminParams.BACKUP_LOCATION));
|
||||
URI backupPath = repository.resolve(location, backupName);
|
||||
ZkStateReader zkStateReader = ocmh.zkStateReader;
|
||||
BackupManager backupMgr = new BackupManager(repository, zkStateReader, restoreCollectionName);
|
||||
BackupManager backupMgr = new BackupManager(repository, zkStateReader);
|
||||
|
||||
Properties properties = backupMgr.readBackupProperties(location, backupName);
|
||||
String backupCollection = properties.getProperty(BackupManager.COLLECTION_NAME_PROP);
|
||||
|
|
|
@ -110,7 +110,6 @@ import org.apache.solr.response.RubyResponseWriter;
|
|||
import org.apache.solr.response.SchemaXmlResponseWriter;
|
||||
import org.apache.solr.response.SmileResponseWriter;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.response.SortingResponseWriter;
|
||||
import org.apache.solr.response.XMLResponseWriter;
|
||||
import org.apache.solr.response.transform.TransformerFactory;
|
||||
import org.apache.solr.rest.ManagedResourceStorage;
|
||||
|
@ -2332,7 +2331,6 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
m.put("raw", new RawResponseWriter());
|
||||
m.put(CommonParams.JAVABIN, new BinaryResponseWriter());
|
||||
m.put("csv", new CSVResponseWriter());
|
||||
m.put("xsort", new SortingResponseWriter());
|
||||
m.put("schema.xml", new SchemaXmlResponseWriter());
|
||||
m.put("smile", new SmileResponseWriter());
|
||||
m.put(ReplicationHandler.FILE_STREAM, getFileStreamWriter());
|
||||
|
@ -2350,12 +2348,21 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
@Override
|
||||
public void write(OutputStream out, SolrQueryRequest req, SolrQueryResponse response) throws IOException {
|
||||
RawWriter rawWriter = (RawWriter) response.getValues().get(ReplicationHandler.FILE_STREAM);
|
||||
if(rawWriter!=null) rawWriter.write(out);
|
||||
if (rawWriter != null) {
|
||||
rawWriter.write(out);
|
||||
if (rawWriter instanceof Closeable) ((Closeable) rawWriter).close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getContentType(SolrQueryRequest request, SolrQueryResponse response) {
|
||||
return BinaryResponseParser.BINARY_CONTENT_TYPE;
|
||||
RawWriter rawWriter = (RawWriter) response.getValues().get(ReplicationHandler.FILE_STREAM);
|
||||
if (rawWriter != null) {
|
||||
return rawWriter.getContentType();
|
||||
} else {
|
||||
return BinaryResponseParser.BINARY_CONTENT_TYPE;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -2365,6 +2372,9 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
}
|
||||
|
||||
public interface RawWriter {
|
||||
default String getContentType() {
|
||||
return BinaryResponseParser.BINARY_CONTENT_TYPE;
|
||||
}
|
||||
void write(OutputStream os) throws IOException ;
|
||||
}
|
||||
|
||||
|
|
|
@ -68,7 +68,7 @@ public class BackupManager {
|
|||
protected final ZkStateReader zkStateReader;
|
||||
protected final BackupRepository repository;
|
||||
|
||||
public BackupManager(BackupRepository repository, ZkStateReader zkStateReader, String collectionName) {
|
||||
public BackupManager(BackupRepository repository, ZkStateReader zkStateReader) {
|
||||
this.repository = Objects.requireNonNull(repository);
|
||||
this.zkStateReader = Objects.requireNonNull(zkStateReader);
|
||||
}
|
||||
|
@ -126,6 +126,7 @@ public class BackupManager {
|
|||
*
|
||||
* @param backupLoc The base path used to store the backup data.
|
||||
* @param backupId The unique name for the backup.
|
||||
* @param collectionName The name of the collection whose meta-data is to be returned.
|
||||
* @return the meta-data information for the backed-up collection.
|
||||
* @throws IOException in case of errors.
|
||||
*/
|
||||
|
|
|
@ -0,0 +1,468 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.core.snapshots;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.commons.cli.CommandLineParser;
|
||||
import org.apache.commons.cli.HelpFormatter;
|
||||
import org.apache.commons.cli.Option;
|
||||
import org.apache.commons.cli.Options;
|
||||
import org.apache.commons.cli.ParseException;
|
||||
import org.apache.commons.cli.PosixParser;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.params.CollectionAdminParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.snapshots.CollectionSnapshotMetaData.CoreSnapshotMetaData;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
/**
|
||||
* This class provides utility functions required for Solr snapshots functionality.
|
||||
*/
|
||||
public class SolrSnapshotsTool implements Closeable {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
private static final DateFormat dateFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z", Locale.getDefault());
|
||||
|
||||
private static final String CREATE = "create";
|
||||
private static final String DELETE = "delete";
|
||||
private static final String LIST = "list";
|
||||
private static final String DESCRIBE = "describe";
|
||||
private static final String PREPARE_FOR_EXPORT = "prepare-snapshot-export";
|
||||
private static final String EXPORT_SNAPSHOT = "export";
|
||||
private static final String HELP = "help";
|
||||
private static final String COLLECTION = "c";
|
||||
private static final String TEMP_DIR = "t";
|
||||
private static final String DEST_DIR = "d";
|
||||
private static final String SOLR_ZK_ENSEMBLE = "z";
|
||||
private static final String HDFS_PATH_PREFIX = "p";
|
||||
private static final String BACKUP_REPO_NAME = "r";
|
||||
private static final String ASYNC_REQ_ID = "i";
|
||||
private static final List<String> OPTION_HELP_ORDER = Arrays.asList(CREATE, DELETE, LIST, DESCRIBE,
|
||||
PREPARE_FOR_EXPORT, EXPORT_SNAPSHOT, HELP, SOLR_ZK_ENSEMBLE, COLLECTION, DEST_DIR, BACKUP_REPO_NAME,
|
||||
ASYNC_REQ_ID, TEMP_DIR, HDFS_PATH_PREFIX);
|
||||
|
||||
private final CloudSolrClient solrClient;
|
||||
|
||||
public SolrSnapshotsTool(String solrZkEnsemble) {
|
||||
solrClient = (new CloudSolrClient.Builder()).withZkHost(solrZkEnsemble).build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (solrClient != null) {
|
||||
solrClient.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void createSnapshot(String collectionName, String snapshotName) {
|
||||
CollectionAdminRequest.CreateSnapshot createSnap = new CollectionAdminRequest.CreateSnapshot(collectionName, snapshotName);
|
||||
CollectionAdminResponse resp;
|
||||
try {
|
||||
resp = createSnap.process(solrClient);
|
||||
Preconditions.checkState(resp.getStatus() == 0, "The CREATESNAPSHOT request failed. The status code is " + resp.getStatus());
|
||||
System.out.println("Successfully created snapshot with name " + snapshotName + " for collection " + collectionName);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to create a snapshot with name " + snapshotName + " for collection " + collectionName, e);
|
||||
System.out.println("Failed to create a snapshot with name " + snapshotName + " for collection " + collectionName
|
||||
+" due to following error : "+e.getLocalizedMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void deleteSnapshot(String collectionName, String snapshotName) {
|
||||
CollectionAdminRequest.DeleteSnapshot deleteSnap = new CollectionAdminRequest.DeleteSnapshot(collectionName, snapshotName);
|
||||
CollectionAdminResponse resp;
|
||||
try {
|
||||
resp = deleteSnap.process(solrClient);
|
||||
Preconditions.checkState(resp.getStatus() == 0, "The DELETESNAPSHOT request failed. The status code is " + resp.getStatus());
|
||||
System.out.println("Successfully deleted snapshot with name " + snapshotName + " for collection " + collectionName);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to delete a snapshot with name " + snapshotName + " for collection " + collectionName, e);
|
||||
System.out.println("Failed to delete a snapshot with name " + snapshotName + " for collection " + collectionName
|
||||
+" due to following error : "+e.getLocalizedMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
public void listSnapshots(String collectionName) {
|
||||
CollectionAdminRequest.ListSnapshots listSnaps = new CollectionAdminRequest.ListSnapshots(collectionName);
|
||||
CollectionAdminResponse resp;
|
||||
try {
|
||||
resp = listSnaps.process(solrClient);
|
||||
Preconditions.checkState(resp.getStatus() == 0, "The LISTSNAPSHOTS request failed. The status code is " + resp.getStatus());
|
||||
|
||||
NamedList apiResult = (NamedList) resp.getResponse().get(SolrSnapshotManager.SNAPSHOTS_INFO);
|
||||
for (int i = 0; i < apiResult.size(); i++) {
|
||||
System.out.println(apiResult.getName(i));
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to list snapshots for collection " + collectionName, e);
|
||||
System.out.println("Failed to list snapshots for collection " + collectionName
|
||||
+" due to following error : "+e.getLocalizedMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void describeSnapshot(String collectionName, String snapshotName) {
|
||||
try {
|
||||
Collection<CollectionSnapshotMetaData> snaps = listCollectionSnapshots(collectionName);
|
||||
for (CollectionSnapshotMetaData m : snaps) {
|
||||
if (snapshotName.equals(m.getName())) {
|
||||
System.out.println("Name: " + m.getName());
|
||||
System.out.println("Status: " + m.getStatus());
|
||||
System.out.println("Time of creation: " + dateFormat.format(m.getCreationDate()));
|
||||
System.out.println("Total number of cores with snapshot: " + m.getReplicaSnapshots().size());
|
||||
System.out.println("-----------------------------------");
|
||||
for (CoreSnapshotMetaData n : m.getReplicaSnapshots()) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append("Core [name=");
|
||||
builder.append(n.getCoreName());
|
||||
builder.append(", leader=");
|
||||
builder.append(n.isLeader());
|
||||
builder.append(", generation=");
|
||||
builder.append(n.getGenerationNumber());
|
||||
builder.append(", indexDirPath=");
|
||||
builder.append(n.getIndexDirPath());
|
||||
builder.append("]\n");
|
||||
System.out.println(builder.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to fetch snapshot details", e);
|
||||
System.out.println("Failed to fetch snapshot details due to following error : " + e.getLocalizedMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String, List<String>> getIndexFilesPathForSnapshot(String collectionName, String snapshotName, Optional<String> pathPrefix)
|
||||
throws SolrServerException, IOException {
|
||||
Map<String, List<String>> result = new HashMap<>();
|
||||
|
||||
Collection<CollectionSnapshotMetaData> snaps = listCollectionSnapshots(collectionName);
|
||||
Optional<CollectionSnapshotMetaData> meta = Optional.empty();
|
||||
for (CollectionSnapshotMetaData m : snaps) {
|
||||
if (snapshotName.equals(m.getName())) {
|
||||
meta = Optional.of(m);
|
||||
}
|
||||
}
|
||||
|
||||
if (!meta.isPresent()) {
|
||||
throw new IllegalArgumentException("The snapshot named " + snapshotName
|
||||
+ " is not found for collection " + collectionName);
|
||||
}
|
||||
|
||||
DocCollection collectionState = solrClient.getZkStateReader().getClusterState().getCollection(collectionName);
|
||||
for (Slice s : collectionState.getSlices()) {
|
||||
List<CoreSnapshotMetaData> replicaSnaps = meta.get().getReplicaSnapshotsForShard(s.getName());
|
||||
// Prepare a list of *existing* replicas (since one or more replicas could have been deleted after the snapshot creation).
|
||||
List<CoreSnapshotMetaData> availableReplicas = new ArrayList<>();
|
||||
for (CoreSnapshotMetaData m : replicaSnaps) {
|
||||
if (isReplicaAvailable(s, m.getCoreName())) {
|
||||
availableReplicas.add(m);
|
||||
}
|
||||
}
|
||||
|
||||
if (availableReplicas.isEmpty()) {
|
||||
throw new IllegalArgumentException(
|
||||
"The snapshot named " + snapshotName + " not found for shard "
|
||||
+ s.getName() + " of collection " + collectionName);
|
||||
}
|
||||
|
||||
// Prefer a leader replica (at the time when the snapshot was created).
|
||||
CoreSnapshotMetaData coreSnap = availableReplicas.get(0);
|
||||
for (CoreSnapshotMetaData m : availableReplicas) {
|
||||
if (m.isLeader()) {
|
||||
coreSnap = m;
|
||||
}
|
||||
}
|
||||
|
||||
String indexDirPath = coreSnap.getIndexDirPath();
|
||||
if (pathPrefix.isPresent()) {
|
||||
// If the path prefix is specified, rebuild the path to the index directory.
|
||||
Path t = new Path(coreSnap.getIndexDirPath());
|
||||
indexDirPath = (new Path(pathPrefix.get(), t.toUri().getPath())).toString();
|
||||
}
|
||||
|
||||
List<String> paths = new ArrayList<>();
|
||||
for (String fileName : coreSnap.getFiles()) {
|
||||
Path p = new Path(indexDirPath, fileName);
|
||||
paths.add(p.toString());
|
||||
}
|
||||
|
||||
result.put(s.getName(), paths);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public void buildCopyListings(String collectionName, String snapshotName, String localFsPath, Optional<String> pathPrefix)
|
||||
throws SolrServerException, IOException {
|
||||
Map<String, List<String>> paths = getIndexFilesPathForSnapshot(collectionName, snapshotName, pathPrefix);
|
||||
for (Map.Entry<String,List<String>> entry : paths.entrySet()) {
|
||||
StringBuilder filesBuilder = new StringBuilder();
|
||||
for (String filePath : entry.getValue()) {
|
||||
filesBuilder.append(filePath);
|
||||
filesBuilder.append("\n");
|
||||
}
|
||||
|
||||
String files = filesBuilder.toString().trim();
|
||||
try (Writer w = new OutputStreamWriter(new FileOutputStream(new File(localFsPath, entry.getKey())), StandardCharsets.UTF_8)) {
|
||||
w.write(files);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void backupCollectionMetaData(String collectionName, String snapshotName, String backupLoc) throws SolrServerException, IOException {
|
||||
// Backup the collection meta-data
|
||||
CollectionAdminRequest.Backup backup = new CollectionAdminRequest.Backup(collectionName, snapshotName);
|
||||
backup.setIndexBackupStrategy(CollectionAdminParams.NO_INDEX_BACKUP_STRATEGY);
|
||||
backup.setLocation(backupLoc);
|
||||
CollectionAdminResponse resp = backup.process(solrClient);
|
||||
Preconditions.checkState(resp.getStatus() == 0, "The request failed. The status code is " + resp.getStatus());
|
||||
}
|
||||
|
||||
public void prepareForExport(String collectionName, String snapshotName, String localFsPath, Optional<String> pathPrefix, String destPath) {
|
||||
try {
|
||||
buildCopyListings(collectionName, snapshotName, localFsPath, pathPrefix);
|
||||
System.out.println("Successfully prepared copylisting for the snapshot export.");
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to prepare a copylisting for snapshot with name " + snapshotName + " for collection "
|
||||
+ collectionName, e);
|
||||
System.out.println("Failed to prepare a copylisting for snapshot with name " + snapshotName + " for collection "
|
||||
+ collectionName + " due to following error : " + e.getLocalizedMessage());
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
try {
|
||||
backupCollectionMetaData(collectionName, snapshotName, destPath);
|
||||
System.out.println("Successfully backed up collection meta-data");
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to backup collection meta-data for collection " + collectionName, e);
|
||||
System.out.println("Failed to backup collection meta-data for collection " + collectionName
|
||||
+ " due to following error : " + e.getLocalizedMessage());
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
public void exportSnapshot(String collectionName, String snapshotName, String destPath, Optional<String> backupRepo,
|
||||
Optional<String> asyncReqId) {
|
||||
try {
|
||||
CollectionAdminRequest.Backup backup = new CollectionAdminRequest.Backup(collectionName, snapshotName);
|
||||
backup.setIndexBackupStrategy(CollectionAdminParams.COPY_FILES_STRATEGY);
|
||||
backup.setLocation(destPath);
|
||||
if (backupRepo.isPresent()) {
|
||||
backup.setRepositoryName(backupRepo.get());
|
||||
}
|
||||
if (asyncReqId.isPresent()) {
|
||||
backup.setAsyncId(asyncReqId.get());
|
||||
}
|
||||
CollectionAdminResponse resp = backup.process(solrClient);
|
||||
Preconditions.checkState(resp.getStatus() == 0, "The request failed. The status code is " + resp.getStatus());
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to backup collection meta-data for collection " + collectionName, e);
|
||||
System.out.println("Failed to backup collection meta-data for collection " + collectionName
|
||||
+ " due to following error : " + e.getLocalizedMessage());
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
CommandLineParser parser = new PosixParser();
|
||||
Options options = new Options();
|
||||
|
||||
options.addOption(null, CREATE, true, "This command will create a snapshot with the specified name");
|
||||
options.addOption(null, DELETE, true, "This command will delete a snapshot with the specified name");
|
||||
options.addOption(null, LIST, false, "This command will list all the named snapshots for the specified collection.");
|
||||
options.addOption(null, DESCRIBE, true, "This command will print details for a named snapshot for the specified collection.");
|
||||
options.addOption(null, PREPARE_FOR_EXPORT, true, "This command will prepare copylistings for the specified snapshot."
|
||||
+ " This command should only be used only if Solr is deployed with Hadoop and collection index files are stored on a shared"
|
||||
+ " file-system e.g. HDFS");
|
||||
options.addOption(null, EXPORT_SNAPSHOT, true, "This command will create a backup for the specified snapshot.");
|
||||
options.addOption(null, HELP, false, "This command will print the help message for the snapshots related commands.");
|
||||
options.addOption(TEMP_DIR, true, "This parameter specifies the path of a temporary directory on local filesystem"
|
||||
+ " during prepare-snapshot-export command.");
|
||||
options.addOption(DEST_DIR, true, "This parameter specifies the path on shared file-system (e.g. HDFS) where the snapshot related"
|
||||
+ " information should be stored.");
|
||||
options.addOption(COLLECTION, true, "This parameter specifies the name of the collection to be used during snapshot operation");
|
||||
options.addOption(SOLR_ZK_ENSEMBLE, true, "This parameter specifies the Solr Zookeeper ensemble address");
|
||||
options.addOption(HDFS_PATH_PREFIX, true, "This parameter specifies the HDFS URI prefix to be used"
|
||||
+ " during snapshot export preparation. This is applicable only if the Solr collection index files are stored on HDFS.");
|
||||
options.addOption(BACKUP_REPO_NAME, true, "This parameter specifies the name of the backup repository to be used"
|
||||
+ " during snapshot export preparation");
|
||||
options.addOption(ASYNC_REQ_ID, true, "This parameter specifies the async request identifier to be used"
|
||||
+ " during snapshot export preparation");
|
||||
|
||||
CommandLine cmd = null;
|
||||
try {
|
||||
cmd = parser.parse(options, args);
|
||||
} catch (ParseException e) {
|
||||
System.out.println(e.getLocalizedMessage());
|
||||
printHelp(options);
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
if (cmd.hasOption(CREATE) || cmd.hasOption(DELETE) || cmd.hasOption(LIST) || cmd.hasOption(DESCRIBE)
|
||||
|| cmd.hasOption(PREPARE_FOR_EXPORT) || cmd.hasOption(EXPORT_SNAPSHOT)) {
|
||||
try (SolrSnapshotsTool tool = new SolrSnapshotsTool(cmd.getOptionValue(SOLR_ZK_ENSEMBLE))) {
|
||||
if (cmd.hasOption(CREATE)) {
|
||||
String snapshotName = cmd.getOptionValue(CREATE);
|
||||
String collectionName = cmd.getOptionValue(COLLECTION);
|
||||
tool.createSnapshot(collectionName, snapshotName);
|
||||
|
||||
} else if (cmd.hasOption(DELETE)) {
|
||||
String snapshotName = cmd.getOptionValue(DELETE);
|
||||
String collectionName = cmd.getOptionValue(COLLECTION);
|
||||
tool.deleteSnapshot(collectionName, snapshotName);
|
||||
|
||||
} else if (cmd.hasOption(LIST)) {
|
||||
String collectionName = cmd.getOptionValue(COLLECTION);
|
||||
tool.listSnapshots(collectionName);
|
||||
|
||||
} else if (cmd.hasOption(DESCRIBE)) {
|
||||
String snapshotName = cmd.getOptionValue(DESCRIBE);
|
||||
String collectionName = cmd.getOptionValue(COLLECTION);
|
||||
tool.describeSnapshot(collectionName, snapshotName);
|
||||
|
||||
} else if (cmd.hasOption(PREPARE_FOR_EXPORT)) {
|
||||
String snapshotName = cmd.getOptionValue(PREPARE_FOR_EXPORT);
|
||||
String collectionName = cmd.getOptionValue(COLLECTION);
|
||||
String localFsDir = requiredArg(options, cmd, TEMP_DIR);
|
||||
String hdfsOpDir = requiredArg(options, cmd, DEST_DIR);
|
||||
Optional<String> pathPrefix = Optional.ofNullable(cmd.getOptionValue(HDFS_PATH_PREFIX));
|
||||
|
||||
if (pathPrefix.isPresent()) {
|
||||
try {
|
||||
new URI(pathPrefix.get());
|
||||
} catch (URISyntaxException e) {
|
||||
System.out.println(
|
||||
"The specified File system path prefix " + pathPrefix.get()
|
||||
+ " is invalid. The error is " + e.getLocalizedMessage());
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
tool.prepareForExport(collectionName, snapshotName, localFsDir, pathPrefix, hdfsOpDir);
|
||||
|
||||
} else if (cmd.hasOption(EXPORT_SNAPSHOT)) {
|
||||
String snapshotName = cmd.getOptionValue(EXPORT_SNAPSHOT);
|
||||
String collectionName = cmd.getOptionValue(COLLECTION);
|
||||
String destDir = requiredArg(options, cmd, DEST_DIR);
|
||||
Optional<String> backupRepo = Optional.ofNullable(cmd.getOptionValue(BACKUP_REPO_NAME));
|
||||
Optional<String> asyncReqId = Optional.ofNullable(cmd.getOptionValue(ASYNC_REQ_ID));
|
||||
|
||||
tool.exportSnapshot(collectionName, snapshotName, destDir, backupRepo, asyncReqId);
|
||||
}
|
||||
}
|
||||
} else if (cmd.hasOption(HELP)) {
|
||||
printHelp(options);
|
||||
} else {
|
||||
System.out.println("Unknown command specified.");
|
||||
printHelp(options);
|
||||
}
|
||||
}
|
||||
|
||||
private static String requiredArg(Options options, CommandLine cmd, String optVal) {
|
||||
if (!cmd.hasOption(optVal)) {
|
||||
System.out.println("Please specify the value for option " + optVal);
|
||||
printHelp(options);
|
||||
System.exit(1);
|
||||
}
|
||||
return cmd.getOptionValue(optVal);
|
||||
}
|
||||
|
||||
private static boolean isReplicaAvailable (Slice s, String coreName) {
|
||||
for (Replica r: s.getReplicas()) {
|
||||
if (coreName.equals(r.getCoreName())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private Collection<CollectionSnapshotMetaData> listCollectionSnapshots(String collectionName)
|
||||
throws SolrServerException, IOException {
|
||||
CollectionAdminRequest.ListSnapshots listSnapshots = new CollectionAdminRequest.ListSnapshots(collectionName);
|
||||
CollectionAdminResponse resp = listSnapshots.process(solrClient);
|
||||
|
||||
Preconditions.checkState(resp.getStatus() == 0);
|
||||
|
||||
NamedList apiResult = (NamedList) resp.getResponse().get(SolrSnapshotManager.SNAPSHOTS_INFO);
|
||||
|
||||
Collection<CollectionSnapshotMetaData> result = new ArrayList<>();
|
||||
for (int i = 0; i < apiResult.size(); i++) {
|
||||
result.add(new CollectionSnapshotMetaData((NamedList<Object>)apiResult.getVal(i)));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void printHelp(Options options) {
|
||||
StringBuilder helpFooter = new StringBuilder();
|
||||
helpFooter.append("Examples: \n");
|
||||
helpFooter.append("snapshotscli.sh --create snapshot-1 -c books -z localhost:2181 \n");
|
||||
helpFooter.append("snapshotscli.sh --list -c books -z localhost:2181 \n");
|
||||
helpFooter.append("snapshotscli.sh --describe snapshot-1 -c books -z localhost:2181 \n");
|
||||
helpFooter.append("snapshotscli.sh --export snapshot-1 -c books -z localhost:2181 -b repo -l backupPath -i req_0 \n");
|
||||
helpFooter.append("snapshotscli.sh --delete snapshot-1 -c books -z localhost:2181 \n");
|
||||
|
||||
HelpFormatter formatter = new HelpFormatter();
|
||||
formatter.setOptionComparator(new OptionComarator<>());
|
||||
formatter.printHelp("SolrSnapshotsTool", null, options, helpFooter.toString(), false);
|
||||
}
|
||||
|
||||
private static class OptionComarator<T extends Option> implements Comparator<T> {
|
||||
|
||||
public int compare(T o1, T o2) {
|
||||
String s1 = o1.hasLongOpt() ? o1.getLongOpt() : o1.getOpt();
|
||||
String s2 = o2.hasLongOpt() ? o2.getLongOpt() : o2.getOpt();
|
||||
return OPTION_HELP_ORDER.indexOf(s1) - OPTION_HELP_ORDER.indexOf(s2);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler;
|
||||
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.handler.component.SearchHandler;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.JSON;
|
||||
|
||||
public class ExportHandler extends SearchHandler {
|
||||
@Override
|
||||
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
|
||||
try {
|
||||
super.handleRequestBody(req, rsp);
|
||||
} catch (Exception e) {
|
||||
rsp.setException(e);
|
||||
}
|
||||
String wt = req.getParams().get(CommonParams.WT, JSON);
|
||||
if("xsort".equals(wt)) wt = JSON;
|
||||
Map<String, String> map = new HashMap<>(1);
|
||||
map.put(CommonParams.WT, ReplicationHandler.FILE_STREAM);
|
||||
req.setParams(SolrParams.wrapDefaults(new MapSolrParams(map),req.getParams()));
|
||||
rsp.add(ReplicationHandler.FILE_STREAM, new ExportWriter(req, rsp, wt));
|
||||
}
|
||||
}
|
|
@ -14,17 +14,21 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.response;
|
||||
|
||||
package org.apache.solr.handler;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.Writer;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
|
@ -40,11 +44,18 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.solr.client.solrj.impl.BinaryResponseParser;
|
||||
import org.apache.solr.common.IteratorWriter;
|
||||
import org.apache.solr.common.MapWriter;
|
||||
import org.apache.solr.common.MapWriter.EntryWriter;
|
||||
import org.apache.solr.common.PushWriter;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
import org.apache.solr.response.JSONResponseWriter;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.BoolField;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
|
@ -61,24 +72,65 @@ import org.apache.solr.search.SyntaxError;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static java.util.Collections.singletonList;
|
||||
import static java.util.Collections.singletonMap;
|
||||
import static org.apache.solr.common.util.Utils.makeMap;
|
||||
|
||||
public class SortingResponseWriter implements QueryResponseWriter {
|
||||
|
||||
public class ExportWriter implements SolrCore.RawWriter, Closeable {
|
||||
private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
private OutputStreamWriter respWriter;
|
||||
final SolrQueryRequest req;
|
||||
final SolrQueryResponse res;
|
||||
FieldWriter[] fieldWriters;
|
||||
int totalHits = 0;
|
||||
FixedBitSet[] sets = null;
|
||||
PushWriter writer;
|
||||
private String wt;
|
||||
|
||||
|
||||
ExportWriter(SolrQueryRequest req, SolrQueryResponse res, String wt) {
|
||||
this.req = req;
|
||||
this.res = res;
|
||||
this.wt = wt;
|
||||
|
||||
public void init(NamedList args) {
|
||||
/* NOOP */
|
||||
}
|
||||
|
||||
public String getContentType(SolrQueryRequest req, SolrQueryResponse res) {
|
||||
return "application/json";
|
||||
@Override
|
||||
public String getContentType() {
|
||||
if ("javabin".equals(wt)) {
|
||||
return BinaryResponseParser.BINARY_CONTENT_TYPE;
|
||||
} else return "json";
|
||||
}
|
||||
|
||||
public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse res) throws IOException {
|
||||
Exception e1 = res.getException();
|
||||
if(e1 != null) {
|
||||
if(!(e1 instanceof IgnoreException)) {
|
||||
writeException(e1, writer, false);
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (writer != null) writer.close();
|
||||
if (respWriter != null) {
|
||||
respWriter.flush();
|
||||
respWriter.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
protected void writeException(Exception e, PushWriter w, boolean log) throws IOException {
|
||||
w.writeMap(mw -> {
|
||||
mw.put("responseHeader", singletonMap("status", 400))
|
||||
.put("response", makeMap(
|
||||
"numFound", 0,
|
||||
"docs", singletonList(singletonMap("EXCEPTION", e.getMessage()))));
|
||||
});
|
||||
if (log) {
|
||||
SolrException.log(logger, e);
|
||||
}
|
||||
}
|
||||
|
||||
public void write(OutputStream os) throws IOException {
|
||||
respWriter = new OutputStreamWriter(os, StandardCharsets.UTF_8);
|
||||
writer = JSONResponseWriter.getPushWriter(respWriter, req, res);
|
||||
Exception exception = res.getException();
|
||||
if (exception != null) {
|
||||
if (!(exception instanceof IgnoreException)) {
|
||||
writeException(exception, writer, false);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -113,8 +165,6 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
// You'll have to uncomment the if below to hit the null pointer exception.
|
||||
// This is such an unusual case (i.e. an empty index) that catching this concdition here is probably OK.
|
||||
// This came to light in the very artifical case of indexing a single doc to Cloud.
|
||||
int totalHits = 0;
|
||||
FixedBitSet[] sets = null;
|
||||
if (req.getContext().get("totalHits") != null) {
|
||||
totalHits = ((Integer)req.getContext().get("totalHits")).intValue();
|
||||
sets = (FixedBitSet[]) req.getContext().get("export");
|
||||
|
@ -145,8 +195,6 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
}
|
||||
}
|
||||
|
||||
FieldWriter[] fieldWriters = null;
|
||||
|
||||
try {
|
||||
fieldWriters = getFieldWriters(fields, req.getSearcher());
|
||||
} catch (Exception e) {
|
||||
|
@ -154,9 +202,17 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
return;
|
||||
}
|
||||
|
||||
writer.write("{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":"+totalHits+", \"docs\":[");
|
||||
writer.writeMap(m -> {
|
||||
m.put("responseHeader", singletonMap("status", 0));
|
||||
m.put("response", (MapWriter) mw -> {
|
||||
mw.put("numFound", totalHits);
|
||||
mw.put("docs", (IteratorWriter) iw -> writeDocs(req, iw, sort));
|
||||
});
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
protected void writeDocs(SolrQueryRequest req, IteratorWriter.ItemWriter writer, Sort sort) throws IOException {
|
||||
//Write the data.
|
||||
List<LeafReaderContext> leaves = req.getSearcher().getTopReaderContext().leaves();
|
||||
SortDoc sortDoc = getSortDoc(req.getSearcher(), sort.getSort());
|
||||
|
@ -165,7 +221,6 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
SortQueue queue = new SortQueue(queueSize, sortDoc);
|
||||
SortDoc[] outDocs = new SortDoc[queueSize];
|
||||
|
||||
boolean commaNeeded = false;
|
||||
while(count < totalHits) {
|
||||
//long begin = System.nanoTime();
|
||||
queue.reset();
|
||||
|
@ -192,19 +247,17 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
}
|
||||
}
|
||||
|
||||
//long end = System.nanoTime();
|
||||
//long end = System.nanoTime();
|
||||
|
||||
count += (outDocsIndex+1);
|
||||
|
||||
try {
|
||||
for(int i=outDocsIndex; i>=0; --i) {
|
||||
SortDoc s = outDocs[i];
|
||||
if(commaNeeded){writer.write(',');}
|
||||
writer.write('{');
|
||||
writeDoc(s, leaves, fieldWriters, sets, writer);
|
||||
writer.write('}');
|
||||
commaNeeded = true;
|
||||
s.reset();
|
||||
writer.add((MapWriter) ew -> {
|
||||
writeDoc(s, leaves, ew);
|
||||
s.reset();
|
||||
});
|
||||
}
|
||||
} catch(Throwable e) {
|
||||
Throwable ex = e;
|
||||
|
@ -224,54 +277,24 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
//System.out.println("Sort Time 2:"+Long.toString(total/1000000));
|
||||
writer.write("]}}");
|
||||
writer.flush();
|
||||
}
|
||||
|
||||
public static class IgnoreException extends IOException {
|
||||
public void printStackTrace(PrintWriter pw) {
|
||||
pw.print("Early Client Disconnect");
|
||||
|
||||
}
|
||||
|
||||
public String getMessage() {
|
||||
return "Early Client Disconnect";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected void writeDoc(SortDoc sortDoc,
|
||||
List<LeafReaderContext> leaves,
|
||||
FieldWriter[] fieldWriters,
|
||||
FixedBitSet[] sets,
|
||||
Writer out) throws IOException{
|
||||
EntryWriter ew) throws IOException {
|
||||
|
||||
int ord = sortDoc.ord;
|
||||
FixedBitSet set = sets[ord];
|
||||
set.clear(sortDoc.docId);
|
||||
LeafReaderContext context = leaves.get(ord);
|
||||
int fieldIndex = 0;
|
||||
for(FieldWriter fieldWriter : fieldWriters) {
|
||||
if(fieldWriter.write(sortDoc.docId, context.reader(), out, fieldIndex)){
|
||||
for (FieldWriter fieldWriter : fieldWriters) {
|
||||
if (fieldWriter.write(sortDoc.docId, context.reader(), ew, fieldIndex)) {
|
||||
++fieldIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeException(Exception e, Writer out, boolean log) throws IOException{
|
||||
out.write("{\"responseHeader\": {\"status\": 400}, \"response\":{\"numFound\":0, \"docs\":[");
|
||||
out.write("{\"EXCEPTION\":\"");
|
||||
writeStr(e.getMessage(), out);
|
||||
out.write("\"}");
|
||||
out.write("]}}");
|
||||
out.flush();
|
||||
if(log) {
|
||||
SolrException.log(logger, e);
|
||||
}
|
||||
}
|
||||
|
||||
protected FieldWriter[] getFieldWriters(String[] fields, SolrIndexSearcher searcher) throws IOException {
|
||||
IndexSchema schema = searcher.getSchema();
|
||||
FieldWriter[] writers = new FieldWriter[fields.length];
|
||||
|
@ -291,50 +314,49 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
|
||||
boolean multiValued = schemaField.multiValued();
|
||||
FieldType fieldType = schemaField.getType();
|
||||
if(fieldType instanceof TrieIntField) {
|
||||
if(multiValued) {
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, true);
|
||||
if (fieldType instanceof TrieIntField) {
|
||||
if (multiValued) {
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
|
||||
} else {
|
||||
writers[i] = new IntFieldWriter(field);
|
||||
}
|
||||
} else if (fieldType instanceof TrieLongField) {
|
||||
if(multiValued) {
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, true);
|
||||
if (multiValued) {
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
|
||||
} else {
|
||||
writers[i] = new LongFieldWriter(field);
|
||||
}
|
||||
} else if (fieldType instanceof TrieFloatField) {
|
||||
if(multiValued) {
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, true);
|
||||
if (multiValued) {
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
|
||||
} else {
|
||||
writers[i] = new FloatFieldWriter(field);
|
||||
}
|
||||
} else if(fieldType instanceof TrieDoubleField) {
|
||||
if(multiValued) {
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, true);
|
||||
} else if (fieldType instanceof TrieDoubleField) {
|
||||
if (multiValued) {
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
|
||||
} else {
|
||||
writers[i] = new DoubleFieldWriter(field);
|
||||
}
|
||||
} else if(fieldType instanceof StrField) {
|
||||
if(multiValued) {
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, false);
|
||||
} else if (fieldType instanceof StrField) {
|
||||
if (multiValued) {
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false);
|
||||
} else {
|
||||
writers[i] = new StringFieldWriter(field, fieldType);
|
||||
}
|
||||
} else if (fieldType instanceof TrieDateField) {
|
||||
if (multiValued) {
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, false);
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false);
|
||||
} else {
|
||||
writers[i] = new DateFieldWriter(field);
|
||||
}
|
||||
} else if(fieldType instanceof BoolField) {
|
||||
if(multiValued) {
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, true);
|
||||
} else if (fieldType instanceof BoolField) {
|
||||
if (multiValued) {
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
|
||||
} else {
|
||||
writers[i] = new BoolFieldWriter(field, fieldType);
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
throw new IOException("Export fields must either be one of the following types: int,float,long,double,string,date,boolean");
|
||||
}
|
||||
}
|
||||
|
@ -398,8 +420,8 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
// _and_ since "F" happens to sort before "T" (thus false sorts "less" than true)
|
||||
// we can just use the existing StringValue here.
|
||||
LeafReader reader = searcher.getSlowAtomicReader();
|
||||
SortedDocValues vals = reader.getSortedDocValues(field);
|
||||
if(reverse) {
|
||||
SortedDocValues vals = reader.getSortedDocValues(field);
|
||||
if (reverse) {
|
||||
sortValues[i] = new StringValue(vals, field, new IntDesc());
|
||||
} else {
|
||||
sortValues[i] = new StringValue(vals, field, new IntAsc());
|
||||
|
@ -439,8 +461,8 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
private void populate() {
|
||||
Object[] heap = getHeapArray();
|
||||
cache = new SortDoc[heap.length];
|
||||
for(int i=1; i<heap.length; i++) {
|
||||
cache[i] = heap[i] = proto.copy();
|
||||
for (int i = 1; i < heap.length; i++) {
|
||||
cache[i] = heap[i] = proto.copy();
|
||||
}
|
||||
size = maxSize;
|
||||
}
|
||||
|
@ -470,7 +492,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
|
||||
public void setNextReader(LeafReaderContext context) throws IOException {
|
||||
this.ord = context.ord;
|
||||
for(SortValue value : sortValues) {
|
||||
for (SortValue value : sortValues) {
|
||||
value.setNextReader(context);
|
||||
}
|
||||
}
|
||||
|
@ -1295,7 +1317,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
}
|
||||
|
||||
protected abstract class FieldWriter {
|
||||
public abstract boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException;
|
||||
public abstract boolean write(int docId, LeafReader reader, EntryWriter out, int fieldIndex) throws IOException;
|
||||
}
|
||||
|
||||
class IntFieldWriter extends FieldWriter {
|
||||
|
@ -1305,7 +1327,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
this.field = field;
|
||||
}
|
||||
|
||||
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
|
||||
public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
|
||||
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
|
||||
int val;
|
||||
if (vals.advance(docId) == docId) {
|
||||
|
@ -1313,14 +1335,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
} else {
|
||||
val = 0;
|
||||
}
|
||||
if(fieldIndex>0) {
|
||||
out.write(',');
|
||||
}
|
||||
out.write('"');
|
||||
out.write(this.field);
|
||||
out.write('"');
|
||||
out.write(':');
|
||||
out.write(Integer.toString(val));
|
||||
ew.put(this.field, val);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -1328,57 +1343,31 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
class MultiFieldWriter extends FieldWriter {
|
||||
private String field;
|
||||
private FieldType fieldType;
|
||||
private SchemaField schemaField;
|
||||
private boolean numeric;
|
||||
private CharsRefBuilder cref = new CharsRefBuilder();
|
||||
|
||||
public MultiFieldWriter(String field, FieldType fieldType, boolean numeric) {
|
||||
public MultiFieldWriter(String field, FieldType fieldType, SchemaField schemaField, boolean numeric) {
|
||||
this.field = field;
|
||||
this.fieldType = fieldType;
|
||||
this.schemaField = schemaField;
|
||||
this.numeric = numeric;
|
||||
}
|
||||
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
|
||||
|
||||
public boolean write(int docId, LeafReader reader, EntryWriter out, int fieldIndex) throws IOException {
|
||||
SortedSetDocValues vals = DocValues.getSortedSet(reader, this.field);
|
||||
List<Long> ords;
|
||||
if (vals.advance(docId) == docId) {
|
||||
ords = new ArrayList();
|
||||
long o = -1;
|
||||
while((o = vals.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||
ords.add(o);
|
||||
}
|
||||
assert ords.size() > 0;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if(fieldIndex>0) {
|
||||
out.write(',');
|
||||
}
|
||||
out.write('"');
|
||||
out.write(this.field);
|
||||
out.write('"');
|
||||
out.write(':');
|
||||
out.write('[');
|
||||
int v = 0;
|
||||
for(long ord : ords) {
|
||||
BytesRef ref = vals.lookupOrd(ord);
|
||||
fieldType.indexedToReadable(ref, cref);
|
||||
if(v > 0) {
|
||||
out.write(',');
|
||||
}
|
||||
|
||||
if(!numeric) {
|
||||
out.write('"');
|
||||
}
|
||||
|
||||
writeStr(cref.toString(), out);
|
||||
|
||||
if(!numeric) {
|
||||
out.write('"');
|
||||
}
|
||||
++v;
|
||||
}
|
||||
out.write("]");
|
||||
if (vals.advance(docId) != docId) return false;
|
||||
out.put(this.field,
|
||||
(IteratorWriter) w -> {
|
||||
long o;
|
||||
while((o = vals.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||
BytesRef ref = vals.lookupOrd(o);
|
||||
fieldType.indexedToReadable(ref, cref);
|
||||
IndexableField f = fieldType.createField(schemaField, cref.toString(), 1.0f);
|
||||
if (f == null) w.add(cref.toString());
|
||||
else w.add(fieldType.toObject(f));
|
||||
}
|
||||
});
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -1390,7 +1379,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
this.field = field;
|
||||
}
|
||||
|
||||
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
|
||||
public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
|
||||
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
|
||||
long val;
|
||||
if (vals.advance(docId) == docId) {
|
||||
|
@ -1398,14 +1387,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
} else {
|
||||
val = 0;
|
||||
}
|
||||
if(fieldIndex > 0) {
|
||||
out.write(',');
|
||||
}
|
||||
out.write('"');
|
||||
out.write(this.field);
|
||||
out.write('"');
|
||||
out.write(':');
|
||||
out.write(Long.toString(val));
|
||||
ew.put(field, val);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -1417,7 +1399,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
this.field = field;
|
||||
}
|
||||
|
||||
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
|
||||
public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
|
||||
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
|
||||
long val;
|
||||
if (vals.advance(docId) == docId) {
|
||||
|
@ -1425,17 +1407,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
} else {
|
||||
val = 0;
|
||||
}
|
||||
|
||||
if (fieldIndex > 0) {
|
||||
out.write(',');
|
||||
}
|
||||
out.write('"');
|
||||
out.write(this.field);
|
||||
out.write('"');
|
||||
out.write(':');
|
||||
out.write('"');
|
||||
writeStr(new Date(val).toInstant().toString(), out);
|
||||
out.write('"');
|
||||
ew.put(this.field, new Date(val));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -1450,7 +1422,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
this.fieldType = fieldType;
|
||||
}
|
||||
|
||||
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
|
||||
public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
|
||||
SortedDocValues vals = DocValues.getSorted(reader, this.field);
|
||||
if (vals.advance(docId) != docId) {
|
||||
return false;
|
||||
|
@ -1459,17 +1431,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
|
||||
BytesRef ref = vals.lookupOrd(ord);
|
||||
fieldType.indexedToReadable(ref, cref);
|
||||
|
||||
if (fieldIndex > 0) {
|
||||
out.write(',');
|
||||
}
|
||||
out.write('"');
|
||||
out.write(this.field);
|
||||
out.write('"');
|
||||
out.write(':');
|
||||
//out.write('"');
|
||||
writeStr(cref.toString(), out);
|
||||
//out.write('"');
|
||||
ew.put(this.field, "true".equals(cref.toString()));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -1481,7 +1443,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
this.field = field;
|
||||
}
|
||||
|
||||
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
|
||||
public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
|
||||
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
|
||||
int val;
|
||||
if (vals.advance(docId) == docId) {
|
||||
|
@ -1489,14 +1451,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
} else {
|
||||
val = 0;
|
||||
}
|
||||
if(fieldIndex > 0) {
|
||||
out.write(',');
|
||||
}
|
||||
out.write('"');
|
||||
out.write(this.field);
|
||||
out.write('"');
|
||||
out.write(':');
|
||||
out.write(Float.toString(Float.intBitsToFloat(val)));
|
||||
ew.put(this.field, Float.intBitsToFloat(val));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -1508,7 +1463,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
this.field = field;
|
||||
}
|
||||
|
||||
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
|
||||
public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
|
||||
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
|
||||
long val;
|
||||
if (vals.advance(docId) == docId) {
|
||||
|
@ -1516,14 +1471,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
} else {
|
||||
val = 0;
|
||||
}
|
||||
if(fieldIndex > 0) {
|
||||
out.write(',');
|
||||
}
|
||||
out.write('"');
|
||||
out.write(this.field);
|
||||
out.write('"');
|
||||
out.write(':');
|
||||
out.write(Double.toString(Double.longBitsToDouble(val)));
|
||||
ew.put(this.field, Double.longBitsToDouble(val));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -1538,7 +1486,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
this.fieldType = fieldType;
|
||||
}
|
||||
|
||||
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
|
||||
public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
|
||||
SortedDocValues vals = DocValues.getSorted(reader, this.field);
|
||||
if (vals.advance(docId) != docId) {
|
||||
return false;
|
||||
|
@ -1547,64 +1495,11 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
|
||||
BytesRef ref = vals.lookupOrd(ord);
|
||||
fieldType.indexedToReadable(ref, cref);
|
||||
if(fieldIndex > 0) {
|
||||
out.write(',');
|
||||
}
|
||||
out.write('"');
|
||||
out.write(this.field);
|
||||
out.write('"');
|
||||
out.write(":");
|
||||
out.write('"');
|
||||
writeStr(cref.toString(), out);
|
||||
out.write('"');
|
||||
ew.put(this.field, cref.toString());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private void writeStr(String val, Writer writer) throws IOException {
|
||||
for (int i=0; i<val.length(); i++) {
|
||||
char ch = val.charAt(i);
|
||||
if ((ch > '#' && ch != '\\' && ch < '\u2028') || ch == ' ') { // fast path
|
||||
writer.write(ch);
|
||||
continue;
|
||||
}
|
||||
switch(ch) {
|
||||
case '"':
|
||||
case '\\':
|
||||
writer.write('\\');
|
||||
writer.write(ch);
|
||||
break;
|
||||
case '\r': writer.write('\\'); writer.write('r'); break;
|
||||
case '\n': writer.write('\\'); writer.write('n'); break;
|
||||
case '\t': writer.write('\\'); writer.write('t'); break;
|
||||
case '\b': writer.write('\\'); writer.write('b'); break;
|
||||
case '\f': writer.write('\\'); writer.write('f'); break;
|
||||
case '\u2028': // fallthrough
|
||||
case '\u2029':
|
||||
unicodeEscape(writer,ch);
|
||||
break;
|
||||
// case '/':
|
||||
default: {
|
||||
if (ch <= 0x1F) {
|
||||
unicodeEscape(writer,ch);
|
||||
} else {
|
||||
writer.write(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static char[] hexdigits = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
|
||||
protected static void unicodeEscape(Appendable out, int ch) throws IOException {
|
||||
out.append('\\');
|
||||
out.append('u');
|
||||
out.append(hexdigits[(ch>>>12) ]);
|
||||
out.append(hexdigits[(ch>>>8) & 0xf]);
|
||||
out.append(hexdigits[(ch>>>4) & 0xf]);
|
||||
out.append(hexdigits[(ch) & 0xf]);
|
||||
}
|
||||
|
||||
public abstract class PriorityQueue<T> {
|
||||
protected int size = 0;
|
||||
protected final int maxSize;
|
||||
|
@ -1802,4 +1697,15 @@ public class SortingResponseWriter implements QueryResponseWriter {
|
|||
return (Object[]) heap;
|
||||
}
|
||||
}
|
||||
|
||||
public class IgnoreException extends IOException {
|
||||
public void printStackTrace(PrintWriter pw) {
|
||||
pw.print("Early Client Disconnect");
|
||||
}
|
||||
|
||||
public String getMessage() {
|
||||
return "Early Client Disconnect";
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -734,8 +734,14 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
|||
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to check the existance of " + uri + ". Is it valid?", ex);
|
||||
}
|
||||
|
||||
String strategy = req.getParams().get(CollectionAdminParams.INDEX_BACKUP_STRATEGY, CollectionAdminParams.COPY_FILES_STRATEGY);
|
||||
if (!CollectionAdminParams.INDEX_BACKUP_STRATEGIES.contains(strategy)) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown index backup strategy " + strategy);
|
||||
}
|
||||
|
||||
Map<String, Object> params = req.getParams().getAll(null, NAME, COLLECTION_PROP, CoreAdminParams.COMMIT_NAME);
|
||||
params.put(CoreAdminParams.BACKUP_LOCATION, location);
|
||||
params.put(CollectionAdminParams.INDEX_BACKUP_STRATEGY, strategy);
|
||||
return params;
|
||||
}),
|
||||
RESTORE_OP(RESTORE, (req, rsp, h) -> {
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.solr.request.LocalSolrQueryRequest;
|
|||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.update.CommitUpdateCommand;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.apache.solr.util.TestInjection;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -46,6 +47,8 @@ class PrepRecoveryOp implements CoreAdminHandler.CoreAdminOp {
|
|||
|
||||
@Override
|
||||
public void execute(CallInfo it) throws Exception {
|
||||
assert TestInjection.injectPrepRecoveryOpPauseForever();
|
||||
|
||||
final SolrParams params = it.req.getParams();
|
||||
|
||||
String cname = params.get(CoreAdminParams.CORE);
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.solr.handler.component;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.ConnectException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
|
@ -116,7 +117,7 @@ public class HttpShardHandler extends ShardHandler {
|
|||
private List<String> getURLs(String shard, String preferredHostAddress) {
|
||||
List<String> urls = shardToURLs.get(shard);
|
||||
if (urls == null) {
|
||||
urls = httpShardHandlerFactory.makeURLList(shard);
|
||||
urls = httpShardHandlerFactory.buildURLList(shard);
|
||||
if (preferredHostAddress != null && urls.size() > 1) {
|
||||
preferCurrentHostForDistributedReq(preferredHostAddress, urls);
|
||||
}
|
||||
|
@ -320,6 +321,8 @@ public class HttpShardHandler extends ShardHandler {
|
|||
}
|
||||
}
|
||||
|
||||
final ReplicaListTransformer replicaListTransformer = httpShardHandlerFactory.getReplicaListTransformer(req);
|
||||
|
||||
if (shards != null) {
|
||||
List<String> lst = StrUtils.splitSmart(shards, ",", true);
|
||||
rb.shards = lst.toArray(new String[lst.size()]);
|
||||
|
@ -404,7 +407,11 @@ public class HttpShardHandler extends ShardHandler {
|
|||
|
||||
|
||||
for (int i=0; i<rb.shards.length; i++) {
|
||||
if (rb.shards[i] == null) {
|
||||
final List<String> shardUrls;
|
||||
if (rb.shards[i] != null) {
|
||||
shardUrls = StrUtils.splitSmart(rb.shards[i], "|", true);
|
||||
replicaListTransformer.transform(shardUrls);
|
||||
} else {
|
||||
if (clusterState == null) {
|
||||
clusterState = zkController.getClusterState();
|
||||
slices = clusterState.getSlicesMap(cloudDescriptor.getCollectionName());
|
||||
|
@ -421,26 +428,25 @@ public class HttpShardHandler extends ShardHandler {
|
|||
// throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "no such shard: " + sliceName);
|
||||
}
|
||||
|
||||
Map<String, Replica> sliceShards = slice.getReplicasMap();
|
||||
|
||||
// For now, recreate the | delimited list of equivalent servers
|
||||
StringBuilder sliceShardsStr = new StringBuilder();
|
||||
boolean first = true;
|
||||
for (Replica replica : sliceShards.values()) {
|
||||
final Collection<Replica> allSliceReplicas = slice.getReplicasMap().values();
|
||||
final List<Replica> eligibleSliceReplicas = new ArrayList<>(allSliceReplicas.size());
|
||||
for (Replica replica : allSliceReplicas) {
|
||||
if (!clusterState.liveNodesContain(replica.getNodeName())
|
||||
|| replica.getState() != Replica.State.ACTIVE) {
|
||||
continue;
|
||||
}
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
sliceShardsStr.append('|');
|
||||
}
|
||||
String url = ZkCoreNodeProps.getCoreUrl(replica);
|
||||
sliceShardsStr.append(url);
|
||||
eligibleSliceReplicas.add(replica);
|
||||
}
|
||||
|
||||
if (sliceShardsStr.length() == 0) {
|
||||
replicaListTransformer.transform(eligibleSliceReplicas);
|
||||
|
||||
shardUrls = new ArrayList<>(eligibleSliceReplicas.size());
|
||||
for (Replica replica : eligibleSliceReplicas) {
|
||||
String url = ZkCoreNodeProps.getCoreUrl(replica);
|
||||
shardUrls.add(url);
|
||||
}
|
||||
|
||||
if (shardUrls.isEmpty()) {
|
||||
boolean tolerant = rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false);
|
||||
if (!tolerant) {
|
||||
// stop the check when there are no replicas available for a shard
|
||||
|
@ -448,9 +454,19 @@ public class HttpShardHandler extends ShardHandler {
|
|||
"no servers hosting shard: " + rb.slices[i]);
|
||||
}
|
||||
}
|
||||
|
||||
rb.shards[i] = sliceShardsStr.toString();
|
||||
}
|
||||
// And now recreate the | delimited list of equivalent servers
|
||||
final StringBuilder sliceShardsStr = new StringBuilder();
|
||||
boolean first = true;
|
||||
for (String shardUrl : shardUrls) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
sliceShardsStr.append('|');
|
||||
}
|
||||
sliceShardsStr.append(shardUrl);
|
||||
}
|
||||
rb.shards[i] = sliceShardsStr.toString();
|
||||
}
|
||||
}
|
||||
String shards_rows = params.get(ShardParams.SHARDS_ROWS);
|
||||
|
|
|
@ -31,13 +31,13 @@ import org.apache.solr.common.util.StrUtils;
|
|||
import org.apache.solr.common.util.URLUtil;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
import org.apache.solr.update.UpdateShardHandlerConfig;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.util.DefaultSolrThreadFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
|
@ -84,6 +84,8 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
|
|||
|
||||
private final Random r = new Random();
|
||||
|
||||
private final ReplicaListTransformer shufflingReplicaListTransformer = new ShufflingReplicaListTransformer(r);
|
||||
|
||||
// URL scheme to be used in distributed search.
|
||||
static final String INIT_URL_SCHEME = "urlScheme";
|
||||
|
||||
|
@ -227,12 +229,12 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a randomized list of urls for the given shard.
|
||||
* Creates a list of urls for the given shard.
|
||||
*
|
||||
* @param shard the urls for the shard, separated by '|'
|
||||
* @return A list of valid urls (including protocol) that are replicas for the shard
|
||||
*/
|
||||
public List<String> makeURLList(String shard) {
|
||||
public List<String> buildURLList(String shard) {
|
||||
List<String> urls = StrUtils.splitSmart(shard, "|", true);
|
||||
|
||||
// convert shard to URL
|
||||
|
@ -240,17 +242,14 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
|
|||
urls.set(i, buildUrl(urls.get(i)));
|
||||
}
|
||||
|
||||
//
|
||||
// Shuffle the list instead of use round-robin by default.
|
||||
// This prevents accidental synchronization where multiple shards could get in sync
|
||||
// and query the same replica at the same time.
|
||||
//
|
||||
if (urls.size() > 1)
|
||||
Collections.shuffle(urls, r);
|
||||
|
||||
return urls;
|
||||
}
|
||||
|
||||
ReplicaListTransformer getReplicaListTransformer(final SolrQueryRequest req)
|
||||
{
|
||||
return shufflingReplicaListTransformer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new completion service for use by a single set of distributed requests.
|
||||
*/
|
||||
|
|
|
@ -973,8 +973,7 @@ public class QueryComponent extends SearchComponent
|
|||
|
||||
// Merge the docs via a priority queue so we don't have to sort *all* of the
|
||||
// documents... we only need to order the top (rows+start)
|
||||
ShardFieldSortedHitQueue queue;
|
||||
queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher());
|
||||
final ShardFieldSortedHitQueue queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher());
|
||||
|
||||
NamedList<Object> shardInfo = null;
|
||||
if(rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) {
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.params.ShardParams;
|
||||
|
||||
interface ReplicaListTransformer {
|
||||
|
||||
/**
|
||||
* Transforms the passed in list of choices. Transformations can include (but are not limited to)
|
||||
* reordering of elements (e.g. via shuffling) and removal of elements (i.e. filtering).
|
||||
*
|
||||
* @param choices - a list of choices to transform, typically the choices are {@link Replica} objects but choices
|
||||
* can also be {@link String} objects such as URLs passed in via the {@link ShardParams#SHARDS} parameter.
|
||||
*/
|
||||
public void transform(List<?> choices);
|
||||
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
class ShufflingReplicaListTransformer implements ReplicaListTransformer {
|
||||
|
||||
private final Random r;
|
||||
|
||||
public ShufflingReplicaListTransformer(Random r)
|
||||
{
|
||||
this.r = r;
|
||||
}
|
||||
|
||||
public void transform(List<?> choices)
|
||||
{
|
||||
if (choices.size() > 1) {
|
||||
Collections.shuffle(choices, r);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -173,17 +173,18 @@ public class DocValuesFacets {
|
|||
int min=mincount-1; // the smallest value in the top 'N' values
|
||||
for (int i=(startTermIndex==-1)?1:0; i<nTerms; i++) {
|
||||
int c = counts[i];
|
||||
if (contains != null) {
|
||||
final BytesRef term = si.lookupOrd(startTermIndex+i);
|
||||
if (!SimpleFacets.contains(term.utf8ToString(), contains, ignoreCase)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (c>min) {
|
||||
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
|
||||
// index order, so we already know that the keys are ordered. This can be very
|
||||
// important if a lot of the counts are repeated (like zero counts would be).
|
||||
|
||||
if (contains != null) {
|
||||
final BytesRef term = si.lookupOrd(startTermIndex+i);
|
||||
if (!SimpleFacets.contains(term.utf8ToString(), contains, ignoreCase)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// smaller term numbers sort higher, so subtract the term number instead
|
||||
long pair = (((long)c)<<32) + (Integer.MAX_VALUE - i);
|
||||
boolean displaced = queue.insert(pair);
|
||||
|
|
|
@ -71,6 +71,8 @@ public class MacroExpander {
|
|||
newValues.add(vv);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (newValues != null) {
|
||||
newValues.add(newV);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,11 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.solr.common.IteratorWriter;
|
||||
import org.apache.solr.common.MapWriter.EntryWriter;
|
||||
import org.apache.solr.common.PushWriter;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.MapWriter;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
|
@ -74,6 +78,11 @@ public class JSONResponseWriter implements QueryResponseWriter {
|
|||
public String getContentType(SolrQueryRequest request, SolrQueryResponse response) {
|
||||
return contentType;
|
||||
}
|
||||
|
||||
public static PushWriter getPushWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) {
|
||||
return new JSONWriter(writer, req, rsp);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class JSONWriter extends TextResponseWriter {
|
||||
|
@ -507,6 +516,53 @@ class JSONWriter extends TextResponseWriter {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeIterator(IteratorWriter val) throws IOException {
|
||||
writeArrayOpener(-1);
|
||||
incLevel();
|
||||
val.writeIter(new IteratorWriter.ItemWriter() {
|
||||
boolean first = true;
|
||||
|
||||
@Override
|
||||
public IteratorWriter.ItemWriter add(Object o) throws IOException {
|
||||
if (!first) {
|
||||
JSONWriter.this.indent();
|
||||
JSONWriter.this.writeArraySeparator();
|
||||
}
|
||||
JSONWriter.this.writeVal(null, o);
|
||||
first = false;
|
||||
return this;
|
||||
}
|
||||
});
|
||||
decLevel();
|
||||
writeArrayCloser();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeMap(MapWriter val)
|
||||
throws IOException {
|
||||
writeMapOpener(-1);
|
||||
incLevel();
|
||||
|
||||
val.writeMap(new EntryWriter() {
|
||||
boolean isFirst = true;
|
||||
|
||||
@Override
|
||||
public EntryWriter put(String k, Object v) throws IOException {
|
||||
if (isFirst) {
|
||||
isFirst = false;
|
||||
} else {
|
||||
JSONWriter.this.writeMapSeparator();
|
||||
}
|
||||
if (doIndent) JSONWriter.this.indent();
|
||||
JSONWriter.this.writeKey(k, true);
|
||||
JSONWriter.this.writeVal(k, v);
|
||||
return this;
|
||||
}
|
||||
});
|
||||
decLevel();
|
||||
writeMapCloser();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeMap(String name, Map val, boolean excludeOuter, boolean isFirstVal) throws IOException {
|
||||
|
@ -544,12 +600,14 @@ class JSONWriter extends TextResponseWriter {
|
|||
public void writeArray(String name, List l) throws IOException {
|
||||
writeArrayOpener(l.size());
|
||||
writeJsonIter(l.iterator());
|
||||
writeArrayCloser();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeArray(String name, Iterator val) throws IOException {
|
||||
writeArrayOpener(-1); // no trivial way to determine array size
|
||||
writeJsonIter(val);
|
||||
writeArrayCloser();
|
||||
}
|
||||
|
||||
private void writeJsonIter(Iterator val) throws IOException {
|
||||
|
@ -564,7 +622,6 @@ class JSONWriter extends TextResponseWriter {
|
|||
first=false;
|
||||
}
|
||||
decLevel();
|
||||
writeArrayCloser();
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -634,11 +691,6 @@ class ArrayOfNamedValuePairJSONWriter extends JSONWriter {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeArray(String name, List l) throws IOException {
|
||||
writeArray(name, l.iterator());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeNamedList(String name, NamedList val) throws IOException {
|
||||
|
||||
|
|
|
@ -31,9 +31,12 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.EnumFieldValue;
|
||||
import org.apache.solr.common.IteratorWriter;
|
||||
import org.apache.solr.common.MapSerializable;
|
||||
import org.apache.solr.common.PushWriter;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.MapWriter;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.util.Base64;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
@ -48,7 +51,7 @@ import org.apache.solr.util.FastWriter;
|
|||
*
|
||||
*
|
||||
*/
|
||||
public abstract class TextResponseWriter {
|
||||
public abstract class TextResponseWriter implements PushWriter {
|
||||
|
||||
// indent up to 40 spaces
|
||||
static final char[] indentChars = new char[81];
|
||||
|
@ -138,19 +141,19 @@ public abstract class TextResponseWriter {
|
|||
writeStr(name, f.stringValue(), true);
|
||||
}
|
||||
} else if (val instanceof Number) {
|
||||
writeNumber(name, (Number)val);
|
||||
writeNumber(name, (Number) val);
|
||||
} else if (val instanceof Boolean) {
|
||||
writeBool(name, (Boolean)val);
|
||||
writeBool(name, (Boolean) val);
|
||||
} else if (val instanceof Date) {
|
||||
writeDate(name,(Date)val);
|
||||
writeDate(name, (Date) val);
|
||||
} else if (val instanceof Document) {
|
||||
SolrDocument doc = DocsStreamer.getDoc((Document) val, schema);
|
||||
writeSolrDocument(name, doc,returnFields, 0 );
|
||||
writeSolrDocument(name, doc, returnFields, 0);
|
||||
} else if (val instanceof SolrDocument) {
|
||||
writeSolrDocument(name, (SolrDocument)val,returnFields, 0);
|
||||
writeSolrDocument(name, (SolrDocument) val, returnFields, 0);
|
||||
} else if (val instanceof ResultContext) {
|
||||
// requires access to IndexReader
|
||||
writeDocuments(name, (ResultContext)val);
|
||||
writeDocuments(name, (ResultContext) val);
|
||||
} else if (val instanceof DocList) {
|
||||
// Should not happen normally
|
||||
ResultContext ctx = new BasicResultContext((DocList)val, returnFields, null, null, req);
|
||||
|
@ -168,6 +171,8 @@ public abstract class TextResponseWriter {
|
|||
writeNamedList(name, (NamedList)val);
|
||||
} else if (val instanceof Path) {
|
||||
writeStr(name, ((Path) val).toAbsolutePath().toString(), true);
|
||||
} else if (val instanceof IteratorWriter) {
|
||||
writeIterator((IteratorWriter) val);
|
||||
} else if (val instanceof Iterable) {
|
||||
writeArray(name,((Iterable)val).iterator());
|
||||
} else if (val instanceof Object[]) {
|
||||
|
@ -184,6 +189,8 @@ public abstract class TextResponseWriter {
|
|||
writeStr(name, val.toString(), true);
|
||||
} else if (val instanceof WriteableValue) {
|
||||
((WriteableValue)val).write(name, this);
|
||||
} else if (val instanceof MapWriter) {
|
||||
writeMap((MapWriter) val);
|
||||
} else if (val instanceof MapSerializable) {
|
||||
//todo find a better way to reuse the map more efficiently
|
||||
writeMap(name, ((MapSerializable) val).toMap(new LinkedHashMap<>()), false, true);
|
||||
|
@ -192,6 +199,15 @@ public abstract class TextResponseWriter {
|
|||
writeStr(name, val.getClass().getName() + ':' + val.toString(), true);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public void writeMap(MapWriter mw) throws IOException {
|
||||
//todo
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeIterator(IteratorWriter iw) throws IOException {
|
||||
/*todo*/
|
||||
}
|
||||
|
||||
protected void writeBool(String name , Boolean val) throws IOException {
|
||||
writeBool(name, val.toString());
|
||||
|
|
|
@ -94,20 +94,58 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
List<Query> qlist = new ArrayList<>(freq.domain.filters.size());
|
||||
// TODO: prevent parsing filters each time!
|
||||
for (Object rawFilter : freq.domain.filters) {
|
||||
Query symbolicFilter;
|
||||
if (rawFilter instanceof String) {
|
||||
QParser parser = null;
|
||||
try {
|
||||
parser = QParser.getParser((String)rawFilter, fcontext.req);
|
||||
symbolicFilter = parser.getQuery();
|
||||
Query symbolicFilter = parser.getQuery();
|
||||
qlist.add(symbolicFilter);
|
||||
} catch (SyntaxError syntaxError) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
|
||||
}
|
||||
} else if (rawFilter instanceof Map) {
|
||||
|
||||
Map<String,Object> m = (Map<String, Object>) rawFilter;
|
||||
String type;
|
||||
Object args;
|
||||
|
||||
if (m.size() == 1) {
|
||||
Map.Entry<String, Object> entry = m.entrySet().iterator().next();
|
||||
type = entry.getKey();
|
||||
args = entry.getValue();
|
||||
} else {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't convert map to query:" + rawFilter);
|
||||
}
|
||||
|
||||
if (!"param".equals(type)) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown type. Can't convert map to query:" + rawFilter);
|
||||
}
|
||||
|
||||
String tag;
|
||||
if (!(args instanceof String)) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't retrieve non-string param:" + args);
|
||||
}
|
||||
tag = (String)args;
|
||||
|
||||
String[] qstrings = fcontext.req.getParams().getParams(tag);
|
||||
|
||||
if (qstrings != null) {
|
||||
for (String qstring : qstrings) {
|
||||
QParser parser = null;
|
||||
try {
|
||||
parser = QParser.getParser((String) qstring, fcontext.req);
|
||||
Query symbolicFilter = parser.getQuery();
|
||||
qlist.add(symbolicFilter);
|
||||
} catch (SyntaxError syntaxError) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad query (expected a string):" + rawFilter);
|
||||
}
|
||||
|
||||
qlist.add(symbolicFilter);
|
||||
}
|
||||
|
||||
this.filter = fcontext.searcher.getDocSet(qlist);
|
||||
|
@ -363,24 +401,29 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
|
||||
void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain) throws IOException {
|
||||
|
||||
// TODO: what if a zero bucket has a sub-facet with an exclusion that would yield results?
|
||||
// should we check for domain-altering exclusions, or even ask the sub-facet for
|
||||
// it's domain and then only skip it if it's 0?
|
||||
|
||||
if (domain == null || domain.size() == 0 && !freq.processEmpty) {
|
||||
return;
|
||||
}
|
||||
boolean emptyDomain = domain == null || domain.size() == 0;
|
||||
|
||||
for (Map.Entry<String,FacetRequest> sub : freq.getSubFacets().entrySet()) {
|
||||
FacetRequest subRequest = sub.getValue();
|
||||
|
||||
// This includes a static check if a sub-facet can possibly produce something from
|
||||
// an empty domain. Should this be changed to a dynamic check as well? That would
|
||||
// probably require actually executing the facet anyway, and dropping it at the
|
||||
// end if it was unproductive.
|
||||
if (emptyDomain && !freq.processEmpty && !subRequest.canProduceFromEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// make a new context for each sub-facet since they can change the domain
|
||||
FacetContext subContext = fcontext.sub(filter, domain);
|
||||
FacetProcessor subProcessor = sub.getValue().createFacetProcessor(subContext);
|
||||
FacetProcessor subProcessor = subRequest.createFacetProcessor(subContext);
|
||||
|
||||
if (fcontext.getDebugInfo() != null) { // if fcontext.debugInfo != null, it means rb.debug() == true
|
||||
FacetDebugInfo fdebug = new FacetDebugInfo();
|
||||
subContext.setDebugInfo(fdebug);
|
||||
fcontext.getDebugInfo().addChild(fdebug);
|
||||
|
||||
fdebug.setReqDescription(sub.getValue().getFacetDescription());
|
||||
fdebug.setReqDescription(subRequest.getFacetDescription());
|
||||
fdebug.setProcessor(subProcessor.getClass().getSimpleName());
|
||||
if (subContext.filter != null) fdebug.setFilter(subContext.filter.toString());
|
||||
|
||||
|
|
|
@ -88,6 +88,16 @@ public abstract class FacetRequest {
|
|||
public boolean toChildren;
|
||||
public String parents; // identifies the parent filter... the full set of parent documents for any block join operation
|
||||
public List<Object> filters; // list of symbolic filters (JSON query format)
|
||||
|
||||
// True if a starting set of documents can be mapped onto a different set of documents not originally in the starting set.
|
||||
public boolean canTransformDomain() {
|
||||
return toParent || toChildren || excludeTags != null;
|
||||
}
|
||||
|
||||
// Can this domain become non-empty if the input domain is empty? This does not check any sub-facets (see canProduceFromEmpty for that)
|
||||
public boolean canBecomeNonEmpty() {
|
||||
return excludeTags != null;
|
||||
}
|
||||
}
|
||||
|
||||
public FacetRequest() {
|
||||
|
@ -119,6 +129,15 @@ public abstract class FacetRequest {
|
|||
return false;
|
||||
}
|
||||
|
||||
/** Returns true if this facet, or any sub-facets can produce results from an empty domain. */
|
||||
public boolean canProduceFromEmpty() {
|
||||
if (domain != null && domain.canBecomeNonEmpty()) return true;
|
||||
for (FacetRequest freq : subFacets.values()) {
|
||||
if (freq.canProduceFromEmpty()) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public void addStat(String key, AggValueSource stat) {
|
||||
facetStats.put(key, stat);
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Random;
|
|||
import java.util.Set;
|
||||
import java.util.Timer;
|
||||
import java.util.TimerTask;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
@ -110,6 +111,8 @@ public class TestInjection {
|
|||
|
||||
public static String updateRandomPause = null;
|
||||
|
||||
public static String prepRecoveryOpPauseForever = null;
|
||||
|
||||
public static String randomDelayInCoreCreation = null;
|
||||
|
||||
public static int randomDelayMaxInCoreCreationInSec = 10;
|
||||
|
@ -118,6 +121,8 @@ public class TestInjection {
|
|||
|
||||
private static Set<Timer> timers = Collections.synchronizedSet(new HashSet<Timer>());
|
||||
|
||||
private static AtomicInteger countPrepRecoveryOpPauseForever = new AtomicInteger(0);
|
||||
|
||||
public static void reset() {
|
||||
nonGracefullClose = null;
|
||||
failReplicaRequests = null;
|
||||
|
@ -127,6 +132,8 @@ public class TestInjection {
|
|||
updateRandomPause = null;
|
||||
randomDelayInCoreCreation = null;
|
||||
splitFailureBeforeReplicaCreation = null;
|
||||
prepRecoveryOpPauseForever = null;
|
||||
countPrepRecoveryOpPauseForever = new AtomicInteger(0);
|
||||
|
||||
for (Timer timer : timers) {
|
||||
timer.cancel();
|
||||
|
@ -289,6 +296,31 @@ public class TestInjection {
|
|||
return true;
|
||||
}
|
||||
|
||||
public static boolean injectPrepRecoveryOpPauseForever() {
|
||||
if (prepRecoveryOpPauseForever != null) {
|
||||
Random rand = random();
|
||||
if (null == rand) return true;
|
||||
|
||||
Pair<Boolean,Integer> pair = parseValue(prepRecoveryOpPauseForever);
|
||||
boolean enabled = pair.first();
|
||||
int chanceIn100 = pair.second();
|
||||
// Prevent for continuous pause forever
|
||||
if (enabled && rand.nextInt(100) >= (100 - chanceIn100) && countPrepRecoveryOpPauseForever.get() < 2) {
|
||||
countPrepRecoveryOpPauseForever.incrementAndGet();
|
||||
log.info("inject pause forever for prep recovery op");
|
||||
try {
|
||||
Thread.sleep(Integer.MAX_VALUE);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
} else {
|
||||
countPrepRecoveryOpPauseForever.set(0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public static boolean injectSplitFailureBeforeReplicaCreation() {
|
||||
if (splitFailureBeforeReplicaCreation != null) {
|
||||
Random rand = random();
|
||||
|
|
|
@ -92,14 +92,16 @@
|
|||
"useParams":"_ADMIN_FILE"
|
||||
},
|
||||
"/export": {
|
||||
"class": "solr.SearchHandler",
|
||||
"class": "solr.ExportHandler",
|
||||
"useParams":"_EXPORT",
|
||||
"components": [
|
||||
"query"
|
||||
],
|
||||
"defaults": {
|
||||
"wt": "json"
|
||||
},
|
||||
"invariants": {
|
||||
"rq": "{!xport}",
|
||||
"wt": "xsort",
|
||||
"distrib": false
|
||||
}
|
||||
},
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.solr.common.SolrInputDocument;
|
|||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.cloud.ImplicitDocRouter;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.params.CollectionAdminParams;
|
||||
import org.apache.solr.common.params.CoreAdminParams;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@ -124,9 +125,24 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
|
|||
}
|
||||
|
||||
testBackupAndRestore(getCollectionName());
|
||||
testConfigBackupOnly("conf1", getCollectionName());
|
||||
testInvalidPath(getCollectionName());
|
||||
}
|
||||
|
||||
/**
|
||||
* This test validates the backup of collection configuration using
|
||||
* {@linkplain CollectionAdminParams#NO_INDEX_BACKUP_STRATEGY}.
|
||||
*
|
||||
* @param configName The config name for the collection to be backed up.
|
||||
* @param collectionName The name of the collection to be backed up.
|
||||
* @throws Exception in case of errors.
|
||||
*/
|
||||
protected void testConfigBackupOnly(String configName, String collectionName) throws Exception {
|
||||
// This is deliberately no-op since we want to run this test only for one of the backup repository
|
||||
// implementation (mainly to avoid redundant test execution). Currently HDFS backup repository test
|
||||
// implements this.
|
||||
}
|
||||
|
||||
// This test verifies the system behavior when the backup location cluster property is configured with an invalid
|
||||
// value for the specified repository (and the default backup location is not configured in solr.xml).
|
||||
private void testInvalidPath(String collectionName) throws Exception {
|
||||
|
|
|
@ -37,6 +37,8 @@ import org.apache.solr.common.params.ModifiableSolrParams;
|
|||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.update.DirectUpdateHandler2;
|
||||
import org.apache.solr.update.UpdateLog;
|
||||
import org.apache.solr.util.TestInjection;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@ -47,6 +49,7 @@ public class TestCloudRecovery extends SolrCloudTestCase {
|
|||
|
||||
@BeforeClass
|
||||
public static void setupCluster() throws Exception {
|
||||
TestInjection.prepRecoveryOpPauseForever = "true:30";
|
||||
System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
|
||||
System.setProperty("solr.ulog.numRecordsToKeep", "1000");
|
||||
|
||||
|
@ -62,6 +65,11 @@ public class TestCloudRecovery extends SolrCloudTestCase {
|
|||
false, true, 30);
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() {
|
||||
TestInjection.reset();
|
||||
}
|
||||
|
||||
@Before
|
||||
public void resetCollection() throws IOException, SolrServerException {
|
||||
cluster.getSolrClient().deleteByQuery(COLLECTION, "*:*");
|
||||
|
|
|
@ -16,10 +16,18 @@
|
|||
*/
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_CONF;
|
||||
import static org.apache.solr.core.backup.BackupManager.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
@ -28,7 +36,14 @@ import org.apache.hadoop.fs.FileSystem;
|
|||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.cloud.hdfs.HdfsTestUtil;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.params.CollectionAdminParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.backup.BackupManager;
|
||||
import org.apache.solr.core.backup.repository.HdfsBackupRepository;
|
||||
import org.apache.solr.util.BadHdfsThreadsFilter;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -144,4 +159,45 @@ public class TestHdfsCloudBackupRestore extends AbstractCloudBackupRestoreTestCa
|
|||
public String getBackupLocation() {
|
||||
return null;
|
||||
}
|
||||
|
||||
protected void testConfigBackupOnly(String configName, String collectionName) throws Exception {
|
||||
String backupName = "configonlybackup";
|
||||
CloudSolrClient solrClient = cluster.getSolrClient();
|
||||
|
||||
CollectionAdminRequest.Backup backup = CollectionAdminRequest.backupCollection(collectionName, backupName)
|
||||
.setRepositoryName(getBackupRepoName())
|
||||
.setIndexBackupStrategy(CollectionAdminParams.NO_INDEX_BACKUP_STRATEGY);
|
||||
backup.process(solrClient);
|
||||
|
||||
Map<String,String> params = new HashMap<>();
|
||||
params.put("location", "/backup");
|
||||
params.put("solr.hdfs.home", hdfsUri + "/solr");
|
||||
|
||||
HdfsBackupRepository repo = new HdfsBackupRepository();
|
||||
repo.init(new NamedList<>(params));
|
||||
BackupManager mgr = new BackupManager(repo, solrClient.getZkStateReader());
|
||||
|
||||
URI baseLoc = repo.createURI("/backup");
|
||||
|
||||
Properties props = mgr.readBackupProperties(baseLoc, backupName);
|
||||
assertNotNull(props);
|
||||
assertEquals(collectionName, props.getProperty(COLLECTION_NAME_PROP));
|
||||
assertEquals(backupName, props.getProperty(BACKUP_NAME_PROP));
|
||||
assertEquals(configName, props.getProperty(COLL_CONF));
|
||||
|
||||
DocCollection collectionState = mgr.readCollectionState(baseLoc, backupName, collectionName);
|
||||
assertNotNull(collectionState);
|
||||
assertEquals(collectionName, collectionState.getName());
|
||||
|
||||
URI configDirLoc = repo.resolve(baseLoc, backupName, ZK_STATE_DIR, CONFIG_STATE_DIR, configName);
|
||||
assertTrue(repo.exists(configDirLoc));
|
||||
|
||||
Collection<String> expected = Arrays.asList(BACKUP_PROPS_FILE, ZK_STATE_DIR);
|
||||
URI backupLoc = repo.resolve(baseLoc, backupName);
|
||||
String[] dirs = repo.listAll(backupLoc);
|
||||
for (String d : dirs) {
|
||||
assertTrue(expected.contains(d));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -97,7 +97,7 @@ public class SolrCoreTest extends SolrTestCaseJ4 {
|
|||
++ihCount; assertEquals(pathToClassMap.get("/admin/system"), "solr.SystemInfoHandler");
|
||||
++ihCount; assertEquals(pathToClassMap.get("/admin/threads"), "solr.ThreadDumpHandler");
|
||||
++ihCount; assertEquals(pathToClassMap.get("/config"), "solr.SolrConfigHandler");
|
||||
++ihCount; assertEquals(pathToClassMap.get("/export"), "solr.SearchHandler");
|
||||
++ihCount; assertEquals(pathToClassMap.get("/export"), "solr.ExportHandler");
|
||||
++ihCount; assertEquals(pathToClassMap.get("/terms"), "solr.SearchHandler");
|
||||
++ihCount; assertEquals(pathToClassMap.get("/get"), "solr.RealTimeGetHandler");
|
||||
++ihCount; assertEquals(pathToClassMap.get(ReplicationHandler.PATH), "solr.ReplicationHandler");
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.junit.Test;
|
||||
|
||||
public class ReplicaListTransformerTest extends LuceneTestCase {
|
||||
|
||||
// A transformer that keeps only matching choices
|
||||
private static class ToyMatchingReplicaListTransformer implements ReplicaListTransformer {
|
||||
|
||||
private final String regex;
|
||||
|
||||
public ToyMatchingReplicaListTransformer(String regex)
|
||||
{
|
||||
this.regex = regex;
|
||||
}
|
||||
|
||||
public void transform(List<?> choices)
|
||||
{
|
||||
Iterator<?> it = choices.iterator();
|
||||
while (it.hasNext()) {
|
||||
Object choice = it.next();
|
||||
final String url;
|
||||
if (choice instanceof String) {
|
||||
url = (String)choice;
|
||||
}
|
||||
else if (choice instanceof Replica) {
|
||||
url = ((Replica)choice).getCoreUrl();
|
||||
} else {
|
||||
url = null;
|
||||
}
|
||||
if (url == null || !url.matches(regex)) {
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// A transformer that makes no transformation
|
||||
private static class ToyNoOpReplicaListTransformer implements ReplicaListTransformer {
|
||||
|
||||
public ToyNoOpReplicaListTransformer()
|
||||
{
|
||||
}
|
||||
|
||||
public void transform(List<?> choices)
|
||||
{
|
||||
// no-op
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTransform() throws Exception {
|
||||
|
||||
final String regex = ".*" + random().nextInt(10) + ".*";
|
||||
|
||||
final ReplicaListTransformer transformer;
|
||||
if (random().nextBoolean()) {
|
||||
|
||||
transformer = new ToyMatchingReplicaListTransformer(regex);
|
||||
|
||||
} else {
|
||||
|
||||
transformer = new HttpShardHandlerFactory() {
|
||||
|
||||
@Override
|
||||
ReplicaListTransformer getReplicaListTransformer(final SolrQueryRequest req)
|
||||
{
|
||||
final SolrParams params = req.getParams();
|
||||
|
||||
if (params.getBool("toyNoTransform", false)) {
|
||||
return new ToyNoOpReplicaListTransformer();
|
||||
}
|
||||
|
||||
final String regex = params.get("toyRegEx");
|
||||
if (regex != null) {
|
||||
return new ToyMatchingReplicaListTransformer(regex);
|
||||
}
|
||||
|
||||
return super.getReplicaListTransformer(req);
|
||||
}
|
||||
|
||||
}.getReplicaListTransformer(
|
||||
new LocalSolrQueryRequest(null,
|
||||
new ModifiableSolrParams().add("toyRegEx", regex)));
|
||||
}
|
||||
|
||||
final List<Replica> inputs = new ArrayList<>();
|
||||
final List<Replica> expectedTransformed = new ArrayList<>();
|
||||
|
||||
final List<String> urls = createRandomUrls();
|
||||
for (int ii=0; ii<urls.size(); ++ii) {
|
||||
|
||||
final String name = "replica"+(ii+1);
|
||||
final String url = urls.get(ii);
|
||||
final Map<String,Object> propMap = new HashMap<String,Object>();
|
||||
propMap.put("base_url", url);
|
||||
// a skeleton replica, good enough for this test's purposes
|
||||
final Replica replica = new Replica(name, propMap);
|
||||
|
||||
inputs.add(replica);
|
||||
if (url.matches(regex)) {
|
||||
expectedTransformed.add(replica);
|
||||
}
|
||||
}
|
||||
|
||||
final List<Replica> actualTransformed = new ArrayList<>(inputs);
|
||||
transformer.transform(actualTransformed);
|
||||
|
||||
assertEquals(expectedTransformed.size(), actualTransformed.size());
|
||||
for (int ii=0; ii<expectedTransformed.size(); ++ii) {
|
||||
assertEquals("mismatch for ii="+ii, expectedTransformed.get(ii), actualTransformed.get(ii));
|
||||
}
|
||||
}
|
||||
|
||||
private final List<String> createRandomUrls() throws Exception {
|
||||
final List<String> urls = new ArrayList<>();
|
||||
maybeAddUrl(urls, "a"+random().nextDouble());
|
||||
maybeAddUrl(urls, "bb"+random().nextFloat());
|
||||
maybeAddUrl(urls, "ccc"+random().nextGaussian());
|
||||
maybeAddUrl(urls, "dddd"+random().nextInt());
|
||||
maybeAddUrl(urls, "eeeee"+random().nextLong());
|
||||
Collections.shuffle(urls, random());
|
||||
return urls;
|
||||
}
|
||||
|
||||
private final void maybeAddUrl(final List<String> urls, final String url) {
|
||||
if (random().nextBoolean()) {
|
||||
urls.add(url);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.junit.Test;
|
||||
|
||||
public class ShufflingReplicaListTransformerTest extends LuceneTestCase {
|
||||
|
||||
private final ShufflingReplicaListTransformer transformer = new ShufflingReplicaListTransformer(random());
|
||||
|
||||
@Test
|
||||
public void testTransformReplicas() throws Exception {
|
||||
final List<Replica> replicas = new ArrayList<>();
|
||||
for (final String url : createRandomUrls()) {
|
||||
replicas.add(new Replica(url, new HashMap<String,Object>()));
|
||||
}
|
||||
implTestTransform(replicas);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTransformUrls() throws Exception {
|
||||
final List<String> urls = createRandomUrls();
|
||||
implTestTransform(urls);
|
||||
}
|
||||
|
||||
private <TYPE> void implTestTransform(List<TYPE> inputs) throws Exception {
|
||||
final List<TYPE> transformedInputs = new ArrayList<>(inputs);
|
||||
transformer.transform(transformedInputs);
|
||||
|
||||
final Set<TYPE> inputSet = new HashSet<>(inputs);
|
||||
final Set<TYPE> transformedSet = new HashSet<>(transformedInputs);
|
||||
|
||||
assertTrue(inputSet.equals(transformedSet));
|
||||
}
|
||||
|
||||
private final List<String> createRandomUrls() throws Exception {
|
||||
final List<String> urls = new ArrayList<>();
|
||||
maybeAddUrl(urls, "a"+random().nextDouble());
|
||||
maybeAddUrl(urls, "bb"+random().nextFloat());
|
||||
maybeAddUrl(urls, "ccc"+random().nextGaussian());
|
||||
maybeAddUrl(urls, "dddd"+random().nextInt());
|
||||
maybeAddUrl(urls, "eeeee"+random().nextLong());
|
||||
Collections.shuffle(urls, random());
|
||||
return urls;
|
||||
}
|
||||
|
||||
private final void maybeAddUrl(final List<String> urls, final String url) {
|
||||
if (random().nextBoolean()) {
|
||||
urls.add(url);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -113,4 +113,17 @@ public class TestMacroExpander extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMap() { // see SOLR-9740, the second fq param was being dropped.
|
||||
final Map<String,String[]> request = new HashMap<>();
|
||||
request.put("fq", new String[] {"zero", "${one_ref}", "two", "${three_ref}"});
|
||||
request.put("one_ref",new String[] {"one"});
|
||||
request.put("three_ref",new String[] {"three"});
|
||||
Map expanded = MacroExpander.expand(request);
|
||||
assertEquals("zero", ((String[])expanded.get("fq"))[0]);
|
||||
assertEquals("one", ((String[])expanded.get("fq"))[1]);
|
||||
assertEquals("two", ((String[]) expanded.get("fq"))[2]);
|
||||
assertEquals("three", ((String[]) expanded.get("fq"))[3]);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue