Merge branch 'apache-https-master' into jira/solr-8593

This commit is contained in:
Kevin Risden 2016-11-10 16:15:06 -05:00
commit c3400e8a2e
123 changed files with 3681 additions and 1496 deletions

View File

@ -55,7 +55,30 @@ Other
* LUCENE-7360: Remove Explanation.toHtml() (Alan Woodward)
======================= Lucene 6.4.0 =======================
(No Changes)
New features
* LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand)
Improvements
* LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
PhraseQuery or MultiPhraseQuery when the word automaton is simple
(Mike McCandless)
* LUCENE-7431: Allow a certain amount of overlap to be specified between the include
and exclude arguments of SpanNotQuery via negative pre and/or post arguments.
(Marc Morissette via David Smiley)
* LUCENE-7544: UnifiedHighlighter: add extension points for handling custom queries.
(Michael Braun, David Smiley)
* LUCENE-7538: Asking IndexWriter to store a too-massive text field
now throws IllegalArgumentException instead of a cryptic exception
that closes your IndexWriter (Steve Chen via Mike McCandless)
* LUCENE-7524: Added more detailed explanation of how IDF is computed in
ClassicSimilarity and BM25Similarity. (Adrien Grand)
======================= Lucene 6.3.0 =======================

View File

@ -27,7 +27,7 @@
<dependencies>
<dependency org="mecab" name="mecab-ipadic" rev="${/mecab/mecab-ipadic}" conf="ipadic">
<artifact name="ipadic" type=".tar.gz" url="http://mecab.googlecode.com/files/mecab-ipadic-2.7.0-20070801.tar.gz"/>
<artifact name="ipadic" type=".tar.gz" url="http://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/mecab-ipadic-2.7.0-20070801.tar.gz"/>
</dependency>
<dependency org="mecab" name="mecab-naist-jdic" rev="${/mecab/mecab-naist-jdic}" conf="naist">
<artifact name="mecab-naist-jdic" type=".tar.gz" url="http://sourceforge.jp/frs/redir.php?m=iij&amp;f=/naist-jdic/53500/mecab-naist-jdic-0.6.3b-20111013.tar.gz"/>

View File

@ -28,7 +28,6 @@ import org.apache.lucene.analysis.ja.dict.ConnectionCosts;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.BitUtil;
public final class ConnectionCostsWriter {

View File

@ -33,12 +33,10 @@ import java.util.Comparator;
import java.util.List;
import org.apache.lucene.analysis.ja.util.DictionaryBuilder.DictionaryFormat;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.packed.PackedInts;
import com.ibm.icu.text.Normalizer2;
@ -133,7 +131,7 @@ public class TokenInfoDictionaryBuilder {
System.out.println(" encode...");
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, true, PackedInts.DEFAULT, true, 15);
Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, true, 15);
IntsRefBuilder scratch = new IntsRefBuilder();
long ord = -1; // first ord will be 0
String lastValue = null;

View File

@ -231,7 +231,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
"6.2.0-cfs",
"6.2.0-nocfs",
"6.2.1-cfs",
"6.2.1-nocfs"
"6.2.1-nocfs",
"6.3.0-cfs",
"6.3.0-nocfs"
};
final String[] unsupportedNames = {

View File

@ -48,7 +48,6 @@ import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
/*
TODO:
@ -363,8 +362,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
final Builder<Output> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
0, 0, true, false, Integer.MAX_VALUE,
FST_OUTPUTS, false,
PackedInts.COMPACT, true, 15);
FST_OUTPUTS, true, 15);
//if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix);
//}

View File

@ -81,9 +81,6 @@ import org.apache.lucene.util.packed.PackedInts;
// loads itself in ram?
public final class MemoryPostingsFormat extends PostingsFormat {
private final boolean doPackFST;
private final float acceptableOverheadRatio;
public MemoryPostingsFormat() {
this(false, PackedInts.DEFAULT);
}
@ -97,13 +94,11 @@ public final class MemoryPostingsFormat extends PostingsFormat {
*/
public MemoryPostingsFormat(boolean doPackFST, float acceptableOverheadRatio) {
super("Memory");
this.doPackFST = doPackFST;
this.acceptableOverheadRatio = acceptableOverheadRatio;
}
@Override
public String toString() {
return "PostingsFormat(name=" + getName() + " doPackFST= " + doPackFST + ")";
return "PostingsFormat(name=" + getName() + ")";
}
private final static class TermsWriter {
@ -111,16 +106,12 @@ public final class MemoryPostingsFormat extends PostingsFormat {
private final FieldInfo field;
private final Builder<BytesRef> builder;
private final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
private final boolean doPackFST;
private final float acceptableOverheadRatio;
private int termCount;
public TermsWriter(IndexOutput out, FieldInfo field, boolean doPackFST, float acceptableOverheadRatio) {
public TermsWriter(IndexOutput out, FieldInfo field) {
this.out = out;
this.field = field;
this.doPackFST = doPackFST;
this.acceptableOverheadRatio = acceptableOverheadRatio;
builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, doPackFST, acceptableOverheadRatio, true, 15);
builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
}
private class PostingsWriter {
@ -307,8 +298,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
TermsEnum termsEnum = terms.iterator();
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
TermsWriter termsWriter = new TermsWriter(out, fieldInfo,
doPackFST, acceptableOverheadRatio);
TermsWriter termsWriter = new TermsWriter(out, fieldInfo);
FixedBitSet docsSeen = new FixedBitSet(state.segmentInfo.maxDoc());
long sumTotalTermFreq = 0;

View File

@ -48,7 +48,6 @@ import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
/*
TODO:
@ -456,8 +455,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
final Builder<BytesRef> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
0, 0, true, false, Integer.MAX_VALUE,
outputs, false,
PackedInts.COMPACT, true, 15);
outputs, true, 15);
//if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix);
//}

View File

@ -64,7 +64,7 @@ public final class GrowableByteArrayDataOutput extends DataOutput {
@Override
public void writeString(String string) throws IOException {
int maxLen = string.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR;
int maxLen = UnicodeUtil.maxUTF8Length(string.length());
if (maxLen <= MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING) {
// string is small enough that we don't need to save memory by falling back to double-pass approach
// this is just an optimized writeString() that re-uses scratchBytes.

View File

@ -24,11 +24,9 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
@ -48,7 +46,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
private final Counter iwBytesUsed;
private final PackedLongValues.Builder lengths;
private FixedBitSet docsWithField;
private DocsWithFieldSet docsWithField;
private final FieldInfo fieldInfo;
private long bytesUsed;
private int lastDocID = -1;
@ -60,7 +58,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
this.bytesOut = bytes.getDataOutput();
this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
this.iwBytesUsed = iwBytesUsed;
this.docsWithField = new FixedBitSet(64);
this.docsWithField = new DocsWithFieldSet();
this.bytesUsed = lengths.ramBytesUsed() + docsWithField.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed);
}
@ -84,8 +82,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
// Should never happen!
throw new RuntimeException(ioe);
}
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
docsWithField.set(docID);
docsWithField.add(docID);
updateBytesUsed();
lastDocID = docID;
@ -112,7 +109,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
return new BufferedBinaryDocValues(lengths, maxLength, bytes.getDataInput(), docsWithField);
return new BufferedBinaryDocValues(lengths, maxLength, bytes.getDataInput(), docsWithField.iterator());
}
});
}
@ -124,12 +121,12 @@ class BinaryDocValuesWriter extends DocValuesWriter {
final DocIdSetIterator docsWithField;
final DataInput bytesIterator;
BufferedBinaryDocValues(PackedLongValues lengths, int maxLength, DataInput bytesIterator, FixedBitSet docsWithFields) {
BufferedBinaryDocValues(PackedLongValues lengths, int maxLength, DataInput bytesIterator, DocIdSetIterator docsWithFields) {
this.value = new BytesRefBuilder();
this.value.grow(maxLength);
this.lengthsIterator = lengths.iterator();
this.bytesIterator = bytesIterator;
this.docsWithField = new BitSetIterator(docsWithFields, lengths.size());
this.docsWithField = docsWithFields;
}
@Override

View File

@ -430,6 +430,10 @@ final class DefaultIndexingChain extends DocConsumer {
fp = getOrAddField(fieldName, fieldType, false);
}
if (fieldType.stored()) {
String value = field.stringValue();
if (value != null && value.length() > IndexWriter.MAX_STORED_STRING_LENGTH) {
throw new IllegalArgumentException("stored field \"" + field.name() + "\" is too large (" + value.length() + " characters) to store");
}
try {
storedFieldsWriter.writeField(fp.fieldInfo, field);
} catch (Throwable th) {

View File

@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
/** Accumulator for documents that have a value for a field. This is optimized
* for the case that all documents have a value. */
final class DocsWithFieldSet extends DocIdSet {
private static long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(DocsWithFieldSet.class);
private FixedBitSet set;
private int cost = 0;
private int lastDocId = -1;
void add(int docID) {
if (docID <= lastDocId) {
throw new IllegalArgumentException("Out of order doc ids: last=" + lastDocId + ", next=" + docID);
}
if (set != null) {
set = FixedBitSet.ensureCapacity(set, docID);
set.set(docID);
} else if (docID != cost) {
// migrate to a sparse encoding using a bit set
set = new FixedBitSet(docID + 1);
set.set(0, cost);
set.set(docID);
}
lastDocId = docID;
cost++;
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + (set == null ? 0 : set.ramBytesUsed());
}
@Override
public DocIdSetIterator iterator() {
return set != null ? new BitSetIterator(set, cost) : DocIdSetIterator.all(cost);
}
}

View File

@ -62,6 +62,7 @@ import org.apache.lucene.store.MergeInfo;
import org.apache.lucene.store.RateLimitedIndexOutput;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
@ -70,6 +71,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.Version;
/**
@ -258,6 +260,12 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* IndexWriterConfig#setInfoStream(InfoStream)}).
*/
public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8;
/**
* Maximum length string for a stored field.
*/
public final static int MAX_STORED_STRING_LENGTH = ArrayUtil.MAX_ARRAY_LENGTH / UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR;
// when unrecoverable disaster strikes, we populate this with the reason that we had to close IndexWriter
volatile Throwable tragedy;

View File

@ -22,9 +22,7 @@ import java.io.IOException;
import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
@ -32,7 +30,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
* segment flushes. */
class NormValuesWriter {
private FixedBitSet docsWithField;
private DocsWithFieldSet docsWithField;
private PackedLongValues.Builder pending;
private final Counter iwBytesUsed;
private long bytesUsed;
@ -40,7 +38,7 @@ class NormValuesWriter {
private int lastDocID = -1;
public NormValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
docsWithField = new FixedBitSet(64);
docsWithField = new DocsWithFieldSet();
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
this.fieldInfo = fieldInfo;
@ -54,8 +52,7 @@ class NormValuesWriter {
}
pending.add(value);
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
docsWithField.set(docID);
docsWithField.add(docID);
updateBytesUsed();
@ -82,7 +79,7 @@ class NormValuesWriter {
if (fieldInfo != NormValuesWriter.this.fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
return new BufferedNorms(values, docsWithField);
return new BufferedNorms(values, docsWithField.iterator());
}
@Override
@ -108,9 +105,9 @@ class NormValuesWriter {
final DocIdSetIterator docsWithField;
private long value;
BufferedNorms(PackedLongValues values, FixedBitSet docsWithFields) {
BufferedNorms(PackedLongValues values, DocIdSetIterator docsWithFields) {
this.iter = values.iterator();
this.docsWithField = new BitSetIterator(docsWithFields, values.size());
this.docsWithField = docsWithFields;
}
@Override

View File

@ -21,9 +21,7 @@ import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
@ -34,13 +32,13 @@ class NumericDocValuesWriter extends DocValuesWriter {
private PackedLongValues.Builder pending;
private final Counter iwBytesUsed;
private long bytesUsed;
private FixedBitSet docsWithField;
private DocsWithFieldSet docsWithField;
private final FieldInfo fieldInfo;
private int lastDocID = -1;
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
docsWithField = new FixedBitSet(64);
docsWithField = new DocsWithFieldSet();
bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
this.fieldInfo = fieldInfo;
this.iwBytesUsed = iwBytesUsed;
@ -53,8 +51,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
}
pending.add(value);
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
docsWithField.set(docID);
docsWithField.add(docID);
updateBytesUsed();
@ -83,7 +80,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
if (fieldInfo != NumericDocValuesWriter.this.fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
return new BufferedNumericDocValues(values, docsWithField);
return new BufferedNumericDocValues(values, docsWithField.iterator());
}
});
}
@ -94,9 +91,9 @@ class NumericDocValuesWriter extends DocValuesWriter {
final DocIdSetIterator docsWithField;
private long value;
BufferedNumericDocValues(PackedLongValues values, FixedBitSet docsWithFields) {
BufferedNumericDocValues(PackedLongValues values, DocIdSetIterator docsWithFields) {
this.iter = values.iterator();
this.docsWithField = new BitSetIterator(docsWithFields, values.size());
this.docsWithField = docsWithFields;
}
@Override

View File

@ -22,13 +22,11 @@ import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
@ -37,7 +35,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
class SortedDocValuesWriter extends DocValuesWriter {
final BytesRefHash hash;
private PackedLongValues.Builder pending;
private FixedBitSet docsWithField;
private DocsWithFieldSet docsWithField;
private final Counter iwBytesUsed;
private long bytesUsed; // this currently only tracks differences in 'pending'
private final FieldInfo fieldInfo;
@ -52,7 +50,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
BytesRefHash.DEFAULT_CAPACITY,
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
docsWithField = new FixedBitSet(64);
docsWithField = new DocsWithFieldSet();
bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed);
}
@ -69,8 +67,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
}
addOneValue(value);
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
docsWithField.set(docID);
docsWithField.add(docID);
lastDocID = docID;
}
@ -121,7 +118,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
return new BufferedSortedDocValues(hash, valueCount, ords, sortedValues, ordMap, docsWithField);
return new BufferedSortedDocValues(hash, valueCount, ords, sortedValues, ordMap, docsWithField.iterator());
}
});
}
@ -136,13 +133,13 @@ class SortedDocValuesWriter extends DocValuesWriter {
final PackedLongValues.Iterator iter;
final DocIdSetIterator docsWithField;
public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, FixedBitSet docsWithField) {
public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, DocIdSetIterator docsWithField) {
this.hash = hash;
this.valueCount = valueCount;
this.sortedValues = sortedValues;
this.iter = docToOrd.iterator();
this.ordMap = ordMap;
this.docsWithField = new BitSetIterator(docsWithField, docToOrd.size());
this.docsWithField = docsWithField;
}
@Override

View File

@ -23,9 +23,7 @@ import java.util.Arrays;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
@ -34,7 +32,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
class SortedNumericDocValuesWriter extends DocValuesWriter {
private PackedLongValues.Builder pending; // stream of all values
private PackedLongValues.Builder pendingCounts; // count of values per doc
private FixedBitSet docsWithField;
private DocsWithFieldSet docsWithField;
private final Counter iwBytesUsed;
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
private final FieldInfo fieldInfo;
@ -47,7 +45,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
this.iwBytesUsed = iwBytesUsed;
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
docsWithField = new FixedBitSet(64);
docsWithField = new DocsWithFieldSet();
bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + docsWithField.ramBytesUsed() + RamUsageEstimator.sizeOf(currentValues);
iwBytesUsed.addAndGet(bytesUsed);
}
@ -76,8 +74,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
pendingCounts.add(currentUpto);
currentUpto = 0;
docsWithField = FixedBitSet.ensureCapacity(docsWithField, currentDoc);
docsWithField.set(currentDoc);
docsWithField.add(currentDoc);
}
@Override
@ -112,7 +109,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
return new BufferedSortedNumericDocValues(values, valueCounts, docsWithField);
return new BufferedSortedNumericDocValues(values, valueCounts, docsWithField.iterator());
}
});
}
@ -124,10 +121,10 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
private int valueCount;
private int valueUpto;
public BufferedSortedNumericDocValues(PackedLongValues values, PackedLongValues valueCounts, FixedBitSet docsWithField) {
public BufferedSortedNumericDocValues(PackedLongValues values, PackedLongValues valueCounts, DocIdSetIterator docsWithField) {
valuesIter = values.iterator();
valueCountsIter = valueCounts.iterator();
this.docsWithField = new BitSetIterator(docsWithField, values.size());
this.docsWithField = docsWithField;
}
@Override

View File

@ -24,13 +24,11 @@ import java.util.Arrays;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
@ -40,7 +38,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
final BytesRefHash hash;
private PackedLongValues.Builder pending; // stream of all termIDs
private PackedLongValues.Builder pendingCounts; // termIDs per doc
private FixedBitSet docsWithField;
private DocsWithFieldSet docsWithField;
private final Counter iwBytesUsed;
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
private final FieldInfo fieldInfo;
@ -59,7 +57,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
pending = PackedLongValues.packedBuilder(PackedInts.COMPACT);
pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
docsWithField = new FixedBitSet(64);
docsWithField = new DocsWithFieldSet();
bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed);
}
@ -103,8 +101,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
pendingCounts.add(count);
maxCount = Math.max(maxCount, count);
currentUpto = 0;
docsWithField = FixedBitSet.ensureCapacity(docsWithField, currentDoc);
docsWithField.set(currentDoc);
docsWithField.add(currentDoc);
}
@Override
@ -158,7 +155,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
return new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField);
return new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField.iterator());
}
});
}
@ -176,14 +173,14 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
private int ordCount;
private int ordUpto;
public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, FixedBitSet docsWithField) {
public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, DocIdSetIterator docsWithField) {
this.currentDoc = new int[maxCount];
this.sortedValues = sortedValues;
this.ordMap = ordMap;
this.hash = hash;
this.ordsIter = ords.iterator();
this.ordCountsIter = ordCounts.iterator();
this.docsWithField = new BitSetIterator(docsWithField, ordCounts.size());
this.docsWithField = docsWithField;
}
@Override

View File

@ -175,7 +175,9 @@ public class BM25Similarity extends Similarity {
final long df = termStats.docFreq();
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
final float idf = idf(df, docCount);
return Explanation.match(idf, "idf(docFreq=" + df + ", docCount=" + docCount + ")");
return Explanation.match(idf, "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
Explanation.match(df, "docFreq"),
Explanation.match(docCount, "docCount"));
}
/**
@ -192,16 +194,14 @@ public class BM25Similarity extends Similarity {
* for each term.
*/
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
float idf = 0.0f;
double idf = 0d; // sum into a double before casting into a float
List<Explanation> details = new ArrayList<>();
for (final TermStatistics stat : termStats ) {
final long df = stat.docFreq();
final float termIdf = idf(df, docCount);
details.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", docCount=" + docCount + ")"));
idf += termIdf;
Explanation idfExplain = idfExplain(collectionStats, stat);
details.add(idfExplain);
idf += idfExplain.getValue();
}
return Explanation.match(idf, "idf(), sum of:", details);
return Explanation.match((float) idf, "idf(), sum of:", details);
}
@Override
@ -303,7 +303,7 @@ public class BM25Similarity extends Similarity {
subs.add(Explanation.match(0, "parameter b (norms omitted for field)"));
return Explanation.match(
(freq.getValue() * (k1 + 1)) / (freq.getValue() + k1),
"tfNorm, computed from:", subs);
"tfNorm, computed as (freq * (k1 + 1)) / (freq + k1) from:", subs);
} else {
byte norm;
if (norms.advanceExact(doc)) {
@ -317,7 +317,7 @@ public class BM25Similarity extends Similarity {
subs.add(Explanation.match(doclen, "fieldLength"));
return Explanation.match(
(freq.getValue() * (k1 + 1)) / (freq.getValue() + k1 * (1 - b + b * doclen/stats.avgdl)),
"tfNorm, computed from:", subs);
"tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:", subs);
}
}

View File

@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.similarities;
import java.io.IOException;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.util.BytesRef;
/**
* Simple similarity that gives terms a score that is equal to their query
* boost. This similarity is typically used with disabled norms since neither
* document statistics nor index statistics are used for scoring. That said,
* if norms are enabled, they will be computed the same way as
* {@link SimilarityBase} and {@link BM25Similarity} with
* {@link SimilarityBase#setDiscountOverlaps(boolean) discounted overlaps}
* so that the {@link Similarity} can be changed after the index has been
* created.
*/
public class BooleanSimilarity extends Similarity {
private static final Similarity BM25_SIM = new BM25Similarity();
/** Sole constructor */
public BooleanSimilarity() {}
@Override
public long computeNorm(FieldInvertState state) {
return BM25_SIM.computeNorm(state);
}
@Override
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
return new BooleanWeight(boost);
}
private static class BooleanWeight extends SimWeight {
final float boost;
BooleanWeight(float boost) {
this.boost = boost;
}
}
@Override
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
final float boost = ((BooleanWeight) weight).boost;
return new SimScorer() {
@Override
public float score(int doc, float freq) throws IOException {
return boost;
}
@Override
public Explanation explain(int doc, Explanation freq) throws IOException {
Explanation queryBoostExpl = Explanation.match(boost, "query boost");
return Explanation.match(
queryBoostExpl.getValue(),
"score(" + getClass().getSimpleName() + ", doc=" + doc + "), computed from:",
queryBoostExpl);
}
@Override
public float computeSlopFactor(int distance) {
return 1f;
}
@Override
public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
return 1f;
}
};
}
}

View File

@ -18,6 +18,9 @@ package org.apache.lucene.search.similarities;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SmallFloat;
@ -121,6 +124,16 @@ public class ClassicSimilarity extends TFIDFSimilarity {
return 1;
}
@Override
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
final long df = termStats.docFreq();
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
final float idf = idf(df, docCount);
return Explanation.match(idf, "idf, computed as log((docCount+1)/(docFreq+1)) + 1 from:",
Explanation.match(df, "docFreq"),
Explanation.match(docCount, "docCount"));
}
/** Implemented as <code>log((docCount+1)/(docFreq+1)) + 1</code>. */
@Override
public float idf(long docFreq, long docCount) {

View File

@ -484,16 +484,14 @@ public abstract class TFIDFSimilarity extends Similarity {
* for each term.
*/
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
float idf = 0.0f;
double idf = 0d; // sum into a double before casting into a float
List<Explanation> subs = new ArrayList<>();
for (final TermStatistics stat : termStats ) {
final long df = stat.docFreq();
final float termIdf = idf(df, docCount);
subs.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", docCount=" + docCount + ")"));
idf += termIdf;
Explanation idfExplain = idfExplain(collectionStats, stat);
subs.add(idfExplain);
idf += idfExplain.getValue();
}
return Explanation.match(idf, "idf(), sum of:", subs);
return Explanation.match((float) idf, "idf(), sum of:", subs);
}
/** Computes a score factor based on a term's document frequency (the number

View File

@ -49,19 +49,23 @@ public final class SpanNotQuery extends SpanQuery {
/** Construct a SpanNotQuery matching spans from <code>include</code> which
* have no overlap with spans from <code>exclude</code> within
* <code>dist</code> tokens of <code>include</code>. */
* <code>dist</code> tokens of <code>include</code>. Inversely, a negative
* <code>dist</code> value may be used to specify a certain amount of allowable
* overlap. */
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {
this(include, exclude, dist, dist);
}
/** Construct a SpanNotQuery matching spans from <code>include</code> which
* have no overlap with spans from <code>exclude</code> within
* <code>pre</code> tokens before or <code>post</code> tokens of <code>include</code>. */
* <code>pre</code> tokens before or <code>post</code> tokens of
* <code>include</code>. Inversely, negative values for <code>pre</code> and/or
* <code>post</code> allow a certain amount of overlap to occur. */
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
this.include = Objects.requireNonNull(include);
this.exclude = Objects.requireNonNull(exclude);
this.pre = (pre >=0) ? pre : 0;
this.post = (post >= 0) ? post : 0;
this.pre = pre;
this.post = post;
if (include.getField() != null && exclude.getField() != null && !include.getField().equals(exclude.getField()))
throw new IllegalArgumentException("Clauses must have same field.");

View File

@ -84,7 +84,7 @@ public final class BytesRef implements Comparable<BytesRef>,Cloneable {
* unicode text, with no unpaired surrogates.
*/
public BytesRef(CharSequence text) {
this(new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * text.length()]);
this(new byte[UnicodeUtil.maxUTF8Length(text.length())]);
length = UnicodeUtil.UTF16toUTF8(text, 0, text.length(), bytes);
}

View File

@ -143,7 +143,7 @@ public class BytesRefBuilder {
* represent the provided text.
*/
public void copyChars(CharSequence text, int off, int len) {
grow(len * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR);
grow(UnicodeUtil.maxUTF8Length(len));
ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes);
}
@ -152,7 +152,7 @@ public class BytesRefBuilder {
* represent the provided text.
*/
public void copyChars(char[] text, int off, int len) {
grow(len * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR);
grow(UnicodeUtil.maxUTF8Length(len));
ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes);
}

View File

@ -613,6 +613,11 @@ public final class UnicodeUtil {
return out_offset;
}
/** Returns the maximum number of utf8 bytes required to encode a utf16 (e.g., java char[], String) */
public static int maxUTF8Length(int utf16Length) {
return Math.multiplyExact(utf16Length, MAX_UTF8_BYTES_PER_CHAR);
}
/**
* Utility method for {@link #UTF8toUTF16(byte[], int, int, char[])}
* @see #UTF8toUTF16(byte[], int, int, char[])

View File

@ -23,7 +23,6 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc
import org.apache.lucene.util.packed.PackedInts;
// TODO: could we somehow stream an FST to disk while we
// build it?
@ -70,10 +69,6 @@ public class Builder<T> {
private final IntsRefBuilder lastInput = new IntsRefBuilder();
// for packing
private final boolean doPackFST;
private final float acceptableOverheadRatio;
// NOTE: cutting this over to ArrayList instead loses ~6%
// in build performance on 9.8M Wikipedia terms; so we
// left this as an array:
@ -99,11 +94,10 @@ public class Builder<T> {
/**
* Instantiates an FST/FSA builder without any pruning. A shortcut
* to {@link #Builder(FST.INPUT_TYPE, int, int, boolean,
* boolean, int, Outputs, boolean, float,
* boolean, int)} with pruning options turned off.
* boolean, int, Outputs, boolean, int)} with pruning options turned off.
*/
public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, false, PackedInts.COMPACT, true, 15);
this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
}
/**
@ -143,11 +137,6 @@ public class Builder<T> {
* FSA, use {@link NoOutputs#getSingleton()} and {@link NoOutputs#getNoOutput()} as the
* singleton output object.
*
* @param doPackFST Pass true to create a packed FST.
*
* @param acceptableOverheadRatio How to trade speed for space when building the FST. This option
* is only relevant when doPackFST is true. @see PackedInts#getMutable(int, int, float)
*
* @param allowArrayArcs Pass false to disable the array arc optimization
* while building the FST; this will make the resulting
* FST smaller but slower to traverse.
@ -159,16 +148,13 @@ public class Builder<T> {
*/
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
boolean doPackFST, float acceptableOverheadRatio, boolean allowArrayArcs,
int bytesPageBits) {
boolean allowArrayArcs, int bytesPageBits) {
this.minSuffixCount1 = minSuffixCount1;
this.minSuffixCount2 = minSuffixCount2;
this.doShareNonSingletonNodes = doShareNonSingletonNodes;
this.shareMaxTailLength = shareMaxTailLength;
this.doPackFST = doPackFST;
this.acceptableOverheadRatio = acceptableOverheadRatio;
this.allowArrayArcs = allowArrayArcs;
fst = new FST<>(inputType, outputs, doPackFST, acceptableOverheadRatio, bytesPageBits);
fst = new FST<>(inputType, outputs, bytesPageBits);
bytes = fst.bytes;
assert bytes != null;
if (doShareSuffix) {
@ -496,12 +482,8 @@ public class Builder<T> {
//if (DEBUG) System.out.println(" builder.finish root.isFinal=" + root.isFinal + " root.output=" + root.output);
fst.finish(compileNode(root, lastInput.length()).node);
if (doPackFST) {
return fst.pack(this, 3, Math.max(10, (int) (getNodeCount()/4)), acceptableOverheadRatio);
} else {
return fst;
}
}
private void compileAllTargets(UnCompiledNode<T> node, int tailLength) throws IOException {
for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {

View File

@ -24,13 +24,9 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
@ -38,13 +34,9 @@ import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
// TODO: break this into WritableFST and ReadOnlyFST.. then
// we can have subclasses of ReadOnlyFST to handle the
@ -90,14 +82,6 @@ public final class FST<T> implements Accountable {
static final int BIT_ARC_HAS_FINAL_OUTPUT = 1 << 5;
// Arcs are stored as fixed-size (per entry) array, so
// that we can find an arc using binary search. We do
// this when number of arcs is > NUM_ARCS_ARRAY:
// If set, the target node is delta coded vs current
// position:
private static final int BIT_TARGET_DELTA = 1 << 6;
// We use this as a marker (because this one flag is
// illegal by itself ...):
private static final byte ARCS_AS_FIXED_ARRAY = BIT_ARC_HAS_FINAL_OUTPUT;
@ -137,7 +121,9 @@ public final class FST<T> implements Accountable {
/** Don't store arcWithOutputCount anymore */
private static final int VERSION_NO_NODE_ARC_COUNTS = 5;
private static final int VERSION_CURRENT = VERSION_NO_NODE_ARC_COUNTS;
private static final int VERSION_PACKED_REMOVED = 6;
private static final int VERSION_CURRENT = VERSION_PACKED_REMOVED;
// Never serialized; just used to represent the virtual
// final node w/ no arcs:
@ -168,9 +154,6 @@ public final class FST<T> implements Accountable {
public final Outputs<T> outputs;
private final boolean packed;
private PackedInts.Reader nodeRefToAddress;
private Arc<T> cachedRootArcs[];
/** Represents a single arc. */
@ -273,18 +256,11 @@ public final class FST<T> implements Accountable {
return (flags & bit) != 0;
}
private GrowableWriter nodeAddress;
// TODO: we could be smarter here, and prune periodically
// as we go; high in-count nodes will "usually" become
// clear early on:
private GrowableWriter inCounts;
private final int version;
// make a new empty FST, for building; Builder invokes
// this ctor
FST(INPUT_TYPE inputType, Outputs<T> outputs, boolean willPackFST, float acceptableOverheadRatio, int bytesPageBits) {
FST(INPUT_TYPE inputType, Outputs<T> outputs, int bytesPageBits) {
this.inputType = inputType;
this.outputs = outputs;
version = VERSION_CURRENT;
@ -293,17 +269,8 @@ public final class FST<T> implements Accountable {
// pad: ensure no node gets address 0 which is reserved to mean
// the stop state w/ no arcs
bytes.writeByte((byte) 0);
if (willPackFST) {
nodeAddress = new GrowableWriter(15, 8, acceptableOverheadRatio);
inCounts = new GrowableWriter(1, 8, acceptableOverheadRatio);
} else {
nodeAddress = null;
inCounts = null;
}
emptyOutput = null;
packed = false;
nodeRefToAddress = null;
}
public static final int DEFAULT_MAX_BLOCK_BITS = Constants.JRE_IS_64BIT ? 30 : 28;
@ -324,8 +291,12 @@ public final class FST<T> implements Accountable {
// NOTE: only reads most recent format; we don't have
// back-compat promise for FSTs (they are experimental):
version = CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_PACKED, VERSION_NO_NODE_ARC_COUNTS);
packed = in.readByte() == 1;
version = CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_PACKED, VERSION_CURRENT);
if (version < VERSION_PACKED_REMOVED) {
if (in.readByte() == 1) {
throw new CorruptIndexException("Cannot read packed FSTs anymore", in);
}
}
if (in.readByte() == 1) {
// accepts empty string
// 1 KB blocks:
@ -334,18 +305,13 @@ public final class FST<T> implements Accountable {
emptyBytes.copyBytes(in, numBytes);
// De-serialize empty-string output:
BytesReader reader;
if (packed) {
reader = emptyBytes.getForwardReader();
} else {
reader = emptyBytes.getReverseReader();
BytesReader reader = emptyBytes.getReverseReader();
// NoOutputs uses 0 bytes when writing its output,
// so we have to check here else BytesStore gets
// angry:
if (numBytes > 0) {
reader.setPosition(numBytes-1);
}
}
emptyOutput = outputs.readFinalOutput(reader);
} else {
emptyOutput = null;
@ -364,11 +330,6 @@ public final class FST<T> implements Accountable {
default:
throw new IllegalStateException("invalid input type " + t);
}
if (packed) {
nodeRefToAddress = PackedInts.getReader(in);
} else {
nodeRefToAddress = null;
}
startNode = in.readVLong();
if (version < VERSION_NO_NODE_ARC_COUNTS) {
in.readVLong();
@ -424,31 +385,13 @@ public final class FST<T> implements Accountable {
} else {
size += bytes.ramBytesUsed();
}
if (packed) {
size += nodeRefToAddress.ramBytesUsed();
} else if (nodeAddress != null) {
size += nodeAddress.ramBytesUsed();
size += inCounts.ramBytesUsed();
}
size += cachedArcsBytesUsed;
return size;
}
@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
if (packed) {
resources.add(Accountables.namedAccountable("node ref to address", nodeRefToAddress));
} else if (nodeAddress != null) {
resources.add(Accountables.namedAccountable("node addresses", nodeAddress));
resources.add(Accountables.namedAccountable("in counts", inCounts));
}
return resources;
}
@Override
public String toString() {
return getClass().getSimpleName() + "(input=" + inputType + ",output=" + outputs + ",packed=" + packed;
return getClass().getSimpleName() + "(input=" + inputType + ",output=" + outputs;
}
void finish(long newStartNode) throws IOException {
@ -464,16 +407,6 @@ public final class FST<T> implements Accountable {
cacheRootArcs();
}
private long getNodeAddress(long node) {
if (nodeAddress != null) {
// Deref
return nodeAddress.get((int) node);
} else {
// Straight
return node;
}
}
// Optionally caches first 128 labels
@SuppressWarnings({"rawtypes","unchecked"})
private void cacheRootArcs() throws IOException {
@ -527,18 +460,7 @@ public final class FST<T> implements Accountable {
if (startNode == -1) {
throw new IllegalStateException("call finish first");
}
if (nodeAddress != null) {
throw new IllegalStateException("cannot save an FST pre-packed FST; it must first be packed");
}
if (packed && !(nodeRefToAddress instanceof PackedInts.Mutable)) {
throw new IllegalStateException("cannot save a FST which has been loaded from disk ");
}
CodecUtil.writeHeader(out, FILE_FORMAT_NAME, VERSION_CURRENT);
if (packed) {
out.writeByte((byte) 1);
} else {
out.writeByte((byte) 0);
}
// TODO: really we should encode this as an arc, arriving
// to the root node, instead of special casing here:
if (emptyOutput != null) {
@ -552,7 +474,6 @@ public final class FST<T> implements Accountable {
byte[] emptyOutputBytes = new byte[(int) ros.getFilePointer()];
ros.writeTo(emptyOutputBytes, 0);
if (!packed) {
// reverse
final int stopAt = emptyOutputBytes.length/2;
int upto = 0;
@ -562,7 +483,6 @@ public final class FST<T> implements Accountable {
emptyOutputBytes[emptyOutputBytes.length-upto-1] = b;
upto++;
}
}
out.writeVInt(emptyOutputBytes.length);
out.writeBytes(emptyOutputBytes, 0, emptyOutputBytes.length);
} else {
@ -577,9 +497,6 @@ public final class FST<T> implements Accountable {
t = 2;
}
out.writeByte(t);
if (packed) {
((PackedInts.Mutable) nodeRefToAddress).save(out);
}
out.writeVLong(startNode);
if (bytes != null) {
long numBytes = bytes.getPosition();
@ -705,8 +622,6 @@ public final class FST<T> implements Accountable {
if (!targetHasArcs) {
flags += BIT_STOP_NODE;
} else if (inCounts != null) {
inCounts.set((int) target.node, inCounts.get((int) target.node) + 1);
}
if (arc.output != NO_OUTPUT) {
@ -810,30 +725,8 @@ public final class FST<T> implements Accountable {
builder.bytes.reverse(startAddress, thisNodeAddress);
// PackedInts uses int as the index, so we cannot handle
// > 2.1B nodes when packing:
if (nodeAddress != null && builder.nodeCount == Integer.MAX_VALUE) {
throw new IllegalStateException("cannot create a packed FST with more than 2.1 billion nodes");
}
builder.nodeCount++;
final long node;
if (nodeAddress != null) {
// Nodes are addressed by 1+ord:
if ((int) builder.nodeCount == nodeAddress.size()) {
nodeAddress = nodeAddress.resize(ArrayUtil.oversize(nodeAddress.size() + 1, nodeAddress.getBitsPerValue()));
inCounts = inCounts.resize(ArrayUtil.oversize(inCounts.size() + 1, inCounts.getBitsPerValue()));
}
nodeAddress.set((int) builder.nodeCount, thisNodeAddress);
// System.out.println(" write nodeAddress[" + nodeCount + "] = " + endAddress);
node = builder.nodeCount;
} else {
node = thisNodeAddress;
}
//System.out.println(" ret node=" + node + " address=" + thisNodeAddress + " nodeAddress=" + nodeAddress);
return node;
return thisNodeAddress;
}
/** Fills virtual 'start' arc, ie, an empty incoming arc to
@ -876,13 +769,13 @@ public final class FST<T> implements Accountable {
arc.flags = BIT_LAST_ARC;
return arc;
} else {
in.setPosition(getNodeAddress(follow.target));
in.setPosition(follow.target);
arc.node = follow.target;
final byte b = in.readByte();
if (b == ARCS_AS_FIXED_ARRAY) {
// array: jump straight to end
arc.numArcs = in.readVInt();
if (packed || version >= VERSION_VINT_TARGET) {
if (version >= VERSION_VINT_TARGET) {
arc.bytesPerArc = in.readVInt();
} else {
arc.bytesPerArc = in.readInt();
@ -906,8 +799,6 @@ public final class FST<T> implements Accountable {
}
if (arc.flag(BIT_STOP_NODE)) {
} else if (arc.flag(BIT_TARGET_NEXT)) {
} else if (packed) {
in.readVLong();
} else {
readUnpackedNodeTarget(in);
}
@ -964,7 +855,7 @@ public final class FST<T> implements Accountable {
}
public Arc<T> readFirstRealTargetArc(long node, Arc<T> arc, final BytesReader in) throws IOException {
final long address = getNodeAddress(node);
final long address = node;
in.setPosition(address);
//System.out.println(" readFirstRealTargtArc address="
//+ address);
@ -975,7 +866,7 @@ public final class FST<T> implements Accountable {
//System.out.println(" fixedArray");
// this is first arc in a fixed-array
arc.numArcs = in.readVInt();
if (packed || version >= VERSION_VINT_TARGET) {
if (version >= VERSION_VINT_TARGET) {
arc.bytesPerArc = in.readVInt();
} else {
arc.bytesPerArc = in.readInt();
@ -1002,7 +893,7 @@ public final class FST<T> implements Accountable {
if (!targetHasArcs(follow)) {
return false;
} else {
in.setPosition(getNodeAddress(follow.target));
in.setPosition(follow.target);
return in.readByte() == ARCS_AS_FIXED_ARRAY;
}
}
@ -1029,7 +920,7 @@ public final class FST<T> implements Accountable {
//System.out.println(" nextArc fake " +
//arc.nextArc);
long pos = getNodeAddress(arc.nextArc);
long pos = arc.nextArc;
in.setPosition(pos);
final byte b = in.readByte();
@ -1038,7 +929,7 @@ public final class FST<T> implements Accountable {
in.readVInt();
// Skip bytesPerArc:
if (packed || version >= VERSION_VINT_TARGET) {
if (version >= VERSION_VINT_TARGET) {
in.readVInt();
} else {
in.readInt();
@ -1107,7 +998,6 @@ public final class FST<T> implements Accountable {
arc.nextArc = in.getPosition();
// TODO: would be nice to make this lazy -- maybe
// caller doesn't need the target and is scanning arcs...
if (nodeAddress == null) {
if (!arc.flag(BIT_LAST_ARC)) {
if (arc.bytesPerArc == 0) {
// must scan
@ -1118,30 +1008,8 @@ public final class FST<T> implements Accountable {
}
}
arc.target = in.getPosition();
} else {
arc.target = arc.node - 1;
assert arc.target > 0;
}
} else {
if (packed) {
final long pos = in.getPosition();
final long code = in.readVLong();
if (arc.flag(BIT_TARGET_DELTA)) {
// Address is delta-coded from current address:
arc.target = pos + code;
//System.out.println(" delta pos=" + pos + " delta=" + code + " target=" + arc.target);
} else if (code < nodeRefToAddress.size()) {
// Deref
arc.target = nodeRefToAddress.get((int) code);
//System.out.println(" deref code=" + code + " target=" + arc.target);
} else {
// Absolute
arc.target = code;
//System.out.println(" abs code=" + code);
}
} else {
arc.target = readUnpackedNodeTarget(in);
}
arc.nextArc = in.getPosition();
}
return arc;
@ -1228,7 +1096,7 @@ public final class FST<T> implements Accountable {
return null;
}
in.setPosition(getNodeAddress(follow.target));
in.setPosition(follow.target);
arc.node = follow.target;
@ -1237,7 +1105,7 @@ public final class FST<T> implements Accountable {
if (in.readByte() == ARCS_AS_FIXED_ARRAY) {
// Arcs are full array; do binary search:
arc.numArcs = in.readVInt();
if (packed || version >= VERSION_VINT_TARGET) {
if (version >= VERSION_VINT_TARGET) {
arc.bytesPerArc = in.readVInt();
} else {
arc.bytesPerArc = in.readInt();
@ -1303,12 +1171,8 @@ public final class FST<T> implements Accountable {
}
if (!flag(flags, BIT_STOP_NODE) && !flag(flags, BIT_TARGET_NEXT)) {
if (packed) {
in.readVLong();
} else {
readUnpackedNodeTarget(in);
}
}
if (flag(flags, BIT_LAST_ARC)) {
return;
@ -1340,20 +1204,12 @@ public final class FST<T> implements Accountable {
/** Returns a {@link BytesReader} for this FST, positioned at
* position 0. */
public BytesReader getBytesReader() {
if (packed) {
if (bytesArray != null) {
return new ForwardBytesReader(bytesArray);
} else {
return bytes.getForwardReader();
}
} else {
if (bytesArray != null) {
return new ReverseBytesReader(bytesArray);
} else {
return bytes.getReverseReader();
}
}
}
/** Reads bytes stored in an FST. */
public static abstract class BytesReader extends DataInput {
@ -1476,395 +1332,4 @@ public final class FST<T> implements Accountable {
}
*/
// Creates a packed FST
private FST(INPUT_TYPE inputType, Outputs<T> outputs, int bytesPageBits) {
version = VERSION_CURRENT;
packed = true;
this.inputType = inputType;
bytesArray = null;
bytes = new BytesStore(bytesPageBits);
this.outputs = outputs;
}
/** Expert: creates an FST by packing this one. This
* process requires substantial additional RAM (currently
* up to ~8 bytes per node depending on
* <code>acceptableOverheadRatio</code>), but then should
* produce a smaller FST.
*
* <p>The implementation of this method uses ideas from
* <a target="_blank" href="http://www.cs.put.poznan.pl/dweiss/site/publications/download/fsacomp.pdf">Smaller Representation of Finite State Automata</a>,
* which describes techniques to reduce the size of a FST.
* However, this is not a strict implementation of the
* algorithms described in this paper.
*/
FST<T> pack(Builder<T> builder, int minInCountDeref, int maxDerefNodes, float acceptableOverheadRatio) throws IOException {
// NOTE: maxDerefNodes is intentionally int: we cannot
// support > 2.1B deref nodes
// TODO: other things to try
// - renumber the nodes to get more next / better locality?
// - allow multiple input labels on an arc, so
// singular chain of inputs can take one arc (on
// wikipedia terms this could save another ~6%)
// - in the ord case, the output '1' is presumably
// very common (after NO_OUTPUT)... maybe use a bit
// for it..?
// - use spare bits in flags.... for top few labels /
// outputs / targets
if (nodeAddress == null) {
throw new IllegalArgumentException("this FST was not built with willPackFST=true");
}
T NO_OUTPUT = outputs.getNoOutput();
Arc<T> arc = new Arc<>();
final BytesReader r = getBytesReader();
final int topN = Math.min(maxDerefNodes, inCounts.size());
// Find top nodes with highest number of incoming arcs:
NodeQueue q = new NodeQueue(topN);
// TODO: we could use more RAM efficient selection algo here...
NodeAndInCount bottom = null;
for(int node=0; node<inCounts.size(); node++) {
if (inCounts.get(node) >= minInCountDeref) {
if (bottom == null) {
q.add(new NodeAndInCount(node, (int) inCounts.get(node)));
if (q.size() == topN) {
bottom = q.top();
}
} else if (inCounts.get(node) > bottom.count) {
q.insertWithOverflow(new NodeAndInCount(node, (int) inCounts.get(node)));
}
}
}
// Free up RAM:
inCounts = null;
final Map<Integer,Integer> topNodeMap = new HashMap<>();
for(int downTo=q.size()-1;downTo>=0;downTo--) {
NodeAndInCount n = q.pop();
topNodeMap.put(n.node, downTo);
//System.out.println("map node=" + n.node + " inCount=" + n.count + " to newID=" + downTo);
}
// +1 because node ords start at 1 (0 is reserved as stop node):
final GrowableWriter newNodeAddress = new GrowableWriter(
PackedInts.bitsRequired(builder.bytes.getPosition()), (int) (1 + builder.nodeCount), acceptableOverheadRatio);
// Fill initial coarse guess:
for(int node=1;node<=builder.nodeCount;node++) {
newNodeAddress.set(node, 1 + builder.bytes.getPosition() - nodeAddress.get(node));
}
int absCount;
int deltaCount;
int topCount;
int nextCount;
FST<T> fst;
// Iterate until we converge:
while(true) {
//System.out.println("\nITER");
boolean changed = false;
// for assert:
boolean negDelta = false;
fst = new FST<>(inputType, outputs, builder.bytes.getBlockBits());
final BytesStore writer = fst.bytes;
// Skip 0 byte since 0 is reserved target:
writer.writeByte((byte) 0);
absCount = deltaCount = topCount = nextCount = 0;
int changedCount = 0;
long addressError = 0;
//int totWasted = 0;
// Since we re-reverse the bytes, we now write the
// nodes backwards, so that BIT_TARGET_NEXT is
// unchanged:
for(int node=(int) builder.nodeCount;node>=1;node--) {
final long address = writer.getPosition();
//System.out.println(" node: " + node + " address=" + address);
if (address != newNodeAddress.get(node)) {
addressError = address - newNodeAddress.get(node);
//System.out.println(" change: " + (address - newNodeAddress[node]));
changed = true;
newNodeAddress.set(node, address);
changedCount++;
}
int nodeArcCount = 0;
int bytesPerArc = 0;
boolean retry = false;
// for assert:
boolean anyNegDelta = false;
// Retry loop: possibly iterate more than once, if
// this is an array'd node and bytesPerArc changes:
writeNode:
while(true) { // retry writing this node
//System.out.println(" cycle: retry");
readFirstRealTargetArc(node, arc, r);
final boolean useArcArray = arc.bytesPerArc != 0;
if (useArcArray) {
// Write false first arc:
if (bytesPerArc == 0) {
bytesPerArc = arc.bytesPerArc;
}
writer.writeByte(ARCS_AS_FIXED_ARRAY);
writer.writeVInt(arc.numArcs);
writer.writeVInt(bytesPerArc);
//System.out.println("node " + node + ": " + arc.numArcs + " arcs");
}
int maxBytesPerArc = 0;
//int wasted = 0;
while(true) { // iterate over all arcs for this node
//System.out.println(" cycle next arc");
final long arcStartPos = writer.getPosition();
nodeArcCount++;
byte flags = 0;
if (arc.isLast()) {
flags += BIT_LAST_ARC;
}
/*
if (!useArcArray && nodeUpto < nodes.length-1 && arc.target == nodes[nodeUpto+1]) {
flags += BIT_TARGET_NEXT;
}
*/
if (!useArcArray && node != 1 && arc.target == node-1) {
flags += BIT_TARGET_NEXT;
if (!retry) {
nextCount++;
}
}
if (arc.isFinal()) {
flags += BIT_FINAL_ARC;
if (arc.nextFinalOutput != NO_OUTPUT) {
flags += BIT_ARC_HAS_FINAL_OUTPUT;
}
} else {
assert arc.nextFinalOutput == NO_OUTPUT;
}
if (!targetHasArcs(arc)) {
flags += BIT_STOP_NODE;
}
if (arc.output != NO_OUTPUT) {
flags += BIT_ARC_HAS_OUTPUT;
}
final long absPtr;
final boolean doWriteTarget = targetHasArcs(arc) && (flags & BIT_TARGET_NEXT) == 0;
if (doWriteTarget) {
final Integer ptr = topNodeMap.get(arc.target);
if (ptr != null) {
absPtr = ptr;
} else {
absPtr = topNodeMap.size() + newNodeAddress.get((int) arc.target) + addressError;
}
long delta = newNodeAddress.get((int) arc.target) + addressError - writer.getPosition() - 2;
if (delta < 0) {
//System.out.println("neg: " + delta);
anyNegDelta = true;
delta = 0;
}
if (delta < absPtr) {
flags |= BIT_TARGET_DELTA;
}
} else {
absPtr = 0;
}
assert flags != ARCS_AS_FIXED_ARRAY;
writer.writeByte(flags);
fst.writeLabel(writer, arc.label);
if (arc.output != NO_OUTPUT) {
outputs.write(arc.output, writer);
}
if (arc.nextFinalOutput != NO_OUTPUT) {
outputs.writeFinalOutput(arc.nextFinalOutput, writer);
}
if (doWriteTarget) {
long delta = newNodeAddress.get((int) arc.target) + addressError - writer.getPosition();
if (delta < 0) {
anyNegDelta = true;
//System.out.println("neg: " + delta);
delta = 0;
}
if (flag(flags, BIT_TARGET_DELTA)) {
//System.out.println(" delta");
writer.writeVLong(delta);
if (!retry) {
deltaCount++;
}
} else {
/*
if (ptr != null) {
System.out.println(" deref");
} else {
System.out.println(" abs");
}
*/
writer.writeVLong(absPtr);
if (!retry) {
if (absPtr >= topNodeMap.size()) {
absCount++;
} else {
topCount++;
}
}
}
}
if (useArcArray) {
final int arcBytes = (int) (writer.getPosition() - arcStartPos);
//System.out.println(" " + arcBytes + " bytes");
maxBytesPerArc = Math.max(maxBytesPerArc, arcBytes);
// NOTE: this may in fact go "backwards", if
// somehow (rarely, possibly never) we use
// more bytesPerArc in this rewrite than the
// incoming FST did... but in this case we
// will retry (below) so it's OK to ovewrite
// bytes:
//wasted += bytesPerArc - arcBytes;
writer.skipBytes((int) (arcStartPos + bytesPerArc - writer.getPosition()));
}
if (arc.isLast()) {
break;
}
readNextRealArc(arc, r);
}
if (useArcArray) {
if (maxBytesPerArc == bytesPerArc || (retry && maxBytesPerArc <= bytesPerArc)) {
// converged
//System.out.println(" bba=" + bytesPerArc + " wasted=" + wasted);
//totWasted += wasted;
break;
}
} else {
break;
}
//System.out.println(" retry this node maxBytesPerArc=" + maxBytesPerArc + " vs " + bytesPerArc);
// Retry:
bytesPerArc = maxBytesPerArc;
writer.truncate(address);
nodeArcCount = 0;
retry = true;
anyNegDelta = false;
}
negDelta |= anyNegDelta;
}
if (!changed) {
// We don't renumber the nodes (just reverse their
// order) so nodes should only point forward to
// other nodes because we only produce acyclic FSTs
// w/ nodes only pointing "forwards":
assert !negDelta;
//System.out.println("TOT wasted=" + totWasted);
// Converged!
break;
}
}
long maxAddress = 0;
for (long key : topNodeMap.keySet()) {
maxAddress = Math.max(maxAddress, newNodeAddress.get((int) key));
}
PackedInts.Mutable nodeRefToAddressIn = PackedInts.getMutable(topNodeMap.size(),
PackedInts.bitsRequired(maxAddress), acceptableOverheadRatio);
for(Map.Entry<Integer,Integer> ent : topNodeMap.entrySet()) {
nodeRefToAddressIn.set(ent.getValue(), newNodeAddress.get(ent.getKey()));
}
fst.nodeRefToAddress = nodeRefToAddressIn;
fst.startNode = newNodeAddress.get((int) startNode);
//System.out.println("new startNode=" + fst.startNode + " old startNode=" + startNode);
if (emptyOutput != null) {
fst.setEmptyOutput(emptyOutput);
}
fst.bytes.finish();
fst.cacheRootArcs();
//final int size = fst.sizeInBytes();
//System.out.println("nextCount=" + nextCount + " topCount=" + topCount + " deltaCount=" + deltaCount + " absCount=" + absCount);
return fst;
}
private static class NodeAndInCount implements Comparable<NodeAndInCount> {
final int node;
final int count;
public NodeAndInCount(int node, int count) {
this.node = node;
this.count = count;
}
@Override
public int compareTo(NodeAndInCount other) {
if (count > other.count) {
return 1;
} else if (count < other.count) {
return -1;
} else {
// Tie-break: smaller node compares as greater than
return other.node - node;
}
}
}
private static class NodeQueue extends PriorityQueue<NodeAndInCount> {
public NodeQueue(int topN) {
super(topN, false);
}
@Override
public boolean lessThan(NodeAndInCount a, NodeAndInCount b) {
final int cmp = a.compareTo(b);
assert cmp != 0;
return cmp < 0;
}
}
}

View File

@ -24,7 +24,6 @@
* <li>Fast and low memory overhead construction of the minimal FST
* (but inputs must be provided in sorted order)</li>
* <li>Low object overhead and quick deserialization (byte[] representation)</li>
* <li>Optional two-pass compression: {@link org.apache.lucene.util.fst.FST#pack FST.pack()}</li>
* <li>{@link org.apache.lucene.util.fst.Util#getByOutput Lookup-by-output} when the
* outputs are in sorted order (e.g., ordinals or file pointers)</li>
* <li>Pluggable {@link org.apache.lucene.util.fst.Outputs Outputs} representation</li>

View File

@ -37,7 +37,7 @@ public class TestGrowableByteArrayDataOutput extends LuceneTestCase {
// create a small string such that the single pass approach is used
int length = TestUtil.nextInt(random(), 1, minSizeForDoublePass - 1);
String unicode = TestUtil.randomFixedByteLengthUnicodeString(random(), length);
byte[] utf8 = new byte[unicode.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR];
byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(unicode.length())];
int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8);
GrowableByteArrayDataOutput dataOutput = new GrowableByteArrayDataOutput(1 << 8);
@ -61,7 +61,7 @@ public class TestGrowableByteArrayDataOutput extends LuceneTestCase {
int num = atLeast(100);
for (int i = 0; i < num; i++) {
String unicode = TestUtil.randomRealisticUnicodeString(random(), minSizeForDoublePass, 10 * minSizeForDoublePass);
byte[] utf8 = new byte[unicode.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR];
byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(unicode.length())];
int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8);
GrowableByteArrayDataOutput dataOutput = new GrowableByteArrayDataOutput(1 << 8);

View File

@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestDocsWithFieldSet extends LuceneTestCase {
public void testDense() throws IOException {
DocsWithFieldSet set = new DocsWithFieldSet();
DocIdSetIterator it = set.iterator();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
set.add(0);
it = set.iterator();
assertEquals(0, it.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
long ramBytesUsed = set.ramBytesUsed();
for (int i = 1; i < 1000; ++i) {
set.add(i);
}
assertEquals(ramBytesUsed, set.ramBytesUsed());
it = set.iterator();
for (int i = 0; i < 1000; ++i) {
assertEquals(i, it.nextDoc());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
}
public void testSparse() throws IOException {
DocsWithFieldSet set = new DocsWithFieldSet();
int doc = random().nextInt(10000);
set.add(doc);
DocIdSetIterator it = set.iterator();
assertEquals(doc, it.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
int doc2 = doc + TestUtil.nextInt(random(), 1, 100);
set.add(doc2);
it = set.iterator();
assertEquals(doc, it.nextDoc());
assertEquals(doc2, it.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
}
public void testDenseThenSparse() throws IOException {
int denseCount = random().nextInt(10000);
int nextDoc = denseCount + random().nextInt(10000);
DocsWithFieldSet set = new DocsWithFieldSet();
for (int i = 0; i < denseCount; ++i) {
set.add(i);
}
set.add(nextDoc);
DocIdSetIterator it = set.iterator();
for (int i = 0; i < denseCount; ++i) {
assertEquals(i, it.nextDoc());
}
assertEquals(nextDoc, it.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
}
}

View File

@ -97,6 +97,7 @@ import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.junit.Ignore;
import org.junit.Test;
public class TestIndexWriter extends LuceneTestCase {
@ -2768,5 +2769,34 @@ public class TestIndexWriter extends LuceneTestCase {
dir.close();
}
@Ignore("requires running tests with biggish heap")
public void testMassiveField() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
final IndexWriter w = new IndexWriter(dir, iwc);
StringBuilder b = new StringBuilder();
while (b.length() <= IndexWriter.MAX_STORED_STRING_LENGTH) {
b.append("x ");
}
final Document doc = new Document();
//doc.add(new TextField("big", b.toString(), Field.Store.YES));
doc.add(new StoredField("big", b.toString()));
Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
assertEquals("stored field \"big\" is too large (" + b.length() + " characters) to store", e.getMessage());
// make sure writer is still usable:
Document doc2 = new Document();
doc2.add(new StringField("id", "foo", Field.Store.YES));
w.addDocument(doc2);
DirectoryReader r = DirectoryReader.open(w);
assertEquals(1, r.numDocs());
r.close();
w.close();
dir.close();
}
}

View File

@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.similarities;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestBooleanSimilarity extends LuceneTestCase {
public void testTermScoreIsEqualToBoost() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir,
newIndexWriterConfig());
Document doc = new Document();
doc.add(new StringField("foo", "bar", Store.NO));
doc.add(new StringField("foo", "baz", Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(new StringField("foo", "bar", Store.NO));
doc.add(new StringField("foo", "bar", Store.NO));
w.addDocument(doc);
DirectoryReader reader = w.getReader();
w.close();
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(new BooleanSimilarity());
TopDocs topDocs = searcher.search(new TermQuery(new Term("foo", "bar")), 2);
assertEquals(2, topDocs.totalHits);
assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
assertEquals(1f, topDocs.scoreDocs[1].score, 0f);
topDocs = searcher.search(new TermQuery(new Term("foo", "baz")), 1);
assertEquals(1, topDocs.totalHits);
assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
topDocs = searcher.search(new BoostQuery(new TermQuery(new Term("foo", "baz")), 3f), 1);
assertEquals(1, topDocs.totalHits);
assertEquals(3f, topDocs.scoreDocs[0].score, 0f);
reader.close();
dir.close();
}
public void testPhraseScoreIsEqualToBoost() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir,
newIndexWriterConfig().setSimilarity(new BooleanSimilarity()));
Document doc = new Document();
doc.add(new TextField("foo", "bar baz quux", Store.NO));
w.addDocument(doc);
DirectoryReader reader = w.getReader();
w.close();
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(new BooleanSimilarity());
PhraseQuery query = new PhraseQuery(2, "foo", "bar", "quux");
TopDocs topDocs = searcher.search(query, 2);
assertEquals(1, topDocs.totalHits);
assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
topDocs = searcher.search(new BoostQuery(query, 7), 2);
assertEquals(1, topDocs.totalHits);
assertEquals(7f, topDocs.scoreDocs[0].score, 0f);
reader.close();
dir.close();
}
public void testSameNormsAsBM25() {
BooleanSimilarity sim1 = new BooleanSimilarity();
BM25Similarity sim2 = new BM25Similarity();
sim2.setDiscountOverlaps(true);
for (int iter = 0; iter < 100; ++iter) {
final int length = TestUtil.nextInt(random(), 1, 100);
final int position = random().nextInt(length);
final int numOverlaps = random().nextInt(50);
final float boost = random().nextFloat() * 10;
FieldInvertState state = new FieldInvertState("foo", position, length, numOverlaps, 100, boost);
assertEquals(
sim2.computeNorm(state),
sim1.computeNorm(state),
0f);
}
}
}

View File

@ -274,16 +274,38 @@ public class TestBasics extends LuceneTestCase {
assertTrue(searcher.explain(query, 849).getValue() > 0.0f);
}
public void testSpanNotWindowNeg() throws Exception {
public void testSpanNotWindowNegPost() throws Exception {
//test handling of invalid window < 0
SpanQuery near = spanNearOrderedQuery("field", 4, "eight", "one");
SpanQuery or = spanOrQuery("field", "forty");
SpanQuery query = spanNotQuery(near, or);
SpanQuery query = spanNotQuery(near, or, 0, -1);
checkHits(query, new int[]
{801, 821, 831, 851, 861, 871, 881, 891,
1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
query = spanNotQuery(near, or, 0, -2);
checkHits(query, new int[]
{801, 821, 831, 841, 851, 861, 871, 881, 891,
1801, 1821, 1831, 1841, 1851, 1861, 1871, 1881, 1891});
assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
}
public void testSpanNotWindowNegPre() throws Exception {
//test handling of invalid window < 0
SpanQuery near = spanNearOrderedQuery("field", 4, "eight", "one");
SpanQuery or = spanOrQuery("field", "forty");
SpanQuery query = spanNotQuery(near, or, -2, 0);
checkHits(query, new int[]
{801, 821, 831, 851, 861, 871, 881, 891,
1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
query = spanNotQuery(near, or, -3, 0);
checkHits(query, new int[]
{801, 821, 831, 841, 851, 861, 871, 881, 891,
1801, 1821, 1831, 1841, 1851, 1861, 1871, 1881, 1891});
assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
}

View File

@ -99,7 +99,6 @@ public class TestSpans extends LuceneTestCase {
"s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx",
"r1 s11",
"r1 s21"
};
private void checkHits(Query query, int[] results) throws IOException {
@ -407,41 +406,53 @@ public class TestSpans extends LuceneTestCase {
}
public void testSpanNots() throws Throwable{
assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", "s2", 0, 0), 0);
assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", "s2", 10, 10), 0);
public void testSpanNots() throws Throwable {
assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", 0, "s2", 0, 0), 0);
assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", 0, "s2", 10, 10), 0);
//focus on behind
assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", "s1", 6, 0));
assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", "s1", 5, 0));
assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", "s1", 3, 0));
assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", "s1", 2, 0));
assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", "s1", 0, 0));
assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", 0, "s1", 6, 0));
assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", 0, "s1", 5, 0));
assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", 0, "s1", 3, 0));
assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", 0, "s1", 2, 0));
assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", 0, "s1", 0, 0));
//focus on both
assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", "s1", 3, 1));
assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", "s1", 2, 1));
assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", "s1", 1, 1));
assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", "s1", 10, 10));
assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", 0, "s1", 3, 1));
assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", 0, "s1", 2, 1));
assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", 0, "s1", 1, 1));
assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", 0, "s1", 10, 10));
//focus on ahead
assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", "s2", 10, 10));
assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", "s2", 0, 1));
assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", "s2", 0, 2));
assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", "s2", 0, 3));
assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", "s2", 0, 4));
assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", "s2", 0, 8));
assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", 0, "s2", 10, 10));
assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", 0, "s2", 0, 1));
assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", 0, "s2", 0, 2));
assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", 0, "s2", 0, 3));
assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", 0, "s2", 0, 4));
assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", 0, "s2", 0, 8));
//exclude doesn't exist
assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", "s3", 8, 8));
assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", 0, "s3", 8, 8));
//include doesn't exist
assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", "s1", 8, 8));
assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", 0, "s1", 8, 8));
// Negative values
assertEquals("SpanNotS2S1NotXXNeg_0_0", 1, spanCount("s2 s1", 10, "xx", 0, 0));
assertEquals("SpanNotS2S1NotXXNeg_1_1", 1, spanCount("s2 s1", 10, "xx", -1, -1));
assertEquals("SpanNotS2S1NotXXNeg_0_2", 2, spanCount("s2 s1", 10, "xx", 0, -2));
assertEquals("SpanNotS2S1NotXXNeg_1_2", 2, spanCount("s2 s1", 10, "xx", -1, -2));
assertEquals("SpanNotS2S1NotXXNeg_2_1", 2, spanCount("s2 s1", 10, "xx", -2, -1));
assertEquals("SpanNotS2S1NotXXNeg_3_1", 2, spanCount("s2 s1", 10, "xx", -3, -1));
assertEquals("SpanNotS2S1NotXXNeg_1_3", 2, spanCount("s2 s1", 10, "xx", -1, -3));
assertEquals("SpanNotS2S1NotXXNeg_2_2", 3, spanCount("s2 s1", 10, "xx", -2, -2));
}
private int spanCount(String include, String exclude, int pre, int post) throws IOException{
SpanQuery iq = spanTermQuery(field, include);
private int spanCount(String include, int slop, String exclude, int pre, int post) throws IOException{
String[] includeTerms = include.split(" +");
SpanQuery iq = includeTerms.length == 1 ? spanTermQuery(field, include) : spanNearOrderedQuery(field, slop, includeTerms);
SpanQuery eq = spanTermQuery(field, exclude);
SpanQuery snq = spanNotQuery(iq, eq, pre, post);
Spans spans = snq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);

View File

@ -111,7 +111,7 @@ public class TestUnicodeUtil extends LuceneTestCase {
int num = atLeast(50000);
for (int i = 0; i < num; i++) {
final String s = TestUtil.randomUnicodeString(random());
final byte[] utf8 = new byte[s.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR];
final byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(s.length())];
final int utf8Len = UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8);
assertEquals(s.codePointCount(0, s.length()),
UnicodeUtil.codePointCount(new BytesRef(utf8, 0, utf8Len)));
@ -137,7 +137,7 @@ public class TestUnicodeUtil extends LuceneTestCase {
int num = atLeast(50000);
for (int i = 0; i < num; i++) {
final String s = TestUtil.randomUnicodeString(random());
final byte[] utf8 = new byte[s.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR];
final byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(s.length())];
final int utf8Len = UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8);
utf32 = ArrayUtil.grow(utf32, utf8Len);
final int utf32Len = UnicodeUtil.UTF8toUTF32(new BytesRef(utf8, 0, utf8Len), utf32);
@ -208,7 +208,7 @@ public class TestUnicodeUtil extends LuceneTestCase {
int num = atLeast(5000);
for (int i = 0; i < num; i++) {
String unicode = TestUtil.randomUnicodeString(random());
byte[] utf8 = new byte[unicode.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR];
byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(unicode.length())];
int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8);
assertEquals(len, UnicodeUtil.calcUTF16toUTF8Length(unicode, 0, unicode.length()));
}

View File

@ -41,7 +41,7 @@ public class TestUTF32ToUTF8 extends LuceneTestCase {
private boolean matches(ByteRunAutomaton a, int code) {
char[] chars = Character.toChars(code);
byte[] b = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * chars.length];
byte[] b = new byte[UnicodeUtil.maxUTF8Length(chars.length)];
final int len = UnicodeUtil.UTF16toUTF8(chars, 0, chars.length, b);
return a.run(b, 0, len);
}

View File

@ -29,7 +29,6 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TimeUnits;
import org.apache.lucene.util.packed.PackedInts;
import org.junit.Ignore;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
@ -47,16 +46,14 @@ public class Test2BFST extends LuceneTestCase {
Directory dir = new MMapDirectory(createTempDir("2BFST"));
for(int doPackIter=0;doPackIter<2;doPackIter++) {
boolean doPack = doPackIter == 1;
for(int iter=0;iter<1;iter++) {
// Build FST w/ NoOutputs and stop when nodeCount > 2.2B
if (!doPack) {
{
System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
Outputs<Object> outputs = NoOutputs.getSingleton();
Object NO_OUTPUT = outputs.getNoOutput();
final Builder<Object> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
doPack, PackedInts.COMPACT, true, 15);
true, 15);
int count = 0;
Random r = new Random(seed);
@ -135,10 +132,10 @@ public class Test2BFST extends LuceneTestCase {
// Build FST w/ ByteSequenceOutputs and stop when FST
// size = 3GB
{
System.out.println("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes");
System.out.println("\nTEST: 3 GB size; outputs=bytes");
Outputs<BytesRef> outputs = ByteSequenceOutputs.getSingleton();
final Builder<BytesRef> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
doPack, PackedInts.COMPACT, true, 15);
true, 15);
byte[] outputBytes = new byte[20];
BytesRef output = new BytesRef(outputBytes);
@ -212,10 +209,10 @@ public class Test2BFST extends LuceneTestCase {
// Build FST w/ PositiveIntOutputs and stop when FST
// size = 3GB
{
System.out.println("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long");
System.out.println("\nTEST: 3 GB size; outputs=long");
Outputs<Long> outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
doPack, PackedInts.COMPACT, true, 15);
true, 15);
long output = 1;

View File

@ -76,7 +76,6 @@ import org.apache.lucene.util.fst.FST.Arc;
import org.apache.lucene.util.fst.FST.BytesReader;
import org.apache.lucene.util.fst.PairOutputs.Pair;
import org.apache.lucene.util.fst.Util.Result;
import org.apache.lucene.util.packed.PackedInts;
import static org.apache.lucene.util.fst.FSTTester.getRandomString;
import static org.apache.lucene.util.fst.FSTTester.simpleRandomString;
@ -328,9 +327,7 @@ public class TestFSTs extends LuceneTestCase {
writer.close();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final boolean doRewrite = random().nextBoolean();
Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, doRewrite, PackedInts.DEFAULT, true, 15);
Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
boolean storeOrd = random().nextBoolean();
if (VERBOSE) {
@ -464,16 +461,14 @@ public class TestFSTs extends LuceneTestCase {
private int inputMode;
private final Outputs<T> outputs;
private final Builder<T> builder;
private final boolean doPack;
public VisitTerms(Path dirOut, Path wordsFileIn, int inputMode, int prune, Outputs<T> outputs, boolean doPack, boolean noArcArrays) {
public VisitTerms(Path dirOut, Path wordsFileIn, int inputMode, int prune, Outputs<T> outputs, boolean noArcArrays) {
this.dirOut = dirOut;
this.wordsFileIn = wordsFileIn;
this.inputMode = inputMode;
this.outputs = outputs;
this.doPack = doPack;
builder = new Builder<>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, doPack, PackedInts.DEFAULT, !noArcArrays, 15);
builder = new Builder<>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, !noArcArrays, 15);
}
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
@ -622,7 +617,6 @@ public class TestFSTs extends LuceneTestCase {
boolean storeOrds = false;
boolean storeDocFreqs = false;
boolean verify = true;
boolean doPack = false;
boolean noArcArrays = false;
Path wordsFileIn = null;
Path dirOut = null;
@ -647,8 +641,6 @@ public class TestFSTs extends LuceneTestCase {
storeOrds = true;
} else if (args[idx].equals("-noverify")) {
verify = false;
} else if (args[idx].equals("-pack")) {
doPack = true;
} else if (args[idx].startsWith("-")) {
System.err.println("Unrecognized option: " + args[idx]);
System.exit(-1);
@ -677,7 +669,7 @@ public class TestFSTs extends LuceneTestCase {
final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton();
final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton();
final PairOutputs<Long,Long> outputs = new PairOutputs<>(o1, o2);
new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
Random rand;
@Override
public PairOutputs.Pair<Long,Long> getOutput(IntsRef input, int ord) {
@ -691,7 +683,7 @@ public class TestFSTs extends LuceneTestCase {
} else if (storeOrds) {
// Store only ords
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
@Override
public Long getOutput(IntsRef input, int ord) {
return (long) ord;
@ -700,7 +692,7 @@ public class TestFSTs extends LuceneTestCase {
} else if (storeDocFreqs) {
// Store only docFreq
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
Random rand;
@Override
public Long getOutput(IntsRef input, int ord) {
@ -714,7 +706,7 @@ public class TestFSTs extends LuceneTestCase {
// Store nothing
final NoOutputs outputs = NoOutputs.getSingleton();
final Object NO_OUTPUT = outputs.getNoOutput();
new VisitTerms<Object>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
new VisitTerms<Object>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
@Override
public Object getOutput(IntsRef input, int ord) {
return NO_OUTPUT;
@ -1118,7 +1110,7 @@ public class TestFSTs extends LuceneTestCase {
public void testFinalOutputOnEndState() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, random().nextBoolean(), PackedInts.DEFAULT, true, 15);
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
builder.add(Util.toUTF32("stat", new IntsRefBuilder()), 17L);
builder.add(Util.toUTF32("station", new IntsRefBuilder()), 10L);
final FST<Long> fst = builder.finish();
@ -1132,8 +1124,7 @@ public class TestFSTs extends LuceneTestCase {
public void testInternalFinalState() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final boolean willRewrite = random().nextBoolean();
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, willRewrite, PackedInts.DEFAULT, true, 15);
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRefBuilder()), outputs.getNoOutput());
builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRefBuilder()), outputs.getNoOutput());
final FST<Long> fst = builder.finish();

View File

@ -19,8 +19,10 @@ package org.apache.lucene.search.uhighlight;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.FilteringTokenFilter;
@ -30,6 +32,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
@ -50,7 +53,9 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
private final LeafReader leafReader;
private final CharacterRunAutomaton preMemIndexFilterAutomaton;
public AnalysisOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata, Analyzer analyzer) {
public AnalysisOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
CharacterRunAutomaton[] automata, Analyzer analyzer,
Function<Query, Collection<Query>> multiTermQueryRewrite) {
super(field, extractedTerms, phraseHelper, automata);
this.analyzer = analyzer;
// Automata (Wildcards / MultiTermQuery):
@ -68,7 +73,8 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader();
// preFilter for MemoryIndex
preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, strictPhrases);
preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, strictPhrases,
multiTermQueryRewrite);
} else {
memoryIndex = null;
leafReader = null;
@ -155,7 +161,8 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
*/
private static CharacterRunAutomaton buildCombinedAutomaton(String field, BytesRef[] terms,
CharacterRunAutomaton[] automata,
PhraseHelper strictPhrases) {
PhraseHelper strictPhrases,
Function<Query, Collection<Query>> multiTermQueryRewrite) {
List<CharacterRunAutomaton> allAutomata = new ArrayList<>();
if (terms.length > 0) {
allAutomata.add(new CharacterRunAutomaton(Automata.makeStringUnion(Arrays.asList(terms))));
@ -163,7 +170,7 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
Collections.addAll(allAutomata, automata);
for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
Collections.addAll(allAutomata,
MultiTermHighlighting.extractAutomata(spanQuery, field, true));//true==lookInSpan
MultiTermHighlighting.extractAutomata(spanQuery, field, true, multiTermQueryRewrite));//true==lookInSpan
}
if (allAutomata.size() == 1) {

View File

@ -20,8 +20,10 @@ import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.function.Function;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -69,34 +71,44 @@ class MultiTermHighlighting {
* Extracts all MultiTermQueries for {@code field}, and returns equivalent
* automata that will match terms.
*/
public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan) {
public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan,
Function<Query, Collection<Query>> preRewriteFunc) {
List<CharacterRunAutomaton> list = new ArrayList<>();
if (query instanceof BooleanQuery) {
Collection<Query> customSubQueries = preRewriteFunc.apply(query);
if (customSubQueries != null) {
for (Query sub : customSubQueries) {
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
}
} else if (query instanceof BooleanQuery) {
for (BooleanClause clause : (BooleanQuery) query) {
if (!clause.isProhibited()) {
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan)));
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan, preRewriteFunc)));
}
}
} else if (query instanceof ConstantScoreQuery) {
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan)));
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan,
preRewriteFunc)));
} else if (query instanceof DisjunctionMaxQuery) {
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanOrQuery) {
for (Query sub : ((SpanOrQuery) query).getClauses()) {
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanNearQuery) {
for (Query sub : ((SpanNearQuery) query).getClauses()) {
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanNotQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan)));
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan,
preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan)));
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan,
preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field, lookInSpan)));
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field,
lookInSpan, preRewriteFunc)));
} else if (query instanceof AutomatonQuery) {
final AutomatonQuery aq = (AutomatonQuery) query;
if (aq.getField().equals(field)) {

View File

@ -40,7 +40,7 @@ import java.util.function.Function;
public class PhraseHelper {
public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
spanQuery -> null, true);
spanQuery -> null, query -> null, true);
//TODO it seems this ought to be a general thing on Spans?
private static final Comparator<? super Spans> SPANS_COMPARATOR = (o1, o2) -> {
@ -69,10 +69,13 @@ public class PhraseHelper {
* {@code rewriteQueryPred} is an extension hook to override the default choice of
* {@link WeightedSpanTermExtractor#mustRewriteQuery(SpanQuery)}. By default unknown query types are rewritten,
* so use this to return {@link Boolean#FALSE} if you know the query doesn't need to be rewritten.
* Similarly, {@code preExtractRewriteFunction} is also an extension hook for extract to allow different queries
* to be set before the {@link WeightedSpanTermExtractor}'s extraction is invoked.
* {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is
* usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones.
*/
public PhraseHelper(Query query, String field, Function<SpanQuery, Boolean> rewriteQueryPred,
Function<Query, Collection<Query>> preExtractRewriteFunction,
boolean ignoreQueriesNeedingRewrite) {
this.fieldName = field; // if null then don't require field match
// filter terms to those we want
@ -98,6 +101,18 @@ public class PhraseHelper {
}
}
@Override
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
Collection<Query> newQueriesToExtract = preExtractRewriteFunction.apply(query);
if (newQueriesToExtract != null) {
for (Query newQuery : newQueriesToExtract) {
extract(newQuery, boost, terms);
}
} else {
super.extract(query, boost, terms);
}
}
@Override
protected boolean isQueryUnsupported(Class<? extends Query> clazz) {
if (clazz.isAssignableFrom(MultiTermQuery.class)) {

View File

@ -21,6 +21,7 @@ import java.nio.charset.StandardCharsets;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
@ -732,7 +733,8 @@ public class UnifiedHighlighter {
OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
switch (offsetSource) {
case ANALYSIS:
return new AnalysisOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer());
return new AnalysisOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer(),
this::preMultiTermQueryRewrite);
case NONE_NEEDED:
return NoOpOffsetStrategy.INSTANCE;
case TERM_VECTORS:
@ -776,13 +778,14 @@ public class UnifiedHighlighter {
boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES);
boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY);
return highlightPhrasesStrictly ?
new PhraseHelper(query, field, this::requiresRewrite, !handleMultiTermQuery) :
new PhraseHelper(query, field, this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) :
PhraseHelper.NONE;
}
protected CharacterRunAutomaton[] getAutomata(String field, Query query, EnumSet<HighlightFlag> highlightFlags) {
return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES))
? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES),
this::preMultiTermQueryRewrite)
: ZERO_LEN_AUTOMATA_ARRAY;
}
@ -830,6 +833,32 @@ public class UnifiedHighlighter {
return null;
}
/**
* When highlighting phrases accurately, we may need to handle custom queries that aren't supported in the
* {@link org.apache.lucene.search.highlight.WeightedSpanTermExtractor} as called by the {@code PhraseHelper}.
* Should custom query types be needed, this method should be overriden to return a collection of queries if appropriate,
* or null if nothing to do. If the query is not custom, simply returning null will allow the default rules to apply.
*
* @param query Query to be highlighted
* @return A Collection of Query object(s) if needs to be rewritten, otherwise null.
*/
protected Collection<Query> preSpanQueryRewrite(Query query) {
return null;
}
/**
* When dealing with multi term queries / span queries, we may need to handle custom queries that aren't supported
* by the default automata extraction in {@code MultiTermHighlighting}. This can be overridden to return a collection
* of queries if appropriate, or null if nothing to do. If query is not custom, simply returning null will allow the
* default rules to apply.
*
* @param query Query to be highlighted
* @return A Collection of Query object(s) if needst o be rewritten, otherwise null.
*/
protected Collection<Query> preMultiTermQueryRewrite(Query query) {
return null;
}
private DocIdSetIterator asDocIdSetIterator(int[] sortedDocIds) {
return new DocIdSetIterator() {
int idx = -1;

View File

@ -20,6 +20,8 @@ package org.apache.lucene.search.uhighlight;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.apache.lucene.analysis.Analyzer;
@ -56,6 +58,7 @@ import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
@ -933,4 +936,89 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
ir.close();
}
public void testCustomSpanQueryHighlighting() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Document doc = new Document();
doc.add(new Field("body", "alpha bravo charlie delta echo foxtrot golf hotel india juliet", fieldType));
doc.add(newTextField("id", "id", Field.Store.YES));
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected List<Query> preMultiTermQueryRewrite(Query query) {
if (query instanceof MyWrapperSpanQuery) {
return Collections.singletonList(((MyWrapperSpanQuery) query).originalQuery);
}
return null;
}
};
int docId = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
WildcardQuery wildcardQuery = new WildcardQuery(new Term("body", "foxtr*"));
SpanMultiTermQueryWrapper wildcardQueryWrapper = new SpanMultiTermQueryWrapper<>(wildcardQuery);
SpanQuery wrappedQuery = new MyWrapperSpanQuery(wildcardQueryWrapper);
BooleanQuery query = new BooleanQuery.Builder()
.add(wrappedQuery, BooleanClause.Occur.SHOULD)
.build();
int[] docIds = new int[]{docId};
String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIds, new int[]{2}).get("body");
assertEquals(1, snippets.length);
assertEquals("alpha bravo charlie delta echo <b>foxtrot</b> golf hotel india juliet", snippets[0]);
ir.close();
}
private static class MyWrapperSpanQuery extends SpanQuery {
private final SpanQuery originalQuery;
private MyWrapperSpanQuery(SpanQuery originalQuery) {
this.originalQuery = Objects.requireNonNull(originalQuery);
}
@Override
public String getField() {
return originalQuery.getField();
}
@Override
public String toString(String field) {
return "(Wrapper[" + originalQuery.toString(field)+"])";
}
@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return originalQuery.createWeight(searcher, needsScores, boost);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query newOriginalQuery = originalQuery.rewrite(reader);
if (newOriginalQuery != originalQuery) {
return new MyWrapperSpanQuery((SpanQuery)newOriginalQuery);
}
return this;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
return originalQuery.equals(((MyWrapperSpanQuery)o).originalQuery);
}
@Override
public int hashCode() {
return originalQuery.hashCode();
}
}
}

View File

@ -17,6 +17,8 @@
package org.apache.lucene.search.uhighlight;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.apache.lucene.analysis.MockAnalyzer;
@ -29,14 +31,17 @@ import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
@ -401,4 +406,76 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
Object o = highlighter.highlightWithoutSearcher("body", new MatchNoDocsQuery(), content, 1);
assertEquals(content, o);
}
public void testPreSpanQueryRewrite() throws IOException {
indexWriter.addDocument(newDoc("There is no accord and satisfaction with this - Consideration of the accord is arbitrary."));
initReaderSearcherHighlighter();
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected Collection<Query> preSpanQueryRewrite(Query query) {
if (query instanceof MyQuery) {
return Collections.singletonList(((MyQuery)query).wrapped);
}
return null;
}
};
highlighter.setHighlightPhrasesStrictly(true);
BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
Query phraseQuery = new BoostQuery(new PhraseQuery("body", "accord", "and", "satisfaction"), 2.0f);
Query oredTerms = new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(2)
.add(new TermQuery(new Term("body", "accord")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("body", "satisfaction")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("body", "consideration")), BooleanClause.Occur.SHOULD)
.build();
Query proximityBoostingQuery = new MyQuery(oredTerms);
Query totalQuery = bqBuilder
.add(phraseQuery, BooleanClause.Occur.SHOULD)
.add(proximityBoostingQuery, BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(totalQuery, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", totalQuery, topDocs);
assertArrayEquals(new String[]{"There is no <b>accord</b> <b>and</b> <b>satisfaction</b> with this - <b>Consideration</b> of the <b>accord</b> is arbitrary."}, snippets);
}
private static class MyQuery extends Query {
private final Query wrapped;
MyQuery(Query wrapped) {
this.wrapped = wrapped;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return wrapped.createWeight(searcher, needsScores, boost);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query newWrapped = wrapped.rewrite(reader);
if (newWrapped != wrapped) {
return new MyQuery(newWrapped);
}
return this;
}
@Override
public String toString(String field) {
return "[[["+wrapped.toString(field)+"]]]";
}
@Override
public boolean equals(Object obj) {
return obj != null && obj.getClass() == getClass() && wrapped.equals(((MyQuery)wrapped).wrapped);
}
@Override
public int hashCode() {
return wrapped.hashCode();
}
}
}

View File

@ -228,7 +228,7 @@ org.bouncycastle.version = 1.45
/org.carrot2.attributes/attributes-binder = 1.3.1
/org.carrot2.shaded/carrot2-guava = 18.0
/org.carrot2/carrot2-mini = 3.12.0
/org.carrot2/carrot2-mini = 3.15.0
org.carrot2.morfologik.version = 2.1.1
/org.carrot2/morfologik-fsa = ${org.carrot2.morfologik.version}

View File

@ -50,7 +50,6 @@ import org.apache.lucene.util.fst.PairOutputs.Pair;
import org.apache.lucene.util.fst.PairOutputs;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
/*
TODO:
@ -354,8 +353,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
final Builder<Pair<BytesRef,Long>> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
0, 0, true, false, Integer.MAX_VALUE,
FST_OUTPUTS, false,
PackedInts.COMPACT, true, 15);
FST_OUTPUTS, true, 15);
//if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix);
//}

View File

@ -23,9 +23,10 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
@ -34,6 +35,7 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Transition;
@ -183,6 +185,10 @@ public class TermAutomatonQuery extends Query {
det = Operations.removeDeadStates(Operations.determinize(automaton,
maxDeterminizedStates));
if (det.isAccept(0)) {
throw new IllegalStateException("cannot accept the empty string");
}
}
@Override
@ -396,4 +402,82 @@ public class TermAutomatonQuery extends Query {
return null;
}
}
public Query rewrite(IndexReader reader) throws IOException {
if (Operations.isEmpty(det)) {
return new MatchNoDocsQuery();
}
IntsRef single = Operations.getSingleton(det);
if (single != null && single.length == 1) {
return new TermQuery(new Term(field, idToTerm.get(single.ints[single.offset])));
}
// TODO: can PhraseQuery really handle multiple terms at the same position? If so, why do we even have MultiPhraseQuery?
// Try for either PhraseQuery or MultiPhraseQuery, which only works when the automaton is a sausage:
MultiPhraseQuery.Builder mpq = new MultiPhraseQuery.Builder();
PhraseQuery.Builder pq = new PhraseQuery.Builder();
Transition t = new Transition();
int state = 0;
int pos = 0;
query:
while (true) {
int count = det.initTransition(state, t);
if (count == 0) {
if (det.isAccept(state) == false) {
mpq = null;
pq = null;
}
break;
} else if (det.isAccept(state)) {
mpq = null;
pq = null;
break;
}
int dest = -1;
List<Term> terms = new ArrayList<>();
boolean matchesAny = false;
for(int i=0;i<count;i++) {
det.getNextTransition(t);
if (i == 0) {
dest = t.dest;
} else if (dest != t.dest) {
mpq = null;
pq = null;
break query;
}
matchesAny |= anyTermID >= t.min && anyTermID <= t.max;
if (matchesAny == false) {
for(int termID=t.min;termID<=t.max;termID++) {
terms.add(new Term(field, idToTerm.get(termID)));
}
}
}
if (matchesAny == false) {
mpq.add(terms.toArray(new Term[terms.size()]), pos);
if (pq != null) {
if (terms.size() == 1) {
pq.add(terms.get(0), pos);
} else {
pq = null;
}
}
}
state = dest;
pos++;
}
if (pq != null) {
return pq.build();
} else if (mpq != null) {
return mpq.build();
}
// TODO: we could maybe also rewrite to union of PhraseQuery (pull all finite strings) if it's "worth it"?
return this;
}
}

View File

@ -296,7 +296,6 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
while (scorer instanceof AssertingScorer) {
scorer = ((AssertingScorer) scorer).getIn();
}
assert scorer instanceof TermAutomatonScorer;
}
@Override
@ -683,7 +682,7 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "comes here", Field.Store.NO));
doc.add(newTextField("field", "comes foo", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
@ -691,9 +690,11 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int init = q.createState();
int s1 = q.createState();
int s2 = q.createState();
q.addTransition(init, s1, "here");
q.addTransition(s1, init, "comes");
q.setAccept(init, true);
q.addTransition(s1, s2, "comes");
q.addTransition(s2, s1, "here");
q.setAccept(s1, true);
q.finish();
assertEquals(1, s.search(q, 1).totalHits);
@ -779,8 +780,186 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
// System.out.println("DOT: " + q.toDot());
assertEquals(0, s.search(q, 1).totalHits);
w.close();
r.close();
dir.close();
IOUtils.close(w, r, dir);
}
public void testEmptyString() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
q.setAccept(initState, true);
try {
q.finish();
fail("did not hit exc");
} catch (IllegalStateException ise) {
// expected
}
}
public void testRewriteNoMatch() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
assertTrue(q.rewrite(r) instanceof MatchNoDocsQuery);
IOUtils.close(w, r, dir);
}
public void testRewriteTerm() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
q.addTransition(initState, s1, "foo");
q.setAccept(s1, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof TermQuery);
assertEquals(new Term("field", "foo"), ((TermQuery) rewrite).getTerm());
IOUtils.close(w, r, dir);
}
public void testRewriteSimplePhrase() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
int s2 = q.createState();
q.addTransition(initState, s1, "foo");
q.addTransition(s1, s2, "bar");
q.setAccept(s2, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof PhraseQuery);
Term[] terms = ((PhraseQuery) rewrite).getTerms();
assertEquals(new Term("field", "foo"), terms[0]);
assertEquals(new Term("field", "bar"), terms[1]);
int[] positions = ((PhraseQuery) rewrite).getPositions();
assertEquals(0, positions[0]);
assertEquals(1, positions[1]);
IOUtils.close(w, r, dir);
}
public void testRewritePhraseWithAny() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
int s2 = q.createState();
int s3 = q.createState();
q.addTransition(initState, s1, "foo");
q.addAnyTransition(s1, s2);
q.addTransition(s2, s3, "bar");
q.setAccept(s3, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof PhraseQuery);
Term[] terms = ((PhraseQuery) rewrite).getTerms();
assertEquals(new Term("field", "foo"), terms[0]);
assertEquals(new Term("field", "bar"), terms[1]);
int[] positions = ((PhraseQuery) rewrite).getPositions();
assertEquals(0, positions[0]);
assertEquals(2, positions[1]);
IOUtils.close(w, r, dir);
}
public void testRewriteSimpleMultiPhrase() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
q.addTransition(initState, s1, "foo");
q.addTransition(initState, s1, "bar");
q.setAccept(s1, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof MultiPhraseQuery);
Term[][] terms = ((MultiPhraseQuery) rewrite).getTermArrays();
assertEquals(1, terms.length);
assertEquals(2, terms[0].length);
assertEquals(new Term("field", "foo"), terms[0][0]);
assertEquals(new Term("field", "bar"), terms[0][1]);
int[] positions = ((MultiPhraseQuery) rewrite).getPositions();
assertEquals(1, positions.length);
assertEquals(0, positions[0]);
IOUtils.close(w, r, dir);
}
public void testRewriteMultiPhraseWithAny() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
int s2 = q.createState();
int s3 = q.createState();
q.addTransition(initState, s1, "foo");
q.addTransition(initState, s1, "bar");
q.addAnyTransition(s1, s2);
q.addTransition(s2, s3, "baz");
q.setAccept(s3, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof MultiPhraseQuery);
Term[][] terms = ((MultiPhraseQuery) rewrite).getTermArrays();
assertEquals(2, terms.length);
assertEquals(2, terms[0].length);
assertEquals(new Term("field", "foo"), terms[0][0]);
assertEquals(new Term("field", "bar"), terms[0][1]);
assertEquals(1, terms[1].length);
assertEquals(new Term("field", "baz"), terms[1][0]);
int[] positions = ((MultiPhraseQuery) rewrite).getPositions();
assertEquals(2, positions.length);
assertEquals(0, positions[0]);
assertEquals(2, positions[1]);
IOUtils.close(w, r, dir);
}
}

View File

@ -26,7 +26,6 @@ import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.*;
import org.apache.lucene.util.packed.PackedInts;
/**
* Finite state automata based implementation of "autocomplete" functionality.
@ -237,8 +236,7 @@ public class FSTCompletionBuilder {
final Object empty = outputs.getNoOutput();
final Builder<Object> builder = new Builder<>(
FST.INPUT_TYPE.BYTE1, 0, 0, true, true,
shareMaxTailLength, outputs, false,
PackedInts.DEFAULT, true, 15);
shareMaxTailLength, outputs, true, 15);
BytesRefBuilder scratch = new BytesRefBuilder();
BytesRef entry;

View File

@ -368,8 +368,9 @@ public class CheckHits {
boolean productOf = descr.endsWith("product of:");
boolean sumOf = descr.endsWith("sum of:");
boolean maxOf = descr.endsWith("max of:");
boolean computedOf = descr.matches(".*, computed as .* from:");
boolean maxTimesOthers = false;
if (!(productOf || sumOf || maxOf)) {
if (!(productOf || sumOf || maxOf || computedOf)) {
// maybe 'max plus x times others'
int k1 = descr.indexOf("max plus ");
if (k1>=0) {
@ -387,9 +388,9 @@ public class CheckHits {
// TODO: this is a TERRIBLE assertion!!!!
Assert.assertTrue(
q+": multi valued explanation description=\""+descr
+"\" must be 'max of plus x times others' or end with 'product of'"
+"\" must be 'max of plus x times others', 'computed as x from:' or end with 'product of'"
+" or 'sum of:' or 'max of:' - "+expl,
productOf || sumOf || maxOf || maxTimesOthers);
productOf || sumOf || maxOf || computedOf || maxTimesOthers);
float sum = 0;
float product = 1;
float max = 0;
@ -410,7 +411,8 @@ public class CheckHits {
} else if (maxTimesOthers) {
combined = max + x * (sum - max);
} else {
Assert.assertTrue("should never get here!",false);
Assert.assertTrue("should never get here!", computedOf);
combined = value;
}
Assert.assertEquals(q+": actual subDetails combined=="+combined+
" != value="+value+" Explanation: "+expl,

View File

@ -91,6 +91,7 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
allSims = new ArrayList<>();
allSims.add(new ClassicSimilarity());
allSims.add(new BM25Similarity());
allSims.add(new BooleanSimilarity());
for (BasicModel basicModel : BASIC_MODELS) {
for (AfterEffect afterEffect : AFTER_EFFECTS) {
for (Normalization normalization : NORMALIZATIONS) {

View File

@ -36,7 +36,6 @@ import org.apache.lucene.codecs.lucene70.Lucene70Codec;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
import org.apache.lucene.index.RandomCodec;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.RandomSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
@ -213,7 +212,7 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
TimeZone randomTimeZone = randomTimeZone(random());
timeZone = testTimeZone.equals("random") ? randomTimeZone : TimeZone.getTimeZone(testTimeZone);
TimeZone.setDefault(timeZone);
similarity = random().nextBoolean() ? new ClassicSimilarity() : new RandomSimilarity(random());
similarity = new RandomSimilarity(random());
// Check codec restrictions once at class level.
try {

View File

@ -40,7 +40,6 @@ import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.packed.PackedInts;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@ -273,25 +272,14 @@ public class FSTTester<T> {
System.out.println("\nTEST: prune1=" + prune1 + " prune2=" + prune2);
}
final boolean willRewrite = random.nextBoolean();
final Builder<T> builder = new Builder<>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
prune1, prune2,
prune1==0 && prune2==0,
allowRandomSuffixSharing ? random.nextBoolean() : true,
allowRandomSuffixSharing ? TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE,
outputs,
willRewrite,
PackedInts.DEFAULT,
true,
15);
if (LuceneTestCase.VERBOSE) {
if (willRewrite) {
System.out.println("TEST: packed FST");
} else {
System.out.println("TEST: non-packed FST");
}
}
for(InputOutput<T> pair : pairs) {
if (pair.output instanceof List) {
@ -306,7 +294,7 @@ public class FSTTester<T> {
}
FST<T> fst = builder.finish();
if (random.nextBoolean() && fst != null && !willRewrite) {
if (random.nextBoolean() && fst != null) {
IOContext context = LuceneTestCase.newIOContext(random);
IndexOutput out = dir.createOutput("fst.bin", context);
fst.save(out);

View File

@ -70,7 +70,7 @@ Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this r
Versions of Major Components
---------------------
Apache Tika 1.13
Carrot2 3.12.0
Carrot2 3.15.0
Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1
Apache ZooKeeper 3.4.6
@ -81,6 +81,9 @@ Detailed Change List
New Features
----------------------
* SOLR-9293: Solrj client support for hierarchical clusters and other topics
marker. (Dawid Weiss)
* SOLR-9681: FacetModule / JSON Facet API added the ability to add filters directly to
any facet command. The filters are applied after any domain change operations.
Example: { type:terms, field:category, filter:"user:yonik" }
@ -96,11 +99,21 @@ New Features
* SOLR-8542: Adds Solr Learning to Rank (LTR) plugin for reranking results with machine learning models.
(Michael Nilsson, Diego Ceccarelli, Joshua Pantony, Jon Dorando, Naveen Santhapuri, Alessandro Benedetti, David Grohmann, Christine Poerschke)
* SOLR-9055: Make collection backup/restore extensible. (Hrishikesh Gadre, Varun Thacker, Mark Miller)
* SOLR-9682: JSON Facet API: added "param" query type to facet domain filter specification to obtain
filters via query parameters. (yonik)
* SOLR-9038: Add a command-line tool to manage the snapshots functionality (Hrishikesh Gadre via yonik)
Optimizations
----------------------
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
filters specified by using those filters as acceptDocs. (yonik)
* SOLR-9726: Reduce number of lookupOrd calls made by the DocValuesFacets.getCounts method.
(Jonny Marks via Christine Poerschke)
Bug Fixes
----------------------
* SOLR-9701: NPE in export handler when "fl" parameter is omitted.
@ -109,15 +122,43 @@ Bug Fixes
* SOLR-9433: SolrCore clean-up logic uses incorrect path to delete dataDir on failure to create a core.
(Evan Sayer, shalin)
* SOLR-9360: Solr script not properly checking SOLR_PID
(Alessandro Benedetti via Erick Erickson)
* SOLR-9716: RecoveryStrategy sends prep recovery command without setting read time out which can cause
replica recovery to hang indefinitely on network partitions. (Cao Manh Dat, shalin)
* SOLR-9624: In Admin UI, do not attempt to highlight CSV output (Alexandre Rafalovitch)
* SOLR-9005: In files example, add a guard condition to javascript URP script (Alexandre Rafalovitch)
* SOLR-9519: JSON Facet API: don't stop at an empty facet bucket if any sub-facets still have a chance
of matching something due to filter exclusions (which can widen the domain again).
(Michael Sun, yonik)
* SOLR-9740: A bug in macro expansion of multi-valued parameters caused non-expanded values
after the first expanded value in the same multi-valued parameter to be dropped.
(Erik Hatcher, yonik)
Other Changes
----------------------
* SOLR-7539: Upgrade the clustering plugin to Carrot2 3.15.0. (Dawid Weiss)
* SOLR-9621: Remove several Guava & Apache Commons calls in favor of java 8 alternatives.
(Michael Braun via David Smiley)
* SOLR-9720: Refactor Responsewriters to remove dependencies on TupleStream,
Tuple, Explanation (noble)
* SOLR-9717: Refactor '/export' to not hardcode the JSON output and to use an API (noble)
* SOLR-9739: JavabinCodec implements PushWriter interface (noble)
* SOLR-8332: Factor HttpShardHandler[Factory]'s url shuffling out into a ReplicaListTransformer class.
(Christine Poerschke, Noble Paul)
================== 6.3.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -495,7 +495,7 @@ function solr_pid_by_port() {
# extract the value of the -Djetty.port parameter from a running Solr process
function jetty_port() {
SOLR_PID="$1"
SOLR_PROC=`ps auxww | grep -w $SOLR_PID | grep start\.jar | grep jetty.port`
SOLR_PROC=`ps auxww | grep -w $SOLR_PID | grep start\.jar | grep jetty\.port`
IFS=' ' read -a proc_args <<< "$SOLR_PROC"
for arg in "${proc_args[@]}"
do
@ -543,10 +543,10 @@ function get_info() {
done < <(find "$SOLR_PID_DIR" -name "solr-*.pid" -type f)
else
# no pid files but check using ps just to be sure
numSolrs=`ps auxww | grep start\.jar | grep solr.solr.home | grep -v grep | wc -l | sed -e 's/^[ \t]*//'`
numSolrs=`ps auxww | grep start\.jar | grep solr\.solr\.home | grep -v grep | wc -l | sed -e 's/^[ \t]*//'`
if [ "$numSolrs" != "0" ]; then
echo -e "\nFound $numSolrs Solr nodes: "
PROCESSES=$(ps auxww | grep start\.jar | grep solr.solr.home | grep -v grep | awk '{print $2}' | sort -r)
PROCESSES=$(ps auxww | grep start\.jar | grep solr\.solr\.home | grep -v grep | awk '{print $2}' | sort -r)
for ID in $PROCESSES
do
port=`jetty_port "$ID"`
@ -1345,7 +1345,7 @@ if [[ "$SCRIPT_CMD" == "start" ]]; then
if [ -z "$SOLR_PID" ]; then
# not found using the pid file ... but use ps to ensure not found
SOLR_PID=`ps auxww | grep start\.jar | grep -w $SOLR_PORT | grep -v grep | awk '{print $2}' | sort -r`
SOLR_PID=`ps auxww | grep start\.jar | grep -w "\-Djetty\.port=$SOLR_PORT" | grep -v grep | awk '{print $2}' | sort -r`
fi
if [ "$SOLR_PID" != "" ]; then
@ -1358,7 +1358,7 @@ else
SOLR_PID=`solr_pid_by_port "$SOLR_PORT"`
if [ -z "$SOLR_PID" ]; then
# not found using the pid file ... but use ps to ensure not found
SOLR_PID=`ps auxww | grep start\.jar | grep -w $SOLR_PORT | grep -v grep | awk '{print $2}' | sort -r`
SOLR_PID=`ps auxww | grep start\.jar | grep -w "\-Djetty\.port=$SOLR_PORT" | grep -v grep | awk '{print $2}' | sort -r`
fi
if [ "$SOLR_PID" != "" ]; then
stop_solr "$SOLR_SERVER_DIR" "$SOLR_PORT" "$STOP_KEY" "$SOLR_PID"
@ -1659,7 +1659,7 @@ function launch_solr() {
exit # subshell!
fi
else
SOLR_PID=`ps auxww | grep start\.jar | grep -w $SOLR_PORT | grep -v grep | awk '{print $2}' | sort -r`
SOLR_PID=`ps auxww | grep start\.jar | grep -w "\-Djetty\.port=$SOLR_PORT" | grep -v grep | awk '{print $2}' | sort -r`
echo -e "\nStarted Solr server on port $SOLR_PORT (pid=$SOLR_PID). Happy searching!\n"
exit # subshell!
fi
@ -1668,7 +1668,7 @@ function launch_solr() {
else
echo -e "NOTE: Please install lsof as this script needs it to determine if Solr is listening on port $SOLR_PORT."
sleep 10
SOLR_PID=`ps auxww | grep start\.jar | grep -w $SOLR_PORT | grep -v grep | awk '{print $2}' | sort -r`
SOLR_PID=`ps auxww | grep start\.jar | grep -w "\-Djetty\.port=$SOLR_PORT" | grep -v grep | awk '{print $2}' | sort -r`
echo -e "\nStarted Solr server on port $SOLR_PORT (pid=$SOLR_PID). Happy searching!\n"
return;
fi

View File

@ -19,6 +19,7 @@ package org.apache.solr.handler.clustering;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
@ -44,9 +45,6 @@ import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Maps;
/**
* Provides a plugin for performing cluster analysis. This can either be applied to
* search results (e.g., via <a href="http://project.carrot2.org">Carrot<sup>2</sup></a>) or for
@ -68,12 +66,12 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
/**
* Declaration-order list of search clustering engines.
*/
private final LinkedHashMap<String, SearchClusteringEngine> searchClusteringEngines = Maps.newLinkedHashMap();
private final LinkedHashMap<String, SearchClusteringEngine> searchClusteringEngines = new LinkedHashMap<>();
/**
* Declaration order list of document clustering engines.
*/
private final LinkedHashMap<String, DocumentClusteringEngine> documentClusteringEngines = Maps.newLinkedHashMap();
private final LinkedHashMap<String, DocumentClusteringEngine> documentClusteringEngines = new LinkedHashMap<>();
/**
* An unmodifiable view of {@link #searchClusteringEngines}.
@ -173,7 +171,7 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
if (engine != null) {
checkAvailable(name, engine);
DocListAndSet results = rb.getResults();
Map<SolrDocument,Integer> docIds = Maps.newHashMapWithExpectedSize(results.docList.size());
Map<SolrDocument,Integer> docIds = new HashMap<>(results.docList.size());
SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList(
results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds);
Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);

View File

@ -58,6 +58,8 @@ import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.shaded.guava.common.base.MoreObjects;
import org.carrot2.shaded.guava.common.base.Strings;
import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder;
@ -69,12 +71,6 @@ import org.carrot2.util.resource.ResourceLookup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Objects;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
/**
* Search results clustering engine based on Carrot2 clustering algorithms.
*
@ -155,7 +151,8 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
// Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute
// of this component. This by-name convention lookup is used to simplify configuring algorithms.
String componentName = initParams.get(ClusteringEngine.ENGINE_NAME);
log.info("Initializing Clustering Engine '" + Objects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
log.info("Initializing Clustering Engine '" +
MoreObjects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
if (!Strings.isNullOrEmpty(componentName)) {
IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml");
@ -268,7 +265,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
SolrParams solrParams = sreq.getParams();
HashSet<String> fields = Sets.newHashSet(getFieldsForClustering(sreq));
HashSet<String> fields = new HashSet<>(getFieldsForClustering(sreq));
fields.add(idFieldName);
fields.add(solrParams.get(CarrotParams.URL_FIELD_NAME, "url"));
fields.addAll(getCustomFieldsMap(solrParams).keySet());
@ -295,7 +292,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
+ " must not be blank.");
}
final Set<String> fields = Sets.newHashSet();
final Set<String> fields = new HashSet<>();
fields.addAll(Arrays.asList(titleFieldSpec.split("[, ]")));
fields.addAll(Arrays.asList(snippetFieldSpec.split("[, ]")));
return fields;
@ -319,7 +316,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
Map<String, String> customFields = getCustomFieldsMap(solrParams);
// Parse language code map string into a map
Map<String, String> languageCodeMap = Maps.newHashMap();
Map<String, String> languageCodeMap = new HashMap<>();
if (StringUtils.isNotBlank(languageField)) {
for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
final String[] split = pair.split(":");
@ -340,7 +337,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
if (produceSummary) {
highlighter = HighlightComponent.getHighlighter(core);
if (highlighter != null){
Map<String, Object> args = Maps.newHashMap();
Map<String, Object> args = new HashMap<>();
snippetFieldAry = snippetFieldSpec.split("[, ]");
args.put(HighlightParams.FIELDS, snippetFieldAry);
args.put(HighlightParams.HIGHLIGHT, "true");
@ -466,10 +463,10 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
* custom field names.
*/
private Map<String, String> getCustomFieldsMap(SolrParams solrParams) {
Map<String, String> customFields = Maps.newHashMap();
Map<String, String> customFields = new HashMap<>();
String [] customFieldsSpec = solrParams.getParams(CarrotParams.CUSTOM_FIELD_NAME);
if (customFieldsSpec != null) {
customFields = Maps.newHashMap();
customFields = new HashMap<>();
for (String customFieldSpec : customFieldsSpec) {
String [] split = customFieldSpec.split(":");
if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
@ -501,7 +498,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
private List<NamedList<Object>> clustersToNamedList(List<Cluster> carrotClusters,
SolrParams solrParams) {
List<NamedList<Object>> result = Lists.newArrayList();
List<NamedList<Object>> result = new ArrayList<>();
clustersToNamedList(carrotClusters, result, solrParams.getBool(
CarrotParams.OUTPUT_SUB_CLUSTERS, true), solrParams.getInt(
CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE));
@ -534,7 +531,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
// Add documents
List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments();
List<Object> docList = Lists.newArrayList();
List<Object> docList = new ArrayList<>();
cluster.add("docs", docList);
for (Document doc : docs) {
docList.add(doc.getField(SOLR_DOCUMENT_ID));
@ -542,7 +539,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
// Add subclusters
if (outputSubClusters && !outCluster.getSubclusters().isEmpty()) {
List<NamedList<Object>> subclusters = Lists.newArrayList();
List<NamedList<Object>> subclusters = new ArrayList<>();
cluster.add("clusters", subclusters);
clustersToNamedList(outCluster.getSubclusters(), subclusters,
outputSubClusters, maxLabels);

View File

@ -16,10 +16,10 @@
*/
package org.apache.solr.handler.clustering.carrot2;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import com.google.common.collect.ImmutableSet;
/**
* Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration).
* @lucene.experimental
@ -50,7 +50,7 @@ public final class CarrotParams {
*/
public static String RESOURCES_DIR = CARROT_PREFIX + "resourcesDir";
static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
static final Set<String> CARROT_PARAM_NAMES = new HashSet<>(Arrays.asList(
ALGORITHM,
TITLE_FIELD_NAME,
@ -66,7 +66,7 @@ public final class CarrotParams {
NUM_DESCRIPTIONS,
OUTPUT_SUB_CLUSTERS,
RESOURCES_DIR,
LANGUAGE_CODE_MAP);
LANGUAGE_CODE_MAP));
/** No instances. */
private CarrotParams() {}

View File

@ -16,7 +16,9 @@
*/
package org.apache.solr.handler.clustering.carrot2;
import java.util.Collection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
@ -26,6 +28,7 @@ import org.apache.lucene.analysis.core.StopFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.attribute.Init;
import org.carrot2.core.attribute.Processing;
@ -37,9 +40,6 @@ import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
/**
* An implementation of Carrot2's {@link ILexicalDataFactory} that adds stop
* words from a field's StopFilter to the default stop words used in Carrot2,
@ -67,7 +67,7 @@ public class SolrStopwordsCarrot2LexicalDataFactory implements ILexicalDataFacto
/**
* A lazily-built cache of stop words per field.
*/
private Multimap<String, CharArraySet> solrStopWords = HashMultimap.create();
private HashMap<String, List<CharArraySet>> solrStopWords = new HashMap<>();
/**
* Carrot2's default lexical resources to use in addition to Solr's stop
@ -79,32 +79,35 @@ public class SolrStopwordsCarrot2LexicalDataFactory implements ILexicalDataFacto
* Obtains stop words for a field from the associated
* {@link StopFilterFactory}, if any.
*/
private Collection<CharArraySet> getSolrStopWordsForField(String fieldName) {
private List<CharArraySet> getSolrStopWordsForField(String fieldName) {
// No need to synchronize here, Carrot2 ensures that instances
// of this class are not used by multiple threads at a time.
synchronized (solrStopWords) {
if (!solrStopWords.containsKey(fieldName)) {
final Analyzer fieldAnalyzer = core.getLatestSchema().getFieldType(fieldName)
.getIndexAnalyzer();
solrStopWords.put(fieldName, new ArrayList<>());
IndexSchema schema = core.getLatestSchema();
final Analyzer fieldAnalyzer = schema.getFieldType(fieldName).getIndexAnalyzer();
if (fieldAnalyzer instanceof TokenizerChain) {
final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer)
.getTokenFilterFactories();
final TokenFilterFactory[] filterFactories =
((TokenizerChain) fieldAnalyzer).getTokenFilterFactories();
for (TokenFilterFactory factory : filterFactories) {
if (factory instanceof StopFilterFactory) {
// StopFilterFactory holds the stop words in a CharArraySet
solrStopWords.put(fieldName,
((StopFilterFactory) factory).getStopWords());
CharArraySet stopWords = ((StopFilterFactory) factory).getStopWords();
solrStopWords.get(fieldName).add(stopWords);
}
if (factory instanceof CommonGramsFilterFactory) {
solrStopWords.put(fieldName,
((CommonGramsFilterFactory) factory)
.getCommonWords());
CharArraySet commonWords = ((CommonGramsFilterFactory) factory).getCommonWords();
solrStopWords.get(fieldName).add(commonWords);
}
}
}
}
return solrStopWords.get(fieldName);
}
}
@Override
public ILexicalData getLexicalData(LanguageCode languageCode) {

View File

@ -17,6 +17,9 @@
package org.apache.solr.handler.clustering.carrot2;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -45,9 +48,6 @@ import org.carrot2.core.LanguageCode;
import org.carrot2.util.attribute.AttributeUtils;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
/**
*
*/
@ -211,7 +211,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
// stoplabels.mt, so we're expecting only one cluster with label "online".
final List<NamedList<Object>> clusters = checkEngine(
getClusteringEngine(engineName), 1, params);
assertEquals(getLabels(clusters.get(0)), ImmutableList.of("online"));
assertEquals(getLabels(clusters.get(0)), Collections.singletonList("online"));
}
@Test
@ -226,7 +226,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
// only one cluster with label "online".
final List<NamedList<Object>> clusters = checkEngine(
getClusteringEngine("lexical-resource-check"), 1, params);
assertEquals(getLabels(clusters.get(0)), ImmutableList.of("online"));
assertEquals(getLabels(clusters.get(0)), Collections.singletonList("online"));
}
@Test
@ -243,9 +243,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
final List<NamedList<Object>> clusters = checkEngine(
getClusteringEngine("lexical-resource-check"), 2, params);
assertEquals(ImmutableList.of("online"), getLabels(clusters.get(0)));
assertEquals(ImmutableList.of("solrownstopword"),
getLabels(clusters.get(1)));
assertEquals(Collections.singletonList("online"), getLabels(clusters.get(0)));
assertEquals(Collections.singletonList("solrownstopword"), getLabels(clusters.get(1)));
}
@Test
@ -395,8 +394,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-default");
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
assertEquals(
Lists.newArrayList("stc", "default", "mock"),
Lists.newArrayList(engines.keySet()));
Arrays.asList("stc", "default", "mock"),
new ArrayList<>(engines.keySet()));
assertEquals(
LingoClusteringAlgorithm.class,
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
@ -407,8 +406,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-decl-order");
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
assertEquals(
Lists.newArrayList("unavailable", "lingo", "stc", "mock", "default"),
Lists.newArrayList(engines.keySet()));
Arrays.asList("unavailable", "lingo", "stc", "mock", "default"),
new ArrayList<>(engines.keySet()));
assertEquals(
LingoClusteringAlgorithm.class,
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
@ -419,8 +418,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-dups");
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
assertEquals(
Lists.newArrayList("", "default"),
Lists.newArrayList(engines.keySet()));
Arrays.asList("", "default"),
new ArrayList<>(engines.keySet()));
assertEquals(
MockClusteringAlgorithm.class,
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());

View File

@ -15,6 +15,7 @@
* limitations under the License.
*/
package org.apache.solr.handler.clustering.carrot2;
import java.util.ArrayList;
import java.util.List;
import org.carrot2.core.Cluster;
@ -29,8 +30,6 @@ import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output;
import com.google.common.collect.Lists;
/**
* A mock Carrot2 clustering algorithm that outputs input documents as clusters.
* Useful only in tests.
@ -56,7 +55,7 @@ public class EchoClusteringAlgorithm extends ProcessingComponentBase implements
@Override
public void process() throws ProcessingException {
clusters = Lists.newArrayListWithCapacity(documents.size());
clusters = new ArrayList<>();
for (Document document : documents) {
final Cluster cluster = new Cluster();

View File

@ -16,6 +16,7 @@
*/
package org.apache.solr.handler.clustering.carrot2;
import java.util.ArrayList;
import java.util.List;
import org.carrot2.core.Cluster;
@ -36,8 +37,6 @@ import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output;
import com.google.common.collect.Lists;
/**
* A mock Carrot2 clustering algorithm that outputs stem of each token of each
* document as a separate cluster. Useful only in tests.
@ -64,7 +63,7 @@ public class EchoStemsClusteringAlgorithm extends ProcessingComponentBase
final AllTokens allTokens = preprocessingContext.allTokens;
final AllWords allWords = preprocessingContext.allWords;
final AllStems allStems = preprocessingContext.allStems;
clusters = Lists.newArrayListWithCapacity(allTokens.image.length);
clusters = new ArrayList<>();
for (int i = 0; i < allTokens.image.length; i++) {
if (allTokens.wordIndex[i] >= 0) {
clusters.add(new Cluster(new String(

View File

@ -16,6 +16,7 @@
*/
package org.apache.solr.handler.clustering.carrot2;
import java.util.ArrayList;
import java.util.List;
import org.carrot2.core.Cluster;
@ -33,7 +34,6 @@ import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output;
import com.google.common.collect.Lists;
/**
* A mock Carrot2 clustering algorithm that outputs each token of each document
@ -58,8 +58,7 @@ public class EchoTokensClusteringAlgorithm extends ProcessingComponentBase
public void process() throws ProcessingException {
final PreprocessingContext preprocessingContext = preprocessing.preprocess(
documents, "", LanguageCode.ENGLISH);
clusters = Lists
.newArrayListWithCapacity(preprocessingContext.allTokens.image.length);
clusters = new ArrayList<>();
for (char[] token : preprocessingContext.allTokens.image) {
if (token != null) {
clusters.add(new Cluster(new String(token)));

View File

@ -16,6 +16,7 @@
*/
package org.apache.solr.handler.clustering.carrot2;
import java.util.ArrayList;
import java.util.List;
import org.carrot2.core.Cluster;
@ -33,8 +34,6 @@ import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output;
import com.google.common.collect.Lists;
/**
* A mock implementation of Carrot2 clustering algorithm for testing whether the
* customized lexical resource lookup works correctly. This algorithm ignores
@ -60,7 +59,7 @@ public class LexicalResourcesCheckClusteringAlgorithm extends
@Override
public void process() throws ProcessingException {
clusters = Lists.newArrayList();
clusters = new ArrayList<>();
if (wordsToCheck == null) {
return;
}

View File

@ -15,13 +15,13 @@
* limitations under the License.
*/
package org.apache.solr.handler.clustering.carrot2;
import com.google.common.collect.Lists;
import org.carrot2.core.*;
import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.core.attribute.Processing;
import org.carrot2.util.attribute.*;
import org.carrot2.util.attribute.constraint.IntRange;
import java.util.ArrayList;
import java.util.List;
@Bindable(prefix = "MockClusteringAlgorithm")
@ -62,7 +62,7 @@ public class MockClusteringAlgorithm extends ProcessingComponentBase implements
@Override
public void process() throws ProcessingException {
clusters = Lists.newArrayList();
clusters = new ArrayList<>();
if (documents == null) {
return;
}

View File

@ -26,6 +26,7 @@ import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import org.apache.lucene.util.Version;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.ClusterState;
@ -35,6 +36,7 @@ import org.apache.solr.common.cloud.Replica.State;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
@ -68,31 +70,13 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd {
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
String collectionName = message.getStr(COLLECTION_PROP);
String backupName = message.getStr(NAME);
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
String asyncId = message.getStr(ASYNC);
String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY);
String commitName = message.getStr(CoreAdminParams.COMMIT_NAME);
Optional<CollectionSnapshotMetaData> snapshotMeta = Optional.empty();
if (commitName != null) {
SolrZkClient zkClient = ocmh.overseer.getZkController().getZkClient();
snapshotMeta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName);
if (!snapshotMeta.isPresent()) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName
+ " does not exist for collection " + collectionName);
}
if (snapshotMeta.get().getStatus() != SnapshotStatus.Successful) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName + " for collection " + collectionName
+ " has not completed successfully. The status is " + snapshotMeta.get().getStatus());
}
}
Map<String, String> requestMap = new HashMap<>();
Instant startTime = Instant.now();
CoreContainer cc = ocmh.overseer.getZkController().getCoreContainer();
BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo));
BackupManager backupMgr = new BackupManager(repository, ocmh.zkStateReader, collectionName);
BackupManager backupMgr = new BackupManager(repository, ocmh.zkStateReader);
// Backup location
URI location = repository.createURI(message.getStr(CoreAdminParams.BACKUP_LOCATION));
@ -106,51 +90,17 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd {
// Create a directory to store backup details.
repository.createDirectory(backupPath);
log.info("Starting backup of collection={} with backupName={} at location={}", collectionName, backupName,
backupPath);
Collection<String> shardsToConsider = Collections.emptySet();
if (snapshotMeta.isPresent()) {
shardsToConsider = snapshotMeta.get().getShards();
String strategy = message.getStr(CollectionAdminParams.INDEX_BACKUP_STRATEGY, CollectionAdminParams.COPY_FILES_STRATEGY);
switch (strategy) {
case CollectionAdminParams.COPY_FILES_STRATEGY: {
copyIndexFiles(backupPath, message, results);
break;
}
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getActiveSlices()) {
Replica replica = null;
if (snapshotMeta.isPresent()) {
if (!shardsToConsider.contains(slice.getName())) {
log.warn("Skipping the backup for shard {} since it wasn't part of the collection {} when snapshot {} was created.",
slice.getName(), collectionName, snapshotMeta.get().getName());
continue;
}
replica = selectReplicaWithSnapshot(snapshotMeta.get(), slice);
} else {
// Note - Actually this can return a null value when there is no leader for this shard.
replica = slice.getLeader();
if (replica == null) {
throw new SolrException(ErrorCode.SERVER_ERROR, "No 'leader' replica available for shard " + slice.getName() + " of collection " + collectionName);
case CollectionAdminParams.NO_INDEX_BACKUP_STRATEGY: {
break;
}
}
String coreName = replica.getStr(CORE_NAME_PROP);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.BACKUPCORE.toString());
params.set(NAME, slice.getName());
params.set(CoreAdminParams.BACKUP_REPOSITORY, repo);
params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString()); // note: index dir will be here then the "snapshot." + slice name
params.set(CORE_NAME_PROP, coreName);
if (snapshotMeta.isPresent()) {
params.set(CoreAdminParams.COMMIT_NAME, snapshotMeta.get().getName());
}
ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
log.debug("Sent backup request to core={} for backupName={}", coreName, backupName);
}
log.debug("Sent backup requests to all shard leaders for backupName={}", backupName);
ocmh.processResponses(results, shardHandler, true, "Could not backup all replicas", asyncId, requestMap);
log.info("Starting to backup ZK data for backupName={}", backupName);
//Download the configs
@ -168,6 +118,7 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd {
properties.put(BackupManager.COLLECTION_NAME_PROP, collectionName);
properties.put(COLL_CONF, configName);
properties.put(BackupManager.START_TIME_PROP, startTime.toString());
properties.put(BackupManager.INDEX_VERSION_PROP, Version.LATEST.toString());
//TODO: Add MD5 of the configset. If during restore the same name configset exists then we can compare checksums to see if they are the same.
//if they are not the same then we can throw an error or have an 'overwriteConfig' flag
//TODO save numDocs for the shardLeader. We can use it to sanity check the restore.
@ -202,4 +153,73 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd {
return r.get();
}
private void copyIndexFiles(URI backupPath, ZkNodeProps request, NamedList results) throws Exception {
String collectionName = request.getStr(COLLECTION_PROP);
String backupName = request.getStr(NAME);
String asyncId = request.getStr(ASYNC);
String repoName = request.getStr(CoreAdminParams.BACKUP_REPOSITORY);
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
Map<String, String> requestMap = new HashMap<>();
String commitName = request.getStr(CoreAdminParams.COMMIT_NAME);
Optional<CollectionSnapshotMetaData> snapshotMeta = Optional.empty();
if (commitName != null) {
SolrZkClient zkClient = ocmh.overseer.getZkController().getZkClient();
snapshotMeta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName);
if (!snapshotMeta.isPresent()) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName
+ " does not exist for collection " + collectionName);
}
if (snapshotMeta.get().getStatus() != SnapshotStatus.Successful) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName + " for collection " + collectionName
+ " has not completed successfully. The status is " + snapshotMeta.get().getStatus());
}
}
log.info("Starting backup of collection={} with backupName={} at location={}", collectionName, backupName,
backupPath);
Collection<String> shardsToConsider = Collections.emptySet();
if (snapshotMeta.isPresent()) {
shardsToConsider = snapshotMeta.get().getShards();
}
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getActiveSlices()) {
Replica replica = null;
if (snapshotMeta.isPresent()) {
if (!shardsToConsider.contains(slice.getName())) {
log.warn("Skipping the backup for shard {} since it wasn't part of the collection {} when snapshot {} was created.",
slice.getName(), collectionName, snapshotMeta.get().getName());
continue;
}
replica = selectReplicaWithSnapshot(snapshotMeta.get(), slice);
} else {
// Note - Actually this can return a null value when there is no leader for this shard.
replica = slice.getLeader();
if (replica == null) {
throw new SolrException(ErrorCode.SERVER_ERROR, "No 'leader' replica available for shard " + slice.getName() + " of collection " + collectionName);
}
}
String coreName = replica.getStr(CORE_NAME_PROP);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.BACKUPCORE.toString());
params.set(NAME, slice.getName());
params.set(CoreAdminParams.BACKUP_REPOSITORY, repoName);
params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString()); // note: index dir will be here then the "snapshot." + slice name
params.set(CORE_NAME_PROP, coreName);
if (snapshotMeta.isPresent()) {
params.set(CoreAdminParams.COMMIT_NAME, snapshotMeta.get().getName());
}
ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
log.debug("Sent backup request to core={} for backupName={}", coreName, backupName);
}
log.debug("Sent backup requests to all shard leaders for backupName={}", backupName);
ocmh.processResponses(results, shardHandler, true, "Could not backup all replicas", asyncId, requestMap);
}
}

View File

@ -19,6 +19,7 @@ package org.apache.solr.cloud;
import java.io.Closeable;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.SocketTimeoutException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@ -572,8 +573,6 @@ public class RecoveryStrategy extends Thread implements Closeable {
private void sendPrepRecoveryCmd(String leaderBaseUrl, String leaderCoreName, Slice slice)
throws SolrServerException, IOException, InterruptedException, ExecutionException {
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderBaseUrl).build()) {
client.setConnectionTimeout(30000);
WaitForState prepCmd = new WaitForState();
prepCmd.setCoreName(leaderCoreName);
prepCmd.setNodeName(zkController.getNodeName());
@ -585,6 +584,28 @@ public class RecoveryStrategy extends Thread implements Closeable {
if (state != Slice.State.CONSTRUCTION && state != Slice.State.RECOVERY && state != Slice.State.RECOVERY_FAILED) {
prepCmd.setOnlyIfLeaderActive(true);
}
final int maxTries = 30;
for (int numTries = 0; numTries < maxTries; numTries++) {
try {
sendPrepRecoveryCmd(leaderBaseUrl, prepCmd);
break;
} catch (ExecutionException e) {
SolrServerException solrException = (SolrServerException) e.getCause();
if (solrException.getRootCause() instanceof SocketTimeoutException && numTries < maxTries) {
LOG.warn("Socket timeout when send prep recovery cmd, retrying.. ");
continue;
}
throw e;
}
}
}
private void sendPrepRecoveryCmd(String leaderBaseUrl, WaitForState prepCmd)
throws SolrServerException, IOException, InterruptedException, ExecutionException {
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderBaseUrl).build()) {
client.setConnectionTimeout(10000);
client.setSoTimeout(10000);
HttpUriRequestResponse mrr = client.httpUriRequest(prepCmd);
prevSendPreRecoveryHttpUriRequest = mrr.httpUriRequest;

View File

@ -87,7 +87,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
URI location = repository.createURI(message.getStr(CoreAdminParams.BACKUP_LOCATION));
URI backupPath = repository.resolve(location, backupName);
ZkStateReader zkStateReader = ocmh.zkStateReader;
BackupManager backupMgr = new BackupManager(repository, zkStateReader, restoreCollectionName);
BackupManager backupMgr = new BackupManager(repository, zkStateReader);
Properties properties = backupMgr.readBackupProperties(location, backupName);
String backupCollection = properties.getProperty(BackupManager.COLLECTION_NAME_PROP);

View File

@ -110,7 +110,6 @@ import org.apache.solr.response.RubyResponseWriter;
import org.apache.solr.response.SchemaXmlResponseWriter;
import org.apache.solr.response.SmileResponseWriter;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.response.SortingResponseWriter;
import org.apache.solr.response.XMLResponseWriter;
import org.apache.solr.response.transform.TransformerFactory;
import org.apache.solr.rest.ManagedResourceStorage;
@ -2332,7 +2331,6 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
m.put("raw", new RawResponseWriter());
m.put(CommonParams.JAVABIN, new BinaryResponseWriter());
m.put("csv", new CSVResponseWriter());
m.put("xsort", new SortingResponseWriter());
m.put("schema.xml", new SchemaXmlResponseWriter());
m.put("smile", new SmileResponseWriter());
m.put(ReplicationHandler.FILE_STREAM, getFileStreamWriter());
@ -2350,13 +2348,22 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
@Override
public void write(OutputStream out, SolrQueryRequest req, SolrQueryResponse response) throws IOException {
RawWriter rawWriter = (RawWriter) response.getValues().get(ReplicationHandler.FILE_STREAM);
if(rawWriter!=null) rawWriter.write(out);
if (rawWriter != null) {
rawWriter.write(out);
if (rawWriter instanceof Closeable) ((Closeable) rawWriter).close();
}
}
@Override
public String getContentType(SolrQueryRequest request, SolrQueryResponse response) {
RawWriter rawWriter = (RawWriter) response.getValues().get(ReplicationHandler.FILE_STREAM);
if (rawWriter != null) {
return rawWriter.getContentType();
} else {
return BinaryResponseParser.BINARY_CONTENT_TYPE;
}
}
};
}
@ -2365,6 +2372,9 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
}
public interface RawWriter {
default String getContentType() {
return BinaryResponseParser.BINARY_CONTENT_TYPE;
}
void write(OutputStream os) throws IOException ;
}

View File

@ -68,7 +68,7 @@ public class BackupManager {
protected final ZkStateReader zkStateReader;
protected final BackupRepository repository;
public BackupManager(BackupRepository repository, ZkStateReader zkStateReader, String collectionName) {
public BackupManager(BackupRepository repository, ZkStateReader zkStateReader) {
this.repository = Objects.requireNonNull(repository);
this.zkStateReader = Objects.requireNonNull(zkStateReader);
}
@ -126,6 +126,7 @@ public class BackupManager {
*
* @param backupLoc The base path used to store the backup data.
* @param backupId The unique name for the backup.
* @param collectionName The name of the collection whose meta-data is to be returned.
* @return the meta-data information for the backed-up collection.
* @throws IOException in case of errors.
*/

View File

@ -0,0 +1,468 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core.snapshots;
import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.lang.invoke.MethodHandles;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.hadoop.fs.Path;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.snapshots.CollectionSnapshotMetaData.CoreSnapshotMetaData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
/**
* This class provides utility functions required for Solr snapshots functionality.
*/
public class SolrSnapshotsTool implements Closeable {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final DateFormat dateFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z", Locale.getDefault());
private static final String CREATE = "create";
private static final String DELETE = "delete";
private static final String LIST = "list";
private static final String DESCRIBE = "describe";
private static final String PREPARE_FOR_EXPORT = "prepare-snapshot-export";
private static final String EXPORT_SNAPSHOT = "export";
private static final String HELP = "help";
private static final String COLLECTION = "c";
private static final String TEMP_DIR = "t";
private static final String DEST_DIR = "d";
private static final String SOLR_ZK_ENSEMBLE = "z";
private static final String HDFS_PATH_PREFIX = "p";
private static final String BACKUP_REPO_NAME = "r";
private static final String ASYNC_REQ_ID = "i";
private static final List<String> OPTION_HELP_ORDER = Arrays.asList(CREATE, DELETE, LIST, DESCRIBE,
PREPARE_FOR_EXPORT, EXPORT_SNAPSHOT, HELP, SOLR_ZK_ENSEMBLE, COLLECTION, DEST_DIR, BACKUP_REPO_NAME,
ASYNC_REQ_ID, TEMP_DIR, HDFS_PATH_PREFIX);
private final CloudSolrClient solrClient;
public SolrSnapshotsTool(String solrZkEnsemble) {
solrClient = (new CloudSolrClient.Builder()).withZkHost(solrZkEnsemble).build();
}
@Override
public void close() throws IOException {
if (solrClient != null) {
solrClient.close();
}
}
public void createSnapshot(String collectionName, String snapshotName) {
CollectionAdminRequest.CreateSnapshot createSnap = new CollectionAdminRequest.CreateSnapshot(collectionName, snapshotName);
CollectionAdminResponse resp;
try {
resp = createSnap.process(solrClient);
Preconditions.checkState(resp.getStatus() == 0, "The CREATESNAPSHOT request failed. The status code is " + resp.getStatus());
System.out.println("Successfully created snapshot with name " + snapshotName + " for collection " + collectionName);
} catch (Exception e) {
log.error("Failed to create a snapshot with name " + snapshotName + " for collection " + collectionName, e);
System.out.println("Failed to create a snapshot with name " + snapshotName + " for collection " + collectionName
+" due to following error : "+e.getLocalizedMessage());
}
}
public void deleteSnapshot(String collectionName, String snapshotName) {
CollectionAdminRequest.DeleteSnapshot deleteSnap = new CollectionAdminRequest.DeleteSnapshot(collectionName, snapshotName);
CollectionAdminResponse resp;
try {
resp = deleteSnap.process(solrClient);
Preconditions.checkState(resp.getStatus() == 0, "The DELETESNAPSHOT request failed. The status code is " + resp.getStatus());
System.out.println("Successfully deleted snapshot with name " + snapshotName + " for collection " + collectionName);
} catch (Exception e) {
log.error("Failed to delete a snapshot with name " + snapshotName + " for collection " + collectionName, e);
System.out.println("Failed to delete a snapshot with name " + snapshotName + " for collection " + collectionName
+" due to following error : "+e.getLocalizedMessage());
}
}
@SuppressWarnings("rawtypes")
public void listSnapshots(String collectionName) {
CollectionAdminRequest.ListSnapshots listSnaps = new CollectionAdminRequest.ListSnapshots(collectionName);
CollectionAdminResponse resp;
try {
resp = listSnaps.process(solrClient);
Preconditions.checkState(resp.getStatus() == 0, "The LISTSNAPSHOTS request failed. The status code is " + resp.getStatus());
NamedList apiResult = (NamedList) resp.getResponse().get(SolrSnapshotManager.SNAPSHOTS_INFO);
for (int i = 0; i < apiResult.size(); i++) {
System.out.println(apiResult.getName(i));
}
} catch (Exception e) {
log.error("Failed to list snapshots for collection " + collectionName, e);
System.out.println("Failed to list snapshots for collection " + collectionName
+" due to following error : "+e.getLocalizedMessage());
}
}
public void describeSnapshot(String collectionName, String snapshotName) {
try {
Collection<CollectionSnapshotMetaData> snaps = listCollectionSnapshots(collectionName);
for (CollectionSnapshotMetaData m : snaps) {
if (snapshotName.equals(m.getName())) {
System.out.println("Name: " + m.getName());
System.out.println("Status: " + m.getStatus());
System.out.println("Time of creation: " + dateFormat.format(m.getCreationDate()));
System.out.println("Total number of cores with snapshot: " + m.getReplicaSnapshots().size());
System.out.println("-----------------------------------");
for (CoreSnapshotMetaData n : m.getReplicaSnapshots()) {
StringBuilder builder = new StringBuilder();
builder.append("Core [name=");
builder.append(n.getCoreName());
builder.append(", leader=");
builder.append(n.isLeader());
builder.append(", generation=");
builder.append(n.getGenerationNumber());
builder.append(", indexDirPath=");
builder.append(n.getIndexDirPath());
builder.append("]\n");
System.out.println(builder.toString());
}
}
}
} catch (Exception e) {
log.error("Failed to fetch snapshot details", e);
System.out.println("Failed to fetch snapshot details due to following error : " + e.getLocalizedMessage());
}
}
public Map<String, List<String>> getIndexFilesPathForSnapshot(String collectionName, String snapshotName, Optional<String> pathPrefix)
throws SolrServerException, IOException {
Map<String, List<String>> result = new HashMap<>();
Collection<CollectionSnapshotMetaData> snaps = listCollectionSnapshots(collectionName);
Optional<CollectionSnapshotMetaData> meta = Optional.empty();
for (CollectionSnapshotMetaData m : snaps) {
if (snapshotName.equals(m.getName())) {
meta = Optional.of(m);
}
}
if (!meta.isPresent()) {
throw new IllegalArgumentException("The snapshot named " + snapshotName
+ " is not found for collection " + collectionName);
}
DocCollection collectionState = solrClient.getZkStateReader().getClusterState().getCollection(collectionName);
for (Slice s : collectionState.getSlices()) {
List<CoreSnapshotMetaData> replicaSnaps = meta.get().getReplicaSnapshotsForShard(s.getName());
// Prepare a list of *existing* replicas (since one or more replicas could have been deleted after the snapshot creation).
List<CoreSnapshotMetaData> availableReplicas = new ArrayList<>();
for (CoreSnapshotMetaData m : replicaSnaps) {
if (isReplicaAvailable(s, m.getCoreName())) {
availableReplicas.add(m);
}
}
if (availableReplicas.isEmpty()) {
throw new IllegalArgumentException(
"The snapshot named " + snapshotName + " not found for shard "
+ s.getName() + " of collection " + collectionName);
}
// Prefer a leader replica (at the time when the snapshot was created).
CoreSnapshotMetaData coreSnap = availableReplicas.get(0);
for (CoreSnapshotMetaData m : availableReplicas) {
if (m.isLeader()) {
coreSnap = m;
}
}
String indexDirPath = coreSnap.getIndexDirPath();
if (pathPrefix.isPresent()) {
// If the path prefix is specified, rebuild the path to the index directory.
Path t = new Path(coreSnap.getIndexDirPath());
indexDirPath = (new Path(pathPrefix.get(), t.toUri().getPath())).toString();
}
List<String> paths = new ArrayList<>();
for (String fileName : coreSnap.getFiles()) {
Path p = new Path(indexDirPath, fileName);
paths.add(p.toString());
}
result.put(s.getName(), paths);
}
return result;
}
public void buildCopyListings(String collectionName, String snapshotName, String localFsPath, Optional<String> pathPrefix)
throws SolrServerException, IOException {
Map<String, List<String>> paths = getIndexFilesPathForSnapshot(collectionName, snapshotName, pathPrefix);
for (Map.Entry<String,List<String>> entry : paths.entrySet()) {
StringBuilder filesBuilder = new StringBuilder();
for (String filePath : entry.getValue()) {
filesBuilder.append(filePath);
filesBuilder.append("\n");
}
String files = filesBuilder.toString().trim();
try (Writer w = new OutputStreamWriter(new FileOutputStream(new File(localFsPath, entry.getKey())), StandardCharsets.UTF_8)) {
w.write(files);
}
}
}
public void backupCollectionMetaData(String collectionName, String snapshotName, String backupLoc) throws SolrServerException, IOException {
// Backup the collection meta-data
CollectionAdminRequest.Backup backup = new CollectionAdminRequest.Backup(collectionName, snapshotName);
backup.setIndexBackupStrategy(CollectionAdminParams.NO_INDEX_BACKUP_STRATEGY);
backup.setLocation(backupLoc);
CollectionAdminResponse resp = backup.process(solrClient);
Preconditions.checkState(resp.getStatus() == 0, "The request failed. The status code is " + resp.getStatus());
}
public void prepareForExport(String collectionName, String snapshotName, String localFsPath, Optional<String> pathPrefix, String destPath) {
try {
buildCopyListings(collectionName, snapshotName, localFsPath, pathPrefix);
System.out.println("Successfully prepared copylisting for the snapshot export.");
} catch (Exception e) {
log.error("Failed to prepare a copylisting for snapshot with name " + snapshotName + " for collection "
+ collectionName, e);
System.out.println("Failed to prepare a copylisting for snapshot with name " + snapshotName + " for collection "
+ collectionName + " due to following error : " + e.getLocalizedMessage());
System.exit(1);
}
try {
backupCollectionMetaData(collectionName, snapshotName, destPath);
System.out.println("Successfully backed up collection meta-data");
} catch (Exception e) {
log.error("Failed to backup collection meta-data for collection " + collectionName, e);
System.out.println("Failed to backup collection meta-data for collection " + collectionName
+ " due to following error : " + e.getLocalizedMessage());
System.exit(1);
}
}
public void exportSnapshot(String collectionName, String snapshotName, String destPath, Optional<String> backupRepo,
Optional<String> asyncReqId) {
try {
CollectionAdminRequest.Backup backup = new CollectionAdminRequest.Backup(collectionName, snapshotName);
backup.setIndexBackupStrategy(CollectionAdminParams.COPY_FILES_STRATEGY);
backup.setLocation(destPath);
if (backupRepo.isPresent()) {
backup.setRepositoryName(backupRepo.get());
}
if (asyncReqId.isPresent()) {
backup.setAsyncId(asyncReqId.get());
}
CollectionAdminResponse resp = backup.process(solrClient);
Preconditions.checkState(resp.getStatus() == 0, "The request failed. The status code is " + resp.getStatus());
} catch (Exception e) {
log.error("Failed to backup collection meta-data for collection " + collectionName, e);
System.out.println("Failed to backup collection meta-data for collection " + collectionName
+ " due to following error : " + e.getLocalizedMessage());
System.exit(1);
}
}
public static void main(String[] args) throws IOException {
CommandLineParser parser = new PosixParser();
Options options = new Options();
options.addOption(null, CREATE, true, "This command will create a snapshot with the specified name");
options.addOption(null, DELETE, true, "This command will delete a snapshot with the specified name");
options.addOption(null, LIST, false, "This command will list all the named snapshots for the specified collection.");
options.addOption(null, DESCRIBE, true, "This command will print details for a named snapshot for the specified collection.");
options.addOption(null, PREPARE_FOR_EXPORT, true, "This command will prepare copylistings for the specified snapshot."
+ " This command should only be used only if Solr is deployed with Hadoop and collection index files are stored on a shared"
+ " file-system e.g. HDFS");
options.addOption(null, EXPORT_SNAPSHOT, true, "This command will create a backup for the specified snapshot.");
options.addOption(null, HELP, false, "This command will print the help message for the snapshots related commands.");
options.addOption(TEMP_DIR, true, "This parameter specifies the path of a temporary directory on local filesystem"
+ " during prepare-snapshot-export command.");
options.addOption(DEST_DIR, true, "This parameter specifies the path on shared file-system (e.g. HDFS) where the snapshot related"
+ " information should be stored.");
options.addOption(COLLECTION, true, "This parameter specifies the name of the collection to be used during snapshot operation");
options.addOption(SOLR_ZK_ENSEMBLE, true, "This parameter specifies the Solr Zookeeper ensemble address");
options.addOption(HDFS_PATH_PREFIX, true, "This parameter specifies the HDFS URI prefix to be used"
+ " during snapshot export preparation. This is applicable only if the Solr collection index files are stored on HDFS.");
options.addOption(BACKUP_REPO_NAME, true, "This parameter specifies the name of the backup repository to be used"
+ " during snapshot export preparation");
options.addOption(ASYNC_REQ_ID, true, "This parameter specifies the async request identifier to be used"
+ " during snapshot export preparation");
CommandLine cmd = null;
try {
cmd = parser.parse(options, args);
} catch (ParseException e) {
System.out.println(e.getLocalizedMessage());
printHelp(options);
System.exit(1);
}
if (cmd.hasOption(CREATE) || cmd.hasOption(DELETE) || cmd.hasOption(LIST) || cmd.hasOption(DESCRIBE)
|| cmd.hasOption(PREPARE_FOR_EXPORT) || cmd.hasOption(EXPORT_SNAPSHOT)) {
try (SolrSnapshotsTool tool = new SolrSnapshotsTool(cmd.getOptionValue(SOLR_ZK_ENSEMBLE))) {
if (cmd.hasOption(CREATE)) {
String snapshotName = cmd.getOptionValue(CREATE);
String collectionName = cmd.getOptionValue(COLLECTION);
tool.createSnapshot(collectionName, snapshotName);
} else if (cmd.hasOption(DELETE)) {
String snapshotName = cmd.getOptionValue(DELETE);
String collectionName = cmd.getOptionValue(COLLECTION);
tool.deleteSnapshot(collectionName, snapshotName);
} else if (cmd.hasOption(LIST)) {
String collectionName = cmd.getOptionValue(COLLECTION);
tool.listSnapshots(collectionName);
} else if (cmd.hasOption(DESCRIBE)) {
String snapshotName = cmd.getOptionValue(DESCRIBE);
String collectionName = cmd.getOptionValue(COLLECTION);
tool.describeSnapshot(collectionName, snapshotName);
} else if (cmd.hasOption(PREPARE_FOR_EXPORT)) {
String snapshotName = cmd.getOptionValue(PREPARE_FOR_EXPORT);
String collectionName = cmd.getOptionValue(COLLECTION);
String localFsDir = requiredArg(options, cmd, TEMP_DIR);
String hdfsOpDir = requiredArg(options, cmd, DEST_DIR);
Optional<String> pathPrefix = Optional.ofNullable(cmd.getOptionValue(HDFS_PATH_PREFIX));
if (pathPrefix.isPresent()) {
try {
new URI(pathPrefix.get());
} catch (URISyntaxException e) {
System.out.println(
"The specified File system path prefix " + pathPrefix.get()
+ " is invalid. The error is " + e.getLocalizedMessage());
System.exit(1);
}
}
tool.prepareForExport(collectionName, snapshotName, localFsDir, pathPrefix, hdfsOpDir);
} else if (cmd.hasOption(EXPORT_SNAPSHOT)) {
String snapshotName = cmd.getOptionValue(EXPORT_SNAPSHOT);
String collectionName = cmd.getOptionValue(COLLECTION);
String destDir = requiredArg(options, cmd, DEST_DIR);
Optional<String> backupRepo = Optional.ofNullable(cmd.getOptionValue(BACKUP_REPO_NAME));
Optional<String> asyncReqId = Optional.ofNullable(cmd.getOptionValue(ASYNC_REQ_ID));
tool.exportSnapshot(collectionName, snapshotName, destDir, backupRepo, asyncReqId);
}
}
} else if (cmd.hasOption(HELP)) {
printHelp(options);
} else {
System.out.println("Unknown command specified.");
printHelp(options);
}
}
private static String requiredArg(Options options, CommandLine cmd, String optVal) {
if (!cmd.hasOption(optVal)) {
System.out.println("Please specify the value for option " + optVal);
printHelp(options);
System.exit(1);
}
return cmd.getOptionValue(optVal);
}
private static boolean isReplicaAvailable (Slice s, String coreName) {
for (Replica r: s.getReplicas()) {
if (coreName.equals(r.getCoreName())) {
return true;
}
}
return false;
}
private Collection<CollectionSnapshotMetaData> listCollectionSnapshots(String collectionName)
throws SolrServerException, IOException {
CollectionAdminRequest.ListSnapshots listSnapshots = new CollectionAdminRequest.ListSnapshots(collectionName);
CollectionAdminResponse resp = listSnapshots.process(solrClient);
Preconditions.checkState(resp.getStatus() == 0);
NamedList apiResult = (NamedList) resp.getResponse().get(SolrSnapshotManager.SNAPSHOTS_INFO);
Collection<CollectionSnapshotMetaData> result = new ArrayList<>();
for (int i = 0; i < apiResult.size(); i++) {
result.add(new CollectionSnapshotMetaData((NamedList<Object>)apiResult.getVal(i)));
}
return result;
}
private static void printHelp(Options options) {
StringBuilder helpFooter = new StringBuilder();
helpFooter.append("Examples: \n");
helpFooter.append("snapshotscli.sh --create snapshot-1 -c books -z localhost:2181 \n");
helpFooter.append("snapshotscli.sh --list -c books -z localhost:2181 \n");
helpFooter.append("snapshotscli.sh --describe snapshot-1 -c books -z localhost:2181 \n");
helpFooter.append("snapshotscli.sh --export snapshot-1 -c books -z localhost:2181 -b repo -l backupPath -i req_0 \n");
helpFooter.append("snapshotscli.sh --delete snapshot-1 -c books -z localhost:2181 \n");
HelpFormatter formatter = new HelpFormatter();
formatter.setOptionComparator(new OptionComarator<>());
formatter.printHelp("SolrSnapshotsTool", null, options, helpFooter.toString(), false);
}
private static class OptionComarator<T extends Option> implements Comparator<T> {
public int compare(T o1, T o2) {
String s1 = o1.hasLongOpt() ? o1.getLongOpt() : o1.getOpt();
String s2 = o2.hasLongOpt() ? o2.getLongOpt() : o2.getOpt();
return OPTION_HELP_ORDER.indexOf(s1) - OPTION_HELP_ORDER.indexOf(s2);
}
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler;
import java.util.HashMap;
import java.util.Map;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import static org.apache.solr.common.params.CommonParams.JSON;
public class ExportHandler extends SearchHandler {
@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
try {
super.handleRequestBody(req, rsp);
} catch (Exception e) {
rsp.setException(e);
}
String wt = req.getParams().get(CommonParams.WT, JSON);
if("xsort".equals(wt)) wt = JSON;
Map<String, String> map = new HashMap<>(1);
map.put(CommonParams.WT, ReplicationHandler.FILE_STREAM);
req.setParams(SolrParams.wrapDefaults(new MapSolrParams(map),req.getParams()));
rsp.add(ReplicationHandler.FILE_STREAM, new ExportWriter(req, rsp, wt));
}
}

View File

@ -14,17 +14,21 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.response;
package org.apache.solr.handler;
import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.nio.charset.StandardCharsets;
import java.util.Date;
import java.util.List;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
@ -40,11 +44,18 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongValues;
import org.apache.solr.client.solrj.impl.BinaryResponseParser;
import org.apache.solr.common.IteratorWriter;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.MapWriter.EntryWriter;
import org.apache.solr.common.PushWriter;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.JSONResponseWriter;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
@ -61,24 +72,65 @@ import org.apache.solr.search.SyntaxError;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
import static org.apache.solr.common.util.Utils.makeMap;
public class SortingResponseWriter implements QueryResponseWriter {
public class ExportWriter implements SolrCore.RawWriter, Closeable {
private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private OutputStreamWriter respWriter;
final SolrQueryRequest req;
final SolrQueryResponse res;
FieldWriter[] fieldWriters;
int totalHits = 0;
FixedBitSet[] sets = null;
PushWriter writer;
private String wt;
ExportWriter(SolrQueryRequest req, SolrQueryResponse res, String wt) {
this.req = req;
this.res = res;
this.wt = wt;
public void init(NamedList args) {
/* NOOP */
}
public String getContentType(SolrQueryRequest req, SolrQueryResponse res) {
return "application/json";
@Override
public String getContentType() {
if ("javabin".equals(wt)) {
return BinaryResponseParser.BINARY_CONTENT_TYPE;
} else return "json";
}
public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse res) throws IOException {
Exception e1 = res.getException();
if(e1 != null) {
if(!(e1 instanceof IgnoreException)) {
writeException(e1, writer, false);
@Override
public void close() throws IOException {
if (writer != null) writer.close();
if (respWriter != null) {
respWriter.flush();
respWriter.close();
}
}
protected void writeException(Exception e, PushWriter w, boolean log) throws IOException {
w.writeMap(mw -> {
mw.put("responseHeader", singletonMap("status", 400))
.put("response", makeMap(
"numFound", 0,
"docs", singletonList(singletonMap("EXCEPTION", e.getMessage()))));
});
if (log) {
SolrException.log(logger, e);
}
}
public void write(OutputStream os) throws IOException {
respWriter = new OutputStreamWriter(os, StandardCharsets.UTF_8);
writer = JSONResponseWriter.getPushWriter(respWriter, req, res);
Exception exception = res.getException();
if (exception != null) {
if (!(exception instanceof IgnoreException)) {
writeException(exception, writer, false);
}
return;
}
@ -113,8 +165,6 @@ public class SortingResponseWriter implements QueryResponseWriter {
// You'll have to uncomment the if below to hit the null pointer exception.
// This is such an unusual case (i.e. an empty index) that catching this concdition here is probably OK.
// This came to light in the very artifical case of indexing a single doc to Cloud.
int totalHits = 0;
FixedBitSet[] sets = null;
if (req.getContext().get("totalHits") != null) {
totalHits = ((Integer)req.getContext().get("totalHits")).intValue();
sets = (FixedBitSet[]) req.getContext().get("export");
@ -145,8 +195,6 @@ public class SortingResponseWriter implements QueryResponseWriter {
}
}
FieldWriter[] fieldWriters = null;
try {
fieldWriters = getFieldWriters(fields, req.getSearcher());
} catch (Exception e) {
@ -154,9 +202,17 @@ public class SortingResponseWriter implements QueryResponseWriter {
return;
}
writer.write("{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":"+totalHits+", \"docs\":[");
writer.writeMap(m -> {
m.put("responseHeader", singletonMap("status", 0));
m.put("response", (MapWriter) mw -> {
mw.put("numFound", totalHits);
mw.put("docs", (IteratorWriter) iw -> writeDocs(req, iw, sort));
});
});
}
protected void writeDocs(SolrQueryRequest req, IteratorWriter.ItemWriter writer, Sort sort) throws IOException {
//Write the data.
List<LeafReaderContext> leaves = req.getSearcher().getTopReaderContext().leaves();
SortDoc sortDoc = getSortDoc(req.getSearcher(), sort.getSort());
@ -165,7 +221,6 @@ public class SortingResponseWriter implements QueryResponseWriter {
SortQueue queue = new SortQueue(queueSize, sortDoc);
SortDoc[] outDocs = new SortDoc[queueSize];
boolean commaNeeded = false;
while(count < totalHits) {
//long begin = System.nanoTime();
queue.reset();
@ -199,12 +254,10 @@ public class SortingResponseWriter implements QueryResponseWriter {
try {
for(int i=outDocsIndex; i>=0; --i) {
SortDoc s = outDocs[i];
if(commaNeeded){writer.write(',');}
writer.write('{');
writeDoc(s, leaves, fieldWriters, sets, writer);
writer.write('}');
commaNeeded = true;
writer.add((MapWriter) ew -> {
writeDoc(s, leaves, ew);
s.reset();
});
}
} catch(Throwable e) {
Throwable ex = e;
@ -224,54 +277,24 @@ public class SortingResponseWriter implements QueryResponseWriter {
}
}
}
//System.out.println("Sort Time 2:"+Long.toString(total/1000000));
writer.write("]}}");
writer.flush();
}
public static class IgnoreException extends IOException {
public void printStackTrace(PrintWriter pw) {
pw.print("Early Client Disconnect");
}
public String getMessage() {
return "Early Client Disconnect";
}
}
protected void writeDoc(SortDoc sortDoc,
List<LeafReaderContext> leaves,
FieldWriter[] fieldWriters,
FixedBitSet[] sets,
Writer out) throws IOException{
EntryWriter ew) throws IOException {
int ord = sortDoc.ord;
FixedBitSet set = sets[ord];
set.clear(sortDoc.docId);
LeafReaderContext context = leaves.get(ord);
int fieldIndex = 0;
for(FieldWriter fieldWriter : fieldWriters) {
if(fieldWriter.write(sortDoc.docId, context.reader(), out, fieldIndex)){
for (FieldWriter fieldWriter : fieldWriters) {
if (fieldWriter.write(sortDoc.docId, context.reader(), ew, fieldIndex)) {
++fieldIndex;
}
}
}
protected void writeException(Exception e, Writer out, boolean log) throws IOException{
out.write("{\"responseHeader\": {\"status\": 400}, \"response\":{\"numFound\":0, \"docs\":[");
out.write("{\"EXCEPTION\":\"");
writeStr(e.getMessage(), out);
out.write("\"}");
out.write("]}}");
out.flush();
if(log) {
SolrException.log(logger, e);
}
}
protected FieldWriter[] getFieldWriters(String[] fields, SolrIndexSearcher searcher) throws IOException {
IndexSchema schema = searcher.getSchema();
FieldWriter[] writers = new FieldWriter[fields.length];
@ -291,50 +314,49 @@ public class SortingResponseWriter implements QueryResponseWriter {
boolean multiValued = schemaField.multiValued();
FieldType fieldType = schemaField.getType();
if(fieldType instanceof TrieIntField) {
if(multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, true);
if (fieldType instanceof TrieIntField) {
if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
} else {
writers[i] = new IntFieldWriter(field);
}
} else if (fieldType instanceof TrieLongField) {
if(multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, true);
if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
} else {
writers[i] = new LongFieldWriter(field);
}
} else if (fieldType instanceof TrieFloatField) {
if(multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, true);
if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
} else {
writers[i] = new FloatFieldWriter(field);
}
} else if(fieldType instanceof TrieDoubleField) {
if(multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, true);
} else if (fieldType instanceof TrieDoubleField) {
if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
} else {
writers[i] = new DoubleFieldWriter(field);
}
} else if(fieldType instanceof StrField) {
if(multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, false);
} else if (fieldType instanceof StrField) {
if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false);
} else {
writers[i] = new StringFieldWriter(field, fieldType);
}
} else if (fieldType instanceof TrieDateField) {
if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, false);
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false);
} else {
writers[i] = new DateFieldWriter(field);
}
} else if(fieldType instanceof BoolField) {
if(multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, true);
} else if (fieldType instanceof BoolField) {
if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true);
} else {
writers[i] = new BoolFieldWriter(field, fieldType);
}
}
else {
} else {
throw new IOException("Export fields must either be one of the following types: int,float,long,double,string,date,boolean");
}
}
@ -399,7 +421,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
// we can just use the existing StringValue here.
LeafReader reader = searcher.getSlowAtomicReader();
SortedDocValues vals = reader.getSortedDocValues(field);
if(reverse) {
if (reverse) {
sortValues[i] = new StringValue(vals, field, new IntDesc());
} else {
sortValues[i] = new StringValue(vals, field, new IntAsc());
@ -439,7 +461,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
private void populate() {
Object[] heap = getHeapArray();
cache = new SortDoc[heap.length];
for(int i=1; i<heap.length; i++) {
for (int i = 1; i < heap.length; i++) {
cache[i] = heap[i] = proto.copy();
}
size = maxSize;
@ -470,7 +492,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord;
for(SortValue value : sortValues) {
for (SortValue value : sortValues) {
value.setNextReader(context);
}
}
@ -1295,7 +1317,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
}
protected abstract class FieldWriter {
public abstract boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException;
public abstract boolean write(int docId, LeafReader reader, EntryWriter out, int fieldIndex) throws IOException;
}
class IntFieldWriter extends FieldWriter {
@ -1305,7 +1327,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
this.field = field;
}
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
int val;
if (vals.advance(docId) == docId) {
@ -1313,14 +1335,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
} else {
val = 0;
}
if(fieldIndex>0) {
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(':');
out.write(Integer.toString(val));
ew.put(this.field, val);
return true;
}
}
@ -1328,57 +1343,31 @@ public class SortingResponseWriter implements QueryResponseWriter {
class MultiFieldWriter extends FieldWriter {
private String field;
private FieldType fieldType;
private SchemaField schemaField;
private boolean numeric;
private CharsRefBuilder cref = new CharsRefBuilder();
public MultiFieldWriter(String field, FieldType fieldType, boolean numeric) {
public MultiFieldWriter(String field, FieldType fieldType, SchemaField schemaField, boolean numeric) {
this.field = field;
this.fieldType = fieldType;
this.schemaField = schemaField;
this.numeric = numeric;
}
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
public boolean write(int docId, LeafReader reader, EntryWriter out, int fieldIndex) throws IOException {
SortedSetDocValues vals = DocValues.getSortedSet(reader, this.field);
List<Long> ords;
if (vals.advance(docId) == docId) {
ords = new ArrayList();
long o = -1;
if (vals.advance(docId) != docId) return false;
out.put(this.field,
(IteratorWriter) w -> {
long o;
while((o = vals.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
ords.add(o);
}
assert ords.size() > 0;
} else {
return false;
}
if(fieldIndex>0) {
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(':');
out.write('[');
int v = 0;
for(long ord : ords) {
BytesRef ref = vals.lookupOrd(ord);
BytesRef ref = vals.lookupOrd(o);
fieldType.indexedToReadable(ref, cref);
if(v > 0) {
out.write(',');
IndexableField f = fieldType.createField(schemaField, cref.toString(), 1.0f);
if (f == null) w.add(cref.toString());
else w.add(fieldType.toObject(f));
}
if(!numeric) {
out.write('"');
}
writeStr(cref.toString(), out);
if(!numeric) {
out.write('"');
}
++v;
}
out.write("]");
});
return true;
}
}
@ -1390,7 +1379,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
this.field = field;
}
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
long val;
if (vals.advance(docId) == docId) {
@ -1398,14 +1387,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
} else {
val = 0;
}
if(fieldIndex > 0) {
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(':');
out.write(Long.toString(val));
ew.put(field, val);
return true;
}
}
@ -1417,7 +1399,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
this.field = field;
}
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
long val;
if (vals.advance(docId) == docId) {
@ -1425,17 +1407,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
} else {
val = 0;
}
if (fieldIndex > 0) {
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(':');
out.write('"');
writeStr(new Date(val).toInstant().toString(), out);
out.write('"');
ew.put(this.field, new Date(val));
return true;
}
}
@ -1450,7 +1422,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
this.fieldType = fieldType;
}
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
SortedDocValues vals = DocValues.getSorted(reader, this.field);
if (vals.advance(docId) != docId) {
return false;
@ -1459,17 +1431,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
BytesRef ref = vals.lookupOrd(ord);
fieldType.indexedToReadable(ref, cref);
if (fieldIndex > 0) {
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(':');
//out.write('"');
writeStr(cref.toString(), out);
//out.write('"');
ew.put(this.field, "true".equals(cref.toString()));
return true;
}
}
@ -1481,7 +1443,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
this.field = field;
}
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
int val;
if (vals.advance(docId) == docId) {
@ -1489,14 +1451,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
} else {
val = 0;
}
if(fieldIndex > 0) {
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(':');
out.write(Float.toString(Float.intBitsToFloat(val)));
ew.put(this.field, Float.intBitsToFloat(val));
return true;
}
}
@ -1508,7 +1463,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
this.field = field;
}
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
long val;
if (vals.advance(docId) == docId) {
@ -1516,14 +1471,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
} else {
val = 0;
}
if(fieldIndex > 0) {
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(':');
out.write(Double.toString(Double.longBitsToDouble(val)));
ew.put(this.field, Double.longBitsToDouble(val));
return true;
}
}
@ -1538,7 +1486,7 @@ public class SortingResponseWriter implements QueryResponseWriter {
this.fieldType = fieldType;
}
public boolean write(int docId, LeafReader reader, Writer out, int fieldIndex) throws IOException {
public boolean write(int docId, LeafReader reader, EntryWriter ew, int fieldIndex) throws IOException {
SortedDocValues vals = DocValues.getSorted(reader, this.field);
if (vals.advance(docId) != docId) {
return false;
@ -1547,64 +1495,11 @@ public class SortingResponseWriter implements QueryResponseWriter {
BytesRef ref = vals.lookupOrd(ord);
fieldType.indexedToReadable(ref, cref);
if(fieldIndex > 0) {
out.write(',');
}
out.write('"');
out.write(this.field);
out.write('"');
out.write(":");
out.write('"');
writeStr(cref.toString(), out);
out.write('"');
ew.put(this.field, cref.toString());
return true;
}
}
private void writeStr(String val, Writer writer) throws IOException {
for (int i=0; i<val.length(); i++) {
char ch = val.charAt(i);
if ((ch > '#' && ch != '\\' && ch < '\u2028') || ch == ' ') { // fast path
writer.write(ch);
continue;
}
switch(ch) {
case '"':
case '\\':
writer.write('\\');
writer.write(ch);
break;
case '\r': writer.write('\\'); writer.write('r'); break;
case '\n': writer.write('\\'); writer.write('n'); break;
case '\t': writer.write('\\'); writer.write('t'); break;
case '\b': writer.write('\\'); writer.write('b'); break;
case '\f': writer.write('\\'); writer.write('f'); break;
case '\u2028': // fallthrough
case '\u2029':
unicodeEscape(writer,ch);
break;
// case '/':
default: {
if (ch <= 0x1F) {
unicodeEscape(writer,ch);
} else {
writer.write(ch);
}
}
}
}
}
private static char[] hexdigits = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
protected static void unicodeEscape(Appendable out, int ch) throws IOException {
out.append('\\');
out.append('u');
out.append(hexdigits[(ch>>>12) ]);
out.append(hexdigits[(ch>>>8) & 0xf]);
out.append(hexdigits[(ch>>>4) & 0xf]);
out.append(hexdigits[(ch) & 0xf]);
}
public abstract class PriorityQueue<T> {
protected int size = 0;
protected final int maxSize;
@ -1802,4 +1697,15 @@ public class SortingResponseWriter implements QueryResponseWriter {
return (Object[]) heap;
}
}
public class IgnoreException extends IOException {
public void printStackTrace(PrintWriter pw) {
pw.print("Early Client Disconnect");
}
public String getMessage() {
return "Early Client Disconnect";
}
}
}

View File

@ -734,8 +734,14 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to check the existance of " + uri + ". Is it valid?", ex);
}
String strategy = req.getParams().get(CollectionAdminParams.INDEX_BACKUP_STRATEGY, CollectionAdminParams.COPY_FILES_STRATEGY);
if (!CollectionAdminParams.INDEX_BACKUP_STRATEGIES.contains(strategy)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown index backup strategy " + strategy);
}
Map<String, Object> params = req.getParams().getAll(null, NAME, COLLECTION_PROP, CoreAdminParams.COMMIT_NAME);
params.put(CoreAdminParams.BACKUP_LOCATION, location);
params.put(CollectionAdminParams.INDEX_BACKUP_STRATEGY, strategy);
return params;
}),
RESTORE_OP(RESTORE, (req, rsp, h) -> {

View File

@ -37,6 +37,7 @@ import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.update.CommitUpdateCommand;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.TestInjection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -46,6 +47,8 @@ class PrepRecoveryOp implements CoreAdminHandler.CoreAdminOp {
@Override
public void execute(CallInfo it) throws Exception {
assert TestInjection.injectPrepRecoveryOpPauseForever();
final SolrParams params = it.req.getParams();
String cname = params.get(CoreAdminParams.CORE);

View File

@ -17,6 +17,7 @@
package org.apache.solr.handler.component;
import java.lang.invoke.MethodHandles;
import java.net.ConnectException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
@ -116,7 +117,7 @@ public class HttpShardHandler extends ShardHandler {
private List<String> getURLs(String shard, String preferredHostAddress) {
List<String> urls = shardToURLs.get(shard);
if (urls == null) {
urls = httpShardHandlerFactory.makeURLList(shard);
urls = httpShardHandlerFactory.buildURLList(shard);
if (preferredHostAddress != null && urls.size() > 1) {
preferCurrentHostForDistributedReq(preferredHostAddress, urls);
}
@ -320,6 +321,8 @@ public class HttpShardHandler extends ShardHandler {
}
}
final ReplicaListTransformer replicaListTransformer = httpShardHandlerFactory.getReplicaListTransformer(req);
if (shards != null) {
List<String> lst = StrUtils.splitSmart(shards, ",", true);
rb.shards = lst.toArray(new String[lst.size()]);
@ -404,7 +407,11 @@ public class HttpShardHandler extends ShardHandler {
for (int i=0; i<rb.shards.length; i++) {
if (rb.shards[i] == null) {
final List<String> shardUrls;
if (rb.shards[i] != null) {
shardUrls = StrUtils.splitSmart(rb.shards[i], "|", true);
replicaListTransformer.transform(shardUrls);
} else {
if (clusterState == null) {
clusterState = zkController.getClusterState();
slices = clusterState.getSlicesMap(cloudDescriptor.getCollectionName());
@ -421,26 +428,25 @@ public class HttpShardHandler extends ShardHandler {
// throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "no such shard: " + sliceName);
}
Map<String, Replica> sliceShards = slice.getReplicasMap();
// For now, recreate the | delimited list of equivalent servers
StringBuilder sliceShardsStr = new StringBuilder();
boolean first = true;
for (Replica replica : sliceShards.values()) {
final Collection<Replica> allSliceReplicas = slice.getReplicasMap().values();
final List<Replica> eligibleSliceReplicas = new ArrayList<>(allSliceReplicas.size());
for (Replica replica : allSliceReplicas) {
if (!clusterState.liveNodesContain(replica.getNodeName())
|| replica.getState() != Replica.State.ACTIVE) {
continue;
}
if (first) {
first = false;
} else {
sliceShardsStr.append('|');
}
String url = ZkCoreNodeProps.getCoreUrl(replica);
sliceShardsStr.append(url);
eligibleSliceReplicas.add(replica);
}
if (sliceShardsStr.length() == 0) {
replicaListTransformer.transform(eligibleSliceReplicas);
shardUrls = new ArrayList<>(eligibleSliceReplicas.size());
for (Replica replica : eligibleSliceReplicas) {
String url = ZkCoreNodeProps.getCoreUrl(replica);
shardUrls.add(url);
}
if (shardUrls.isEmpty()) {
boolean tolerant = rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false);
if (!tolerant) {
// stop the check when there are no replicas available for a shard
@ -448,9 +454,19 @@ public class HttpShardHandler extends ShardHandler {
"no servers hosting shard: " + rb.slices[i]);
}
}
rb.shards[i] = sliceShardsStr.toString();
}
// And now recreate the | delimited list of equivalent servers
final StringBuilder sliceShardsStr = new StringBuilder();
boolean first = true;
for (String shardUrl : shardUrls) {
if (first) {
first = false;
} else {
sliceShardsStr.append('|');
}
sliceShardsStr.append(shardUrl);
}
rb.shards[i] = sliceShardsStr.toString();
}
}
String shards_rows = params.get(ShardParams.SHARDS_ROWS);

View File

@ -31,13 +31,13 @@ import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.URLUtil;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.update.UpdateShardHandlerConfig;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.DefaultSolrThreadFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ArrayBlockingQueue;
@ -84,6 +84,8 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
private final Random r = new Random();
private final ReplicaListTransformer shufflingReplicaListTransformer = new ShufflingReplicaListTransformer(r);
// URL scheme to be used in distributed search.
static final String INIT_URL_SCHEME = "urlScheme";
@ -227,12 +229,12 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
}
/**
* Creates a randomized list of urls for the given shard.
* Creates a list of urls for the given shard.
*
* @param shard the urls for the shard, separated by '|'
* @return A list of valid urls (including protocol) that are replicas for the shard
*/
public List<String> makeURLList(String shard) {
public List<String> buildURLList(String shard) {
List<String> urls = StrUtils.splitSmart(shard, "|", true);
// convert shard to URL
@ -240,17 +242,14 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
urls.set(i, buildUrl(urls.get(i)));
}
//
// Shuffle the list instead of use round-robin by default.
// This prevents accidental synchronization where multiple shards could get in sync
// and query the same replica at the same time.
//
if (urls.size() > 1)
Collections.shuffle(urls, r);
return urls;
}
ReplicaListTransformer getReplicaListTransformer(final SolrQueryRequest req)
{
return shufflingReplicaListTransformer;
}
/**
* Creates a new completion service for use by a single set of distributed requests.
*/

View File

@ -973,8 +973,7 @@ public class QueryComponent extends SearchComponent
// Merge the docs via a priority queue so we don't have to sort *all* of the
// documents... we only need to order the top (rows+start)
ShardFieldSortedHitQueue queue;
queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher());
final ShardFieldSortedHitQueue queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher());
NamedList<Object> shardInfo = null;
if(rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) {

View File

@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.util.List;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.params.ShardParams;
interface ReplicaListTransformer {
/**
* Transforms the passed in list of choices. Transformations can include (but are not limited to)
* reordering of elements (e.g. via shuffling) and removal of elements (i.e. filtering).
*
* @param choices - a list of choices to transform, typically the choices are {@link Replica} objects but choices
* can also be {@link String} objects such as URLs passed in via the {@link ShardParams#SHARDS} parameter.
*/
public void transform(List<?> choices);
}

View File

@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.util.Collections;
import java.util.List;
import java.util.Random;
class ShufflingReplicaListTransformer implements ReplicaListTransformer {
private final Random r;
public ShufflingReplicaListTransformer(Random r)
{
this.r = r;
}
public void transform(List<?> choices)
{
if (choices.size() > 1) {
Collections.shuffle(choices, r);
}
}
}

View File

@ -173,16 +173,17 @@ public class DocValuesFacets {
int min=mincount-1; // the smallest value in the top 'N' values
for (int i=(startTermIndex==-1)?1:0; i<nTerms; i++) {
int c = counts[i];
if (c>min) {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
// important if a lot of the counts are repeated (like zero counts would be).
if (contains != null) {
final BytesRef term = si.lookupOrd(startTermIndex+i);
if (!SimpleFacets.contains(term.utf8ToString(), contains, ignoreCase)) {
continue;
}
}
if (c>min) {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
// important if a lot of the counts are repeated (like zero counts would be).
// smaller term numbers sort higher, so subtract the term number instead
long pair = (((long)c)<<32) + (Integer.MAX_VALUE - i);

View File

@ -71,6 +71,8 @@ public class MacroExpander {
newValues.add(vv);
}
}
}
if (newValues != null) {
newValues.add(newV);
}
}

View File

@ -24,7 +24,11 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.solr.common.IteratorWriter;
import org.apache.solr.common.MapWriter.EntryWriter;
import org.apache.solr.common.PushWriter;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
@ -74,6 +78,11 @@ public class JSONResponseWriter implements QueryResponseWriter {
public String getContentType(SolrQueryRequest request, SolrQueryResponse response) {
return contentType;
}
public static PushWriter getPushWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) {
return new JSONWriter(writer, req, rsp);
}
}
class JSONWriter extends TextResponseWriter {
@ -507,6 +516,53 @@ class JSONWriter extends TextResponseWriter {
}
}
@Override
public void writeIterator(IteratorWriter val) throws IOException {
writeArrayOpener(-1);
incLevel();
val.writeIter(new IteratorWriter.ItemWriter() {
boolean first = true;
@Override
public IteratorWriter.ItemWriter add(Object o) throws IOException {
if (!first) {
JSONWriter.this.indent();
JSONWriter.this.writeArraySeparator();
}
JSONWriter.this.writeVal(null, o);
first = false;
return this;
}
});
decLevel();
writeArrayCloser();
}
@Override
public void writeMap(MapWriter val)
throws IOException {
writeMapOpener(-1);
incLevel();
val.writeMap(new EntryWriter() {
boolean isFirst = true;
@Override
public EntryWriter put(String k, Object v) throws IOException {
if (isFirst) {
isFirst = false;
} else {
JSONWriter.this.writeMapSeparator();
}
if (doIndent) JSONWriter.this.indent();
JSONWriter.this.writeKey(k, true);
JSONWriter.this.writeVal(k, v);
return this;
}
});
decLevel();
writeMapCloser();
}
@Override
public void writeMap(String name, Map val, boolean excludeOuter, boolean isFirstVal) throws IOException {
@ -544,12 +600,14 @@ class JSONWriter extends TextResponseWriter {
public void writeArray(String name, List l) throws IOException {
writeArrayOpener(l.size());
writeJsonIter(l.iterator());
writeArrayCloser();
}
@Override
public void writeArray(String name, Iterator val) throws IOException {
writeArrayOpener(-1); // no trivial way to determine array size
writeJsonIter(val);
writeArrayCloser();
}
private void writeJsonIter(Iterator val) throws IOException {
@ -564,7 +622,6 @@ class JSONWriter extends TextResponseWriter {
first=false;
}
decLevel();
writeArrayCloser();
}
//
@ -634,11 +691,6 @@ class ArrayOfNamedValuePairJSONWriter extends JSONWriter {
}
}
@Override
public void writeArray(String name, List l) throws IOException {
writeArray(name, l.iterator());
}
@Override
public void writeNamedList(String name, NamedList val) throws IOException {

View File

@ -31,9 +31,12 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.EnumFieldValue;
import org.apache.solr.common.IteratorWriter;
import org.apache.solr.common.MapSerializable;
import org.apache.solr.common.PushWriter;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.NamedList;
@ -48,7 +51,7 @@ import org.apache.solr.util.FastWriter;
*
*
*/
public abstract class TextResponseWriter {
public abstract class TextResponseWriter implements PushWriter {
// indent up to 40 spaces
static final char[] indentChars = new char[81];
@ -138,19 +141,19 @@ public abstract class TextResponseWriter {
writeStr(name, f.stringValue(), true);
}
} else if (val instanceof Number) {
writeNumber(name, (Number)val);
writeNumber(name, (Number) val);
} else if (val instanceof Boolean) {
writeBool(name, (Boolean)val);
writeBool(name, (Boolean) val);
} else if (val instanceof Date) {
writeDate(name,(Date)val);
writeDate(name, (Date) val);
} else if (val instanceof Document) {
SolrDocument doc = DocsStreamer.getDoc((Document) val, schema);
writeSolrDocument(name, doc,returnFields, 0 );
writeSolrDocument(name, doc, returnFields, 0);
} else if (val instanceof SolrDocument) {
writeSolrDocument(name, (SolrDocument)val,returnFields, 0);
writeSolrDocument(name, (SolrDocument) val, returnFields, 0);
} else if (val instanceof ResultContext) {
// requires access to IndexReader
writeDocuments(name, (ResultContext)val);
writeDocuments(name, (ResultContext) val);
} else if (val instanceof DocList) {
// Should not happen normally
ResultContext ctx = new BasicResultContext((DocList)val, returnFields, null, null, req);
@ -168,6 +171,8 @@ public abstract class TextResponseWriter {
writeNamedList(name, (NamedList)val);
} else if (val instanceof Path) {
writeStr(name, ((Path) val).toAbsolutePath().toString(), true);
} else if (val instanceof IteratorWriter) {
writeIterator((IteratorWriter) val);
} else if (val instanceof Iterable) {
writeArray(name,((Iterable)val).iterator());
} else if (val instanceof Object[]) {
@ -184,6 +189,8 @@ public abstract class TextResponseWriter {
writeStr(name, val.toString(), true);
} else if (val instanceof WriteableValue) {
((WriteableValue)val).write(name, this);
} else if (val instanceof MapWriter) {
writeMap((MapWriter) val);
} else if (val instanceof MapSerializable) {
//todo find a better way to reuse the map more efficiently
writeMap(name, ((MapSerializable) val).toMap(new LinkedHashMap<>()), false, true);
@ -192,6 +199,15 @@ public abstract class TextResponseWriter {
writeStr(name, val.getClass().getName() + ':' + val.toString(), true);
}
}
@Override
public void writeMap(MapWriter mw) throws IOException {
//todo
}
@Override
public void writeIterator(IteratorWriter iw) throws IOException {
/*todo*/
}
protected void writeBool(String name , Boolean val) throws IOException {
writeBool(name, val.toString());

View File

@ -94,20 +94,58 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
List<Query> qlist = new ArrayList<>(freq.domain.filters.size());
// TODO: prevent parsing filters each time!
for (Object rawFilter : freq.domain.filters) {
Query symbolicFilter;
if (rawFilter instanceof String) {
QParser parser = null;
try {
parser = QParser.getParser((String)rawFilter, fcontext.req);
symbolicFilter = parser.getQuery();
Query symbolicFilter = parser.getQuery();
qlist.add(symbolicFilter);
} catch (SyntaxError syntaxError) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
}
} else if (rawFilter instanceof Map) {
Map<String,Object> m = (Map<String, Object>) rawFilter;
String type;
Object args;
if (m.size() == 1) {
Map.Entry<String, Object> entry = m.entrySet().iterator().next();
type = entry.getKey();
args = entry.getValue();
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't convert map to query:" + rawFilter);
}
if (!"param".equals(type)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown type. Can't convert map to query:" + rawFilter);
}
String tag;
if (!(args instanceof String)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't retrieve non-string param:" + args);
}
tag = (String)args;
String[] qstrings = fcontext.req.getParams().getParams(tag);
if (qstrings != null) {
for (String qstring : qstrings) {
QParser parser = null;
try {
parser = QParser.getParser((String) qstring, fcontext.req);
Query symbolicFilter = parser.getQuery();
qlist.add(symbolicFilter);
} catch (SyntaxError syntaxError) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
}
}
}
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad query (expected a string):" + rawFilter);
}
qlist.add(symbolicFilter);
}
this.filter = fcontext.searcher.getDocSet(qlist);
@ -363,24 +401,29 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain) throws IOException {
// TODO: what if a zero bucket has a sub-facet with an exclusion that would yield results?
// should we check for domain-altering exclusions, or even ask the sub-facet for
// it's domain and then only skip it if it's 0?
if (domain == null || domain.size() == 0 && !freq.processEmpty) {
return;
}
boolean emptyDomain = domain == null || domain.size() == 0;
for (Map.Entry<String,FacetRequest> sub : freq.getSubFacets().entrySet()) {
FacetRequest subRequest = sub.getValue();
// This includes a static check if a sub-facet can possibly produce something from
// an empty domain. Should this be changed to a dynamic check as well? That would
// probably require actually executing the facet anyway, and dropping it at the
// end if it was unproductive.
if (emptyDomain && !freq.processEmpty && !subRequest.canProduceFromEmpty()) {
continue;
}
// make a new context for each sub-facet since they can change the domain
FacetContext subContext = fcontext.sub(filter, domain);
FacetProcessor subProcessor = sub.getValue().createFacetProcessor(subContext);
FacetProcessor subProcessor = subRequest.createFacetProcessor(subContext);
if (fcontext.getDebugInfo() != null) { // if fcontext.debugInfo != null, it means rb.debug() == true
FacetDebugInfo fdebug = new FacetDebugInfo();
subContext.setDebugInfo(fdebug);
fcontext.getDebugInfo().addChild(fdebug);
fdebug.setReqDescription(sub.getValue().getFacetDescription());
fdebug.setReqDescription(subRequest.getFacetDescription());
fdebug.setProcessor(subProcessor.getClass().getSimpleName());
if (subContext.filter != null) fdebug.setFilter(subContext.filter.toString());

View File

@ -88,6 +88,16 @@ public abstract class FacetRequest {
public boolean toChildren;
public String parents; // identifies the parent filter... the full set of parent documents for any block join operation
public List<Object> filters; // list of symbolic filters (JSON query format)
// True if a starting set of documents can be mapped onto a different set of documents not originally in the starting set.
public boolean canTransformDomain() {
return toParent || toChildren || excludeTags != null;
}
// Can this domain become non-empty if the input domain is empty? This does not check any sub-facets (see canProduceFromEmpty for that)
public boolean canBecomeNonEmpty() {
return excludeTags != null;
}
}
public FacetRequest() {
@ -119,6 +129,15 @@ public abstract class FacetRequest {
return false;
}
/** Returns true if this facet, or any sub-facets can produce results from an empty domain. */
public boolean canProduceFromEmpty() {
if (domain != null && domain.canBecomeNonEmpty()) return true;
for (FacetRequest freq : subFacets.values()) {
if (freq.canProduceFromEmpty()) return true;
}
return false;
}
public void addStat(String key, AggValueSource stat) {
facetStats.put(key, stat);
}

View File

@ -24,6 +24,7 @@ import java.util.Random;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -110,6 +111,8 @@ public class TestInjection {
public static String updateRandomPause = null;
public static String prepRecoveryOpPauseForever = null;
public static String randomDelayInCoreCreation = null;
public static int randomDelayMaxInCoreCreationInSec = 10;
@ -118,6 +121,8 @@ public class TestInjection {
private static Set<Timer> timers = Collections.synchronizedSet(new HashSet<Timer>());
private static AtomicInteger countPrepRecoveryOpPauseForever = new AtomicInteger(0);
public static void reset() {
nonGracefullClose = null;
failReplicaRequests = null;
@ -127,6 +132,8 @@ public class TestInjection {
updateRandomPause = null;
randomDelayInCoreCreation = null;
splitFailureBeforeReplicaCreation = null;
prepRecoveryOpPauseForever = null;
countPrepRecoveryOpPauseForever = new AtomicInteger(0);
for (Timer timer : timers) {
timer.cancel();
@ -289,6 +296,31 @@ public class TestInjection {
return true;
}
public static boolean injectPrepRecoveryOpPauseForever() {
if (prepRecoveryOpPauseForever != null) {
Random rand = random();
if (null == rand) return true;
Pair<Boolean,Integer> pair = parseValue(prepRecoveryOpPauseForever);
boolean enabled = pair.first();
int chanceIn100 = pair.second();
// Prevent for continuous pause forever
if (enabled && rand.nextInt(100) >= (100 - chanceIn100) && countPrepRecoveryOpPauseForever.get() < 2) {
countPrepRecoveryOpPauseForever.incrementAndGet();
log.info("inject pause forever for prep recovery op");
try {
Thread.sleep(Integer.MAX_VALUE);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
} else {
countPrepRecoveryOpPauseForever.set(0);
}
}
return true;
}
public static boolean injectSplitFailureBeforeReplicaCreation() {
if (splitFailureBeforeReplicaCreation != null) {
Random rand = random();

View File

@ -92,14 +92,16 @@
"useParams":"_ADMIN_FILE"
},
"/export": {
"class": "solr.SearchHandler",
"class": "solr.ExportHandler",
"useParams":"_EXPORT",
"components": [
"query"
],
"defaults": {
"wt": "json"
},
"invariants": {
"rq": "{!xport}",
"wt": "xsort",
"distrib": false
}
},

View File

@ -38,6 +38,7 @@ import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.ImplicitDocRouter;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.CoreAdminParams;
import org.junit.BeforeClass;
import org.junit.Test;
@ -124,9 +125,24 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
}
testBackupAndRestore(getCollectionName());
testConfigBackupOnly("conf1", getCollectionName());
testInvalidPath(getCollectionName());
}
/**
* This test validates the backup of collection configuration using
* {@linkplain CollectionAdminParams#NO_INDEX_BACKUP_STRATEGY}.
*
* @param configName The config name for the collection to be backed up.
* @param collectionName The name of the collection to be backed up.
* @throws Exception in case of errors.
*/
protected void testConfigBackupOnly(String configName, String collectionName) throws Exception {
// This is deliberately no-op since we want to run this test only for one of the backup repository
// implementation (mainly to avoid redundant test execution). Currently HDFS backup repository test
// implements this.
}
// This test verifies the system behavior when the backup location cluster property is configured with an invalid
// value for the specified repository (and the default backup location is not configured in solr.xml).
private void testInvalidPath(String collectionName) throws Exception {

View File

@ -37,6 +37,8 @@ import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.core.SolrCore;
import org.apache.solr.update.DirectUpdateHandler2;
import org.apache.solr.update.UpdateLog;
import org.apache.solr.util.TestInjection;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
@ -47,6 +49,7 @@ public class TestCloudRecovery extends SolrCloudTestCase {
@BeforeClass
public static void setupCluster() throws Exception {
TestInjection.prepRecoveryOpPauseForever = "true:30";
System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
System.setProperty("solr.ulog.numRecordsToKeep", "1000");
@ -62,6 +65,11 @@ public class TestCloudRecovery extends SolrCloudTestCase {
false, true, 30);
}
@AfterClass
public static void afterClass() {
TestInjection.reset();
}
@Before
public void resetCollection() throws IOException, SolrServerException {
cluster.getSolrClient().deleteByQuery(COLLECTION, "*:*");

View File

@ -16,10 +16,18 @@
*/
package org.apache.solr.cloud;
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.COLL_CONF;
import static org.apache.solr.core.backup.BackupManager.*;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import org.apache.commons.io.IOUtils;
@ -28,7 +36,14 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.cloud.hdfs.HdfsTestUtil;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.backup.BackupManager;
import org.apache.solr.core.backup.repository.HdfsBackupRepository;
import org.apache.solr.util.BadHdfsThreadsFilter;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -144,4 +159,45 @@ public class TestHdfsCloudBackupRestore extends AbstractCloudBackupRestoreTestCa
public String getBackupLocation() {
return null;
}
protected void testConfigBackupOnly(String configName, String collectionName) throws Exception {
String backupName = "configonlybackup";
CloudSolrClient solrClient = cluster.getSolrClient();
CollectionAdminRequest.Backup backup = CollectionAdminRequest.backupCollection(collectionName, backupName)
.setRepositoryName(getBackupRepoName())
.setIndexBackupStrategy(CollectionAdminParams.NO_INDEX_BACKUP_STRATEGY);
backup.process(solrClient);
Map<String,String> params = new HashMap<>();
params.put("location", "/backup");
params.put("solr.hdfs.home", hdfsUri + "/solr");
HdfsBackupRepository repo = new HdfsBackupRepository();
repo.init(new NamedList<>(params));
BackupManager mgr = new BackupManager(repo, solrClient.getZkStateReader());
URI baseLoc = repo.createURI("/backup");
Properties props = mgr.readBackupProperties(baseLoc, backupName);
assertNotNull(props);
assertEquals(collectionName, props.getProperty(COLLECTION_NAME_PROP));
assertEquals(backupName, props.getProperty(BACKUP_NAME_PROP));
assertEquals(configName, props.getProperty(COLL_CONF));
DocCollection collectionState = mgr.readCollectionState(baseLoc, backupName, collectionName);
assertNotNull(collectionState);
assertEquals(collectionName, collectionState.getName());
URI configDirLoc = repo.resolve(baseLoc, backupName, ZK_STATE_DIR, CONFIG_STATE_DIR, configName);
assertTrue(repo.exists(configDirLoc));
Collection<String> expected = Arrays.asList(BACKUP_PROPS_FILE, ZK_STATE_DIR);
URI backupLoc = repo.resolve(baseLoc, backupName);
String[] dirs = repo.listAll(backupLoc);
for (String d : dirs) {
assertTrue(expected.contains(d));
}
}
}

View File

@ -97,7 +97,7 @@ public class SolrCoreTest extends SolrTestCaseJ4 {
++ihCount; assertEquals(pathToClassMap.get("/admin/system"), "solr.SystemInfoHandler");
++ihCount; assertEquals(pathToClassMap.get("/admin/threads"), "solr.ThreadDumpHandler");
++ihCount; assertEquals(pathToClassMap.get("/config"), "solr.SolrConfigHandler");
++ihCount; assertEquals(pathToClassMap.get("/export"), "solr.SearchHandler");
++ihCount; assertEquals(pathToClassMap.get("/export"), "solr.ExportHandler");
++ihCount; assertEquals(pathToClassMap.get("/terms"), "solr.SearchHandler");
++ihCount; assertEquals(pathToClassMap.get("/get"), "solr.RealTimeGetHandler");
++ihCount; assertEquals(pathToClassMap.get(ReplicationHandler.PATH), "solr.ReplicationHandler");

View File

@ -0,0 +1,163 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.Test;
public class ReplicaListTransformerTest extends LuceneTestCase {
// A transformer that keeps only matching choices
private static class ToyMatchingReplicaListTransformer implements ReplicaListTransformer {
private final String regex;
public ToyMatchingReplicaListTransformer(String regex)
{
this.regex = regex;
}
public void transform(List<?> choices)
{
Iterator<?> it = choices.iterator();
while (it.hasNext()) {
Object choice = it.next();
final String url;
if (choice instanceof String) {
url = (String)choice;
}
else if (choice instanceof Replica) {
url = ((Replica)choice).getCoreUrl();
} else {
url = null;
}
if (url == null || !url.matches(regex)) {
it.remove();
}
}
}
}
// A transformer that makes no transformation
private static class ToyNoOpReplicaListTransformer implements ReplicaListTransformer {
public ToyNoOpReplicaListTransformer()
{
}
public void transform(List<?> choices)
{
// no-op
}
}
@Test
public void testTransform() throws Exception {
final String regex = ".*" + random().nextInt(10) + ".*";
final ReplicaListTransformer transformer;
if (random().nextBoolean()) {
transformer = new ToyMatchingReplicaListTransformer(regex);
} else {
transformer = new HttpShardHandlerFactory() {
@Override
ReplicaListTransformer getReplicaListTransformer(final SolrQueryRequest req)
{
final SolrParams params = req.getParams();
if (params.getBool("toyNoTransform", false)) {
return new ToyNoOpReplicaListTransformer();
}
final String regex = params.get("toyRegEx");
if (regex != null) {
return new ToyMatchingReplicaListTransformer(regex);
}
return super.getReplicaListTransformer(req);
}
}.getReplicaListTransformer(
new LocalSolrQueryRequest(null,
new ModifiableSolrParams().add("toyRegEx", regex)));
}
final List<Replica> inputs = new ArrayList<>();
final List<Replica> expectedTransformed = new ArrayList<>();
final List<String> urls = createRandomUrls();
for (int ii=0; ii<urls.size(); ++ii) {
final String name = "replica"+(ii+1);
final String url = urls.get(ii);
final Map<String,Object> propMap = new HashMap<String,Object>();
propMap.put("base_url", url);
// a skeleton replica, good enough for this test's purposes
final Replica replica = new Replica(name, propMap);
inputs.add(replica);
if (url.matches(regex)) {
expectedTransformed.add(replica);
}
}
final List<Replica> actualTransformed = new ArrayList<>(inputs);
transformer.transform(actualTransformed);
assertEquals(expectedTransformed.size(), actualTransformed.size());
for (int ii=0; ii<expectedTransformed.size(); ++ii) {
assertEquals("mismatch for ii="+ii, expectedTransformed.get(ii), actualTransformed.get(ii));
}
}
private final List<String> createRandomUrls() throws Exception {
final List<String> urls = new ArrayList<>();
maybeAddUrl(urls, "a"+random().nextDouble());
maybeAddUrl(urls, "bb"+random().nextFloat());
maybeAddUrl(urls, "ccc"+random().nextGaussian());
maybeAddUrl(urls, "dddd"+random().nextInt());
maybeAddUrl(urls, "eeeee"+random().nextLong());
Collections.shuffle(urls, random());
return urls;
}
private final void maybeAddUrl(final List<String> urls, final String url) {
if (random().nextBoolean()) {
urls.add(url);
}
}
}

View File

@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.common.cloud.Replica;
import org.junit.Test;
public class ShufflingReplicaListTransformerTest extends LuceneTestCase {
private final ShufflingReplicaListTransformer transformer = new ShufflingReplicaListTransformer(random());
@Test
public void testTransformReplicas() throws Exception {
final List<Replica> replicas = new ArrayList<>();
for (final String url : createRandomUrls()) {
replicas.add(new Replica(url, new HashMap<String,Object>()));
}
implTestTransform(replicas);
}
@Test
public void testTransformUrls() throws Exception {
final List<String> urls = createRandomUrls();
implTestTransform(urls);
}
private <TYPE> void implTestTransform(List<TYPE> inputs) throws Exception {
final List<TYPE> transformedInputs = new ArrayList<>(inputs);
transformer.transform(transformedInputs);
final Set<TYPE> inputSet = new HashSet<>(inputs);
final Set<TYPE> transformedSet = new HashSet<>(transformedInputs);
assertTrue(inputSet.equals(transformedSet));
}
private final List<String> createRandomUrls() throws Exception {
final List<String> urls = new ArrayList<>();
maybeAddUrl(urls, "a"+random().nextDouble());
maybeAddUrl(urls, "bb"+random().nextFloat());
maybeAddUrl(urls, "ccc"+random().nextGaussian());
maybeAddUrl(urls, "dddd"+random().nextInt());
maybeAddUrl(urls, "eeeee"+random().nextLong());
Collections.shuffle(urls, random());
return urls;
}
private final void maybeAddUrl(final List<String> urls, final String url) {
if (random().nextBoolean()) {
urls.add(url);
}
}
}

View File

@ -113,4 +113,17 @@ public class TestMacroExpander extends LuceneTestCase {
}
}
@Test
public void testMap() { // see SOLR-9740, the second fq param was being dropped.
final Map<String,String[]> request = new HashMap<>();
request.put("fq", new String[] {"zero", "${one_ref}", "two", "${three_ref}"});
request.put("one_ref",new String[] {"one"});
request.put("three_ref",new String[] {"three"});
Map expanded = MacroExpander.expand(request);
assertEquals("zero", ((String[])expanded.get("fq"))[0]);
assertEquals("one", ((String[])expanded.get("fq"))[1]);
assertEquals("two", ((String[]) expanded.get("fq"))[2]);
assertEquals("three", ((String[]) expanded.get("fq"))[3]);
}
}

View File

@ -181,15 +181,19 @@ public class JSONWriterTest extends SolrTestCaseJ4 {
methodsExpectedNotOverriden.add("writeMapOpener");
methodsExpectedNotOverriden.add("writeMapSeparator");
methodsExpectedNotOverriden.add("writeMapCloser");
methodsExpectedNotOverriden.add("public void org.apache.solr.response.JSONWriter.writeArray(java.lang.String,java.util.List) throws java.io.IOException");
methodsExpectedNotOverriden.add("writeArrayOpener");
methodsExpectedNotOverriden.add("writeArraySeparator");
methodsExpectedNotOverriden.add("writeArrayCloser");
methodsExpectedNotOverriden.add("public void org.apache.solr.response.JSONWriter.writeMap(org.apache.solr.common.MapWriter) throws java.io.IOException");
methodsExpectedNotOverriden.add("public void org.apache.solr.response.JSONWriter.writeIterator(org.apache.solr.common.IteratorWriter) throws java.io.IOException");
final Class<?> subClass = ArrayOfNamedValuePairJSONWriter.class;
final Class<?> superClass = subClass.getSuperclass();
for (final Method superClassMethod : superClass.getDeclaredMethods()) {
final String methodName = superClassMethod.getName();
final String methodFullName = superClassMethod.toString();
if (!methodName.startsWith("write")) continue;
final int modifiers = superClassMethod.getModifiers();
@ -197,7 +201,8 @@ public class JSONWriterTest extends SolrTestCaseJ4 {
if (Modifier.isStatic(modifiers)) continue;
if (Modifier.isPrivate(modifiers)) continue;
final boolean expectOverriden = !methodsExpectedNotOverriden.contains(methodName);
final boolean expectOverriden = !methodsExpectedNotOverriden.contains(methodName)
&& !methodsExpectedNotOverriden.contains(methodFullName);
try {
final Method subClassMethod = subClass.getDeclaredMethod(
@ -215,7 +220,7 @@ public class JSONWriterTest extends SolrTestCaseJ4 {
if (expectOverriden) {
fail(subClass + " needs to override '" + superClassMethod + "'");
} else {
assertTrue(methodName+" not found in remaining "+methodsExpectedNotOverriden, methodsExpectedNotOverriden.remove(methodName));
assertTrue(methodName+" not found in remaining "+methodsExpectedNotOverriden, methodsExpectedNotOverriden.remove(methodName)|| methodsExpectedNotOverriden.remove(methodFullName));
}
}
}

Some files were not shown because too many files have changed in this diff Show More