Merge remote-tracking branch 'origin/master'

Conflicts:
	solr/CHANGES.txt
This commit is contained in:
Noble Paul 2016-08-11 12:13:38 +05:30
commit 92b5a76b54
216 changed files with 11609 additions and 1492 deletions

View File

@ -65,6 +65,9 @@ New Features
Polygon instances from a standard GeoJSON string (Robert Muir, Mike
McCandless)
* SOLR-9279: Queries module: new ComparisonBoolFunction base class
(Doug Turnbull via David Smiley)
Bug Fixes
* LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand)
@ -135,10 +138,9 @@ Improvements
* LUCENE-7385: Improve/fix assert messages in SpanScorer. (David Smiley)
* LUCENE-7390: Improve performance of indexing points by allowing the
codec to use transient heap in proportion to IndexWriter's RAM
buffer, instead of a fixed 16.0 MB. A custom codec can still
override the buffer size itself. (Mike McCandless)
* LUCENE-7393: Add ICUTokenizer option to parse Myanmar text as syllables instead of words,
because the ICU word-breaking algorithm has some issues. This allows for the previous
tokenization used before Lucene 5. (AM, Robert Muir)
Optimizations
@ -154,6 +156,12 @@ Optimizations
* LUCENE-7311: Cached term queries do not seek the terms dictionary anymore.
(Adrien Grand)
* LUCENE-7396, LUCENE-7399: Faster flush of points.
(Adrien Grand, Mike McCandless)
* LUCENE-7406: Automaton and PrefixQuery tweaks (fewer object (re)allocations).
(Christine Poerschke)
Other
* LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien

View File

@ -402,6 +402,7 @@ public class MinHashFilter extends TokenFilter {
}
/** Returns the MurmurHash3_x64_128 hash, placing the result in "out". */
@SuppressWarnings("fallthrough") // the huge switch is designed to use fall through into cases!
static void murmurhash3_x64_128(byte[] key, int offset, int len, int seed, LongPair out) {
// The original algorithm does have a 32 bit unsigned seed.
// We have to mask to match the behavior of the unsigned types and prevent sign extension.

View File

@ -0,0 +1,50 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Parses Myanmar text, with syllable as token.
#
$Cons = [[:Other_Letter:]&[:Myanmar:]];
$Virama = [\u1039];
$Asat = [\u103A];
$WordJoin = [:Line_Break=Word_Joiner:];
#
# default numerical definitions
#
$Extend = [\p{Word_Break = Extend}];
$Format = [\p{Word_Break = Format}];
$MidNumLet = [\p{Word_Break = MidNumLet}];
$MidNum = [\p{Word_Break = MidNum}];
$Numeric = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
$MidNumLetEx = $MidNumLet ($Extend | $Format)*;
$MidNumEx = $MidNum ($Extend | $Format)*;
$NumericEx = $Numeric ($Extend | $Format)*;
$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*;
$ConsEx = $Cons ($Extend | $Format)*;
$AsatEx = $Cons $Asat ($Virama $ConsEx)? ($Extend | $Format)*;
$MyanmarSyllableEx = $ConsEx ($Virama $ConsEx)? ($AsatEx)*;
$MyanmarJoinedSyllableEx = $MyanmarSyllableEx ($WordJoin $MyanmarSyllableEx)*;
!!forward;
$MyanmarJoinedSyllableEx {200};
# default numeric rules
$NumericEx $ExtendNumLetEx? (($MidNumEx | $MidNumLetEx)? $NumericEx $ExtendNumLetEx?)* {100};

View File

@ -63,9 +63,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
// the same as ROOT, except no dictionary segmentation for cjk
private static final BreakIterator defaultBreakIterator =
readBreakIterator("Default.brk");
private static final BreakIterator myanmarSyllableIterator =
readBreakIterator("MyanmarSyllable.brk");
// TODO: deprecate this boolean? you only care if you are doing super-expert stuff...
private final boolean cjkAsWords;
private final boolean myanmarAsWords;
/**
* Creates a new config. This object is lightweight, but the first
@ -74,9 +77,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
* otherwise text will be segmented according to UAX#29 defaults.
* If this is true, all Han+Hiragana+Katakana words will be tagged as
* IDEOGRAPHIC.
* @param myanmarAsWords true if Myanmar text should undergo dictionary-based segmentation,
* otherwise it will be tokenized as syllables.
*/
public DefaultICUTokenizerConfig(boolean cjkAsWords) {
public DefaultICUTokenizerConfig(boolean cjkAsWords, boolean myanmarAsWords) {
this.cjkAsWords = cjkAsWords;
this.myanmarAsWords = myanmarAsWords;
}
@Override
@ -88,6 +94,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
public BreakIterator getBreakIterator(int script) {
switch(script) {
case UScript.JAPANESE: return (BreakIterator)cjkBreakIterator.clone();
case UScript.MYANMAR:
if (myanmarAsWords) {
return (BreakIterator)defaultBreakIterator.clone();
} else {
return (BreakIterator)myanmarSyllableIterator.clone();
}
default: return (BreakIterator)defaultBreakIterator.clone();
}
}

View File

@ -68,7 +68,7 @@ public final class ICUTokenizer extends Tokenizer {
* @see DefaultICUTokenizerConfig
*/
public ICUTokenizer() {
this(new DefaultICUTokenizerConfig(true));
this(new DefaultICUTokenizerConfig(true, true));
}
/**

View File

@ -79,6 +79,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
private final Map<Integer,String> tailored;
private ICUTokenizerConfig config;
private final boolean cjkAsWords;
private final boolean myanmarAsWords;
/** Creates a new ICUTokenizerFactory */
public ICUTokenizerFactory(Map<String,String> args) {
@ -95,6 +96,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
}
}
cjkAsWords = getBoolean(args, "cjkAsWords", true);
myanmarAsWords = getBoolean(args, "myanmarAsWords", true);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@ -104,7 +106,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
public void inform(ResourceLoader loader) throws IOException {
assert tailored != null : "init must be called first!";
if (tailored.isEmpty()) {
config = new DefaultICUTokenizerConfig(cjkAsWords);
config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords);
} else {
final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT];
for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
@ -112,7 +114,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
String resourcePath = entry.getValue();
breakers[code] = parseRules(resourcePath, loader);
}
config = new DefaultICUTokenizerConfig(cjkAsWords) {
config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords) {
@Override
public BreakIterator getBreakIterator(int script) {

View File

@ -42,7 +42,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
sb.append(whitespace);
sb.append("testing 1234");
String input = sb.toString();
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
tokenizer.setReader(new StringReader(input));
assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
}
@ -53,7 +53,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
sb.append('a');
}
String input = sb.toString();
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
tokenizer.setReader(new StringReader(input));
char token[] = new char[4096];
Arrays.fill(token, 'a');
@ -75,7 +75,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
TokenFilter filter = new ICUNormalizer2Filter(tokenizer);
return new TokenStreamComponents(tokenizer, filter);
}

View File

@ -34,7 +34,7 @@ public class TestICUTokenizerCJK extends BaseTokenStreamTestCase {
a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(true)));
return new TokenStreamComponents(new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(true, true)));
}
};
}

View File

@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.icu.segmentation;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
/** Test tokenizing Myanmar text into syllables */
public class TestMyanmarSyllable extends BaseTokenStreamTestCase {
Analyzer a;
@Override
public void setUp() throws Exception {
super.setUp();
a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, false));
return new TokenStreamComponents(tokenizer);
}
};
}
@Override
public void tearDown() throws Exception {
a.close();
super.tearDown();
}
/** as opposed to dictionary break of သက်ဝင်|လှုပ်ရှား|စေ|ပြီး */
public void testBasics() throws Exception {
assertAnalyzesTo(a, "သက်ဝင်လှုပ်ရှားစေပြီး", new String[] { "သက်", "ဝင်", "လှုပ်", "ရှား", "စေ", "ပြီး" });
}
// simple tests from "A Rule-based Syllable Segmentation of Myanmar Text"
// * http://www.aclweb.org/anthology/I08-3010
// (see also the presentation: http://gii2.nagaokaut.ac.jp/gii/media/share/20080901-ZMM%20Presentation.pdf)
// The words are fake, we just test the categories.
// note that currently our algorithm is not sophisticated enough to handle some of the special cases!
/** constant */
public void testC() throws Exception {
assertAnalyzesTo(a, "ကက", new String[] { "က", "က" });
}
/** consonant + sign */
public void testCF() throws Exception {
assertAnalyzesTo(a, "ကံကံ", new String[] { "ကံ", "ကံ" });
}
/** consonant + consonant + asat */
public void testCCA() throws Exception {
assertAnalyzesTo(a, "ကင်ကင်", new String[] { "ကင်", "ကင်" });
}
/** consonant + consonant + asat + sign */
public void testCCAF() throws Exception {
assertAnalyzesTo(a, "ကင်းကင်း", new String[] { "ကင်း", "ကင်း" });
}
/** consonant + vowel */
public void testCV() throws Exception {
assertAnalyzesTo(a, "ကာကာ", new String[] { "ကာ", "ကာ" });
}
/** consonant + vowel + sign */
public void testCVF() throws Exception {
assertAnalyzesTo(a, "ကားကား", new String[] { "ကား", "ကား" });
}
/** consonant + vowel + vowel + asat */
public void testCVVA() throws Exception {
assertAnalyzesTo(a, "ကော်ကော်", new String[] { "ကော်", "ကော်" });
}
/** consonant + vowel + vowel + consonant + asat */
public void testCVVCA() throws Exception {
assertAnalyzesTo(a, "ကောင်ကောင်", new String[] { "ကောင်", "ကောင်" });
}
/** consonant + vowel + vowel + consonant + asat + sign */
public void testCVVCAF() throws Exception {
assertAnalyzesTo(a, "ကောင်းကောင်း", new String[] { "ကောင်း", "ကောင်း" });
}
/** consonant + medial */
public void testCM() throws Exception {
assertAnalyzesTo(a, "ကျကျ", new String[] { "ကျ", "ကျ" });
}
/** consonant + medial + sign */
public void testCMF() throws Exception {
assertAnalyzesTo(a, "ကျံကျံ", new String[] { "ကျံ", "ကျံ" });
}
/** consonant + medial + consonant + asat */
public void testCMCA() throws Exception {
assertAnalyzesTo(a, "ကျင်ကျင်", new String[] { "ကျင်", "ကျင်" });
}
/** consonant + medial + consonant + asat + sign */
public void testCMCAF() throws Exception {
assertAnalyzesTo(a, "ကျင်းကျင်း", new String[] { "ကျင်း", "ကျင်း" });
}
/** consonant + medial + vowel */
public void testCMV() throws Exception {
assertAnalyzesTo(a, "ကျာကျာ", new String[] { "ကျာ", "ကျာ" });
}
/** consonant + medial + vowel + sign */
public void testCMVF() throws Exception {
assertAnalyzesTo(a, "ကျားကျား", new String[] { "ကျား", "ကျား" });
}
/** consonant + medial + vowel + vowel + asat */
public void testCMVVA() throws Exception {
assertAnalyzesTo(a, "ကျော်ကျော်", new String[] { "ကျော်", "ကျော်" });
}
/** consonant + medial + vowel + vowel + consonant + asat */
public void testCMVVCA() throws Exception {
assertAnalyzesTo(a, "ကြောင်ကြောင်", new String[] { "ကြောင်", "ကြောင်"});
}
/** consonant + medial + vowel + vowel + consonant + asat + sign */
public void testCMVVCAF() throws Exception {
assertAnalyzesTo(a, "ကြောင်းကြောင်း", new String[] { "ကြောင်း", "ကြောင်း"});
}
/** independent vowel */
public void testI() throws Exception {
assertAnalyzesTo(a, "ဪဪ", new String[] { "", "" });
}
/** independent vowel */
public void testE() throws Exception {
assertAnalyzesTo(a, "ဣဣ", new String[] { "", "" });
}
}

View File

@ -46,7 +46,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase {
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
TokenStream result = new CJKBigramFilter(source);
return new TokenStreamComponents(source, new StopFilter(result, CharArraySet.EMPTY_SET));
}
@ -60,7 +60,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase {
analyzer2 = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
// we put this before the CJKBigramFilter, because the normalization might combine
// some halfwidth katakana forms, which will affect the bigramming.
TokenStream result = new ICUNormalizer2Filter(source);

View File

@ -30,6 +30,7 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.ScoreDoc;
@ -82,11 +83,20 @@ public class DatasetSplitter {
// get the exact no. of existing classes
int noOfClasses = 0;
for (LeafReaderContext leave : originalIndex.leaves()) {
long valueCount = 0;
SortedDocValues classValues = leave.reader().getSortedDocValues(classFieldName);
if (classValues == null) {
throw new IllegalStateException("the classFieldName \"" + classFieldName + "\" must index sorted doc values");
if (classValues != null) {
valueCount = classValues.getValueCount();
} else {
SortedSetDocValues sortedSetDocValues = leave.reader().getSortedSetDocValues(classFieldName);
if (sortedSetDocValues != null) {
valueCount = sortedSetDocValues.getValueCount();
}
noOfClasses += classValues.getValueCount();
}
if (classValues == null) {
throw new IllegalStateException("field \"" + classFieldName + "\" must have sorted (set) doc values");
}
noOfClasses += valueCount;
}
try {

View File

@ -68,7 +68,7 @@ class SimpleTextPointsWriter extends PointsWriter {
}
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
@ -79,7 +79,7 @@ class SimpleTextPointsWriter extends PointsWriter {
fieldInfo.getPointDimensionCount(),
fieldInfo.getPointNumBytes(),
BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
maxMBSortInHeap,
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
values.size(fieldInfo.name),
singleValuePerDoc) {

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs;
import org.apache.lucene.util.BytesRef;
/** {@link PointsReader} whose order of points can be changed.
* This class is useful for codecs to optimize flush.
* @lucene.internal */
public abstract class MutablePointsReader extends PointsReader {
/** Sole constructor. */
protected MutablePointsReader() {}
/** Set {@code packedValue} with a reference to the packed bytes of the i-th value. */
public abstract void getValue(int i, BytesRef packedValue);
/** Get the k-th byte of the i-th value. */
public abstract byte getByteAt(int i, int k);
/** Return the doc ID of the i-th value. */
public abstract int getDocID(int i);
/** Swap the i-th and j-th values. */
public abstract void swap(int i, int j);
}

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.util.bkd.BKDWriter;
/** Abstract API to write points
*
@ -35,9 +34,8 @@ public abstract class PointsWriter implements Closeable {
protected PointsWriter() {
}
/** Write all values contained in the provided reader. {@code maxMBSortInHeap} is the maximum
* transient heap that can be used to sort values, before spilling to disk for offline sorting */
public abstract void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException;
/** Write all values contained in the provided reader */
public abstract void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException;
/** Default naive merge implementation for one field: it just re-indexes all the values
* from the incoming segment. The default codec overrides this for 1D fields and uses
@ -147,10 +145,7 @@ public abstract class PointsWriter implements Closeable {
public int getDocCount(String fieldName) {
return finalDocCount;
}
},
// TODO: also let merging of > 1D fields tap into IW's indexing buffer size, somehow (1D fields do an optimized merge sort
// and don't need heap)
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
});
}
/** Default merge implementation to merge incoming points readers by visiting all their points and

View File

@ -25,6 +25,7 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.index.FieldInfo;
@ -39,9 +40,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDReader;
import org.apache.lucene.util.bkd.BKDWriter;
/** Writes dimensional values
*
* @lucene.experimental */
/** Writes dimensional values */
public class Lucene60PointsWriter extends PointsWriter implements Closeable {
/** Output used to write the BKD tree data file */
@ -52,13 +51,15 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
final SegmentWriteState writeState;
final int maxPointsInLeafNode;
final double maxMBSortInHeap;
private boolean finished;
/** Full constructor */
public Lucene60PointsWriter(SegmentWriteState writeState, int maxPointsInLeafNode) throws IOException {
public Lucene60PointsWriter(SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap) throws IOException {
assert writeState.fieldInfos.hasPointValues();
this.writeState = writeState;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxMBSortInHeap = maxMBSortInHeap;
String dataFileName = IndexFileNames.segmentFileName(writeState.segmentInfo.name,
writeState.segmentSuffix,
Lucene60PointsFormat.DATA_EXTENSION);
@ -80,11 +81,11 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
/** Uses the defaults values for {@code maxPointsInLeafNode} (1024) and {@code maxMBSortInHeap} (16.0) */
public Lucene60PointsWriter(SegmentWriteState writeState) throws IOException {
this(writeState, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
this(writeState, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
@ -98,6 +99,14 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
values.size(fieldInfo.name),
singleValuePerDoc)) {
if (values instanceof MutablePointsReader) {
final long fp = writer.writeField(dataOut, fieldInfo.name, (MutablePointsReader) values);
if (fp != -1) {
indexFPs.put(fieldInfo.name, fp);
}
return;
}
values.intersect(fieldInfo.name, new IntersectVisitor() {
@Override
public void visit(int docID) {
@ -173,8 +182,7 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
fieldInfo.getPointDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode,
// NOTE: not used, since BKDWriter.merge does a merge sort:
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
maxMBSortInHeap,
totMaxSize,
singleValuePerDoc)) {
List<BKDReader> bkdReaders = new ArrayList<>();

View File

@ -257,7 +257,7 @@ public class Field implements IndexableField {
/**
* The value of the field as a String, or null. If null, the Reader value or
* binary value is used. Exactly one of stringValue(), readerValue(), and
* getBinaryValue() must be set.
* binaryValue() must be set.
*/
@Override
public String stringValue() {
@ -271,7 +271,7 @@ public class Field implements IndexableField {
/**
* The value of the field as a Reader, or null. If null, the String value or
* binary value is used. Exactly one of stringValue(), readerValue(), and
* getBinaryValue() must be set.
* binaryValue() must be set.
*/
@Override
public Reader readerValue() {
@ -420,7 +420,7 @@ public class Field implements IndexableField {
/**
* Expert: sets the token stream to be used for indexing and causes
* isIndexed() and isTokenized() to return true. May be combined with stored
* values from stringValue() or getBinaryValue()
* values from stringValue() or binaryValue()
*/
public void setTokenStream(TokenStream tokenStream) {
if (type.indexOptions() == IndexOptions.NONE || !type.tokenized()) {

View File

@ -153,7 +153,7 @@ class DocumentsWriterPerThread {
final Allocator byteBlockAllocator;
final IntBlockPool.Allocator intBlockAllocator;
private final AtomicLong pendingNumDocs;
final LiveIndexWriterConfig indexWriterConfig;
private final LiveIndexWriterConfig indexWriterConfig;
private final boolean enableTestPoints;
private final IndexWriter indexWriter;

View File

@ -762,7 +762,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* {@link #getConfig()}.
*
* <p>
* <b>NOTE:</b> after this writer is created, the given configuration instance
* <b>NOTE:</b> after ths writer is created, the given configuration instance
* cannot be passed to another writer.
*
* @param d

View File

@ -168,14 +168,9 @@ public class LiveIndexWriterConfig {
/**
* Determines the amount of RAM that may be used for buffering added documents
* and deletions before beginning to flush them to the Directory. For
* faster indexing performance it's best to use as large a RAM buffer as you can.
* <p>
* Note that this setting is not a hard limit on memory usage during indexing, as
* transient and non-trivial memory well beyond this buffer size may be used,
* for example due to segment merges or writing points to new segments.
* For application stability the available memory in the JVM
* should be significantly larger than the RAM buffer used for indexing.
* and deletions before they are flushed to the Directory. Generally for
* faster indexing performance it's best to flush by RAM usage instead of
* document count and use as large a RAM buffer as you can.
* <p>
* When this is set, the writer will flush whenever buffered documents and
* deletions use this much RAM. Pass in
@ -183,6 +178,14 @@ public class LiveIndexWriterConfig {
* due to RAM usage. Note that if flushing by document count is also enabled,
* then the flush will be triggered by whichever comes first.
* <p>
* The maximum RAM limit is inherently determined by the JVMs available
* memory. Yet, an {@link IndexWriter} session can consume a significantly
* larger amount of memory than the given RAM limit since this limit is just
* an indicator when to flush memory resident documents to the Directory.
* Flushes are likely happen concurrently while other threads adding documents
* to the writer. For application stability the available memory in the JVM
* should be significantly larger than the RAM buffer used for indexing.
* <p>
* <b>NOTE</b>: the account of RAM usage for pending deletions is only
* approximate. Specifically, if you delete by Query, Lucene currently has no
* way to measure the RAM usage of individual Queries so the accounting will

View File

@ -18,13 +18,13 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.bkd.BKDWriter;
/** Buffers up pending byte[][] value(s) per doc, then flushes when segment flushes. */
class PointValuesWriter {
@ -35,8 +35,7 @@ class PointValuesWriter {
private int numPoints;
private int numDocs;
private int lastDocID = -1;
private final byte[] packedValue;
private final LiveIndexWriterConfig indexWriterConfig;
private final int packedBytesLength;
public PointValuesWriter(DocumentsWriterPerThread docWriter, FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
@ -44,8 +43,7 @@ class PointValuesWriter {
this.bytes = new ByteBlockPool(docWriter.byteBlockAllocator);
docIDs = new int[16];
iwBytesUsed.addAndGet(16 * Integer.BYTES);
packedValue = new byte[fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()];
indexWriterConfig = docWriter.indexWriterConfig;
packedBytesLength = fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes();
}
// TODO: if exactly the same value is added to exactly the same doc, should we dedup?
@ -53,9 +51,10 @@ class PointValuesWriter {
if (value == null) {
throw new IllegalArgumentException("field=" + fieldInfo.name + ": point value must not be null");
}
if (value.length != fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()) {
if (value.length != packedBytesLength) {
throw new IllegalArgumentException("field=" + fieldInfo.name + ": this field's value has length=" + value.length + " but should be " + (fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()));
}
if (docIDs.length == numPoints) {
docIDs = ArrayUtil.grow(docIDs, numPoints+1);
iwBytesUsed.addAndGet((docIDs.length - numPoints) * Integer.BYTES);
@ -66,21 +65,32 @@ class PointValuesWriter {
numDocs++;
lastDocID = docID;
}
numPoints++;
}
public void flush(SegmentWriteState state, PointsWriter writer) throws IOException {
PointsReader reader = new MutablePointsReader() {
final int[] ords = new int[numPoints];
{
for (int i = 0; i < numPoints; ++i) {
ords[i] = i;
}
}
writer.writeField(fieldInfo,
new PointsReader() {
@Override
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
if (fieldName.equals(fieldInfo.name) == false) {
throw new IllegalArgumentException("fieldName must be the same");
}
final BytesRef scratch = new BytesRef();
final byte[] packedValue = new byte[packedBytesLength];
for(int i=0;i<numPoints;i++) {
bytes.readBytes(packedValue.length * i, packedValue, 0, packedValue.length);
visitor.visit(docIDs[i], packedValue);
getValue(i, scratch);
assert scratch.length == packedValue.length;
System.arraycopy(scratch.bytes, scratch.offset, packedValue, 0, packedBytesLength);
visitor.visit(getDocID(i), packedValue);
}
}
@ -120,14 +130,46 @@ class PointValuesWriter {
@Override
public long size(String fieldName) {
if (fieldName.equals(fieldInfo.name) == false) {
throw new IllegalArgumentException("fieldName must be the same");
}
return numPoints;
}
@Override
public int getDocCount(String fieldName) {
if (fieldName.equals(fieldInfo.name) == false) {
throw new IllegalArgumentException("fieldName must be the same");
}
return numDocs;
}
},
Math.max(indexWriterConfig.getRAMBufferSizeMB()/8.0, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP));
@Override
public void swap(int i, int j) {
int tmp = ords[i];
ords[i] = ords[j];
ords[j] = tmp;
}
@Override
public int getDocID(int i) {
return docIDs[ords[i]];
}
@Override
public void getValue(int i, BytesRef packedValue) {
final long offset = (long) packedBytesLength * ords[i];
packedValue.length = packedBytesLength;
bytes.setRawBytesRef(packedValue, offset);
}
@Override
public byte getByteAt(int i, int k) {
final long offset = (long) packedBytesLength * ords[i] + k;
return bytes.readByte(offset);
}
};
writer.writeField(fieldInfo, reader);
}
}

View File

@ -405,6 +405,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
lock.lock();
try {
cache.clear();
// Note that this also clears the uniqueQueries map since mostRecentlyUsedQueries is the uniqueQueries.keySet view:
mostRecentlyUsedQueries.clear();
onClear();
} finally {

View File

@ -41,7 +41,8 @@ public class PrefixQuery extends AutomatonQuery {
/** Build an automaton accepting all terms with the specified prefix. */
public static Automaton toAutomaton(BytesRef prefix) {
Automaton automaton = new Automaton();
final int numStatesAndTransitions = prefix.length+1;
final Automaton automaton = new Automaton(numStatesAndTransitions, numStatesAndTransitions);
int lastState = automaton.createState();
for(int i=0;i<prefix.length;i++) {
int state = automaton.createState();
@ -66,7 +67,7 @@ public class PrefixQuery extends AutomatonQuery {
StringBuilder buffer = new StringBuilder();
if (!getField().equals(field)) {
buffer.append(getField());
buffer.append(":");
buffer.append(':');
}
buffer.append(term.text());
buffer.append('*');

View File

@ -459,69 +459,26 @@ public final class ArrayUtil {
* greater than or equal to it.
* This runs in linear time on average and in {@code n log(n)} time in the
* worst case.*/
public static <T> void select(T[] arr, int from, int to, int k, Comparator<T> comparator) {
if (k < from) {
throw new IllegalArgumentException("k must be >= from");
}
if (k >= to) {
throw new IllegalArgumentException("k must be < to");
}
final int maxDepth = 2 * MathUtil.log(to - from, 2);
quickSelect(arr, from, to, k, comparator, maxDepth);
public static <T> void select(T[] arr, int from, int to, int k, Comparator<? super T> comparator) {
new IntroSelector() {
T pivot;
@Override
protected void swap(int i, int j) {
ArrayUtil.swap(arr, i, j);
}
private static <T> void quickSelect(T[] arr, int from, int to, int k, Comparator<T> comparator, int maxDepth) {
assert from <= k;
assert k < to;
if (to - from == 1) {
return;
}
if (--maxDepth < 0) {
Arrays.sort(arr, from, to, comparator);
return;
@Override
protected void setPivot(int i) {
pivot = arr[i];
}
final int mid = (from + to) >>> 1;
// heuristic: we use the median of the values at from, to-1 and mid as a pivot
if (comparator.compare(arr[from], arr[to - 1]) > 0) {
swap(arr, from, to - 1);
}
if (comparator.compare(arr[to - 1], arr[mid]) > 0) {
swap(arr, to - 1, mid);
if (comparator.compare(arr[from], arr[to - 1]) > 0) {
swap(arr, from, to - 1);
@Override
protected int comparePivot(int j) {
return comparator.compare(pivot, arr[j]);
}
}.select(from, to, k);
}
T pivot = arr[to - 1];
int left = from + 1;
int right = to - 2;
for (;;) {
while (comparator.compare(pivot, arr[left]) > 0) {
++left;
}
while (left < right && comparator.compare(pivot, arr[right]) <= 0) {
--right;
}
if (left < right) {
swap(arr, left, right);
--right;
} else {
break;
}
}
swap(arr, left, to - 1);
if (left == k) {
return;
} else if (left < k) {
quickSelect(arr, left + 1, to, k, comparator, maxDepth);
} else {
quickSelect(arr, from, left, k, comparator, maxDepth);
}
}
}

View File

@ -378,5 +378,34 @@ public final class ByteBlockPool {
}
} while (true);
}
/**
* Set the given {@link BytesRef} so that its content is equal to the
* {@code ref.length} bytes starting at {@code offset}. Most of the time this
* method will set pointers to internal data-structures. However, in case a
* value crosses a boundary, a fresh copy will be returned.
* On the contrary to {@link #setBytesRef(BytesRef, int)}, this does not
* expect the length to be encoded with the data.
*/
public void setRawBytesRef(BytesRef ref, final long offset) {
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
int pos = (int) (offset & BYTE_BLOCK_MASK);
if (pos + ref.length <= BYTE_BLOCK_SIZE) {
ref.bytes = buffers[bufferIndex];
ref.offset = pos;
} else {
ref.bytes = new byte[ref.length];
ref.offset = 0;
readBytes(offset, ref.bytes, 0, ref.length);
}
}
/** Read a single byte at the given {@code offset}. */
public byte readByte(long offset) {
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
int pos = (int) (offset & BYTE_BLOCK_MASK);
byte[] buffer = buffers[bufferIndex];
return buffer[pos];
}
}

View File

@ -33,8 +33,8 @@ public abstract class InPlaceMergeSorter extends Sorter {
}
void mergeSort(int from, int to) {
if (to - from < INSERTION_SORT_THRESHOLD) {
insertionSort(from, to);
if (to - from < BINARY_SORT_THRESHOLD) {
binarySort(from, to);
} else {
final int mid = (from + to) >>> 1;
mergeSort(from, mid);

View File

@ -0,0 +1,128 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Comparator;
/** Implementation of the quick select algorithm.
* <p>It uses the median of the first, middle and last values as a pivot and
* falls back to a heap sort when the number of recursion levels exceeds
* {@code 2 lg(n)}, as a consequence it runs in linear time on average and in
* {@code n log(n)} time in the worst case.</p>
* @lucene.internal */
public abstract class IntroSelector extends Selector {
@Override
public final void select(int from, int to, int k) {
checkArgs(from, to, k);
final int maxDepth = 2 * MathUtil.log(to - from, 2);
quickSelect(from, to, k, maxDepth);
}
// heap sort
// TODO: use median of median instead to have linear worst-case rather than
// n*log(n)
void slowSelect(int from, int to, int k) {
new Sorter() {
@Override
protected void swap(int i, int j) {
IntroSelector.this.swap(i, j);
}
@Override
protected int compare(int i, int j) {
return IntroSelector.this.compare(i, j);
}
public void sort(int from, int to) {
heapSort(from, to);
}
}.sort(from, to);
}
private void quickSelect(int from, int to, int k, int maxDepth) {
assert from <= k;
assert k < to;
if (to - from == 1) {
return;
}
if (--maxDepth < 0) {
slowSelect(from, to, k);
return;
}
final int mid = (from + to) >>> 1;
// heuristic: we use the median of the values at from, to-1 and mid as a pivot
if (compare(from, to - 1) > 0) {
swap(from, to - 1);
}
if (compare(to - 1, mid) > 0) {
swap(to - 1, mid);
if (compare(from, to - 1) > 0) {
swap(from, to - 1);
}
}
setPivot(to - 1);
int left = from + 1;
int right = to - 2;
for (;;) {
while (comparePivot(left) > 0) {
++left;
}
while (left < right && comparePivot(right) <= 0) {
--right;
}
if (left < right) {
swap(left, right);
--right;
} else {
break;
}
}
swap(left, to - 1);
if (left == k) {
return;
} else if (left < k) {
quickSelect(left + 1, to, k, maxDepth);
} else {
quickSelect(from, left, k, maxDepth);
}
}
/** Compare entries found in slots <code>i</code> and <code>j</code>.
* The contract for the returned value is the same as
* {@link Comparator#compare(Object, Object)}. */
protected int compare(int i, int j) {
setPivot(i);
return comparePivot(j);
}
/** Save the value at slot <code>i</code> so that it can later be used as a
* pivot, see {@link #comparePivot(int)}. */
protected abstract void setPivot(int i);
/** Compare the pivot with the slot at <code>j</code>, similarly to
* {@link #compare(int, int) compare(i, j)}. */
protected abstract int comparePivot(int j);
}

View File

@ -16,7 +16,6 @@
*/
package org.apache.lucene.util;
/**
* {@link Sorter} implementation based on a variant of the quicksort algorithm
* called <a href="http://en.wikipedia.org/wiki/Introsort">introsort</a>: when
@ -38,8 +37,8 @@ public abstract class IntroSorter extends Sorter {
}
void quicksort(int from, int to, int maxDepth) {
if (to - from < INSERTION_SORT_THRESHOLD) {
insertionSort(from, to);
if (to - from < BINARY_SORT_THRESHOLD) {
binarySort(from, to);
return;
} else if (--maxDepth < 0) {
heapSort(from, to);
@ -84,11 +83,18 @@ public abstract class IntroSorter extends Sorter {
quicksort(left + 1, to, maxDepth);
}
/** Save the value at slot <code>i</code> so that it can later be used as a
* pivot, see {@link #comparePivot(int)}. */
// Don't rely on the slow default impl of setPivot/comparePivot since
// quicksort relies on these methods to be fast for good performance
@Override
protected abstract void setPivot(int i);
/** Compare the pivot with the slot at <code>j</code>, similarly to
* {@link #compare(int, int) compare(i, j)}. */
@Override
protected abstract int comparePivot(int j);
@Override
protected int compare(int i, int j) {
setPivot(i);
return comparePivot(j);
}
}

View File

@ -38,6 +38,7 @@ public abstract class MSBRadixSorter extends Sorter {
// we store one histogram per recursion level
private final int[][] histograms = new int[LEVEL_THRESHOLD][];
private final int[] endOffsets = new int[HISTOGRAM_SIZE];
private final int[] commonPrefix;
private final int maxLength;
@ -47,6 +48,7 @@ public abstract class MSBRadixSorter extends Sorter {
*/
protected MSBRadixSorter(int maxLength) {
this.maxLength = maxLength;
this.commonPrefix = new int[Math.min(24, maxLength)];
}
/** Return the k-th byte of the entry at index {@code i}, or {@code -1} if
@ -116,14 +118,14 @@ public abstract class MSBRadixSorter extends Sorter {
@Override
public void sort(int from, int to) {
checkRange(from, to);
sort(from, to, 0);
sort(from, to, 0, 0);
}
private void sort(int from, int to, int k) {
if (to - from <= LENGTH_THRESHOLD || k >= LEVEL_THRESHOLD) {
private void sort(int from, int to, int k, int l) {
if (to - from <= LENGTH_THRESHOLD || l >= LEVEL_THRESHOLD) {
introSort(from, to, k);
} else {
radixSort(from, to, k);
radixSort(from, to, k, l);
}
}
@ -131,28 +133,30 @@ public abstract class MSBRadixSorter extends Sorter {
getFallbackSorter(k).sort(from, to);
}
private void radixSort(int from, int to, int k) {
int[] histogram = histograms[k];
/**
* @param k the character number to compare
* @param l the level of recursion
*/
private void radixSort(int from, int to, int k, int l) {
int[] histogram = histograms[l];
if (histogram == null) {
histogram = histograms[k] = new int[HISTOGRAM_SIZE];
histogram = histograms[l] = new int[HISTOGRAM_SIZE];
} else {
Arrays.fill(histogram, 0);
}
buildHistogram(from, to, k, histogram);
// short-circuit: if all keys have the same byte at offset k, then recurse directly
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
if (histogram[i] == to - from) {
// everything is in the same bucket, recurse
if (i > 0) {
sort(from, to, k + 1);
final int commonPrefixLength = computeCommonPrefixLengthAndBuildHistogram(from, to, k, histogram);
if (commonPrefixLength > 0) {
// if there are no more chars to compare or if all entries fell into the
// first bucket (which means strings are shorter than k) then we are done
// otherwise recurse
if (k + commonPrefixLength < maxLength
&& histogram[0] < to - from) {
radixSort(from, to, k + commonPrefixLength, l);
}
return;
} else if (histogram[i] != 0) {
break;
}
}
assert assertHistogram(commonPrefixLength, histogram);
int[] startOffsets = histogram;
int[] endOffsets = this.endOffsets;
@ -167,24 +171,83 @@ public abstract class MSBRadixSorter extends Sorter {
int h = endOffsets[i];
final int bucketLen = h - prev;
if (bucketLen > 1) {
sort(from + prev, from + h, k + 1);
sort(from + prev, from + h, k + 1, l + 1);
}
prev = h;
}
}
}
// only used from assert
private boolean assertHistogram(int commonPrefixLength, int[] histogram) {
int numberOfUniqueBytes = 0;
for (int freq : histogram) {
if (freq > 0) {
numberOfUniqueBytes++;
}
}
if (numberOfUniqueBytes == 1) {
assert commonPrefixLength >= 1;
} else {
assert commonPrefixLength == 0 : commonPrefixLength;
}
return true;
}
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
private int getBucket(int i, int k) {
return byteAt(i, k) + 1;
}
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */
private int[] buildHistogram(int from, int to, int k, int[] histogram) {
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}
* and return a common prefix length for all visited values.
* @see #buildHistogram */
private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
final int[] commonPrefix = this.commonPrefix;
int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
for (int j = 0; j < commonPrefixLength; ++j) {
final int b = byteAt(from, k + j);
commonPrefix[j] = b;
if (b == -1) {
commonPrefixLength = j + 1;
break;
}
}
int i;
outer: for (i = from + 1; i < to; ++i) {
for (int j = 0; j < commonPrefixLength; ++j) {
final int b = byteAt(i, k + j);
if (b != commonPrefix[j]) {
commonPrefixLength = j;
if (commonPrefixLength == 0) { // we have no common prefix
histogram[commonPrefix[0] + 1] = i - from;
histogram[b + 1] = 1;
break outer;
}
break;
}
}
}
if (i < to) {
// the loop got broken because there is no common prefix
assert commonPrefixLength == 0;
buildHistogram(i + 1, to, k, histogram);
} else {
assert commonPrefixLength > 0;
histogram[commonPrefix[0] + 1] = to - from;
}
return commonPrefixLength;
}
/** Build an histogram of the k-th characters of values occurring between
* offsets {@code from} and {@code to}, using {@link #getBucket}. */
private void buildHistogram(int from, int to, int k, int[] histogram) {
for (int i = from; i < to; ++i) {
histogram[getBucket(i, k)]++;
}
return histogram;
}
/** Accumulate values of the histogram so that it does not store counts but

View File

@ -0,0 +1,278 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
/** Radix selector.
* <p>This implementation works similarly to a MSB radix sort except that it
* only recurses into the sub partition that contains the desired value.
* @lucene.internal */
public abstract class RadixSelector extends Selector {
// after that many levels of recursion we fall back to introselect anyway
// this is used as a protection against the fact that radix sort performs
// worse when there are long common prefixes (probably because of cache
// locality)
private static final int LEVEL_THRESHOLD = 8;
// size of histograms: 256 + 1 to indicate that the string is finished
private static final int HISTOGRAM_SIZE = 257;
// buckets below this size will be sorted with introselect
private static final int LENGTH_THRESHOLD = 100;
// we store one histogram per recursion level
private final int[] histogram = new int[HISTOGRAM_SIZE];
private final int[] commonPrefix;
private final int maxLength;
/**
* Sole constructor.
* @param maxLength the maximum length of keys, pass {@link Integer#MAX_VALUE} if unknown.
*/
protected RadixSelector(int maxLength) {
this.maxLength = maxLength;
this.commonPrefix = new int[Math.min(24, maxLength)];
}
/** Return the k-th byte of the entry at index {@code i}, or {@code -1} if
* its length is less than or equal to {@code k}. This may only be called
* with a value of {@code i} between {@code 0} included and
* {@code maxLength} excluded. */
protected abstract int byteAt(int i, int k);
/** Get a fall-back selector which may assume that the first {@code d} bytes
* of all compared strings are equal. This fallback selector is used when
* the range becomes narrow or when the maximum level of recursion has
* been exceeded. */
protected Selector getFallbackSelector(int d) {
return new IntroSelector() {
@Override
protected void swap(int i, int j) {
RadixSelector.this.swap(i, j);
}
@Override
protected int compare(int i, int j) {
for (int o = d; o < maxLength; ++o) {
final int b1 = byteAt(i, o);
final int b2 = byteAt(j, o);
if (b1 != b2) {
return b1 - b2;
} else if (b1 == -1) {
break;
}
}
return 0;
}
@Override
protected void setPivot(int i) {
pivot.setLength(0);
for (int o = d; o < maxLength; ++o) {
final int b = byteAt(i, o);
if (b == -1) {
break;
}
pivot.append((byte) b);
}
}
@Override
protected int comparePivot(int j) {
for (int o = 0; o < pivot.length(); ++o) {
final int b1 = pivot.byteAt(o) & 0xff;
final int b2 = byteAt(j, d + o);
if (b1 != b2) {
return b1 - b2;
}
}
if (d + pivot.length() == maxLength) {
return 0;
}
return -1 - byteAt(j, d + pivot.length());
}
private final BytesRefBuilder pivot = new BytesRefBuilder();
};
}
@Override
public void select(int from, int to, int k) {
checkArgs(from, to, k);
select(from, to, k, 0, 0);
}
private void select(int from, int to, int k, int d, int l) {
if (to - from <= LENGTH_THRESHOLD || d >= LEVEL_THRESHOLD) {
getFallbackSelector(d).select(from, to, k);
} else {
radixSelect(from, to, k, d, l);
}
}
/**
* @param d the character number to compare
* @param l the level of recursion
*/
private void radixSelect(int from, int to, int k, int d, int l) {
final int[] histogram = this.histogram;
Arrays.fill(histogram, 0);
final int commonPrefixLength = computeCommonPrefixLengthAndBuildHistogram(from, to, d, histogram);
if (commonPrefixLength > 0) {
// if there are no more chars to compare or if all entries fell into the
// first bucket (which means strings are shorter than d) then we are done
// otherwise recurse
if (d + commonPrefixLength < maxLength
&& histogram[0] < to - from) {
radixSelect(from, to, k, d + commonPrefixLength, l);
}
return;
}
assert assertHistogram(commonPrefixLength, histogram);
int bucketFrom = from;
for (int bucket = 0; bucket < HISTOGRAM_SIZE; ++bucket) {
final int bucketTo = bucketFrom + histogram[bucket];
if (bucketTo > k) {
partition(from, to, bucket, bucketFrom, bucketTo, d);
if (bucket != 0 && d + 1 < maxLength) {
// all elements in bucket 0 are equal so we only need to recurse if bucket != 0
select(bucketFrom, bucketTo, k, d + 1, l + 1);
}
return;
}
bucketFrom = bucketTo;
}
throw new AssertionError("Unreachable code");
}
// only used from assert
private boolean assertHistogram(int commonPrefixLength, int[] histogram) {
int numberOfUniqueBytes = 0;
for (int freq : histogram) {
if (freq > 0) {
numberOfUniqueBytes++;
}
}
if (numberOfUniqueBytes == 1) {
assert commonPrefixLength >= 1;
} else {
assert commonPrefixLength == 0;
}
return true;
}
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
private int getBucket(int i, int k) {
return byteAt(i, k) + 1;
}
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}
* and return a common prefix length for all visited values.
* @see #buildHistogram */
private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
final int[] commonPrefix = this.commonPrefix;
int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
for (int j = 0; j < commonPrefixLength; ++j) {
final int b = byteAt(from, k + j);
commonPrefix[j] = b;
if (b == -1) {
commonPrefixLength = j + 1;
break;
}
}
int i;
outer: for (i = from + 1; i < to; ++i) {
for (int j = 0; j < commonPrefixLength; ++j) {
final int b = byteAt(i, k + j);
if (b != commonPrefix[j]) {
commonPrefixLength = j;
if (commonPrefixLength == 0) { // we have no common prefix
histogram[commonPrefix[0] + 1] = i - from;
histogram[b + 1] = 1;
break outer;
}
break;
}
}
}
if (i < to) {
// the loop got broken because there is no common prefix
assert commonPrefixLength == 0;
buildHistogram(i + 1, to, k, histogram);
} else {
assert commonPrefixLength > 0;
histogram[commonPrefix[0] + 1] = to - from;
}
return commonPrefixLength;
}
/** Build an histogram of the k-th characters of values occurring between
* offsets {@code from} and {@code to}, using {@link #getBucket}. */
private void buildHistogram(int from, int to, int k, int[] histogram) {
for (int i = from; i < to; ++i) {
histogram[getBucket(i, k)]++;
}
}
/** Reorder elements so that all of them that fall into {@code bucket} are
* between offsets {@code bucketFrom} and {@code bucketTo}. */
private void partition(int from, int to, int bucket, int bucketFrom, int bucketTo, int d) {
int left = from;
int right = to - 1;
int slot = bucketFrom;
for (;;) {
int leftBucket = getBucket(left, d);
int rightBucket = getBucket(right, d);
while (leftBucket <= bucket && left < bucketFrom) {
if (leftBucket == bucket) {
swap(left, slot++);
} else {
++left;
}
leftBucket = getBucket(left, d);
}
while (rightBucket >= bucket && right >= bucketTo) {
if (rightBucket == bucket) {
swap(right, slot++);
} else {
--right;
}
rightBucket = getBucket(right, d);
}
if (left < bucketFrom && right >= bucketTo) {
swap(left++, right--);
} else {
assert left == bucketFrom;
assert right == bucketTo - 1;
break;
}
}
}
}

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
/** An implementation of a selection algorithm, ie. computing the k-th greatest
* value from a collection. */
public abstract class Selector {
/** Reorder elements so that the element at position {@code k} is the same
* as if all elements were sorted and all other elements are partitioned
* around it: {@code [from, k)} only contains elements that are less than
* or equal to {@code k} and {@code (k, to)} only contains elements that
* are greater than or equal to {@code k}. */
public abstract void select(int from, int to, int k);
void checkArgs(int from, int to, int k) {
if (k < from) {
throw new IllegalArgumentException("k must be >= from");
}
if (k >= to) {
throw new IllegalArgumentException("k must be < to");
}
}
/** Swap values at slots <code>i</code> and <code>j</code>. */
protected abstract void swap(int i, int j);
}

View File

@ -23,7 +23,7 @@ import java.util.Comparator;
* @lucene.internal */
public abstract class Sorter {
static final int INSERTION_SORT_THRESHOLD = 20;
static final int BINARY_SORT_THRESHOLD = 20;
/** Sole constructor, used for inheritance. */
protected Sorter() {}
@ -36,6 +36,20 @@ public abstract class Sorter {
/** Swap values at slots <code>i</code> and <code>j</code>. */
protected abstract void swap(int i, int j);
private int pivotIndex;
/** Save the value at slot <code>i</code> so that it can later be used as a
* pivot, see {@link #comparePivot(int)}. */
protected void setPivot(int i) {
pivotIndex = i;
}
/** Compare the pivot with the slot at <code>j</code>, similarly to
* {@link #compare(int, int) compare(i, j)}. */
protected int comparePivot(int j) {
return compare(pivotIndex, j);
}
/** Sort the slice which starts at <code>from</code> (inclusive) and ends at
* <code>to</code> (exclusive). */
public abstract void sort(int from, int to);
@ -163,54 +177,41 @@ public abstract class Sorter {
}
}
void insertionSort(int from, int to) {
for (int i = from + 1; i < to; ++i) {
for (int j = i; j > from; --j) {
if (compare(j - 1, j) > 0) {
swap(j - 1, j);
} else {
break;
}
}
}
}
/**
* A binary sort implementation. This performs {@code O(n*log(n))} comparisons
* and {@code O(n^2)} swaps. It is typically used by more sophisticated
* implementations as a fall-back when the numbers of items to sort has become
* less than {@value #BINARY_SORT_THRESHOLD}.
*/
void binarySort(int from, int to) {
binarySort(from, to, from + 1);
}
void binarySort(int from, int to, int i) {
for ( ; i < to; ++i) {
setPivot(i);
int l = from;
int h = i - 1;
while (l <= h) {
final int mid = (l + h) >>> 1;
final int cmp = compare(i, mid);
final int cmp = comparePivot(mid);
if (cmp < 0) {
h = mid - 1;
} else {
l = mid + 1;
}
}
switch (i - l) {
case 2:
swap(l + 1, l + 2);
swap(l, l + 1);
break;
case 1:
swap(l, l + 1);
break;
case 0:
break;
default:
for (int j = i; j > l; --j) {
swap(j - 1, j);
}
break;
}
}
}
/**
* Use heap sort to sort items between {@code from} inclusive and {@code to}
* exclusive. This runs in {@code O(n*log(n))} and is used as a fall-back by
* {@link IntroSorter}.
*/
void heapSort(int from, int to) {
if (to - from <= 1) {
return;

View File

@ -357,13 +357,13 @@ public class Automaton implements Accountable {
}
private void growStates() {
if (nextState+2 >= states.length) {
if (nextState+2 > states.length) {
states = ArrayUtil.grow(states, nextState+2);
}
}
private void growTransitions() {
if (nextTransition+3 >= transitions.length) {
if (nextTransition+3 > transitions.length) {
transitions = ArrayUtil.grow(transitions, nextTransition+3);
}
}

View File

@ -25,6 +25,7 @@ import java.util.List;
import java.util.function.IntFunction;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
@ -111,7 +112,8 @@ public class BKDWriter implements Closeable {
final byte[] scratchDiff;
final byte[] scratch1;
final byte[] scratch2;
final BytesRef scratchBytesRef = new BytesRef();
final BytesRef scratchBytesRef1 = new BytesRef();
final BytesRef scratchBytesRef2 = new BytesRef();
final int[] commonPrefixLengths;
protected final FixedBitSet docsSeen;
@ -173,7 +175,6 @@ public class BKDWriter implements Closeable {
packedBytesLength = numDims * bytesPerDim;
scratchDiff = new byte[bytesPerDim];
scratchBytesRef.length = packedBytesLength;
scratch1 = new byte[packedBytesLength];
scratch2 = new byte[packedBytesLength];
commonPrefixLengths = new int[numDims];
@ -204,7 +205,7 @@ public class BKDWriter implements Closeable {
// all recursive halves (i.e. 16 + 8 + 4 + 2) so the memory usage is 2X
// what that level would consume, so we multiply by 0.5 to convert from
// bytes to points here. Each dimension has its own sorted partition, so
// we must divide by numDims as well.
// we must divide by numDims as wel.
maxPointsSortInHeap = (int) (0.5 * (maxMBSortInHeap * 1024 * 1024) / (bytesPerDoc * numDims));
@ -416,15 +417,25 @@ public class BKDWriter implements Closeable {
}
}
/** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already
* sorted values and currently only works when numDims==1. This returns -1 if all documents containing
* dimensional values were deleted. */
public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers) throws IOException {
if (numDims != 1) {
throw new UnsupportedOperationException("numDims must be 1 but got " + numDims);
/** Write a field from a {@link MutablePointsReader}. This way of writing
* points is faster than regular writes with {@link BKDWriter#add} since
* there is opportunity for reordering points before writing them to
* disk. This method does not use transient disk in order to reorder points.
*/
public long writeField(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
if (numDims == 1) {
return writeField1Dim(out, fieldName, reader);
} else {
return writeFieldNDims(out, fieldName, reader);
}
}
/* In the 2+D case, we recursively pick the split dimension, compute the
* median value and partition other values around it. */
private long writeFieldNDims(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
if (pointCount != 0) {
throw new IllegalStateException("cannot mix add and merge");
throw new IllegalStateException("cannot mix add and writeField");
}
// Catch user silliness:
@ -435,6 +446,81 @@ public class BKDWriter implements Closeable {
// Mark that we already finished:
heapPointWriter = null;
long countPerLeaf = pointCount = reader.size(fieldName);
long innerNodeCount = 1;
while (countPerLeaf > maxPointsInLeafNode) {
countPerLeaf = (countPerLeaf+1)/2;
innerNodeCount *= 2;
}
int numLeaves = Math.toIntExact(innerNodeCount);
checkMaxLeafNodeCount(numLeaves);
final byte[] splitPackedValues = new byte[numLeaves * (bytesPerDim + 1)];
final long[] leafBlockFPs = new long[numLeaves];
// compute the min/max for this slice
Arrays.fill(minPackedValue, (byte) 0xff);
Arrays.fill(maxPackedValue, (byte) 0);
for (int i = 0; i < Math.toIntExact(pointCount); ++i) {
reader.getValue(i, scratchBytesRef1);
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset) < 0) {
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset, bytesPerDim);
}
if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset) > 0) {
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset, bytesPerDim);
}
}
docsSeen.set(reader.getDocID(i));
}
build(1, numLeaves, reader, 0, Math.toIntExact(pointCount), out,
minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs,
new int[maxPointsInLeafNode]);
long indexFP = out.getFilePointer();
writeIndex(out, leafBlockFPs, splitPackedValues);
return indexFP;
}
/* In the 1D case, we can simply sort points in ascending order and use the
* same writing logic as we use at merge time. */
private long writeField1Dim(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size(fieldName)));
final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
reader.intersect(fieldName, new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
oneDimWriter.add(packedValue, docID);
}
@Override
public void visit(int docID) throws IOException {
throw new IllegalStateException();
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});
return oneDimWriter.finish();
}
/** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already
* sorted values and currently only works when numDims==1. This returns -1 if all documents containing
* dimensional values were deleted. */
public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers) throws IOException {
assert docMaps == null || readers.size() == docMaps.size();
BKDMergeQueue queue = new BKDMergeQueue(bytesPerDim, readers.size());
@ -453,72 +539,14 @@ public class BKDWriter implements Closeable {
}
}
if (queue.size() == 0) {
return -1;
}
int leafCount = 0;
List<Long> leafBlockFPs = new ArrayList<>();
List<byte[]> leafBlockStartValues = new ArrayList<>();
// Target halfway between min and max allowed for the leaf:
int pointsPerLeafBlock = (int) (0.75 * maxPointsInLeafNode);
//System.out.println("POINTS PER: " + pointsPerLeafBlock);
byte[] lastPackedValue = new byte[bytesPerDim];
byte[] firstPackedValue = new byte[bytesPerDim];
long valueCount = 0;
// Buffer up each leaf block's docs and values
int[] leafBlockDocIDs = new int[maxPointsInLeafNode];
byte[][] leafBlockPackedValues = new byte[maxPointsInLeafNode][];
for(int i=0;i<maxPointsInLeafNode;i++) {
leafBlockPackedValues[i] = new byte[packedBytesLength];
}
Arrays.fill(commonPrefixLengths, bytesPerDim);
OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
while (queue.size() != 0) {
MergeReader reader = queue.top();
// System.out.println("iter reader=" + reader);
// NOTE: doesn't work with subclasses (e.g. SimpleText!)
int docID = reader.docID;
leafBlockDocIDs[leafCount] = docID;
System.arraycopy(reader.state.scratchPackedValue, 0, leafBlockPackedValues[leafCount], 0, packedBytesLength);
docsSeen.set(docID);
if (valueCount == 0) {
System.arraycopy(reader.state.scratchPackedValue, 0, minPackedValue, 0, packedBytesLength);
}
System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, 0, packedBytesLength);
assert numDims > 1 || valueInOrder(valueCount, lastPackedValue, reader.state.scratchPackedValue, 0);
valueCount++;
if (pointCount > totalPointCount) {
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
}
if (leafCount == 0) {
if (leafBlockFPs.size() > 0) {
// Save the first (minimum) value in each leaf block except the first, to build the split value index in the end:
leafBlockStartValues.add(Arrays.copyOf(reader.state.scratchPackedValue, bytesPerDim));
}
Arrays.fill(commonPrefixLengths, bytesPerDim);
System.arraycopy(reader.state.scratchPackedValue, 0, firstPackedValue, 0, bytesPerDim);
} else {
// Find per-dim common prefix:
for(int dim=0;dim<numDims;dim++) {
int offset = dim * bytesPerDim;
for(int j=0;j<commonPrefixLengths[dim];j++) {
if (firstPackedValue[offset+j] != reader.state.scratchPackedValue[offset+j]) {
commonPrefixLengths[dim] = j;
break;
}
}
}
}
leafCount++;
oneDimWriter.add(reader.state.scratchPackedValue, reader.docID);
if (reader.next()) {
queue.updateTop();
@ -526,35 +554,78 @@ public class BKDWriter implements Closeable {
// This segment was exhausted
queue.pop();
}
}
return oneDimWriter.finish();
}
private class OneDimensionBKDWriter {
final IndexOutput out;
final List<Long> leafBlockFPs = new ArrayList<>();
final List<byte[]> leafBlockStartValues = new ArrayList<>();
final byte[] leafValues = new byte[maxPointsInLeafNode * packedBytesLength];
final int[] leafDocs = new int[maxPointsInLeafNode];
long valueCount;
int leafCount;
OneDimensionBKDWriter(IndexOutput out) {
if (numDims != 1) {
throw new UnsupportedOperationException("numDims must be 1 but got " + numDims);
}
if (pointCount != 0) {
throw new IllegalStateException("cannot mix add and merge");
}
// Catch user silliness:
if (heapPointWriter == null && tempInput == null) {
throw new IllegalStateException("already finished");
}
// Mark that we already finished:
heapPointWriter = null;
this.out = out;
lastPackedValue = new byte[packedBytesLength];
}
// for asserts
final byte[] lastPackedValue;
int lastDocID;
void add(byte[] packedValue, int docID) throws IOException {
assert valueInOrder(valueCount + leafCount,
0, lastPackedValue, packedValue, 0, docID, lastDocID);
System.arraycopy(packedValue, 0, leafValues, leafCount * packedBytesLength, packedBytesLength);
leafDocs[leafCount] = docID;
docsSeen.set(docID);
leafCount++;
if (valueCount > totalPointCount) {
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
}
if (leafCount == maxPointsInLeafNode) {
// We write a block once we hit exactly the max count ... this is different from
// when we flush a new segment, where we write between max/2 and max per leaf block,
// so merged segments will behave differently from newly flushed segments:
if (leafCount == pointsPerLeafBlock || queue.size() == 0) {
leafBlockFPs.add(out.getFilePointer());
checkMaxLeafNodeCount(leafBlockFPs.size());
writeLeafBlockDocs(out, leafBlockDocIDs, 0, leafCount);
writeCommonPrefixes(out, commonPrefixLengths, firstPackedValue);
final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
final BytesRef scratch = new BytesRef();
{
scratch.length = packedBytesLength;
scratch.offset = 0;
}
@Override
public BytesRef apply(int i) {
scratch.bytes = leafBlockPackedValues[i];
return scratch;
}
};
writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
writeLeafBlock();
leafCount = 0;
}
assert (lastDocID = docID) >= 0; // only assign when asserts are enabled
}
public long finish() throws IOException {
if (leafCount > 0) {
writeLeafBlock();
leafCount = 0;
}
if (valueCount == 0) {
return -1;
}
pointCount = valueCount;
@ -575,6 +646,60 @@ public class BKDWriter implements Closeable {
return indexFP;
}
private void writeLeafBlock() throws IOException {
assert leafCount != 0;
if (valueCount == 0) {
System.arraycopy(leafValues, 0, minPackedValue, 0, packedBytesLength);
}
System.arraycopy(leafValues, (leafCount - 1) * packedBytesLength, maxPackedValue, 0, packedBytesLength);
valueCount += leafCount;
if (leafBlockFPs.size() > 0) {
// Save the first (minimum) value in each leaf block except the first, to build the split value index in the end:
leafBlockStartValues.add(Arrays.copyOf(leafValues, packedBytesLength));
}
leafBlockFPs.add(out.getFilePointer());
checkMaxLeafNodeCount(leafBlockFPs.size());
Arrays.fill(commonPrefixLengths, bytesPerDim);
// Find per-dim common prefix:
for(int dim=0;dim<numDims;dim++) {
int offset1 = dim * bytesPerDim;
int offset2 = (leafCount - 1) * packedBytesLength + offset1;
for(int j=0;j<commonPrefixLengths[dim];j++) {
if (leafValues[offset1+j] != leafValues[offset2+j]) {
commonPrefixLengths[dim] = j;
break;
}
}
}
writeLeafBlockDocs(out, leafDocs, 0, leafCount);
writeCommonPrefixes(out, commonPrefixLengths, leafValues);
final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
final BytesRef scratch = new BytesRef();
{
scratch.length = packedBytesLength;
scratch.bytes = leafValues;
}
@Override
public BytesRef apply(int i) {
scratch.offset = packedBytesLength * i;
return scratch;
}
};
assert valuesInOrderAndBounds(leafCount, 0, Arrays.copyOf(leafValues, packedBytesLength),
Arrays.copyOfRange(leafValues, (leafCount - 1) * packedBytesLength, leafCount * packedBytesLength),
packedValues, leafDocs, 0);
writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
}
}
// TODO: there must be a simpler way?
private void rotateToTree(int nodeID, int offset, int count, byte[] index, List<byte[]> leafBlockStartValues) {
//System.out.println("ROTATE: nodeID=" + nodeID + " offset=" + offset + " count=" + count + " bpd=" + bytesPerDim + " index.length=" + index.length);
@ -686,6 +811,7 @@ public class BKDWriter implements Closeable {
}
private PointWriter sort(int dim) throws IOException {
assert dim >= 0 && dim < numDims;
if (heapPointWriter != null) {
@ -1110,6 +1236,132 @@ public class BKDWriter implements Closeable {
}
}
/* Recursively reorders the provided reader and writes the bkd-tree on the fly. */
private void build(int nodeID, int leafNodeOffset,
MutablePointsReader reader, int from, int to,
IndexOutput out,
byte[] minPackedValue, byte[] maxPackedValue,
byte[] splitPackedValues,
long[] leafBlockFPs,
int[] spareDocIds) throws IOException {
if (nodeID >= leafNodeOffset) {
// leaf node
final int count = to - from;
assert count <= maxPointsInLeafNode;
// Compute common prefixes
Arrays.fill(commonPrefixLengths, bytesPerDim);
reader.getValue(from, scratchBytesRef1);
for (int i = from + 1; i < to; ++i) {
reader.getValue(i, scratchBytesRef2);
for (int dim=0;dim<numDims;dim++) {
final int offset = dim * bytesPerDim;
for(int j=0;j<commonPrefixLengths[dim];j++) {
if (scratchBytesRef1.bytes[scratchBytesRef1.offset+offset+j] != scratchBytesRef2.bytes[scratchBytesRef2.offset+offset+j]) {
commonPrefixLengths[dim] = j;
break;
}
}
}
}
// Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
FixedBitSet[] usedBytes = new FixedBitSet[numDims];
for (int dim = 0; dim < numDims; ++dim) {
if (commonPrefixLengths[dim] < bytesPerDim) {
usedBytes[dim] = new FixedBitSet(256);
}
}
for (int i = from + 1; i < to; ++i) {
for (int dim=0;dim<numDims;dim++) {
if (usedBytes[dim] != null) {
byte b = reader.getByteAt(i, dim * bytesPerDim + commonPrefixLengths[dim]);
usedBytes[dim].set(Byte.toUnsignedInt(b));
}
}
}
int sortedDim = 0;
int sortedDimCardinality = Integer.MAX_VALUE;
for (int dim = 0; dim < numDims; ++dim) {
if (usedBytes[dim] != null) {
final int cardinality = usedBytes[dim].cardinality();
if (cardinality < sortedDimCardinality) {
sortedDim = dim;
sortedDimCardinality = cardinality;
}
}
}
// sort by sortedDim
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
reader, from, to, scratchBytesRef1, scratchBytesRef2);
// Save the block file pointer:
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
// Write doc IDs
int[] docIDs = spareDocIds;
for (int i = from; i < to; ++i) {
docIDs[i - from] = reader.getDocID(i);
}
writeLeafBlockDocs(out, docIDs, 0, count);
// Write the common prefixes:
reader.getValue(from, scratchBytesRef1);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
writeCommonPrefixes(out, commonPrefixLengths, scratch1);
// Write the full values:
IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
@Override
public BytesRef apply(int i) {
reader.getValue(from + i, scratchBytesRef1);
return scratchBytesRef1;
}
};
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
docIDs, 0);
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
} else {
// inner node
// compute the split dimension and partition around it
final int splitDim = split(minPackedValue, maxPackedValue);
final int mid = (from + to + 1) >>> 1;
int commonPrefixLen = bytesPerDim;
for (int i = 0; i < bytesPerDim; ++i) {
if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) {
commonPrefixLen = i;
break;
}
}
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
// set the split value
final int address = nodeID * (1+bytesPerDim);
splitPackedValues[address] = (byte) splitDim;
reader.getValue(mid, scratchBytesRef1);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
byte[] minSplitPackedValue = Arrays.copyOf(minPackedValue, packedBytesLength);
byte[] maxSplitPackedValue = Arrays.copyOf(maxPackedValue, packedBytesLength);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
// recurse
build(nodeID * 2, leafNodeOffset, reader, from, mid, out,
minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
build(nodeID * 2 + 1, leafNodeOffset, reader, mid, to, out,
minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
}
}
/** The array (sized numDims) of PathSlice describe the cell we have currently recursed to. */
private void build(int nodeID, int leafNodeOffset,
PathSlice[] slices,
@ -1217,7 +1469,8 @@ public class BKDWriter implements Closeable {
return scratch;
}
};
assert valuesInOrderAndBounds(count, minPackedValue, maxPackedValue, packedValues);
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
heapSource.docIDs, Math.toIntExact(source.start));
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
} else {
@ -1321,12 +1574,16 @@ public class BKDWriter implements Closeable {
}
// only called from assert
private boolean valuesInOrderAndBounds(int count, byte[] minPackedValue, byte[] maxPackedValue, IntFunction<BytesRef> values) throws IOException {
byte[] lastPackedValue = new byte[bytesPerDim];
private boolean valuesInOrderAndBounds(int count, int sortedDim, byte[] minPackedValue, byte[] maxPackedValue,
IntFunction<BytesRef> values, int[] docs, int docsOffset) throws IOException {
byte[] lastPackedValue = new byte[packedBytesLength];
int lastDoc = -1;
for (int i=0;i<count;i++) {
BytesRef packedValue = values.apply(i);
assert packedValue.length == packedBytesLength;
assert numDims != 1 || valueInOrder(i, lastPackedValue, packedValue.bytes, packedValue.offset);
assert valueInOrder(i, sortedDim, lastPackedValue, packedValue.bytes, packedValue.offset,
docs[docsOffset + i], lastDoc);
lastDoc = docs[docsOffset + i];
// Make sure this value does in fact fall within this leaf cell:
assert valueInBounds(packedValue, minPackedValue, maxPackedValue);
@ -1335,11 +1592,19 @@ public class BKDWriter implements Closeable {
}
// only called from assert
private boolean valueInOrder(long ord, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset) {
if (ord > 0 && StringHelper.compare(bytesPerDim, lastPackedValue, 0, packedValue, packedValueOffset) > 0) {
private boolean valueInOrder(long ord, int sortedDim, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset,
int doc, int lastDoc) {
int dimOffset = sortedDim * bytesPerDim;
if (ord > 0) {
int cmp = StringHelper.compare(bytesPerDim, lastPackedValue, dimOffset, packedValue, packedValueOffset + dimOffset);
if (cmp > 0) {
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
}
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, bytesPerDim);
if (cmp == 0 && doc < lastDoc) {
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord);
}
}
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, packedBytesLength);
return true;
}

View File

@ -0,0 +1,186 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.bkd;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntroSelector;
import org.apache.lucene.util.IntroSorter;
import org.apache.lucene.util.MSBRadixSorter;
import org.apache.lucene.util.RadixSelector;
import org.apache.lucene.util.Selector;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.packed.PackedInts;
final class MutablePointsReaderUtils {
MutablePointsReaderUtils() {}
/** Sort the given {@link MutablePointsReader} based on its packed value then doc ID. */
static void sort(int maxDoc, int packedBytesLength,
MutablePointsReader reader, int from, int to) {
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
new MSBRadixSorter(packedBytesLength + (bitsPerDocId + 7) / 8) {
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected int byteAt(int i, int k) {
if (k < packedBytesLength) {
return Byte.toUnsignedInt(reader.getByteAt(i, k));
} else {
final int shift = bitsPerDocId - ((k - packedBytesLength + 1) << 3);
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
}
}
@Override
protected org.apache.lucene.util.Sorter getFallbackSorter(int k) {
return new IntroSorter() {
final BytesRef pivot = new BytesRef();
final BytesRef scratch = new BytesRef();
int pivotDoc;
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected void setPivot(int i) {
reader.getValue(i, pivot);
pivotDoc = reader.getDocID(i);
}
@Override
protected int comparePivot(int j) {
if (k < packedBytesLength) {
reader.getValue(j, scratch);
int cmp = StringHelper.compare(packedBytesLength - k, pivot.bytes, pivot.offset + k, scratch.bytes, scratch.offset + k);
if (cmp != 0) {
return cmp;
}
}
return pivotDoc - reader.getDocID(j);
}
};
}
}.sort(from, to);
}
/** Sort points on the given dimension. */
static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
MutablePointsReader reader, int from, int to,
BytesRef scratch1, BytesRef scratch2) {
// No need for a fancy radix sort here, this is called on the leaves only so
// there are not many values to sort
final int offset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
final int numBytesToCompare = bytesPerDim - commonPrefixLengths[sortedDim];
new IntroSorter() {
final BytesRef pivot = scratch1;
int pivotDoc = -1;
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected void setPivot(int i) {
reader.getValue(i, pivot);
pivotDoc = reader.getDocID(i);
}
@Override
protected int comparePivot(int j) {
reader.getValue(j, scratch2);
int cmp = StringHelper.compare(numBytesToCompare, pivot.bytes, pivot.offset + offset, scratch2.bytes, scratch2.offset + offset);
if (cmp == 0) {
cmp = pivotDoc - reader.getDocID(j);
}
return cmp;
}
}.sort(from, to);
}
/** Partition points around {@code mid}. All values on the left must be less
* than or equal to it and all values on the right must be greater than or
* equal to it. */
static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
MutablePointsReader reader, int from, int to, int mid,
BytesRef scratch1, BytesRef scratch2) {
final int offset = splitDim * bytesPerDim + commonPrefixLen;
final int cmpBytes = bytesPerDim - commonPrefixLen;
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
new RadixSelector(cmpBytes + (bitsPerDocId + 7) / 8) {
@Override
protected Selector getFallbackSelector(int k) {
return new IntroSelector() {
final BytesRef pivot = scratch1;
int pivotDoc;
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected void setPivot(int i) {
reader.getValue(i, pivot);
pivotDoc = reader.getDocID(i);
}
@Override
protected int comparePivot(int j) {
if (k < cmpBytes) {
reader.getValue(j, scratch2);
int cmp = StringHelper.compare(cmpBytes - k, pivot.bytes, pivot.offset + offset + k, scratch2.bytes, scratch2.offset + offset + k);
if (cmp != 0) {
return cmp;
}
}
return pivotDoc - reader.getDocID(j);
}
};
}
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected int byteAt(int i, int k) {
if (k < cmpBytes) {
return Byte.toUnsignedInt(reader.getByteAt(i, offset + k));
} else {
final int shift = bitsPerDocId - ((k - cmpBytes + 1) << 3);
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
}
}
}.select(from, to, mid);
}
}

View File

@ -41,8 +41,9 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
if (random().nextBoolean()) {
// randomize parameters
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
double maxMBSortInHeap = 3.0 + (3*random().nextDouble());
if (VERBOSE) {
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode);
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
}
// sneaky impersonation!
@ -52,7 +53,7 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode);
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
}
@Override

View File

@ -1156,8 +1156,9 @@ public class TestPointQueries extends LuceneTestCase {
private static Codec getCodec() {
if (Codec.getDefault().getName().equals("Lucene62")) {
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
double maxMBSortInHeap = 5.0 + (3*random().nextDouble());
if (VERBOSE) {
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode);
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
}
return new FilterCodec("Lucene62", Codec.getDefault()) {
@ -1166,7 +1167,7 @@ public class TestPointQueries extends LuceneTestCase {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode);
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
}
@Override

View File

@ -45,7 +45,26 @@ public class TestByteBlockPool extends LuceneTestCase {
for (BytesRef expected : list) {
ref.grow(expected.length);
ref.setLength(expected.length);
switch (random().nextInt(3)) {
case 0:
// copy bytes
pool.readBytes(position, ref.bytes(), 0, ref.length());
break;
case 1:
// copy bytes one by one
for (int i = 0; i < ref.length(); ++i) {
ref.setByteAt(i, pool.readByte(position + i));
}
break;
case 2:
BytesRef scratch = new BytesRef();
scratch.length = ref.length();
pool.setRawBytesRef(scratch, position);
System.arraycopy(scratch.bytes, scratch.offset, ref.bytes(), 0, ref.length());
break;
default:
fail();
}
assertEquals(expected, ref.get());
position += ref.length();
}

View File

@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
public class TestIntroSelector extends LuceneTestCase {
public void testSelect() {
for (int iter = 0; iter < 100; ++iter) {
doTestSelect(false);
}
}
public void testSlowSelect() {
for (int iter = 0; iter < 100; ++iter) {
doTestSelect(true);
}
}
private void doTestSelect(boolean slow) {
final int from = random().nextInt(5);
final int to = from + TestUtil.nextInt(random(), 1, 10000);
final int max = random().nextBoolean() ? random().nextInt(100) : random().nextInt(100000);
Integer[] arr = new Integer[from + to + random().nextInt(5)];
for (int i = 0; i < arr.length; ++i) {
arr[i] = TestUtil.nextInt(random(), 0, max);
}
final int k = TestUtil.nextInt(random(), from, to - 1);
Integer[] expected = arr.clone();
Arrays.sort(expected, from, to);
Integer[] actual = arr.clone();
IntroSelector selector = new IntroSelector() {
Integer pivot;
@Override
protected void swap(int i, int j) {
ArrayUtil.swap(actual, i, j);
}
@Override
protected void setPivot(int i) {
pivot = actual[i];
}
@Override
protected int comparePivot(int j) {
return pivot.compareTo(actual[j]);
}
};
if (slow) {
selector.slowSelect(from, to, k);
} else {
selector.select(from, to, k);
}
assertEquals(expected[k], actual[k]);
for (int i = 0; i < actual.length; ++i) {
if (i < from || i >= to) {
assertSame(arr[i], actual[i]);
} else if (i <= k) {
assertTrue(actual[i].intValue() <= actual[k].intValue());
} else {
assertTrue(actual[i].intValue() >= actual[k].intValue());
}
}
}
}

View File

@ -17,6 +17,8 @@
package org.apache.lucene.util;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
public class TestMSBRadixSorter extends LuceneTestCase {
@ -41,9 +43,12 @@ public class TestMSBRadixSorter extends LuceneTestCase {
break;
}
final int finalMaxLength = maxLength;
new MSBRadixSorter(maxLength) {
@Override
protected int byteAt(int i, int k) {
assertTrue(k < finalMaxLength);
BytesRef ref = refs[i];
if (ref.length <= k) {
return -1;
@ -114,4 +119,67 @@ public class TestMSBRadixSorter extends LuceneTestCase {
testRandom(TestUtil.nextInt(random(), 1, 30), 2);
}
}
public void testRandom2() {
// how large our alphabet is
int letterCount = TestUtil.nextInt(random(), 2, 10);
// how many substring fragments to use
int substringCount = TestUtil.nextInt(random(), 2, 10);
Set<BytesRef> substringsSet = new HashSet<>();
// how many strings to make
int stringCount = atLeast(10000);
//System.out.println("letterCount=" + letterCount + " substringCount=" + substringCount + " stringCount=" + stringCount);
while(substringsSet.size() < substringCount) {
int length = TestUtil.nextInt(random(), 2, 10);
byte[] bytes = new byte[length];
for(int i=0;i<length;i++) {
bytes[i] = (byte) random().nextInt(letterCount);
}
BytesRef br = new BytesRef(bytes);
substringsSet.add(br);
//System.out.println("add substring count=" + substringsSet.size() + ": " + br);
}
BytesRef[] substrings = substringsSet.toArray(new BytesRef[substringsSet.size()]);
double[] chance = new double[substrings.length];
double sum = 0.0;
for(int i=0;i<substrings.length;i++) {
chance[i] = random().nextDouble();
sum += chance[i];
}
// give each substring a random chance of occurring:
double accum = 0.0;
for(int i=0;i<substrings.length;i++) {
accum += chance[i]/sum;
chance[i] = accum;
}
Set<BytesRef> stringsSet = new HashSet<>();
int iters = 0;
while (stringsSet.size() < stringCount && iters < stringCount*5) {
int count = TestUtil.nextInt(random(), 1, 5);
BytesRefBuilder b = new BytesRefBuilder();
for(int i=0;i<count;i++) {
double v = random().nextDouble();
accum = 0.0;
for(int j=0;j<substrings.length;j++) {
accum += chance[j];
if (accum >= v) {
b.append(substrings[j]);
break;
}
}
}
BytesRef br = b.toBytesRef();
stringsSet.add(br);
//System.out.println("add string count=" + stringsSet.size() + ": " + br);
iters++;
}
test(stringsSet.toArray(new BytesRef[stringsSet.size()]), stringsSet.size());
}
}

View File

@ -0,0 +1,106 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
public class TestRadixSelector extends LuceneTestCase {
public void testSelect() {
for (int iter = 0; iter < 100; ++iter) {
doTestSelect();
}
}
private void doTestSelect() {
final int from = random().nextInt(5);
final int to = from + TestUtil.nextInt(random(), 1, 10000);
final int maxLen = TestUtil.nextInt(random(), 1, 12);
BytesRef[] arr = new BytesRef[from + to + random().nextInt(5)];
for (int i = 0; i < arr.length; ++i) {
byte[] bytes = new byte[TestUtil.nextInt(random(), 0, maxLen)];
random().nextBytes(bytes);
arr[i] = new BytesRef(bytes);
}
doTest(arr, from, to, maxLen);
}
public void testSharedPrefixes() {
for (int iter = 0; iter < 100; ++iter) {
doTestSharedPrefixes();
}
}
private void doTestSharedPrefixes() {
final int from = random().nextInt(5);
final int to = from + TestUtil.nextInt(random(), 1, 10000);
final int maxLen = TestUtil.nextInt(random(), 1, 12);
BytesRef[] arr = new BytesRef[from + to + random().nextInt(5)];
for (int i = 0; i < arr.length; ++i) {
byte[] bytes = new byte[TestUtil.nextInt(random(), 0, maxLen)];
random().nextBytes(bytes);
arr[i] = new BytesRef(bytes);
}
final int sharedPrefixLength = Math.min(arr[0].length, TestUtil.nextInt(random(), 1, maxLen));
for (int i = 1; i < arr.length; ++i) {
System.arraycopy(arr[0].bytes, arr[0].offset, arr[i].bytes, arr[i].offset, Math.min(sharedPrefixLength, arr[i].length));
}
doTest(arr, from, to, maxLen);
}
private void doTest(BytesRef[] arr, int from, int to, int maxLen) {
final int k = TestUtil.nextInt(random(), from, to - 1);
BytesRef[] expected = arr.clone();
Arrays.sort(expected, from, to);
BytesRef[] actual = arr.clone();
final int enforcedMaxLen = random().nextBoolean() ? maxLen : Integer.MAX_VALUE;
RadixSelector selector = new RadixSelector(enforcedMaxLen) {
@Override
protected void swap(int i, int j) {
ArrayUtil.swap(actual, i, j);
}
@Override
protected int byteAt(int i, int k) {
assertTrue(k < enforcedMaxLen);
BytesRef b = actual[i];
if (k >= b.length) {
return -1;
} else {
return Byte.toUnsignedInt(b.bytes[b.offset + k]);
}
}
};
selector.select(from, to, k);
assertEquals(expected[k], actual[k]);
for (int i = 0; i < actual.length; ++i) {
if (i < from || i >= to) {
assertSame(arr[i], actual[i]);
} else if (i <= k) {
assertTrue(actual[i].compareTo(actual[k]) <= 0);
} else {
assertTrue(actual[i].compareTo(actual[k]) >= 0);
}
}
}
}

View File

@ -0,0 +1,270 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.bkd;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
public class TestMutablePointsReaderUtils extends LuceneTestCase {
public void testSort() {
for (int iter = 0; iter < 5; ++iter) {
doTestSort();
}
}
private void doTestSort() {
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
Point[] points = createRandomPoints(1, bytesPerDim, maxDoc);
DummyPointsReader reader = new DummyPointsReader(points);
MutablePointsReaderUtils.sort(maxDoc, bytesPerDim, reader, 0, points.length);
Arrays.sort(points, new Comparator<Point>() {
@Override
public int compare(Point o1, Point o2) {
int cmp = o1.packedValue.compareTo(o2.packedValue);
if (cmp == 0) {
cmp = Integer.compare(o1.doc, o2.doc);
}
return cmp;
}
});
assertNotSame(points, reader.points);
assertArrayEquals(points, reader.points);
}
public void testSortByDim() {
for (int iter = 0; iter < 5; ++iter) {
doTestSortByDim();
}
}
private void doTestSortByDim() {
final int numDims = TestUtil.nextInt(random(), 1, 8);
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
int[] commonPrefixLengths = new int[numDims];
for (int i = 0; i < commonPrefixLengths.length; ++i) {
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
}
BytesRef firstValue = points[0].packedValue;
for (int i = 1; i < points.length; ++i) {
for (int dim = 0; dim < numDims; ++dim) {
int offset = dim * bytesPerDim;
BytesRef packedValue = points[i].packedValue;
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
}
}
DummyPointsReader reader = new DummyPointsReader(points);
final int sortedDim = random().nextInt(numDims);
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
new BytesRef(), new BytesRef());
for (int i = 1; i < points.length; ++i) {
final int offset = sortedDim * bytesPerDim;
BytesRef previousValue = reader.points[i-1].packedValue;
BytesRef currentValue = reader.points[i].packedValue;
int cmp = StringHelper.compare(bytesPerDim,
previousValue.bytes, previousValue.offset + offset,
currentValue.bytes, currentValue.offset + offset);
if (cmp == 0) {
cmp = reader.points[i - 1].doc - reader.points[i].doc;
}
assertTrue(cmp <= 0);
}
}
public void testPartition() {
for (int iter = 0; iter < 5; ++iter) {
doTestPartition();
}
}
private void doTestPartition() {
final int numDims = TestUtil.nextInt(random(), 1, 8);
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
int commonPrefixLength = TestUtil.nextInt(random(), 0, bytesPerDim);
final int splitDim = random().nextInt(numDims);
BytesRef firstValue = points[0].packedValue;
for (int i = 1; i < points.length; ++i) {
BytesRef packedValue = points[i].packedValue;
int offset = splitDim * bytesPerDim;
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLength);
}
DummyPointsReader reader = new DummyPointsReader(points);
final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLength, reader, 0, points.length, pivot,
new BytesRef(), new BytesRef());
BytesRef pivotValue = reader.points[pivot].packedValue;
int offset = splitDim * bytesPerDim;
for (int i = 0; i < points.length; ++i) {
BytesRef value = reader.points[i].packedValue;
int cmp = StringHelper.compare(bytesPerDim,
value.bytes, value.offset + offset,
pivotValue.bytes, pivotValue.offset + offset);
if (cmp == 0) {
cmp = reader.points[i].doc - reader.points[pivot].doc;
}
if (i < pivot) {
assertTrue(cmp <= 0);
} else if (i > pivot) {
assertTrue(cmp >= 0);
} else {
assertEquals(0, cmp);
}
}
}
private static Point[] createRandomPoints(int numDims, int bytesPerDim, int maxDoc) {
final int packedBytesLength = numDims * bytesPerDim;
final int numPoints = TestUtil.nextInt(random(), 1, 100000);
Point[] points = new Point[numPoints];
for (int i = 0; i < numPoints; ++i) {
byte[] value = new byte[packedBytesLength];
random().nextBytes(value);
points[i] = new Point(value, random().nextInt(maxDoc));
}
return points;
}
private static class Point {
final BytesRef packedValue;
final int doc;
Point(byte[] packedValue, int doc) {
// use a non-null offset to make sure MutablePointsReaderUtils does not ignore it
this.packedValue = new BytesRef(packedValue.length + 1);
this.packedValue.bytes[0] = (byte) random().nextInt(256);
this.packedValue.offset = 1;
this.packedValue.length = packedValue.length;
this.doc = doc;
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj instanceof Point == false) {
return false;
}
Point that = (Point) obj;
return packedValue.equals(that.packedValue) && doc == that.doc;
}
@Override
public int hashCode() {
return 31 * packedValue.hashCode() + doc;
}
@Override
public String toString() {
return "value=" + packedValue + " doc=" + doc;
}
}
private static class DummyPointsReader extends MutablePointsReader {
private final Point[] points;
DummyPointsReader(Point[] points) {
this.points = points.clone();
}
@Override
public void close() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long ramBytesUsed() {
return 0;
}
@Override
public void getValue(int i, BytesRef packedValue) {
packedValue.bytes = points[i].packedValue.bytes;
packedValue.offset = points[i].packedValue.offset;
packedValue.length = points[i].packedValue.length;
}
@Override
public byte getByteAt(int i, int k) {
BytesRef packedValue = points[i].packedValue;
return packedValue.bytes[packedValue.offset + k];
}
@Override
public int getDocID(int i) {
return points[i].doc;
}
@Override
public void swap(int i, int j) {
ArrayUtil.swap(points, i, j);
}
@Override
public void checkIntegrity() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMinPackedValue(String fieldName) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMaxPackedValue(String fieldName) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getNumDimensions(String fieldName) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getBytesPerDimension(String fieldName) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long size(String fieldName) {
throw new UnsupportedOperationException();
}
@Override
public int getDocCount(String fieldName) {
throw new UnsupportedOperationException();
}
}
}

View File

@ -106,6 +106,7 @@ io.netty.netty-all.version = 4.0.36.Final
org.apache.curator.version = 2.8.0
/org.apache.curator/curator-client = ${org.apache.curator.version}
/org.apache.curator/curator-framework = ${org.apache.curator.version}
/org.apache.curator/curator-recipes = ${org.apache.curator.version}
/org.apache.derby/derby = 10.9.1.0

View File

@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queries.function.valuesource;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
import org.apache.lucene.search.IndexSearcher;
/**
* Base class for comparison operators useful within an "if"/conditional.
*/
public abstract class ComparisonBoolFunction extends BoolFunction {
private final ValueSource lhs;
private final ValueSource rhs;
private final String name;
public ComparisonBoolFunction(ValueSource lhs, ValueSource rhs, String name) {
this.lhs = lhs;
this.rhs = rhs;
this.name = name;
}
/** Perform the comparison, returning true or false */
public abstract boolean compare(int doc, FunctionValues lhs, FunctionValues rhs);
/** Uniquely identify the operation (ie "gt", "lt" "gte", etc) */
public String name() {
return this.name;
}
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
final FunctionValues lhsVal = this.lhs.getValues(context, readerContext);
final FunctionValues rhsVal = this.rhs.getValues(context, readerContext);
final String compLabel = this.name();
return new BoolDocValues(this) {
@Override
public boolean boolVal(int doc) {
return compare(doc, lhsVal, rhsVal);
}
@Override
public String toString(int doc) {
return compLabel + "(" + lhsVal.toString(doc) + "," + rhsVal.toString(doc) + ")";
}
@Override
public boolean exists(int doc) {
return lhsVal.exists(doc) && rhsVal.exists(doc);
}
};
}
@Override
public boolean equals(Object o) {
if (this.getClass() != o.getClass()) return false;
ComparisonBoolFunction other = (ComparisonBoolFunction)o;
return name().equals(other.name())
&& lhs.equals(other.lhs)
&& rhs.equals(other.rhs); }
@Override
public int hashCode() {
int h = this.getClass().hashCode();
h = h * 31 + this.name().hashCode();
h = h * 31 + lhs.hashCode();
h = h * 31 + rhs.hashCode();
return h;
}
@Override
public String description() {
return name() + "(" + lhs.description() + "," + rhs.description() + ")";
}
@Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
lhs.createWeight(context, searcher);
rhs.createWeight(context, searcher);
}
}

View File

@ -38,5 +38,10 @@ public class FileMetaData {
this.length = length;
this.checksum = checksum;
}
@Override
public String toString() {
return "FileMetaData(length=" + length + ")";
}
}

View File

@ -118,6 +118,8 @@ class SimpleCopyJob extends CopyJob {
return highPriority ? -1 : 1;
} else if (ord < other.ord) {
return -1;
} else if (ord > other.ord) {
return 1;
} else {
return 0;
}

View File

@ -121,4 +121,11 @@ public interface Bounds {
*/
public Bounds noBottomLatitudeBound();
/** Signal that there is no bound whatsoever.
* The bound is limited only by the constraints of the
* planet.
*@return the updated Bounds object.,
*/
public Bounds noBound(final PlanetModel planetModel);
}

View File

@ -253,6 +253,11 @@ public class LatLonBounds implements Bounds {
return this;
}
@Override
public Bounds noBound(final PlanetModel planetModel) {
return noLongitudeBound().noTopLatitudeBound().noBottomLatitudeBound();
}
// Protected methods
/** Update latitude bound.

View File

@ -1003,13 +1003,14 @@ public class Plane extends Vector {
* D - MINIMUM_RESOLUTION. Both are examined and intersection points determined.
*/
protected void findIntersectionBounds(final PlanetModel planetModel, final Bounds boundsInfo, final Plane q, final Membership... bounds) {
//System.out.println("Finding intersection bounds");
// Unnormalized, unchecked...
final double lineVectorX = y * q.z - z * q.y;
final double lineVectorY = z * q.x - x * q.z;
final double lineVectorZ = x * q.y - y * q.x;
if (Math.abs(lineVectorX) < MINIMUM_RESOLUTION && Math.abs(lineVectorY) < MINIMUM_RESOLUTION && Math.abs(lineVectorZ) < MINIMUM_RESOLUTION) {
// Degenerate case: parallel planes
//System.err.println(" planes are parallel - no intersection");
//System.out.println(" planes are parallel - no intersection");
return;
}
@ -1037,9 +1038,10 @@ public class Plane extends Vector {
final double denomXZ = this.x * q.z - this.z * q.x;
final double denomXY = this.x * q.y - this.y * q.x;
if (Math.abs(denomYZ) >= Math.abs(denomXZ) && Math.abs(denomYZ) >= Math.abs(denomXY)) {
//System.out.println("X biggest");
// X is the biggest, so our point will have x0 = 0.0
if (Math.abs(denomYZ) < MINIMUM_RESOLUTION_SQUARED) {
//System.err.println(" Denominator is zero: no intersection");
//System.out.println(" Denominator is zero: no intersection");
return;
}
final double denom = 1.0 / denomYZ;
@ -1061,9 +1063,10 @@ public class Plane extends Vector {
0.0, (-(this.D-MINIMUM_RESOLUTION) * q.z - this.z * -(q.D-MINIMUM_RESOLUTION)) * denom, (this.y * -(q.D-MINIMUM_RESOLUTION) + (this.D-MINIMUM_RESOLUTION) * q.y) * denom,
bounds);
} else if (Math.abs(denomXZ) >= Math.abs(denomXY) && Math.abs(denomXZ) >= Math.abs(denomYZ)) {
//System.out.println("Y biggest");
// Y is the biggest, so y0 = 0.0
if (Math.abs(denomXZ) < MINIMUM_RESOLUTION_SQUARED) {
//System.err.println(" Denominator is zero: no intersection");
//System.out.println(" Denominator is zero: no intersection");
return;
}
final double denom = 1.0 / denomXZ;
@ -1084,9 +1087,10 @@ public class Plane extends Vector {
(-(this.D-MINIMUM_RESOLUTION) * q.z - this.z * -(q.D-MINIMUM_RESOLUTION)) * denom, 0.0, (this.x * -(q.D-MINIMUM_RESOLUTION) + (this.D-MINIMUM_RESOLUTION) * q.x) * denom,
bounds);
} else {
//System.out.println("Z biggest");
// Z is the biggest, so Z0 = 0.0
if (Math.abs(denomXY) < MINIMUM_RESOLUTION_SQUARED) {
//System.err.println(" Denominator is zero: no intersection");
//System.out.println(" Denominator is zero: no intersection");
return;
}
final double denom = 1.0 / denomXY;
@ -1178,6 +1182,10 @@ public class Plane extends Vector {
if (point2Valid) {
boundsInfo.addPoint(new GeoPoint(point2X, point2Y, point2Z));
}
} else {
// If we can't intersect line with world, then it's outside the world, so
// we have to assume everything is included.
boundsInfo.noBound(planetModel);
}
}
@ -1351,8 +1359,6 @@ public class Plane extends Vector {
// m * [- 2*A*ab^2*r + 2*A^2*ab^2*r*q + 2*B^2*ab^2*r*q + 2*C^2*c^2*r*q] +
// [ab^2 - 2*A*ab^2*q + A^2*ab^2*q^2 + B^2*ab^2*q^2 + C^2*c^2*q^2] = 0
//System.err.println(" computing X bound");
// Useful subexpressions for this bound
final double q = A*abSquared*k;
final double qSquared = q * q;
@ -1392,6 +1398,7 @@ public class Plane extends Vector {
assert Math.abs(a * m1 * m1 + b * m1 + c) < MINIMUM_RESOLUTION;
final double m2 = (-b - sqrtResult) * commonDenom;
assert Math.abs(a * m2 * m2 + b * m2 + c) < MINIMUM_RESOLUTION;
if (Math.abs(m1) >= MINIMUM_RESOLUTION || Math.abs(m2) >= MINIMUM_RESOLUTION) {
final double l1 = r * m1 + q;
final double l2 = r * m2 + q;
// x = ((1 - l*A) * ab^2 ) / (2 * m)
@ -1410,11 +1417,14 @@ public class Plane extends Vector {
//assert evaluateIsZero(thePoint2): "Evaluation of point2: "+evaluate(thePoint2);
addPoint(boundsInfo, bounds, thePoint1);
addPoint(boundsInfo, bounds, thePoint2);
} else {
// This is a plane of the form A=n B=0 C=0. We can set a bound only by noting the D value.
boundsInfo.addXValue(-D/A);
}
} else {
// No solutions
}
} else if (Math.abs(b) > MINIMUM_RESOLUTION_SQUARED) {
//System.err.println("Not x quadratic");
// a = 0, so m = - c / b
final double m = -c / b;
final double l = r * m + q;
@ -1561,6 +1571,7 @@ public class Plane extends Vector {
assert Math.abs(a * m1 * m1 + b * m1 + c) < MINIMUM_RESOLUTION;
final double m2 = (-b - sqrtResult) * commonDenom;
assert Math.abs(a * m2 * m2 + b * m2 + c) < MINIMUM_RESOLUTION;
if (Math.abs(m1) >= MINIMUM_RESOLUTION || Math.abs(m2) >= MINIMUM_RESOLUTION) {
final double l1 = r * m1 + q;
final double l2 = r * m2 + q;
// x = (-l*A * ab^2 ) / (2 * m)
@ -1579,6 +1590,10 @@ public class Plane extends Vector {
//assert evaluateIsZero(thePoint2): "Evaluation of point2: "+evaluate(thePoint2);
addPoint(boundsInfo, bounds, thePoint1);
addPoint(boundsInfo, bounds, thePoint2);
} else {
// This is a plane of the form A=0 B=n C=0. We can set a bound only by noting the D value.
boundsInfo.addYValue(-D/B);
}
} else {
// No solutions
}

View File

@ -292,6 +292,17 @@ public class XYZBounds implements Bounds {
return this;
}
@Override
public Bounds noBound(final PlanetModel planetModel) {
minX = planetModel.getMinimumXValue();
maxX = planetModel.getMaximumXValue();
minY = planetModel.getMinimumYValue();
maxY = planetModel.getMaximumYValue();
minZ = planetModel.getMinimumZValue();
maxZ = planetModel.getMaximumZValue();
return this;
}
@Override
public String toString() {
return "XYZBounds: [xmin="+minX+" xmax="+maxX+" ymin="+minY+" ymax="+maxY+" zmin="+minZ+" zmax="+maxZ+"]";

View File

@ -87,8 +87,9 @@ public class TestGeo3DPoint extends LuceneTestCase {
private static Codec getCodec() {
if (Codec.getDefault().getName().equals("Lucene62")) {
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
double maxMBSortInHeap = 3.0 + (3*random().nextDouble());
if (VERBOSE) {
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode);
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
}
return new FilterCodec("Lucene62", Codec.getDefault()) {
@ -97,7 +98,7 @@ public class TestGeo3DPoint extends LuceneTestCase {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode);
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
}
@Override

View File

@ -372,4 +372,19 @@ public class GeoBBoxTest {
assertTrue(box.isWithin(point)?solid.isWithin(point):true);
}
@Test
public void testFailureCase2() {
//final GeoPoint point = new GeoPoint(-0.7375647084975573, -2.3309121299774915E-10, 0.6746626163258577);
final GeoPoint point = new GeoPoint(-0.737564708579924, -9.032562595264542E-17, 0.6746626165197899);
final GeoBBox box = new GeoRectangle(PlanetModel.WGS84, 0.7988584710911523, 0.25383311815493353, -1.2236144735575564E-12, 7.356011300929654E-49);
final XYZBounds bounds = new XYZBounds();
box.getBounds(bounds);
final XYZSolid solid = XYZSolidFactory.makeXYZSolid(PlanetModel.WGS84, bounds.getMinimumX(), bounds.getMaximumX(), bounds.getMinimumY(), bounds.getMaximumY(), bounds.getMinimumZ(), bounds.getMaximumZ());
//System.out.println("Is within Y value? "+(point.y >= bounds.getMinimumY() && point.y <= bounds.getMaximumY()));
//System.out.println("Shape = "+box+" is within? "+box.isWithin(point));
//System.out.println("XYZBounds = "+bounds+" is within? "+solid.isWithin(point)+" solid="+solid);
assertTrue(box.isWithin(point) == solid.isWithin(point));
}
}

View File

@ -405,4 +405,18 @@ public class GeoCircleTest extends LuceneTestCase {
assertTrue(solid.isWithin(gp));
}
@Test
public void testBoundsFailureCase2() {
final GeoCircle gc = GeoCircleFactory.makeGeoCircle(PlanetModel.WGS84, -2.7574435614238194E-13, 0.0, 1.5887859182593391);
final GeoPoint gp = new GeoPoint(PlanetModel.WGS84, 0.7980359504429014, 1.5964981068121482);
final XYZBounds bounds = new XYZBounds();
gc.getBounds(bounds);
System.out.println("Bounds = "+bounds);
System.out.println("Point = "+gp);
final XYZSolid solid = XYZSolidFactory.makeXYZSolid(PlanetModel.WGS84, bounds.getMinimumX(), bounds.getMaximumX(), bounds.getMinimumY(), bounds.getMaximumY(), bounds.getMinimumZ(), bounds.getMaximumZ());
assert gc.isWithin(gp)?solid.isWithin(gp):true;
}
}

View File

@ -126,6 +126,7 @@ public final class AssertingPointsFormat extends PointsFormat {
assert false: "point values are out of order";
}
System.arraycopy(packedValue, 0, lastDocValue, 0, bytesPerDim);
lastDocID = docID;
}
in.visit(docID, packedValue);
}
@ -254,11 +255,11 @@ public final class AssertingPointsFormat extends PointsFormat {
}
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
if (fieldInfo.getPointDimensionCount() == 0) {
throw new IllegalArgumentException("writing field=\"" + fieldInfo.name + "\" but pointDimensionalCount is 0");
}
in.writeField(fieldInfo, values, maxMBSortInHeap);
in.writeField(fieldInfo, values);
}
@Override

View File

@ -56,11 +56,11 @@ class CrankyPointsFormat extends PointsFormat {
}
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
if (random.nextInt(100) == 0) {
throw new IOException("Fake IOException");
}
delegate.writeField(fieldInfo, values, maxMBSortInHeap);
delegate.writeField(fieldInfo, values);
}
@Override

View File

@ -67,6 +67,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.SloppyMath;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.bkd.BKDWriter;
/**
* Abstract class to do basic tests for a geospatial impl (high level
@ -1247,7 +1248,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, pointsInLeaf);
return new Lucene60PointsWriter(writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
@Override

View File

@ -92,6 +92,7 @@ public class RandomCodec extends AssertingCodec {
// which is less effective for testing.
// TODO: improve how we randomize this...
private final int maxPointsInLeafNode;
private final double maxMBSortInHeap;
private final int bkdSplitRandomSeed;
@Override
@ -102,9 +103,9 @@ public class RandomCodec extends AssertingCodec {
// Randomize how BKDWriter chooses its splis:
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode) {
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap) {
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
@ -184,6 +185,7 @@ public class RandomCodec extends AssertingCodec {
int lowFreqCutoff = TestUtil.nextInt(random, 2, 100);
maxPointsInLeafNode = TestUtil.nextInt(random, 16, 2048);
maxMBSortInHeap = 5.0 + (3*random.nextDouble());
bkdSplitRandomSeed = random.nextInt();
add(avoidCodecs,
@ -251,7 +253,8 @@ public class RandomCodec extends AssertingCodec {
public String toString() {
return super.toString() + ": " + previousMappings.toString() +
", docValues:" + previousDVMappings.toString() +
", maxPointsInLeafNode=" + maxPointsInLeafNode;
", maxPointsInLeafNode=" + maxPointsInLeafNode +
", maxMBSortInHeap=" + maxMBSortInHeap;
}
/** Just like {@link BKDWriter} except it evilly picks random ways to split cells on

View File

@ -771,7 +771,7 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
}
ii = new SlowOpeningMockIndexInputWrapper(this, name, delegateInput);
} else {
ii = new MockIndexInputWrapper(this, name, delegateInput);
ii = new MockIndexInputWrapper(this, name, delegateInput, null);
}
addFileHandle(ii, name, Handle.Input);
return ii;

View File

@ -30,12 +30,19 @@ public class MockIndexInputWrapper extends IndexInput {
private MockDirectoryWrapper dir;
final String name;
private IndexInput delegate;
private boolean isClone;
private boolean closed;
private volatile boolean closed;
/** Construct an empty output buffer. */
public MockIndexInputWrapper(MockDirectoryWrapper dir, String name, IndexInput delegate) {
// Which MockIndexInputWrapper we were cloned from, or null if we are not a clone:
private final MockIndexInputWrapper parent;
/** Sole constructor */
public MockIndexInputWrapper(MockDirectoryWrapper dir, String name, IndexInput delegate, MockIndexInputWrapper parent) {
super("MockIndexInputWrapper(name=" + name + " delegate=" + delegate + ")");
// If we are a clone then our parent better not be a clone!
assert parent == null || parent.parent == null;
this.parent = parent;
this.name = name;
this.dir = dir;
this.delegate = delegate;
@ -54,7 +61,7 @@ public class MockIndexInputWrapper extends IndexInput {
// remove the conditional check so we also track that
// all clones get closed:
assert delegate != null;
if (!isClone) {
if (parent == null) {
dir.removeIndexInput(this, name);
}
dir.maybeThrowDeterministicException();
@ -62,9 +69,13 @@ public class MockIndexInputWrapper extends IndexInput {
}
private void ensureOpen() {
// TODO: not great this is a volatile read (closed) ... we should deploy heavy JVM voodoo like SwitchPoint to avoid this
if (closed) {
throw new RuntimeException("Abusing closed IndexInput!");
}
if (parent != null && parent.closed) {
throw new RuntimeException("Abusing clone of a closed IndexInput!");
}
}
@Override
@ -75,8 +86,7 @@ public class MockIndexInputWrapper extends IndexInput {
}
dir.inputCloneCount.incrementAndGet();
IndexInput iiclone = delegate.clone();
MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, name, iiclone);
clone.isClone = true;
MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, name, iiclone, parent != null ? parent : this);
// Pending resolution on LUCENE-686 we may want to
// uncomment this code so that we also track that all
// clones get closed:
@ -102,8 +112,7 @@ public class MockIndexInputWrapper extends IndexInput {
}
dir.inputCloneCount.incrementAndGet();
IndexInput slice = delegate.slice(sliceDescription, offset, length);
MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, sliceDescription, slice);
clone.isClone = true;
MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, sliceDescription, slice, parent != null ? parent : this);
return clone;
}

View File

@ -30,7 +30,7 @@ class SlowClosingMockIndexInputWrapper extends MockIndexInputWrapper {
public SlowClosingMockIndexInputWrapper(MockDirectoryWrapper dir,
String name, IndexInput delegate) {
super(dir, name, delegate);
super(dir, name, delegate, null);
}
@Override

View File

@ -28,7 +28,7 @@ class SlowOpeningMockIndexInputWrapper extends MockIndexInputWrapper {
public SlowOpeningMockIndexInputWrapper(MockDirectoryWrapper dir,
String name, IndexInput delegate) throws IOException {
super(dir, name, delegate);
super(dir, name, delegate, null);
try {
Thread.sleep(50);
} catch (InterruptedException ie) {

View File

@ -171,4 +171,40 @@ public class TestMockDirectoryWrapper extends BaseDirectoryTestCase {
assertTrue("MockDirectoryWrapper on dir=" + dir + " failed to corrupt an unsync'd file", changed);
}
public void testAbuseClosedIndexInput() throws Exception {
MockDirectoryWrapper dir = newMockDirectory();
IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT);
out.writeByte((byte) 42);
out.close();
final IndexInput in = dir.openInput("foo", IOContext.DEFAULT);
in.close();
expectThrows(RuntimeException.class, in::readByte);
dir.close();
}
public void testAbuseCloneAfterParentClosed() throws Exception {
MockDirectoryWrapper dir = newMockDirectory();
IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT);
out.writeByte((byte) 42);
out.close();
IndexInput in = dir.openInput("foo", IOContext.DEFAULT);
final IndexInput clone = in.clone();
in.close();
expectThrows(RuntimeException.class, clone::readByte);
dir.close();
}
public void testAbuseCloneOfCloneAfterParentClosed() throws Exception {
MockDirectoryWrapper dir = newMockDirectory();
IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT);
out.writeByte((byte) 42);
out.close();
IndexInput in = dir.openInput("foo", IOContext.DEFAULT);
IndexInput clone1 = in.clone();
IndexInput clone2 = clone1.clone();
in.close();
expectThrows(RuntimeException.class, clone2::readByte);
dir.close();
}
}

View File

@ -100,6 +100,25 @@ New Features
* SOLR-9275: XML QueryParser support (defType=xmlparser) now extensible via configuration.
(Christine Poerschke)
* SOLR-9200: Add Delegation Token Support to Solr.
(Gregory Chanan)
* SOLR-9038: Solr core snapshots: The current commit can be snapshotted which retains the commit and associates it with
a name. The core admin API can create snapshots, list them, and delete them. Snapshot names can be referenced in
doing a core backup, and in replication. Snapshot metadata is stored in a new snapshot_metadata/ dir.
(Hrishikesh Gadre via David Smiley)
* SOLR-9279: New boolean comparison function queries comparing numeric arguments: gt, gte, lt, lte, eq
(Doug Turnbull, David Smiley)
* SOLR-9324: Support Secure Impersonation / Proxy User for solr authentication
(Gregory Chanan)
* SOLR-9252: Feature selection and logistic regression on text (Cao Manh Dat, Joel Bernstein)
* SOLR-6465: CDCR: fall back to whole-index replication when tlogs are insufficient.
(Noble Paul, Renaud Delbru, shalin)
* SOLR-9320: A REPLACENODE command to decommission an existing node with another new node
(noble, Nitin Sharma, Varun Thacker)
@ -170,6 +189,19 @@ Bug Fixes
* SOLR-9339: NPE in CloudSolrClient when the response is null (noble)
* SOLR-8596: Web UI doesn't correctly generate queries which include local parameters (Alexandre Rafalovitch, janhoy)
* SOLR-8645: managed-schema is now syntax highlighted in cloud->Tree view (Alexandre Rafalovitch via janhoy)
* SOLR-8379: UI Cloud->Tree view now shows .txt files correctly (Alexandre Rafalovitch via janhoy)
* SOLR-9003: New Admin UI's Dataimport screen now correctly displays DIH Debug output (Alexandre Rafalovitch)
* SOLR-9308: Fix distributed RTG to forward request params, fixes fq and non-default fl params (hossman)
* SOLR-9179: NPE in IndexSchema using IBM JDK (noble, Colvin Cowie)
* SOLR-9397: Config API does not support adding caches (noble)
Optimizations
----------------------
@ -179,6 +211,13 @@ Optimizations
* SOLR-9264: Optimize ZkController.publishAndWaitForDownStates to not read all collection states and
watch relevant collections instead. (Hrishikesh Gadre, shalin)
* SOLR-9335: Solr cache/search/update stats counters now use LongAdder which are supposed to have higher throughput
under high contention. (Varun Thacker)
* SOLR-9350: JSON Facets: method="stream" will no longer always uses & populates the filter cache, likely
flushing it. 'cacheDf' can be configured to set a doc frequency threshold, now defaulting to 1/16th doc count.
Using -1 Disables use of the cache. (David Smiley, yonik)
Other Changes
----------------------
@ -202,6 +241,25 @@ Other Changes
* SOLR-9163: Sync up basic_configs and data_driven_schema_configs, removing almost all differences
except what is required for schemaless. (yonik)
* SOLR-9340: Change ZooKeeper disconnect and session expiry related logging from INFO to WARN to
make debugging easier (Varun Thacker)
* SOLR-9358: [AngularUI] In Cloud->Tree file view area, collapse metadata by default (janhoy)
* SOLR-9256: asserting hasNext() contract in JdbcDataSource in DataImportHandler (Kristine Jetzke via Mikhai Khludnev)
* SOLR-9209: extracting JdbcDataSource.createResultSetIterator() for extension (Kristine Jetzke via Mikhai Khludnev)
* SOLR-9353: Factor out ReRankQParserPlugin.ReRankQueryRescorer private class. (Christine Poerschke)
* SOLR-9392: Fixed CDCR Test failures which were due to leaked resources. (shalin)
* SOLR-9385: Add QParser.getParser(String,SolrQueryRequest) variant. (Christine Poerschke)
* SOLR-9367: Improved TestInjection's randomization logic to use LuceneTestCase.random() (hossman)
* SOLR-9331: Remove ReRankQuery's length constructor argument and member. (Christine Poerschke)
================== 6.1.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -604,7 +604,7 @@ public class FacetingAccumulator extends BasicAccumulator implements FacetValueA
QueryFacetAccumulator qAcc = new QueryFacetAccumulator(this,qfr.getName(),query);
final Query q;
try {
q = QParser.getParser(query, null, queryRequest).getQuery();
q = QParser.getParser(query, queryRequest).getQuery();
} catch( SyntaxError e ){
throw new SolrException(ErrorCode.BAD_REQUEST,"Invalid query '"+query+"'",e);
}

View File

@ -280,10 +280,14 @@ public class JdbcDataSource extends
resultSetIterator.close();
resultSetIterator = null;
}
resultSetIterator = new ResultSetIterator(query);
resultSetIterator = createResultSetIterator(query);
return resultSetIterator.getIterator();
}
protected ResultSetIterator createResultSetIterator(String query) {
return new ResultSetIterator(query);
}
private void logError(String msg, Exception e) {
LOG.warn(msg, e);
}

View File

@ -510,6 +510,45 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase {
DriverManager.deregisterDriver(driver);
}
}
@Test
public void testEmptyResultSet() throws Exception {
MockInitialContextFactory.bind("java:comp/env/jdbc/JndiDB", dataSource);
props.put(JdbcDataSource.JNDI_NAME, "java:comp/env/jdbc/JndiDB");
EasyMock.expect(dataSource.getConnection()).andReturn(connection);
jdbcDataSource.init(context, props);
connection.setAutoCommit(false);
Statement statement = mockControl.createMock(Statement.class);
EasyMock.expect(connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY))
.andReturn(statement);
statement.setFetchSize(500);
statement.setMaxRows(0);
EasyMock.expect(statement.execute("query")).andReturn(true);
ResultSet resultSet = mockControl.createMock(ResultSet.class);
EasyMock.expect(statement.getResultSet()).andReturn(resultSet);
ResultSetMetaData metaData = mockControl.createMock(ResultSetMetaData.class);
EasyMock.expect(resultSet.getMetaData()).andReturn(metaData);
EasyMock.expect(metaData.getColumnCount()).andReturn(0);
EasyMock.expect(resultSet.next()).andReturn(false);
resultSet.close();
EasyMock.expect(statement.getMoreResults()).andReturn(false);
EasyMock.expect(statement.getUpdateCount()).andReturn(-1);
statement.close();
mockControl.replay();
Iterator<Map<String,Object>> resultSetIterator = jdbcDataSource.getData("query");
resultSetIterator.hasNext();
resultSetIterator.hasNext();
mockControl.verify();
}
@Test
@Ignore("Needs a Mock database server to work")
public void testBasic() throws Exception {

View File

@ -16,6 +16,15 @@
*/
package org.apache.solr.hadoop;
import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import com.google.common.io.Files;
import org.apache.commons.io.FileUtils;
import org.apache.solr.cloud.ZkController;
@ -35,15 +44,6 @@ import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
/**
* Extracts SolrCloud information from ZooKeeper.
*/
@ -78,8 +78,7 @@ final class ZooKeeperInspector {
}
SolrZkClient zkClient = getZkClient(zkHost);
try {
ZkStateReader zkStateReader = new ZkStateReader(zkClient);
try (ZkStateReader zkStateReader = new ZkStateReader(zkClient)) {
try {
// first check for alias
collection = checkForAlias(zkClient, collection);

View File

@ -134,6 +134,10 @@
<dependency org="antlr" name="antlr" rev="${/antlr/antlr}" conf="test.MiniKdc"/>
<dependency org="net.sf.ehcache" name="ehcache-core" rev="${/net.sf.ehcache/ehcache-core}" conf="test.MiniKdc"/>
<dependency org="org.apache.curator" name="curator-framework" rev="${/org.apache.curator/curator-framework}" conf="compile"/>
<dependency org="org.apache.curator" name="curator-client" rev="${/org.apache.curator/curator-client}" conf="compile"/>
<dependency org="org.apache.curator" name="curator-recipes" rev="${/org.apache.curator/curator-recipes}" conf="compile"/>
<!-- StatsComponents percentiles Dependencies-->
<dependency org="com.tdunning" name="t-digest" rev="${/com.tdunning/t-digest}" conf="compile->*"/>
<!-- SQL Parser -->

View File

@ -15,21 +15,26 @@
* limitations under the License.
*/
package org.apache.solr.core;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.update.SolrIndexWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
/**
* A wrapper for an IndexDeletionPolicy instance.
* <p>
@ -52,9 +57,11 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
private final Map<Long, Long> reserves = new ConcurrentHashMap<>();
private volatile IndexCommit latestCommit;
private final ConcurrentHashMap<Long, AtomicInteger> savedCommits = new ConcurrentHashMap<>();
private final SolrSnapshotMetaDataManager snapshotMgr;
public IndexDeletionPolicyWrapper(IndexDeletionPolicy deletionPolicy) {
public IndexDeletionPolicyWrapper(IndexDeletionPolicy deletionPolicy, SolrSnapshotMetaDataManager snapshotMgr) {
this.deletionPolicy = deletionPolicy;
this.snapshotMgr = snapshotMgr;
}
/**
@ -134,7 +141,6 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
}
}
/**
* Internal use for Lucene... do not explicitly call.
*/
@ -185,7 +191,8 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
Long gen = delegate.getGeneration();
Long reserve = reserves.get(gen);
if (reserve != null && System.nanoTime() < reserve) return;
if(savedCommits.containsKey(gen)) return;
if (savedCommits.containsKey(gen)) return;
if (snapshotMgr.isSnapshotted(gen)) return;
delegate.delete();
}

View File

@ -28,7 +28,17 @@ import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -49,6 +59,7 @@ import org.apache.solr.schema.IndexSchemaFactory;
import org.apache.solr.search.CacheConfig;
import org.apache.solr.search.FastLRUCache;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.SolrCache;
import org.apache.solr.search.ValueSourceParser;
import org.apache.solr.search.stats.StatsCache;
import org.apache.solr.servlet.SolrRequestParsers;
@ -91,7 +102,7 @@ public class SolrConfig extends Config implements MapSerializable {
public static final String DEFAULT_CONF_FILE = "solrconfig.xml";
private RequestParams requestParams;
public static enum PluginOpts {
public enum PluginOpts {
MULTI_OK,
REQUIRE_NAME,
REQUIRE_NAME_IN_OVERLAY,
@ -254,7 +265,6 @@ public class SolrConfig extends Config implements MapSerializable {
dataDir = get("dataDir", null);
if (dataDir != null && dataDir.length() == 0) dataDir = null;
userCacheConfigs = CacheConfig.getMultipleConfigs(this, "query/cache");
org.apache.solr.search.SolrIndexSearcher.initRegenerators(this);
@ -276,6 +286,16 @@ public class SolrConfig extends Config implements MapSerializable {
maxWarmingSearchers = getInt("query/maxWarmingSearchers", Integer.MAX_VALUE);
slowQueryThresholdMillis = getInt("query/slowQueryThresholdMillis", -1);
for (SolrPluginInfo plugin : plugins) loadPluginInfo(plugin);
Map<String, CacheConfig> userCacheConfigs = CacheConfig.getMultipleConfigs(this, "query/cache");
List<PluginInfo> caches = getPluginInfos(SolrCache.class.getName());
if (!caches.isEmpty()) {
for (PluginInfo c : caches) {
userCacheConfigs.put(c.name, CacheConfig.getConfig(this, "cache", c.attributes, null));
}
}
this.userCacheConfigs = Collections.unmodifiableMap(userCacheConfigs);
updateHandlerInfo = loadUpdatehandlerInfo();
multipartUploadLimitKB = getInt(
@ -317,6 +337,7 @@ public class SolrConfig extends Config implements MapSerializable {
.add(new SolrPluginInfo(TransformerFactory.class, "transformer", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK))
.add(new SolrPluginInfo(SearchComponent.class, "searchComponent", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK))
.add(new SolrPluginInfo(UpdateRequestProcessorFactory.class, "updateProcessor", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK))
.add(new SolrPluginInfo(SolrCache.class, "cache", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK))
// TODO: WTF is up with queryConverter???
// it apparently *only* works as a singleton? - SOLR-4304
// and even then -- only if there is a single SpellCheckComponent
@ -457,7 +478,7 @@ public class SolrConfig extends Config implements MapSerializable {
public final CacheConfig queryResultCacheConfig;
public final CacheConfig documentCacheConfig;
public final CacheConfig fieldValueCacheConfig;
public final CacheConfig[] userCacheConfigs;
public final Map<String, CacheConfig> userCacheConfigs;
// SolrIndexSearcher - more...
public final boolean useFilterForSortedQuery;
public final int queryResultWindowSize;

View File

@ -81,6 +81,7 @@ import org.apache.solr.common.util.ObjectReleaseTracker;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.Utils;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.handler.IndexFetcher;
import org.apache.solr.handler.ReplicationHandler;
import org.apache.solr.handler.RequestHandlerBase;
@ -184,6 +185,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
private final Map<String,UpdateRequestProcessorChain> updateProcessorChains;
private final Map<String, SolrInfoMBean> infoRegistry;
private final IndexDeletionPolicyWrapper solrDelPolicy;
private final SolrSnapshotMetaDataManager snapshotMgr;
private final DirectoryFactory directoryFactory;
private IndexReaderFactory indexReaderFactory;
private final Codec codec;
@ -414,7 +416,19 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
} else {
delPolicy = new SolrDeletionPolicy();
}
return new IndexDeletionPolicyWrapper(delPolicy);
return new IndexDeletionPolicyWrapper(delPolicy, snapshotMgr);
}
private SolrSnapshotMetaDataManager initSnapshotMetaDataManager() {
try {
String dirName = getDataDir() + SolrSnapshotMetaDataManager.SNAPSHOT_METADATA_DIR + "/";
Directory snapshotDir = directoryFactory.get(dirName, DirContext.DEFAULT,
getSolrConfig().indexConfig.lockType);
return new SolrSnapshotMetaDataManager(this, snapshotDir);
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
private void initListeners() {
@ -739,6 +753,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
initListeners();
this.snapshotMgr = initSnapshotMetaDataManager();
this.solrDelPolicy = initDeletionPolicy(delPolicy);
this.codec = initCodec(solrConfig, this.schema);
@ -1242,6 +1257,17 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
}
}
// Close the snapshots meta-data directory.
Directory snapshotsDir = snapshotMgr.getSnapshotsDir();
try {
this.directoryFactory.release(snapshotsDir);
} catch (Throwable e) {
SolrException.log(log,e);
if (e instanceof Error) {
throw (Error) e;
}
}
if (coreStateClosed) {
try {
@ -2343,6 +2369,14 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
return solrDelPolicy;
}
/**
* @return A reference of {@linkplain SolrSnapshotMetaDataManager}
* managing the persistent snapshots for this Solr core.
*/
public SolrSnapshotMetaDataManager getSnapshotMetaDataManager() {
return snapshotMgr;
}
public ReentrantLock getRuleExpiryLock() {
return ruleExpiryLock;
}

View File

@ -32,6 +32,7 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Constants;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.DirectoryFactory;
@ -59,10 +60,22 @@ public class LocalFileSystemRepository implements BackupRepository {
@Override
public URI createURI(String... pathComponents) {
Preconditions.checkArgument(pathComponents.length > 0);
Path result = Paths.get(pathComponents[0]);
String basePath = Preconditions.checkNotNull(pathComponents[0]);
// Note the URI.getPath() invocation on Windows platform generates an invalid URI.
// Refer to http://stackoverflow.com/questions/9834776/java-nio-file-path-issue
// Since the caller may have used this method to generate the string representation
// for the pathComponents, we implement a work-around specifically for Windows platform
// to remove the leading '/' character.
if (Constants.WINDOWS) {
basePath = basePath.replaceFirst("^/(.:/)", "$1");
}
Path result = Paths.get(basePath);
for (int i = 1; i < pathComponents.length; i++) {
result = result.resolve(pathComponents[i]);
}
return result.toUri();
}

View File

@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core.snapshots;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import com.google.common.annotations.VisibleForTesting;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.store.Directory;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class provides functionality required to handle the data files corresponding to Solr snapshots.
*/
public class SolrSnapshotManager {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
/**
* This method deletes index files of the {@linkplain IndexCommit} for the specified generation number.
*
* @param dir The index directory storing the snapshot.
* @param gen The generation number for the {@linkplain IndexCommit}
* @throws IOException in case of I/O errors.
*/
public static void deleteIndexFiles ( Directory dir, Collection<SnapshotMetaData> snapshots, long gen ) throws IOException {
List<IndexCommit> commits = DirectoryReader.listCommits(dir);
Map<String, Integer> refCounts = buildRefCounts(snapshots, commits);
for (IndexCommit ic : commits) {
if (ic.getGeneration() == gen) {
deleteIndexFiles(dir,refCounts, ic);
break;
}
}
}
/**
* This method deletes all files not corresponding to a configured snapshot in the specified index directory.
*
* @param dir The index directory to search for.
* @throws IOException in case of I/O errors.
*/
public static void deleteNonSnapshotIndexFiles (Directory dir, Collection<SnapshotMetaData> snapshots) throws IOException {
List<IndexCommit> commits = DirectoryReader.listCommits(dir);
Map<String, Integer> refCounts = buildRefCounts(snapshots, commits);
Set<Long> snapshotGenNumbers = snapshots.stream()
.map(SnapshotMetaData::getGenerationNumber)
.collect(Collectors.toSet());
for (IndexCommit ic : commits) {
if (!snapshotGenNumbers.contains(ic.getGeneration())) {
deleteIndexFiles(dir,refCounts, ic);
}
}
}
/**
* This method computes reference count for the index files by taking into consideration
* (a) configured snapshots and (b) files sharing between two or more {@linkplain IndexCommit} instances.
*
* @param snapshots A collection of user configured snapshots
* @param commits A list of {@linkplain IndexCommit} instances
* @return A map containing reference count for each index file referred in one of the {@linkplain IndexCommit} instances.
* @throws IOException in case of I/O error.
*/
@VisibleForTesting
static Map<String, Integer> buildRefCounts (Collection<SnapshotMetaData> snapshots, List<IndexCommit> commits) throws IOException {
Map<String, Integer> result = new HashMap<>();
Map<Long, IndexCommit> commitsByGen = commits.stream().collect(
Collectors.toMap(IndexCommit::getGeneration, Function.identity()));
for(SnapshotMetaData md : snapshots) {
IndexCommit ic = commitsByGen.get(md.getGenerationNumber());
if (ic != null) {
Collection<String> fileNames = ic.getFileNames();
for(String fileName : fileNames) {
int refCount = result.getOrDefault(fileName, 0);
result.put(fileName, refCount+1);
}
}
}
return result;
}
/**
* This method deletes the index files associated with specified <code>indexCommit</code> provided they
* are not referred by some other {@linkplain IndexCommit}.
*
* @param dir The index directory containing the {@linkplain IndexCommit} to be deleted.
* @param refCounts A map containing reference counts for each file associated with every {@linkplain IndexCommit}
* in the specified directory.
* @param indexCommit The {@linkplain IndexCommit} whose files need to be deleted.
* @throws IOException in case of I/O errors.
*/
private static void deleteIndexFiles ( Directory dir, Map<String, Integer> refCounts, IndexCommit indexCommit ) throws IOException {
log.info("Deleting index files for index commit with generation {} in directory {}", indexCommit.getGeneration(), dir);
for (String fileName : indexCommit.getFileNames()) {
try {
// Ensure that a file being deleted is not referred by some other commit.
int ref = refCounts.getOrDefault(fileName, 0);
log.debug("Reference count for file {} is {}", fileName, ref);
if (ref == 0) {
dir.deleteFile(fileName);
}
} catch (IOException e) {
log.warn("Unable to delete file {} in directory {} due to exception {}", fileName, dir, e.getMessage());
}
}
}
}

View File

@ -0,0 +1,416 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core.snapshots;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.stream.Collectors;
import com.google.common.base.Preconditions;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.IndexDeletionPolicyWrapper;
import org.apache.solr.core.SolrCore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class is responsible to manage the persistent snapshots meta-data for the Solr indexes. The
* persistent snapshots are implemented by relying on Lucene {@linkplain IndexDeletionPolicy}
* abstraction to configure a specific {@linkplain IndexCommit} to be retained. The
* {@linkplain IndexDeletionPolicyWrapper} in Solr uses this class to create/delete the Solr index
* snapshots.
*/
public class SolrSnapshotMetaDataManager {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String SNAPSHOT_METADATA_DIR = "snapshot_metadata";
/**
* A class defining the meta-data for a specific snapshot.
*/
public static class SnapshotMetaData {
private String name;
private String indexDirPath;
private long generationNumber;
public SnapshotMetaData(String name, String indexDirPath, long generationNumber) {
super();
this.name = name;
this.indexDirPath = indexDirPath;
this.generationNumber = generationNumber;
}
public String getName() {
return name;
}
public String getIndexDirPath() {
return indexDirPath;
}
public long getGenerationNumber() {
return generationNumber;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("SnapshotMetaData[name=");
builder.append(name);
builder.append(", indexDirPath=");
builder.append(indexDirPath);
builder.append(", generation=");
builder.append(generationNumber);
builder.append("]");
return builder.toString();
}
}
/** Prefix used for the save file. */
public static final String SNAPSHOTS_PREFIX = "snapshots_";
private static final int VERSION_START = 0;
private static final int VERSION_CURRENT = VERSION_START;
private static final String CODEC_NAME = "solr-snapshots";
// The index writer which maintains the snapshots metadata
private long nextWriteGen;
private final Directory dir;
/** Used to map snapshot name to snapshot meta-data. */
protected final Map<String,SnapshotMetaData> nameToDetailsMapping = new LinkedHashMap<>();
/** Used to figure out the *current* index data directory path */
private final SolrCore solrCore;
/**
* A constructor.
*
* @param dir The directory where the snapshot meta-data should be stored. Enables updating
* the existing meta-data.
* @throws IOException in case of errors.
*/
public SolrSnapshotMetaDataManager(SolrCore solrCore, Directory dir) throws IOException {
this(solrCore, dir, OpenMode.CREATE_OR_APPEND);
}
/**
* A constructor.
*
* @param dir The directory where the snapshot meta-data is stored.
* @param mode CREATE If previous meta-data should be erased.
* APPEND If previous meta-data should be read and updated.
* CREATE_OR_APPEND Creates a new meta-data structure if one does not exist
* Updates the existing structure if one exists.
* @throws IOException in case of errors.
*/
public SolrSnapshotMetaDataManager(SolrCore solrCore, Directory dir, OpenMode mode) throws IOException {
this.solrCore = solrCore;
this.dir = dir;
if (mode == OpenMode.CREATE) {
deleteSnapshotMetadataFiles();
}
loadFromSnapshotMetadataFile();
if (mode == OpenMode.APPEND && nextWriteGen == 0) {
throw new IllegalStateException("no snapshots stored in this directory");
}
}
/**
* @return The snapshot meta-data directory
*/
public Directory getSnapshotsDir() {
return dir;
}
/**
* This method creates a new snapshot meta-data entry.
*
* @param name The name of the snapshot.
* @param indexDirPath The directory path where the index files are stored.
* @param gen The generation number for the {@linkplain IndexCommit} being snapshotted.
* @throws IOException in case of I/O errors.
*/
public synchronized void snapshot(String name, String indexDirPath, long gen) throws IOException {
Preconditions.checkNotNull(name);
log.info("Creating the snapshot named {} for core {} associated with index commit with generation {} in directory {}"
, name, solrCore.getName(), gen, indexDirPath);
if(nameToDetailsMapping.containsKey(name)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "A snapshot with name " + name + " already exists");
}
SnapshotMetaData d = new SnapshotMetaData(name, indexDirPath, gen);
nameToDetailsMapping.put(name, d);
boolean success = false;
try {
persist();
success = true;
} finally {
if (!success) {
try {
release(name);
} catch (Exception e) {
// Suppress so we keep throwing original exception
}
}
}
}
/**
* This method deletes a previously created snapshot (if any).
*
* @param name The name of the snapshot to be deleted.
* @return The snapshot meta-data if the snapshot with the snapshot name exists.
* @throws IOException in case of I/O error
*/
public synchronized Optional<SnapshotMetaData> release(String name) throws IOException {
log.info("Deleting the snapshot named {} for core {}", name, solrCore.getName());
SnapshotMetaData result = nameToDetailsMapping.remove(Preconditions.checkNotNull(name));
if(result != null) {
boolean success = false;
try {
persist();
success = true;
} finally {
if (!success) {
nameToDetailsMapping.put(name, result);
}
}
}
return Optional.ofNullable(result);
}
/**
* This method returns if snapshot is created for the specified generation number in
* the *current* index directory.
*
* @param genNumber The generation number for the {@linkplain IndexCommit} to be checked.
* @return true if the snapshot is created.
* false otherwise.
*/
public synchronized boolean isSnapshotted(long genNumber) {
return !nameToDetailsMapping.isEmpty() && isSnapshotted(solrCore.getIndexDir(), genNumber);
}
/**
* This method returns if snapshot is created for the specified generation number in
* the specified index directory.
*
* @param genNumber The generation number for the {@linkplain IndexCommit} to be checked.
* @return true if the snapshot is created.
* false otherwise.
*/
public synchronized boolean isSnapshotted(String indexDirPath, long genNumber) {
return !nameToDetailsMapping.isEmpty()
&& nameToDetailsMapping.values().stream()
.anyMatch(entry -> entry.getIndexDirPath().equals(indexDirPath) && entry.getGenerationNumber() == genNumber);
}
/**
* This method returns the snapshot meta-data for the specified name (if it exists).
*
* @param name The name of the snapshot
* @return The snapshot meta-data if exists.
*/
public synchronized Optional<SnapshotMetaData> getSnapshotMetaData(String name) {
return Optional.ofNullable(nameToDetailsMapping.get(name));
}
/**
* @return A list of snapshots created so far.
*/
public synchronized List<String> listSnapshots() {
// We create a copy for thread safety.
return new ArrayList<>(nameToDetailsMapping.keySet());
}
/**
* This method returns a list of snapshots created in a specified index directory.
*
* @param indexDirPath The index directory path.
* @return a list snapshots stored in the specified directory.
*/
public synchronized Collection<SnapshotMetaData> listSnapshotsInIndexDir(String indexDirPath) {
return nameToDetailsMapping.values().stream()
.filter(entry -> indexDirPath.equals(entry.getIndexDirPath()))
.collect(Collectors.toList());
}
/**
* This method returns the {@linkplain IndexCommit} associated with the specified
* <code>commitName</code>. A snapshot with specified <code>commitName</code> must
* be created before invoking this method.
*
* @param commitName The name of persisted commit
* @return the {@linkplain IndexCommit}
* @throws IOException in case of I/O error.
*/
public Optional<IndexCommit> getIndexCommitByName(String commitName) throws IOException {
Optional<IndexCommit> result = Optional.empty();
Optional<SnapshotMetaData> metaData = getSnapshotMetaData(commitName);
if (metaData.isPresent()) {
String indexDirPath = metaData.get().getIndexDirPath();
long gen = metaData.get().getGenerationNumber();
Directory d = solrCore.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, DirectoryFactory.LOCK_TYPE_NONE);
try {
result = DirectoryReader.listCommits(d)
.stream()
.filter(ic -> ic.getGeneration() == gen)
.findAny();
if (!result.isPresent()) {
log.warn("Unable to find commit with generation {} in the directory {}", gen, indexDirPath);
}
} finally {
solrCore.getDirectoryFactory().release(d);
}
} else {
log.warn("Commit with name {} is not persisted for core {}", commitName, solrCore.getName());
}
return result;
}
private synchronized void persist() throws IOException {
String fileName = SNAPSHOTS_PREFIX + nextWriteGen;
IndexOutput out = dir.createOutput(fileName, IOContext.DEFAULT);
boolean success = false;
try {
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
out.writeVInt(nameToDetailsMapping.size());
for(Entry<String,SnapshotMetaData> ent : nameToDetailsMapping.entrySet()) {
out.writeString(ent.getKey());
out.writeString(ent.getValue().getIndexDirPath());
out.writeVLong(ent.getValue().getGenerationNumber());
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(out);
IOUtils.deleteFilesIgnoringExceptions(dir, fileName);
} else {
IOUtils.close(out);
}
}
dir.sync(Collections.singletonList(fileName));
if (nextWriteGen > 0) {
String lastSaveFile = SNAPSHOTS_PREFIX + (nextWriteGen-1);
// exception OK: likely it didn't exist
IOUtils.deleteFilesIgnoringExceptions(dir, lastSaveFile);
}
nextWriteGen++;
}
private synchronized void deleteSnapshotMetadataFiles() throws IOException {
for(String file : dir.listAll()) {
if (file.startsWith(SNAPSHOTS_PREFIX)) {
dir.deleteFile(file);
}
}
}
/**
* Reads the snapshot meta-data information from the given {@link Directory}.
*/
private synchronized void loadFromSnapshotMetadataFile() throws IOException {
log.info("Loading from snapshot metadata file...");
long genLoaded = -1;
IOException ioe = null;
List<String> snapshotFiles = new ArrayList<>();
for(String file : dir.listAll()) {
if (file.startsWith(SNAPSHOTS_PREFIX)) {
long gen = Long.parseLong(file.substring(SNAPSHOTS_PREFIX.length()));
if (genLoaded == -1 || gen > genLoaded) {
snapshotFiles.add(file);
Map<String, SnapshotMetaData> snapshotMetaDataMapping = new HashMap<>();
IndexInput in = dir.openInput(file, IOContext.DEFAULT);
try {
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
int count = in.readVInt();
for(int i=0;i<count;i++) {
String name = in.readString();
String indexDirPath = in.readString();
long commitGen = in.readVLong();
snapshotMetaDataMapping.put(name, new SnapshotMetaData(name, indexDirPath, commitGen));
}
} catch (IOException ioe2) {
// Save first exception & throw in the end
if (ioe == null) {
ioe = ioe2;
}
} finally {
in.close();
}
genLoaded = gen;
nameToDetailsMapping.clear();
nameToDetailsMapping.putAll(snapshotMetaDataMapping);
}
}
}
if (genLoaded == -1) {
// Nothing was loaded...
if (ioe != null) {
// ... not for lack of trying:
throw ioe;
}
} else {
if (snapshotFiles.size() > 1) {
// Remove any broken / old snapshot files:
String curFileName = SNAPSHOTS_PREFIX + genLoaded;
for(String file : snapshotFiles) {
if (!curFileName.equals(file)) {
IOUtils.deleteFilesIgnoringExceptions(dir, file);
}
}
}
nextWriteGen = 1+genLoaded;
}
}
}

View File

@ -0,0 +1,22 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Core classes for Solr's persistent snapshots functionality
*/
package org.apache.solr.core.snapshots;

View File

@ -160,7 +160,7 @@ public class BlobHandler extends RequestHandlerBase implements PluginInfoInitial
} else {
String q = "blobName:{0}";
if (version != -1) q = "id:{0}/{1}";
QParser qparser = QParser.getParser(StrUtils.formatString(q, blobName, version), "lucene", req);
QParser qparser = QParser.getParser(StrUtils.formatString(q, blobName, version), req);
final TopDocs docs = req.getSearcher().search(qparser.parse(), 1, new Sort(new SortField("version", SortField.Type.LONG, true)));
if (docs.totalHits > 0) {
rsp.add(ReplicationHandler.FILE_STREAM, new SolrCore.RawWriter() {

View File

@ -121,6 +121,11 @@ public class CdcrParams {
*/
public final static String COUNTER_DELETES = "deletes";
/**
* Counter for Bootstrap operations *
*/
public final static String COUNTER_BOOTSTRAP = "bootstraps";
/**
* A list of errors per target collection *
*/
@ -165,7 +170,10 @@ public class CdcrParams {
LASTPROCESSEDVERSION,
QUEUES,
OPS,
ERRORS;
ERRORS,
BOOTSTRAP,
BOOTSTRAP_STATUS,
CANCEL_BOOTSTRAP;
public static CdcrAction get(String p) {
if (p != null) {

View File

@ -119,7 +119,7 @@ public class CdcrReplicator implements Runnable {
// we might have read a single commit operation and reached the end of the update logs
logReader.forwardSeek(subReader);
log.debug("Forwarded {} updates to target {}", counter, state.getTargetCollection());
log.info("Forwarded {} updates to target {}", counter, state.getTargetCollection());
} catch (Exception e) {
// report error and update error stats
this.handleException(e);
@ -150,13 +150,13 @@ public class CdcrReplicator implements Runnable {
if (e instanceof CdcrReplicatorException) {
UpdateRequest req = ((CdcrReplicatorException) e).req;
UpdateResponse rsp = ((CdcrReplicatorException) e).rsp;
log.warn("Failed to forward update request {}. Got response {}", req, rsp);
log.warn("Failed to forward update request {} to target: {}. Got response {}", req, state.getTargetCollection(), rsp);
state.reportError(CdcrReplicatorState.ErrorType.BAD_REQUEST);
} else if (e instanceof CloudSolrClient.RouteException) {
log.warn("Failed to forward update request", e);
log.warn("Failed to forward update request to target: " + state.getTargetCollection(), e);
state.reportError(CdcrReplicatorState.ErrorType.BAD_REQUEST);
} else {
log.warn("Failed to forward update request", e);
log.warn("Failed to forward update request to target: " + state.getTargetCollection(), e);
state.reportError(CdcrReplicatorState.ErrorType.INTERNAL);
}
}

View File

@ -16,29 +16,49 @@
*/
package org.apache.solr.handler;
import java.io.Closeable;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import org.apache.http.client.HttpClient;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.CloudSolrClient.Builder;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.ZkCoreNodeProps;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SolrjNamedThreadFactory;
import org.apache.solr.core.SolrCore;
import org.apache.solr.update.CdcrUpdateLog;
import org.apache.solr.util.TimeOut;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.handler.admin.CoreAdminHandler.RESPONSE_STATUS;
class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
private static final int MAX_BOOTSTRAP_ATTEMPTS = 5;
private static final int BOOTSTRAP_RETRY_DELAY_MS = 2000;
// 6 hours is hopefully long enough for most indexes
private static final long BOOTSTRAP_TIMEOUT_SECONDS = 6L * 3600L * 3600L;
private List<CdcrReplicatorState> replicatorStates;
private final CdcrReplicatorScheduler scheduler;
@ -48,6 +68,9 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
private SolrCore core;
private String path;
private ExecutorService bootstrapExecutor;
private volatile BootstrapStatusRunnable bootstrapStatusRunnable;
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
CdcrReplicatorManager(final SolrCore core, String path,
@ -104,12 +127,20 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
@Override
public synchronized void stateUpdate() {
if (leaderStateManager.amILeader() && processStateManager.getState().equals(CdcrParams.ProcessState.STARTED)) {
if (replicatorStates.size() > 0) {
this.bootstrapExecutor = ExecutorUtil.newMDCAwareFixedThreadPool(replicatorStates.size(),
new SolrjNamedThreadFactory("cdcr-bootstrap-status"));
}
this.initLogReaders();
this.scheduler.start();
return;
}
this.scheduler.shutdown();
if (bootstrapExecutor != null) {
IOUtils.closeQuietly(bootstrapStatusRunnable);
ExecutorUtil.shutdownAndAwaitTermination(bootstrapExecutor);
}
this.closeLogReaders();
}
@ -117,7 +148,7 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
return replicatorStates;
}
void initLogReaders() {
private void initLogReaders() {
String collectionName = core.getCoreDescriptor().getCloudDescriptor().getCollectionName();
String shard = core.getCoreDescriptor().getCloudDescriptor().getShardId();
CdcrUpdateLog ulog = (CdcrUpdateLog) core.getUpdateHandler().getUpdateLog();
@ -129,8 +160,23 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
log.info("Create new update log reader for target {} with checkpoint {} @ {}:{}", state.getTargetCollection(),
checkpoint, collectionName, shard);
CdcrUpdateLog.CdcrLogReader reader = ulog.newLogReader();
reader.seek(checkpoint);
boolean seek = reader.seek(checkpoint);
state.init(reader);
if (!seek) {
// targetVersion is lower than the oldest known entry.
// In this scenario, it probably means that there is a gap in the updates log.
// the best we can do here is to bootstrap the target leader by replicating the full index
final String targetCollection = state.getTargetCollection();
state.setBootstrapInProgress(true);
log.info("Attempting to bootstrap target collection: {}, shard: {}", targetCollection, shard);
bootstrapStatusRunnable = new BootstrapStatusRunnable(core, state);
log.info("Submitting bootstrap task to executor");
try {
bootstrapExecutor.submit(bootstrapStatusRunnable);
} catch (Exception e) {
log.error("Unable to submit bootstrap call to executor", e);
}
}
} catch (IOException | SolrServerException | SolrException e) {
log.warn("Unable to instantiate the log reader for target collection " + state.getTargetCollection(), e);
} catch (InterruptedException e) {
@ -164,11 +210,203 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
*/
void shutdown() {
this.scheduler.shutdown();
if (bootstrapExecutor != null) {
IOUtils.closeQuietly(bootstrapStatusRunnable);
ExecutorUtil.shutdownAndAwaitTermination(bootstrapExecutor);
}
for (CdcrReplicatorState state : replicatorStates) {
state.shutdown();
}
replicatorStates.clear();
}
private class BootstrapStatusRunnable implements Runnable, Closeable {
private final CdcrReplicatorState state;
private final String targetCollection;
private final String shard;
private final String collectionName;
private final CdcrUpdateLog ulog;
private final String myCoreUrl;
private volatile boolean closed = false;
BootstrapStatusRunnable(SolrCore core, CdcrReplicatorState state) {
this.collectionName = core.getCoreDescriptor().getCloudDescriptor().getCollectionName();
this.shard = core.getCoreDescriptor().getCloudDescriptor().getShardId();
this.ulog = (CdcrUpdateLog) core.getUpdateHandler().getUpdateLog();
this.state = state;
this.targetCollection = state.getTargetCollection();
String baseUrl = core.getCoreDescriptor().getCoreContainer().getZkController().getBaseUrl();
this.myCoreUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, core.getName());
}
@Override
public void close() throws IOException {
closed = true;
try {
Replica leader = state.getClient().getZkStateReader().getLeaderRetry(targetCollection, shard, 30000); // assume same shard exists on target
String leaderCoreUrl = leader.getCoreUrl();
HttpClient httpClient = state.getClient().getLbClient().getHttpClient();
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderCoreUrl).withHttpClient(httpClient).build()) {
sendCdcrCommand(client, CdcrParams.CdcrAction.CANCEL_BOOTSTRAP);
} catch (SolrServerException e) {
log.error("Error sending cancel bootstrap message to target collection: {} shard: {} leader: {}",
targetCollection, shard, leaderCoreUrl);
}
} catch (InterruptedException e) {
log.error("Interrupted while closing BootstrapStatusRunnable", e);
Thread.currentThread().interrupt();
}
}
@Override
public void run() {
int retries = 1;
boolean success = false;
try {
while (!closed && sendBootstrapCommand() != BootstrapStatus.SUBMITTED) {
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
}
TimeOut timeOut = new TimeOut(BOOTSTRAP_TIMEOUT_SECONDS, TimeUnit.SECONDS);
while (!timeOut.hasTimedOut()) {
if (closed) {
log.warn("Cancelling waiting for bootstrap on target: {} shard: {} to complete", targetCollection, shard);
state.setBootstrapInProgress(false);
break;
}
BootstrapStatus status = getBoostrapStatus();
if (status == BootstrapStatus.RUNNING) {
try {
log.info("CDCR bootstrap running for {} seconds, sleeping for {} ms",
BOOTSTRAP_TIMEOUT_SECONDS - timeOut.timeLeft(TimeUnit.SECONDS), BOOTSTRAP_RETRY_DELAY_MS);
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
} else if (status == BootstrapStatus.COMPLETED) {
log.info("CDCR bootstrap successful in {} seconds", BOOTSTRAP_TIMEOUT_SECONDS - timeOut.timeLeft(TimeUnit.SECONDS));
long checkpoint = CdcrReplicatorManager.this.getCheckpoint(state);
log.info("Create new update log reader for target {} with checkpoint {} @ {}:{}", state.getTargetCollection(),
checkpoint, collectionName, shard);
CdcrUpdateLog.CdcrLogReader reader1 = ulog.newLogReader();
reader1.seek(checkpoint);
success = true;
break;
} else if (status == BootstrapStatus.FAILED) {
log.warn("CDCR bootstrap failed in {} seconds", BOOTSTRAP_TIMEOUT_SECONDS - timeOut.timeLeft(TimeUnit.SECONDS));
// let's retry a fixed number of times before giving up
if (retries >= MAX_BOOTSTRAP_ATTEMPTS) {
log.error("Unable to bootstrap the target collection: {}, shard: {} even after {} retries", targetCollection, shard, retries);
break;
} else {
log.info("Retry: {} - Attempting to bootstrap target collection: {} shard: {}", retries, targetCollection, shard);
while (!closed && sendBootstrapCommand() != BootstrapStatus.SUBMITTED) {
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
}
timeOut = new TimeOut(BOOTSTRAP_TIMEOUT_SECONDS, TimeUnit.SECONDS); // reset the timer
retries++;
}
} else if (status == BootstrapStatus.NOTFOUND) {
// the leader of the target shard may have changed and therefore there is no record of the
// bootstrap process so we must retry the operation
while (!closed && sendBootstrapCommand() != BootstrapStatus.SUBMITTED) {
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
}
retries = 1;
timeOut = new TimeOut(6L * 3600L * 3600L, TimeUnit.SECONDS); // reset the timer
} else if (status == BootstrapStatus.UNKNOWN) {
// we were not able to query the status on the remote end
// so just sleep for a bit and try again
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
}
}
} catch (InterruptedException e) {
log.info("Bootstrap thread interrupted");
state.reportError(CdcrReplicatorState.ErrorType.INTERNAL);
Thread.currentThread().interrupt();
} catch (IOException | SolrServerException | SolrException e) {
log.error("Unable to bootstrap the target collection " + targetCollection + " shard: " + shard, e);
state.reportError(CdcrReplicatorState.ErrorType.BAD_REQUEST);
} finally {
if (success) {
log.info("Bootstrap successful, giving the go-ahead to replicator");
state.setBootstrapInProgress(false);
}
}
}
private BootstrapStatus sendBootstrapCommand() throws InterruptedException {
Replica leader = state.getClient().getZkStateReader().getLeaderRetry(targetCollection, shard, 30000); // assume same shard exists on target
String leaderCoreUrl = leader.getCoreUrl();
HttpClient httpClient = state.getClient().getLbClient().getHttpClient();
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderCoreUrl).withHttpClient(httpClient).build()) {
log.info("Attempting to bootstrap target collection: {} shard: {} leader: {}", targetCollection, shard, leaderCoreUrl);
try {
NamedList response = sendCdcrCommand(client, CdcrParams.CdcrAction.BOOTSTRAP, ReplicationHandler.MASTER_URL, myCoreUrl);
log.debug("CDCR Bootstrap response: {}", response);
String status = response.get(RESPONSE_STATUS).toString();
return BootstrapStatus.valueOf(status.toUpperCase(Locale.ROOT));
} catch (Exception e) {
log.error("Exception submitting bootstrap request", e);
return BootstrapStatus.UNKNOWN;
}
} catch (IOException e) {
log.error("There shouldn't be an IOException while closing but there was!", e);
}
return BootstrapStatus.UNKNOWN;
}
private BootstrapStatus getBoostrapStatus() throws InterruptedException {
try {
Replica leader = state.getClient().getZkStateReader().getLeaderRetry(targetCollection, shard, 30000); // assume same shard exists on target
String leaderCoreUrl = leader.getCoreUrl();
HttpClient httpClient = state.getClient().getLbClient().getHttpClient();
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderCoreUrl).withHttpClient(httpClient).build()) {
NamedList response = sendCdcrCommand(client, CdcrParams.CdcrAction.BOOTSTRAP_STATUS);
String status = (String) response.get(RESPONSE_STATUS);
BootstrapStatus bootstrapStatus = BootstrapStatus.valueOf(status.toUpperCase(Locale.ROOT));
if (bootstrapStatus == BootstrapStatus.RUNNING) {
return BootstrapStatus.RUNNING;
} else if (bootstrapStatus == BootstrapStatus.COMPLETED) {
return BootstrapStatus.COMPLETED;
} else if (bootstrapStatus == BootstrapStatus.FAILED) {
return BootstrapStatus.FAILED;
} else if (bootstrapStatus == BootstrapStatus.NOTFOUND) {
log.warn("Bootstrap process was not found on target collection: {} shard: {}, leader: {}", targetCollection, shard, leaderCoreUrl);
return BootstrapStatus.NOTFOUND;
} else if (bootstrapStatus == BootstrapStatus.CANCELLED) {
return BootstrapStatus.CANCELLED;
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Unknown status: " + status + " returned by BOOTSTRAP_STATUS command");
}
}
} catch (Exception e) {
log.error("Exception during bootstrap status request", e);
return BootstrapStatus.UNKNOWN;
}
}
}
private NamedList sendCdcrCommand(SolrClient client, CdcrParams.CdcrAction action, String... params) throws SolrServerException, IOException {
ModifiableSolrParams solrParams = new ModifiableSolrParams();
solrParams.set(CommonParams.QT, "/cdcr");
solrParams.set(CommonParams.ACTION, action.toString());
for (int i = 0; i < params.length - 1; i+=2) {
solrParams.set(params[i], params[i + 1]);
}
SolrRequest request = new QueryRequest(solrParams);
return client.request(request);
}
private enum BootstrapStatus {
SUBMITTED,
RUNNING,
COMPLETED,
FAILED,
NOTFOUND,
CANCELLED,
UNKNOWN
}
}

View File

@ -77,7 +77,11 @@ class CdcrReplicatorScheduler {
CdcrReplicatorState state = statesQueue.poll();
assert state != null; // Should never happen
try {
if (!state.isBootstrapInProgress()) {
new CdcrReplicator(state, batchSize).run();
} else {
log.debug("Replicator state is bootstrapping, skipping replication for target collection {}", state.getTargetCollection());
}
} finally {
statesQueue.offer(state);
}

View File

@ -27,6 +27,8 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.update.CdcrUpdateLog;
@ -53,6 +55,9 @@ class CdcrReplicatorState {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final AtomicBoolean bootstrapInProgress = new AtomicBoolean(false);
private final AtomicInteger numBootstraps = new AtomicInteger();
CdcrReplicatorState(final String targetCollection, final String zkHost, final CloudSolrClient targetClient) {
this.targetCollection = targetCollection;
this.targetClient = targetClient;
@ -164,6 +169,24 @@ class CdcrReplicatorState {
return this.benchmarkTimer;
}
/**
* @return true if a bootstrap operation is in progress, false otherwise
*/
boolean isBootstrapInProgress() {
return bootstrapInProgress.get();
}
void setBootstrapInProgress(boolean inProgress) {
if (bootstrapInProgress.compareAndSet(true, false)) {
numBootstraps.incrementAndGet();
}
bootstrapInProgress.set(inProgress);
}
public int getNumBootstraps() {
return numBootstraps.get();
}
enum ErrorType {
INTERNAL,
BAD_REQUEST;

View File

@ -16,6 +16,7 @@
*/
package org.apache.solr.handler;
import java.io.Closeable;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
@ -24,14 +25,20 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.Lock;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ClusterState;
@ -41,21 +48,33 @@ import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.UpdateParams;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.CloseHook;
import org.apache.solr.core.PluginBag;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.CdcrUpdateLog;
import org.apache.solr.update.UpdateLog;
import org.apache.solr.update.VersionInfo;
import org.apache.solr.update.processor.DistributedUpdateProcessor;
import org.apache.solr.util.DefaultSolrThreadFactory;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.handler.admin.CoreAdminHandler.COMPLETED;
import static org.apache.solr.handler.admin.CoreAdminHandler.FAILED;
import static org.apache.solr.handler.admin.CoreAdminHandler.RESPONSE;
import static org.apache.solr.handler.admin.CoreAdminHandler.RESPONSE_MESSAGE;
import static org.apache.solr.handler.admin.CoreAdminHandler.RESPONSE_STATUS;
import static org.apache.solr.handler.admin.CoreAdminHandler.RUNNING;
/**
* <p>
* This request handler implements the CDCR API and is responsible of the execution of the
@ -199,6 +218,18 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
this.handleErrorsAction(req, rsp);
break;
}
case BOOTSTRAP: {
this.handleBootstrapAction(req, rsp);
break;
}
case BOOTSTRAP_STATUS: {
this.handleBootstrapStatus(req, rsp);
break;
}
case CANCEL_BOOTSTRAP: {
this.handleCancelBootstrap(req, rsp);
break;
}
default: {
throw new RuntimeException("Unknown action: " + action);
}
@ -409,10 +440,20 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
}
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
VersionInfo versionInfo = ulog.getVersionInfo();
try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) {
List<Long> versions = recentUpdates.getVersions(1);
long lastVersion = versions.isEmpty() ? -1 : Math.abs(versions.get(0));
rsp.add(CdcrParams.CHECKPOINT, lastVersion);
long maxVersionFromRecent = recentUpdates.getMaxRecentVersion();
long maxVersionFromIndex = versionInfo.getMaxVersionFromIndex(req.getSearcher());
log.info("Found maxVersionFromRecent {} maxVersionFromIndex {}", maxVersionFromRecent, maxVersionFromIndex);
// there is no race with ongoing bootstrap because we don't expect any updates to come from the source
long maxVersion = Math.max(maxVersionFromIndex, maxVersionFromRecent);
if (maxVersion == 0L) {
maxVersion = -1;
}
rsp.add(CdcrParams.CHECKPOINT, maxVersion);
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Action '" + CdcrParams.CdcrAction.SHARDCHECKPOINT +
"' could not read max version");
}
}
@ -574,6 +615,192 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
rsp.add(CdcrParams.ERRORS, hosts);
}
private AtomicBoolean running = new AtomicBoolean();
private volatile Future<Boolean> bootstrapFuture;
private volatile BootstrapCallable bootstrapCallable;
private void handleBootstrapAction(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, SolrServerException {
String collectionName = core.getCoreDescriptor().getCloudDescriptor().getCollectionName();
String shard = core.getCoreDescriptor().getCloudDescriptor().getShardId();
if (!leaderStateManager.amILeader()) {
log.warn("Action {} sent to non-leader replica @ {}:{}", CdcrParams.CdcrAction.BOOTSTRAP, collectionName, shard);
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Action " + CdcrParams.CdcrAction.BOOTSTRAP +
" sent to non-leader replica");
}
Runnable runnable = () -> {
Lock recoveryLock = req.getCore().getSolrCoreState().getRecoveryLock();
boolean locked = recoveryLock.tryLock();
try {
if (!locked) {
handleCancelBootstrap(req, rsp);
} else if (leaderStateManager.amILeader()) {
running.set(true);
String masterUrl = req.getParams().get(ReplicationHandler.MASTER_URL);
bootstrapCallable = new BootstrapCallable(masterUrl, core);
bootstrapFuture = core.getCoreDescriptor().getCoreContainer().getUpdateShardHandler().getRecoveryExecutor().submit(bootstrapCallable);
try {
bootstrapFuture.get();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
log.warn("Bootstrap was interrupted", e);
} catch (ExecutionException e) {
log.error("Bootstrap operation failed", e);
}
} else {
log.error("Action {} sent to non-leader replica @ {}:{}. Aborting bootstrap.", CdcrParams.CdcrAction.BOOTSTRAP, collectionName, shard);
}
} finally {
if (locked) {
running.set(false);
recoveryLock.unlock();
}
}
};
try {
core.getCoreDescriptor().getCoreContainer().getUpdateShardHandler().getUpdateExecutor().submit(runnable);
rsp.add(RESPONSE_STATUS, "submitted");
} catch (RejectedExecutionException ree) {
// no problem, we're probably shutting down
rsp.add(RESPONSE_STATUS, "failed");
}
}
private void handleCancelBootstrap(SolrQueryRequest req, SolrQueryResponse rsp) {
BootstrapCallable callable = this.bootstrapCallable;
IOUtils.closeQuietly(callable);
rsp.add(RESPONSE_STATUS, "cancelled");
}
private void handleBootstrapStatus(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, SolrServerException {
if (running.get()) {
rsp.add(RESPONSE_STATUS, RUNNING);
return;
}
Future<Boolean> future = bootstrapFuture;
BootstrapCallable callable = this.bootstrapCallable;
if (future == null) {
rsp.add(RESPONSE_STATUS, "notfound");
rsp.add(RESPONSE_MESSAGE, "No bootstrap found in running, completed or failed states");
} else if (future.isCancelled() || callable.isClosed()) {
rsp.add(RESPONSE_STATUS, "cancelled");
} else if (future.isDone()) {
// could be a normal termination or an exception
try {
Boolean result = future.get();
if (result) {
rsp.add(RESPONSE_STATUS, COMPLETED);
} else {
rsp.add(RESPONSE_STATUS, FAILED);
}
} catch (InterruptedException e) {
// should not happen?
} catch (ExecutionException e) {
rsp.add(RESPONSE_STATUS, FAILED);
rsp.add(RESPONSE, e);
} catch (CancellationException ce) {
rsp.add(RESPONSE_STATUS, FAILED);
rsp.add(RESPONSE_MESSAGE, "Bootstrap was cancelled");
}
} else {
rsp.add(RESPONSE_STATUS, RUNNING);
}
}
private static class BootstrapCallable implements Callable<Boolean>, Closeable {
private final String masterUrl;
private final SolrCore core;
private volatile boolean closed = false;
BootstrapCallable(String masterUrl, SolrCore core) {
this.masterUrl = masterUrl;
this.core = core;
}
@Override
public void close() throws IOException {
closed = true;
SolrRequestHandler handler = core.getRequestHandler(ReplicationHandler.PATH);
ReplicationHandler replicationHandler = (ReplicationHandler) handler;
replicationHandler.abortFetch();
}
public boolean isClosed() {
return closed;
}
@Override
public Boolean call() throws Exception {
boolean success = false;
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
// we start buffering updates as a safeguard however we do not expect
// to receive any updates from the source during bootstrap
ulog.bufferUpdates();
try {
commitOnLeader(masterUrl);
// use rep handler directly, so we can do this sync rather than async
SolrRequestHandler handler = core.getRequestHandler(ReplicationHandler.PATH);
ReplicationHandler replicationHandler = (ReplicationHandler) handler;
if (replicationHandler == null) {
throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE,
"Skipping recovery, no " + ReplicationHandler.PATH + " handler found");
}
ModifiableSolrParams solrParams = new ModifiableSolrParams();
solrParams.set(ReplicationHandler.MASTER_URL, masterUrl);
// we do not want the raw tlog files from the source
solrParams.set(ReplicationHandler.TLOG_FILES, false);
success = replicationHandler.doFetch(solrParams, false);
// this is required because this callable can race with HttpSolrCall#destroy
// which clears the request info.
// Applying buffered updates fails without the following line because LogReplayer
// also tries to set request info and fails with AssertionError
SolrRequestInfo.clearRequestInfo();
Future<UpdateLog.RecoveryInfo> future = ulog.applyBufferedUpdates();
if (future == null) {
// no replay needed
log.info("No replay needed.");
} else {
log.info("Replaying buffered documents.");
// wait for replay
UpdateLog.RecoveryInfo report = future.get();
if (report.failed) {
SolrException.log(log, "Replay failed");
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Replay failed");
}
}
return success;
} finally {
if (closed || !success) {
// we cannot apply the buffer in this case because it will introduce newer versions in the
// update log and then the source cluster will get those versions via collectioncheckpoint
// causing the versions in between to be completely missed
boolean dropped = ulog.dropBufferedUpdates();
assert dropped;
}
}
}
private void commitOnLeader(String leaderUrl) throws SolrServerException,
IOException {
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderUrl).build()) {
client.setConnectionTimeout(30000);
UpdateRequest ureq = new UpdateRequest();
ureq.setParams(new ModifiableSolrParams());
ureq.getParams().set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
ureq.getParams().set(UpdateParams.OPEN_SEARCHER, false);
ureq.setAction(AbstractUpdateRequest.ACTION.COMMIT, false, true).process(
client);
}
}
}
@Override
public String getDescription() {
return "Manage Cross Data Center Replication";

View File

@ -82,6 +82,9 @@ import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.IndexDeletionPolicyWrapper;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.snapshots.SolrSnapshotManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
import org.apache.solr.handler.ReplicationHandler.*;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
@ -468,9 +471,18 @@ public class IndexFetcher {
// let the system know we are changing dir's and the old one
// may be closed
if (indexDir != null) {
LOG.info("removing old index directory " + indexDir);
solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
SolrSnapshotMetaDataManager snapshotsMgr = solrCore.getSnapshotMetaDataManager();
Collection<SnapshotMetaData> snapshots = snapshotsMgr.listSnapshotsInIndexDir(indexDirPath);
// Delete the old index directory only if no snapshot exists in that directory.
if(snapshots.isEmpty()) {
LOG.info("removing old index directory " + indexDir);
solrCore.getDirectoryFactory().remove(indexDir);
} else {
SolrSnapshotManager.deleteNonSnapshotIndexFiles(indexDir, snapshots);
}
}
}
@ -738,14 +750,14 @@ public class IndexFetcher {
}
private void openNewSearcherAndUpdateCommitPoint() throws IOException {
SolrQueryRequest req = new LocalSolrQueryRequest(solrCore,
new ModifiableSolrParams());
RefCounted<SolrIndexSearcher> searcher = null;
IndexCommit commitPoint;
// must get the latest solrCore object because the one we have might be closed because of a reload
// todo stop keeping solrCore around
SolrCore core = solrCore.getCoreDescriptor().getCoreContainer().getCore(solrCore.getName());
try {
Future[] waitSearcher = new Future[1];
searcher = solrCore.getSearcher(true, true, waitSearcher, true);
searcher = core.getSearcher(true, true, waitSearcher, true);
if (waitSearcher[0] != null) {
try {
waitSearcher[0].get();
@ -755,10 +767,10 @@ public class IndexFetcher {
}
commitPoint = searcher.get().getIndexReader().getIndexCommit();
} finally {
req.close();
if (searcher != null) {
searcher.decref();
}
core.close();
}
// update the commit point in replication handler

View File

@ -125,7 +125,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
filters = new ArrayList<>();
for (String fq : fqs) {
if (fq != null && fq.trim().length() != 0) {
QParser fqp = QParser.getParser(fq, null, req);
QParser fqp = QParser.getParser(fq, req);
filters.add(fqp.getQuery());
}
}

View File

@ -87,6 +87,7 @@ import org.apache.solr.core.SolrDeletionPolicy;
import org.apache.solr.core.SolrEventListener;
import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.core.backup.repository.LocalFileSystemRepository;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.SolrIndexSearcher;
@ -299,9 +300,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
rsp.add("message","No slave configured");
}
} else if (command.equalsIgnoreCase(CMD_ABORT_FETCH)) {
IndexFetcher fetcher = currentIndexFetcher;
if (fetcher != null){
fetcher.abortFetch();
if (abortFetch()){
rsp.add(STATUS, OK_STATUS);
} else {
rsp.add(STATUS,ERR_STATUS);
@ -320,6 +319,16 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
}
}
public boolean abortFetch() {
IndexFetcher fetcher = currentIndexFetcher;
if (fetcher != null){
fetcher.abortFetch();
return true;
} else {
return false;
}
}
private void deleteSnapshot(ModifiableSolrParams params) {
String name = params.get(NAME);
if(name == null) {
@ -512,12 +521,25 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
numberToKeep = Integer.MAX_VALUE;
}
IndexCommit indexCommit = null;
String commitName = params.get(CoreAdminParams.COMMIT_NAME);
if (commitName != null) {
SolrSnapshotMetaDataManager snapshotMgr = core.getSnapshotMetaDataManager();
Optional<IndexCommit> commit = snapshotMgr.getIndexCommitByName(commitName);
if(commit.isPresent()) {
indexCommit = commit.get();
} else {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to find an index commit with name " + commitName +
" for core " + core.getName());
}
} else {
IndexDeletionPolicyWrapper delPolicy = core.getDeletionPolicy();
IndexCommit indexCommit = delPolicy.getLatestCommit();
indexCommit = delPolicy.getLatestCommit();
if (indexCommit == null) {
indexCommit = req.getSearcher().getIndexReader().getIndexCommit();
}
}
String location = params.get(CoreAdminParams.BACKUP_LOCATION);
String repoName = params.get(CoreAdminParams.BACKUP_REPOSITORY);
@ -539,7 +561,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
}
// small race here before the commit point is saved
SnapShooter snapShooter = new SnapShooter(repo, core, location, params.get(NAME));
SnapShooter snapShooter = new SnapShooter(repo, core, location, params.get(NAME), commitName);
snapShooter.validateCreateSnapshot();
snapShooter.createSnapAsync(indexCommit, numberToKeep, (nl) -> snapShootDetails = nl);
@ -644,7 +666,8 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
rsp.add(CMD_GET_FILE_LIST, result);
// fetch list of tlog files only if cdcr is activated
if (core.getUpdateHandler().getUpdateLog() != null && core.getUpdateHandler().getUpdateLog() instanceof CdcrUpdateLog) {
if (solrParams.getBool(TLOG_FILES, true) && core.getUpdateHandler().getUpdateLog() != null
&& core.getUpdateHandler().getUpdateLog() instanceof CdcrUpdateLog) {
try {
List<Map<String, Object>> tlogfiles = getTlogFileList(commit);
LOG.info("Adding tlog files to list: " + tlogfiles);

View File

@ -16,13 +16,17 @@
*/
package org.apache.solr.handler;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.util.concurrent.atomic.LongAdder;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.PluginBag;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestHandler;
@ -35,10 +39,6 @@ import org.apache.solr.util.stats.TimerContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.util.concurrent.atomic.AtomicLong;
import static org.apache.solr.core.RequestParams.USEPARAM;
/**
@ -53,10 +53,10 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
protected boolean httpCaching = true;
// Statistics
private final AtomicLong numRequests = new AtomicLong();
private final AtomicLong numServerErrors = new AtomicLong();
private final AtomicLong numClientErrors = new AtomicLong();
private final AtomicLong numTimeouts = new AtomicLong();
private final LongAdder numRequests = new LongAdder();
private final LongAdder numServerErrors = new LongAdder();
private final LongAdder numClientErrors = new LongAdder();
private final LongAdder numTimeouts = new LongAdder();
private final Timer requestTimes = new Timer();
private final long handlerStart;
@ -144,7 +144,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
@Override
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
numRequests.incrementAndGet();
numRequests.increment();
TimerContext timer = requestTimes.time();
try {
if(pluginInfo != null && pluginInfo.attributes.containsKey(USEPARAM)) req.getContext().put(USEPARAM,pluginInfo.attributes.get(USEPARAM));
@ -158,7 +158,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
Object partialResults = header.get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY);
boolean timedOut = partialResults == null ? false : (Boolean)partialResults;
if( timedOut ) {
numTimeouts.incrementAndGet();
numTimeouts.increment();
rsp.setHttpCaching(false);
}
}
@ -185,9 +185,9 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
SolrException.log(log, e);
if (isServerError) {
numServerErrors.incrementAndGet();
numServerErrors.increment();
} else {
numClientErrors.incrementAndGet();
numClientErrors.increment();
}
}
}

View File

@ -19,6 +19,7 @@ package org.apache.solr.handler;
import java.lang.invoke.MethodHandles;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.Date;
import java.util.Locale;
import java.util.concurrent.Callable;
@ -32,6 +33,9 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.core.snapshots.SolrSnapshotManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -63,6 +67,7 @@ public class RestoreCore implements Callable<Boolean> {
String restoreIndexName = "restore." + dateFormat.format(new Date());
String restoreIndexPath = core.getDataDir() + restoreIndexName;
String indexDirPath = core.getIndexDir();
Directory restoreIndexDir = null;
Directory indexDir = null;
try {
@ -71,7 +76,7 @@ public class RestoreCore implements Callable<Boolean> {
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
//Prefer local copy.
indexDir = core.getDirectoryFactory().get(core.getIndexDir(),
indexDir = core.getDirectoryFactory().get(indexDirPath,
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
//Move all files from backupDir to restoreIndexDir
@ -130,7 +135,16 @@ public class RestoreCore implements Callable<Boolean> {
}
if (success) {
core.getDirectoryFactory().doneWithDirectory(indexDir);
SolrSnapshotMetaDataManager snapshotsMgr = core.getSnapshotMetaDataManager();
Collection<SnapshotMetaData> snapshots = snapshotsMgr.listSnapshotsInIndexDir(indexDirPath);
// Delete the old index directory only if no snapshot exists in that directory.
if (snapshots.isEmpty()) {
core.getDirectoryFactory().remove(indexDir);
} else {
SolrSnapshotManager.deleteNonSnapshotIndexFiles(indexDir, snapshots);
}
}
return true;

View File

@ -185,7 +185,7 @@ public class SchemaHandler extends RequestHandlerBase implements SolrCoreAware,
if (parts.get(0).isEmpty()) parts.remove(0);
if (parts.size() > 1 && level2.containsKey(parts.get(1))) {
String realName = parts.get(1);
String fieldName = IndexSchema.SchemaProps.nameMapping.get(realName);
String fieldName = IndexSchema.nameMapping.get(realName);
String pathParam = level2.get(realName);
if (parts.size() > 2) {

View File

@ -26,12 +26,14 @@ import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Optional;
import java.util.function.Consumer;
import com.google.common.base.Preconditions;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.store.Directory;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.IndexDeletionPolicyWrapper;
@ -39,6 +41,7 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.core.backup.repository.BackupRepository.PathType;
import org.apache.solr.core.backup.repository.LocalFileSystemRepository;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.slf4j.Logger;
@ -59,6 +62,7 @@ public class SnapShooter {
private URI baseSnapDirPath = null;
private URI snapshotDirPath = null;
private BackupRepository backupRepo = null;
private String commitName; // can be null
@Deprecated
public SnapShooter(SolrCore core, String location, String snapshotName) {
@ -71,14 +75,14 @@ public class SnapShooter {
} else {
snapDirStr = core.getCoreDescriptor().getInstanceDir().resolve(location).normalize().toString();
}
initialize(new LocalFileSystemRepository(), core, snapDirStr, snapshotName);
initialize(new LocalFileSystemRepository(), core, snapDirStr, snapshotName, null);
}
public SnapShooter(BackupRepository backupRepo, SolrCore core, String location, String snapshotName) {
initialize(backupRepo, core, location, snapshotName);
public SnapShooter(BackupRepository backupRepo, SolrCore core, String location, String snapshotName, String commitName) {
initialize(backupRepo, core, location, snapshotName, commitName);
}
private void initialize(BackupRepository backupRepo, SolrCore core, String location, String snapshotName) {
private void initialize(BackupRepository backupRepo, SolrCore core, String location, String snapshotName, String commitName) {
this.solrCore = Preconditions.checkNotNull(core);
this.backupRepo = Preconditions.checkNotNull(backupRepo);
this.baseSnapDirPath = backupRepo.createURI(Preconditions.checkNotNull(location)).normalize();
@ -90,6 +94,7 @@ public class SnapShooter {
directoryName = "snapshot." + fmt.format(new Date());
}
this.snapshotDirPath = backupRepo.createURI(location, directoryName);
this.commitName = commitName;
}
public BackupRepository getBackupRepository() {
@ -145,10 +150,19 @@ public class SnapShooter {
}
public NamedList createSnapshot() throws Exception {
IndexDeletionPolicyWrapper deletionPolicy = solrCore.getDeletionPolicy();
RefCounted<SolrIndexSearcher> searcher = solrCore.getSearcher();
try {
if (commitName != null) {
SolrSnapshotMetaDataManager snapshotMgr = solrCore.getSnapshotMetaDataManager();
Optional<IndexCommit> commit = snapshotMgr.getIndexCommitByName(commitName);
if(commit.isPresent()) {
return createSnapshot(commit.get());
}
throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to find an index commit with name " + commitName +
" for core " + solrCore.getName());
} else {
//TODO should we try solrCore.getDeletionPolicy().getLatestCommit() first?
IndexDeletionPolicyWrapper deletionPolicy = solrCore.getDeletionPolicy();
IndexCommit indexCommit = searcher.get().getIndexReader().getIndexCommit();
deletionPolicy.saveCommitPoint(indexCommit.getGeneration());
try {
@ -156,6 +170,7 @@ public class SnapShooter {
} finally {
deletionPolicy.releaseCommitPoint(indexCommit.getGeneration());
}
}
} finally {
searcher.decref();
}

View File

@ -122,6 +122,8 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
.withFunctionName("intersect", IntersectStream.class)
.withFunctionName("complement", ComplementStream.class)
.withFunctionName("sort", SortStream.class)
.withFunctionName("train", TextLogitStream.class)
.withFunctionName("features", FeaturesSelectionStream.class)
.withFunctionName("daemon", DaemonStream.class)
.withFunctionName("shortestPath", ShortestPathStream.class)
.withFunctionName("gatherNodes", GatherNodesStream.class)

View File

@ -714,10 +714,8 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
String location = repository.getBackupLocation(req.getParams().get(CoreAdminParams.BACKUP_LOCATION));
if (location == null) {
//Refresh the cluster property file to make sure the value set for location is the latest
h.coreContainer.getZkController().getZkStateReader().forceUpdateClusterProperties();
// Check if the location is specified in the cluster property.
location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty(CoreAdminParams.BACKUP_LOCATION, null);
location = new ClusterProperties(h.coreContainer.getZkController().getZkClient()).getClusterProperty(CoreAdminParams.BACKUP_LOCATION, null);
if (location == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query"
+ " parameter or as a default repository property or as a cluster property.");
@ -755,10 +753,8 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
String location = repository.getBackupLocation(req.getParams().get(CoreAdminParams.BACKUP_LOCATION));
if (location == null) {
//Refresh the cluster property file to make sure the value set for location is the latest
h.coreContainer.getZkController().getZkStateReader().forceUpdateClusterProperties();
// Check if the location is specified in the cluster property.
location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty("location", null);
location = new ClusterProperties(h.coreContainer.getZkController().getZkClient()).getClusterProperty("location", null);
if (location == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query"
+ " parameter or as a default repository property or as a cluster property.");

View File

@ -34,6 +34,7 @@ import java.util.concurrent.Future;
import com.google.common.collect.Lists;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
@ -59,9 +60,13 @@ import org.apache.solr.core.CachingDirectoryFactory;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.core.snapshots.SolrSnapshotManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
import org.apache.solr.handler.RestoreCore;
import org.apache.solr.handler.SnapShooter;
import org.apache.solr.handler.admin.CoreAdminHandler.CoreAdminOp;
@ -794,20 +799,24 @@ enum CoreAdminOperation implements CoreAdminOp {
+ " parameter or as a default repository property");
}
// An optional parameter to describe the snapshot to be backed-up. If this
// parameter is not supplied, the latest index commit is backed-up.
String commitName = params.get(CoreAdminParams.COMMIT_NAME);
try (SolrCore core = it.handler.coreContainer.getCore(cname)) {
SnapShooter snapShooter = new SnapShooter(repository, core, location, name);
SnapShooter snapShooter = new SnapShooter(repository, core, location, name, commitName);
// validateCreateSnapshot will create parent dirs instead of throw; that choice is dubious.
// But we want to throw. One reason is that
// this dir really should, in fact must, already exist here if triggered via a collection backup on a shared
// file system. Otherwise, perhaps the FS location isn't shared -- we want an error.
if (!snapShooter.getBackupRepository().exists(snapShooter.getLocation())) {
throw new SolrException(ErrorCode.BAD_REQUEST,
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Directory to contain snapshots doesn't exist: " + snapShooter.getLocation());
}
snapShooter.validateCreateSnapshot();
snapShooter.createSnapshot();
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR,
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Failed to backup core=" + cname + " because " + e, e);
}
}),
@ -845,6 +854,92 @@ enum CoreAdminOperation implements CoreAdminOp {
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to restore core=" + core.getName());
}
}
}),
CREATESNAPSHOT_OP(CREATESNAPSHOT, it -> {
CoreContainer cc = it.handler.getCoreContainer();
final SolrParams params = it.req.getParams();
String commitName = params.required().get(CoreAdminParams.COMMIT_NAME);
String cname = params.required().get(CoreAdminParams.CORE);
try (SolrCore core = cc.getCore(cname)) {
if (core == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
}
String indexDirPath = core.getIndexDir();
IndexCommit ic = core.getDeletionPolicy().getLatestCommit();
if (ic == null) {
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
try {
ic = searcher.get().getIndexReader().getIndexCommit();
} finally {
searcher.decref();
}
}
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
mgr.snapshot(commitName, indexDirPath, ic.getGeneration());
it.rsp.add("core", core.getName());
it.rsp.add("commitName", commitName);
it.rsp.add("indexDirPath", indexDirPath);
it.rsp.add("generation", ic.getGeneration());
}
}),
DELETESNAPSHOT_OP(DELETESNAPSHOT, it -> {
CoreContainer cc = it.handler.getCoreContainer();
final SolrParams params = it.req.getParams();
String commitName = params.required().get(CoreAdminParams.COMMIT_NAME);
String cname = params.required().get(CoreAdminParams.CORE);
try (SolrCore core = cc.getCore(cname)) {
if (core == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
}
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
Optional<SnapshotMetaData> metadata = mgr.release(commitName);
if (metadata.isPresent()) {
long gen = metadata.get().getGenerationNumber();
String indexDirPath = metadata.get().getIndexDirPath();
// If the directory storing the snapshot is not the same as the *current* core
// index directory, then delete the files corresponding to this snapshot.
// Otherwise we leave the index files related to snapshot as is (assuming the
// underlying Solr IndexDeletionPolicy will clean them up appropriately).
if (!indexDirPath.equals(core.getIndexDir())) {
Directory d = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, DirectoryFactory.LOCK_TYPE_NONE);
try {
SolrSnapshotManager.deleteIndexFiles(d, mgr.listSnapshotsInIndexDir(indexDirPath), gen);
} finally {
core.getDirectoryFactory().release(d);
}
}
}
}
}),
LISTSNAPSHOTS_OP(LISTSNAPSHOTS, it -> {
CoreContainer cc = it.handler.getCoreContainer();
final SolrParams params = it.req.getParams();
String cname = params.required().get(CoreAdminParams.CORE);
try ( SolrCore core = cc.getCore(cname) ) {
if (core == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
}
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
NamedList result = new NamedList();
for (String name : mgr.listSnapshots()) {
Optional<SnapshotMetaData> metadata = mgr.getSnapshotMetaData(name);
if ( metadata.isPresent() ) {
NamedList<String> props = new NamedList<>();
props.add("generation", String.valueOf(metadata.get().getGenerationNumber()));
props.add("indexDirPath", metadata.get().getIndexDirPath());
result.add(name, props);
}
}
it.rsp.add("snapshots", result);
}
});
final CoreAdminParams.CoreAdminAction action;

View File

@ -176,7 +176,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
query = rb.getQuery();
} else {
try {
QParser parser = QParser.getParser(qs, null, req);
QParser parser = QParser.getParser(qs, req);
query = parser.getQuery();
} catch (Exception e) {
throw new IOException(e);
@ -198,7 +198,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
try {
for (String fq : fqs) {
if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
QParser fqp = QParser.getParser(fq, null, req);
QParser fqp = QParser.getParser(fq, req);
newFilters.add(fqp.getQuery());
}
}

View File

@ -202,7 +202,7 @@ public class QueryComponent extends SearchComponent
filters = filters == null ? new ArrayList<Query>(fqs.length) : new ArrayList<>(filters);
for (String fq : fqs) {
if (fq != null && fq.trim().length()!=0) {
QParser fqp = QParser.getParser(fq, null, req);
QParser fqp = QParser.getParser(fq, req);
filters.add(fqp.getQuery());
}
}

View File

@ -158,7 +158,7 @@ public class RealTimeGetComponent extends SearchComponent
filters = filters == null ? new ArrayList<Query>(fqs.length) : new ArrayList<>(filters);
for (String fq : fqs) {
if (fq != null && fq.trim().length()!=0) {
QParser fqp = QParser.getParser(fq, null, req);
QParser fqp = QParser.getParser(fq, req);
filters.add(fqp.getQuery());
}
}
@ -249,7 +249,8 @@ public class RealTimeGetComponent extends SearchComponent
docid = segid + ctx.docBase;
if (rb.getFilters() != null) {
for (Query q : rb.getFilters()) {
for (Query raw : rb.getFilters()) {
Query q = raw.rewrite(searcher.getIndexReader());
Scorer scorer = searcher.createWeight(q, false, 1f).scorer(ctx);
if (scorer == null || segid != scorer.iterator().advance(segid)) {
// filter doesn't match.
@ -448,7 +449,7 @@ public class RealTimeGetComponent extends SearchComponent
ZkController zkController = rb.req.getCore().getCoreDescriptor().getCoreContainer().getZkController();
// if shards=... then use that
if (zkController != null && params.get("shards") == null) {
if (zkController != null && params.get(ShardParams.SHARDS) == null) {
CloudDescriptor cloudDescriptor = rb.req.getCore().getCoreDescriptor().getCloudDescriptor();
String collection = cloudDescriptor.getCollectionName();
@ -470,32 +471,18 @@ public class RealTimeGetComponent extends SearchComponent
for (Map.Entry<String,List<String>> entry : sliceToId.entrySet()) {
String shard = entry.getKey();
String shardIdList = StrUtils.join(entry.getValue(), ',');
ShardRequest sreq = new ShardRequest();
sreq.purpose = 1;
ShardRequest sreq = createShardRequest(rb, entry.getValue());
// sreq.shards = new String[]{shard}; // TODO: would be nice if this would work...
sreq.shards = sliceToShards(rb, collection, shard);
sreq.actualShards = sreq.shards;
sreq.params = new ModifiableSolrParams();
sreq.params.set(ShardParams.SHARDS_QT,"/get"); // TODO: how to avoid hardcoding this and hit the same handler?
sreq.params.set("distrib",false);
sreq.params.set("ids", shardIdList);
rb.addRequest(this, sreq);
}
} else {
String shardIdList = StrUtils.join(reqIds.allIds, ',');
ShardRequest sreq = new ShardRequest();
sreq.purpose = 1;
ShardRequest sreq = createShardRequest(rb, reqIds.allIds);
sreq.shards = null; // ALL
sreq.actualShards = sreq.shards;
sreq.params = new ModifiableSolrParams();
sreq.params.set(ShardParams.SHARDS_QT,"/get"); // TODO: how to avoid hardcoding this and hit the same handler?
sreq.params.set("distrib",false);
sreq.params.set("ids", shardIdList);
rb.addRequest(this, sreq);
}
@ -503,6 +490,28 @@ public class RealTimeGetComponent extends SearchComponent
return ResponseBuilder.STAGE_DONE;
}
/**
* Helper method for creating a new ShardRequest for the specified ids, based on the params
* specified for the current request. The new ShardRequest does not yet know anything about
* which shard/slice it will be sent to.
*/
private ShardRequest createShardRequest(final ResponseBuilder rb, final List<String> ids) {
final ShardRequest sreq = new ShardRequest();
sreq.purpose = 1;
sreq.params = new ModifiableSolrParams(rb.req.getParams());
// TODO: how to avoid hardcoding this and hit the same handler?
sreq.params.set(ShardParams.SHARDS_QT,"/get");
sreq.params.set("distrib",false);
sreq.params.remove(ShardParams.SHARDS);
sreq.params.remove("id");
sreq.params.remove("ids");
sreq.params.set("ids", StrUtils.join(ids, ','));
return sreq;
}
private String[] sliceToShards(ResponseBuilder rb, String collection, String slice) {
String lookup = collection + '_' + slice; // seems either form may be filled in rb.slices?

View File

@ -60,7 +60,6 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
@ -242,7 +241,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
try {
if (maxResultsFilterQueryString != null) {
// Get the default Lucene query parser
QParser parser = QParser.getParser(maxResultsFilterQueryString, QParserPlugin.DEFAULT_QTYPE, rb.req);
QParser parser = QParser.getParser(maxResultsFilterQueryString, rb.req);
DocSet s = searcher.getDocSet(parser.getQuery());
maxResultsByFilters = s.size();
} else {

View File

@ -321,7 +321,7 @@ public class SimpleFacets {
public void getFacetQueryCount(ParsedParams parsed, NamedList<Integer> res) throws SyntaxError, IOException {
// TODO: slight optimization would prevent double-parsing of any localParams
// TODO: SOLR-7753
Query qobj = QParser.getParser(parsed.facetValue, null, req).getQuery();
Query qobj = QParser.getParser(parsed.facetValue, req).getQuery();
if (qobj == null) {
res.add(parsed.key, 0);

View File

@ -77,7 +77,7 @@ public class ChildDocTransformerFactory extends TransformerFactory {
BitSetProducer parentsFilter = null;
try {
Query parentFilterQuery = QParser.getParser( parentFilter, null, req).getQuery();
Query parentFilterQuery = QParser.getParser( parentFilter, req).getQuery();
parentsFilter = new QueryBitSetProducer(new QueryWrapperFilter(parentFilterQuery));
} catch (SyntaxError syntaxError) {
throw new SolrException( ErrorCode.BAD_REQUEST, "Failed to create correct parent filter query" );
@ -86,7 +86,7 @@ public class ChildDocTransformerFactory extends TransformerFactory {
Query childFilterQuery = null;
if(childFilter != null) {
try {
childFilterQuery = QParser.getParser( childFilter, null, req).getQuery();
childFilterQuery = QParser.getParser( childFilter, req).getQuery();
} catch (SyntaxError syntaxError) {
throw new SolrException( ErrorCode.BAD_REQUEST, "Failed to create correct child filter query" );
}

View File

@ -76,6 +76,15 @@ import org.apache.solr.search.TermsQParserPlugin;
* its' native parameters like <code>collection, shards</code> for subquery, eg<br>
* <code>q=*:*&amp;fl=*,foo:[subquery]&amp;foo.q=cloud&amp;foo.collection=departments</code>
*
* <h3>When used in Real Time Get</h3>
* <p>
* When used in the context of a Real Time Get, the <i>values</i> from each document that are used
* in the qubquery are the "real time" values (possibly from the transaction log), but the query
* itself is still executed against the currently open searcher. Note that this means if a
* document is updated but not yet committed, an RTG request for that document that uses
* <code>[subquery]</code> could include the older (committed) version of that document,
* with differnet field values, in the subquery results.
* </p>
*/
public class SubQueryAugmenterFactory extends TransformerFactory{
@ -304,6 +313,14 @@ class SubQueryAugmenter extends DocTransformer {
return name;
}
/**
* Returns false -- this transformer does use an IndexSearcher, but it does not (neccessarily) need
* the searcher from the ResultContext of the document being returned. Instead we use the current
* "live" searcher for the specified core.
*/
@Override
public boolean needsSolrIndexSearcher() { return false; }
@Override
public void transform(SolrDocument doc, int docid, float score) {

View File

@ -1500,10 +1500,12 @@ public class IndexSchema {
(v1, v2) -> v2,
LinkedHashMap::new));
}
public static Map<String,String> nameMapping = Collections.unmodifiableMap(Stream.of(Handler.values())
.collect(Collectors.toMap(Handler::getNameLower , Handler::getRealName)));
}
public static Map<String,String> nameMapping = Collections.unmodifiableMap(Stream.of(SchemaProps.Handler.values())
.collect(Collectors.toMap(SchemaProps.Handler::getNameLower , SchemaProps.Handler::getRealName)));
public Map<String, Object> getNamedPropertyValues(String name, SolrParams params) {
return new SchemaProps(name, params, this).toMap();

Some files were not shown because too many files have changed in this diff Show More