mirror of https://github.com/apache/lucene.git
Merge remote-tracking branch 'origin/master'
Conflicts: solr/CHANGES.txt
This commit is contained in:
commit
92b5a76b54
|
@ -65,6 +65,9 @@ New Features
|
|||
Polygon instances from a standard GeoJSON string (Robert Muir, Mike
|
||||
McCandless)
|
||||
|
||||
* SOLR-9279: Queries module: new ComparisonBoolFunction base class
|
||||
(Doug Turnbull via David Smiley)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand)
|
||||
|
@ -135,10 +138,9 @@ Improvements
|
|||
|
||||
* LUCENE-7385: Improve/fix assert messages in SpanScorer. (David Smiley)
|
||||
|
||||
* LUCENE-7390: Improve performance of indexing points by allowing the
|
||||
codec to use transient heap in proportion to IndexWriter's RAM
|
||||
buffer, instead of a fixed 16.0 MB. A custom codec can still
|
||||
override the buffer size itself. (Mike McCandless)
|
||||
* LUCENE-7393: Add ICUTokenizer option to parse Myanmar text as syllables instead of words,
|
||||
because the ICU word-breaking algorithm has some issues. This allows for the previous
|
||||
tokenization used before Lucene 5. (AM, Robert Muir)
|
||||
|
||||
Optimizations
|
||||
|
||||
|
@ -154,6 +156,12 @@ Optimizations
|
|||
* LUCENE-7311: Cached term queries do not seek the terms dictionary anymore.
|
||||
(Adrien Grand)
|
||||
|
||||
* LUCENE-7396, LUCENE-7399: Faster flush of points.
|
||||
(Adrien Grand, Mike McCandless)
|
||||
|
||||
* LUCENE-7406: Automaton and PrefixQuery tweaks (fewer object (re)allocations).
|
||||
(Christine Poerschke)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien
|
||||
|
|
|
@ -402,6 +402,7 @@ public class MinHashFilter extends TokenFilter {
|
|||
}
|
||||
|
||||
/** Returns the MurmurHash3_x64_128 hash, placing the result in "out". */
|
||||
@SuppressWarnings("fallthrough") // the huge switch is designed to use fall through into cases!
|
||||
static void murmurhash3_x64_128(byte[] key, int offset, int len, int seed, LongPair out) {
|
||||
// The original algorithm does have a 32 bit unsigned seed.
|
||||
// We have to mask to match the behavior of the unsigned types and prevent sign extension.
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
#
|
||||
# Parses Myanmar text, with syllable as token.
|
||||
#
|
||||
|
||||
$Cons = [[:Other_Letter:]&[:Myanmar:]];
|
||||
$Virama = [\u1039];
|
||||
$Asat = [\u103A];
|
||||
|
||||
$WordJoin = [:Line_Break=Word_Joiner:];
|
||||
|
||||
#
|
||||
# default numerical definitions
|
||||
#
|
||||
$Extend = [\p{Word_Break = Extend}];
|
||||
$Format = [\p{Word_Break = Format}];
|
||||
$MidNumLet = [\p{Word_Break = MidNumLet}];
|
||||
$MidNum = [\p{Word_Break = MidNum}];
|
||||
$Numeric = [\p{Word_Break = Numeric}];
|
||||
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
|
||||
$MidNumLetEx = $MidNumLet ($Extend | $Format)*;
|
||||
$MidNumEx = $MidNum ($Extend | $Format)*;
|
||||
$NumericEx = $Numeric ($Extend | $Format)*;
|
||||
$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*;
|
||||
|
||||
$ConsEx = $Cons ($Extend | $Format)*;
|
||||
$AsatEx = $Cons $Asat ($Virama $ConsEx)? ($Extend | $Format)*;
|
||||
$MyanmarSyllableEx = $ConsEx ($Virama $ConsEx)? ($AsatEx)*;
|
||||
$MyanmarJoinedSyllableEx = $MyanmarSyllableEx ($WordJoin $MyanmarSyllableEx)*;
|
||||
|
||||
!!forward;
|
||||
$MyanmarJoinedSyllableEx {200};
|
||||
|
||||
# default numeric rules
|
||||
$NumericEx $ExtendNumLetEx? (($MidNumEx | $MidNumLetEx)? $NumericEx $ExtendNumLetEx?)* {100};
|
|
@ -63,9 +63,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
|
|||
// the same as ROOT, except no dictionary segmentation for cjk
|
||||
private static final BreakIterator defaultBreakIterator =
|
||||
readBreakIterator("Default.brk");
|
||||
private static final BreakIterator myanmarSyllableIterator =
|
||||
readBreakIterator("MyanmarSyllable.brk");
|
||||
|
||||
// TODO: deprecate this boolean? you only care if you are doing super-expert stuff...
|
||||
private final boolean cjkAsWords;
|
||||
private final boolean myanmarAsWords;
|
||||
|
||||
/**
|
||||
* Creates a new config. This object is lightweight, but the first
|
||||
|
@ -74,9 +77,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
|
|||
* otherwise text will be segmented according to UAX#29 defaults.
|
||||
* If this is true, all Han+Hiragana+Katakana words will be tagged as
|
||||
* IDEOGRAPHIC.
|
||||
* @param myanmarAsWords true if Myanmar text should undergo dictionary-based segmentation,
|
||||
* otherwise it will be tokenized as syllables.
|
||||
*/
|
||||
public DefaultICUTokenizerConfig(boolean cjkAsWords) {
|
||||
public DefaultICUTokenizerConfig(boolean cjkAsWords, boolean myanmarAsWords) {
|
||||
this.cjkAsWords = cjkAsWords;
|
||||
this.myanmarAsWords = myanmarAsWords;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -88,6 +94,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
|
|||
public BreakIterator getBreakIterator(int script) {
|
||||
switch(script) {
|
||||
case UScript.JAPANESE: return (BreakIterator)cjkBreakIterator.clone();
|
||||
case UScript.MYANMAR:
|
||||
if (myanmarAsWords) {
|
||||
return (BreakIterator)defaultBreakIterator.clone();
|
||||
} else {
|
||||
return (BreakIterator)myanmarSyllableIterator.clone();
|
||||
}
|
||||
default: return (BreakIterator)defaultBreakIterator.clone();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,7 +68,7 @@ public final class ICUTokenizer extends Tokenizer {
|
|||
* @see DefaultICUTokenizerConfig
|
||||
*/
|
||||
public ICUTokenizer() {
|
||||
this(new DefaultICUTokenizerConfig(true));
|
||||
this(new DefaultICUTokenizerConfig(true, true));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -79,6 +79,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
|
|||
private final Map<Integer,String> tailored;
|
||||
private ICUTokenizerConfig config;
|
||||
private final boolean cjkAsWords;
|
||||
private final boolean myanmarAsWords;
|
||||
|
||||
/** Creates a new ICUTokenizerFactory */
|
||||
public ICUTokenizerFactory(Map<String,String> args) {
|
||||
|
@ -95,6 +96,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
|
|||
}
|
||||
}
|
||||
cjkAsWords = getBoolean(args, "cjkAsWords", true);
|
||||
myanmarAsWords = getBoolean(args, "myanmarAsWords", true);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
@ -104,7 +106,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
|
|||
public void inform(ResourceLoader loader) throws IOException {
|
||||
assert tailored != null : "init must be called first!";
|
||||
if (tailored.isEmpty()) {
|
||||
config = new DefaultICUTokenizerConfig(cjkAsWords);
|
||||
config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords);
|
||||
} else {
|
||||
final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT];
|
||||
for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
|
||||
|
@ -112,7 +114,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
|
|||
String resourcePath = entry.getValue();
|
||||
breakers[code] = parseRules(resourcePath, loader);
|
||||
}
|
||||
config = new DefaultICUTokenizerConfig(cjkAsWords) {
|
||||
config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords) {
|
||||
|
||||
@Override
|
||||
public BreakIterator getBreakIterator(int script) {
|
||||
|
|
Binary file not shown.
|
@ -42,7 +42,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
|
|||
sb.append(whitespace);
|
||||
sb.append("testing 1234");
|
||||
String input = sb.toString();
|
||||
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
|
||||
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
|
||||
tokenizer.setReader(new StringReader(input));
|
||||
assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
|
||||
}
|
||||
|
@ -53,7 +53,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
|
|||
sb.append('a');
|
||||
}
|
||||
String input = sb.toString();
|
||||
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
|
||||
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
|
||||
tokenizer.setReader(new StringReader(input));
|
||||
char token[] = new char[4096];
|
||||
Arrays.fill(token, 'a');
|
||||
|
@ -75,7 +75,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
|
|||
a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
|
||||
Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
|
||||
TokenFilter filter = new ICUNormalizer2Filter(tokenizer);
|
||||
return new TokenStreamComponents(tokenizer, filter);
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ public class TestICUTokenizerCJK extends BaseTokenStreamTestCase {
|
|||
a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(true)));
|
||||
return new TokenStreamComponents(new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(true, true)));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.icu.segmentation;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
||||
/** Test tokenizing Myanmar text into syllables */
|
||||
public class TestMyanmarSyllable extends BaseTokenStreamTestCase {
|
||||
|
||||
Analyzer a;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, false));
|
||||
return new TokenStreamComponents(tokenizer);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
a.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** as opposed to dictionary break of သက်ဝင်|လှုပ်ရှား|စေ|ပြီး */
|
||||
public void testBasics() throws Exception {
|
||||
assertAnalyzesTo(a, "သက်ဝင်လှုပ်ရှားစေပြီး", new String[] { "သက်", "ဝင်", "လှုပ်", "ရှား", "စေ", "ပြီး" });
|
||||
}
|
||||
|
||||
// simple tests from "A Rule-based Syllable Segmentation of Myanmar Text"
|
||||
// * http://www.aclweb.org/anthology/I08-3010
|
||||
// (see also the presentation: http://gii2.nagaokaut.ac.jp/gii/media/share/20080901-ZMM%20Presentation.pdf)
|
||||
// The words are fake, we just test the categories.
|
||||
// note that currently our algorithm is not sophisticated enough to handle some of the special cases!
|
||||
|
||||
/** constant */
|
||||
public void testC() throws Exception {
|
||||
assertAnalyzesTo(a, "ကက", new String[] { "က", "က" });
|
||||
}
|
||||
|
||||
/** consonant + sign */
|
||||
public void testCF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကံကံ", new String[] { "ကံ", "ကံ" });
|
||||
}
|
||||
|
||||
/** consonant + consonant + asat */
|
||||
public void testCCA() throws Exception {
|
||||
assertAnalyzesTo(a, "ကင်ကင်", new String[] { "ကင်", "ကင်" });
|
||||
}
|
||||
|
||||
/** consonant + consonant + asat + sign */
|
||||
public void testCCAF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကင်းကင်း", new String[] { "ကင်း", "ကင်း" });
|
||||
}
|
||||
|
||||
/** consonant + vowel */
|
||||
public void testCV() throws Exception {
|
||||
assertAnalyzesTo(a, "ကာကာ", new String[] { "ကာ", "ကာ" });
|
||||
}
|
||||
|
||||
/** consonant + vowel + sign */
|
||||
public void testCVF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကားကား", new String[] { "ကား", "ကား" });
|
||||
}
|
||||
|
||||
/** consonant + vowel + vowel + asat */
|
||||
public void testCVVA() throws Exception {
|
||||
assertAnalyzesTo(a, "ကော်ကော်", new String[] { "ကော်", "ကော်" });
|
||||
}
|
||||
|
||||
/** consonant + vowel + vowel + consonant + asat */
|
||||
public void testCVVCA() throws Exception {
|
||||
assertAnalyzesTo(a, "ကောင်ကောင်", new String[] { "ကောင်", "ကောင်" });
|
||||
}
|
||||
|
||||
/** consonant + vowel + vowel + consonant + asat + sign */
|
||||
public void testCVVCAF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကောင်းကောင်း", new String[] { "ကောင်း", "ကောင်း" });
|
||||
}
|
||||
|
||||
/** consonant + medial */
|
||||
public void testCM() throws Exception {
|
||||
assertAnalyzesTo(a, "ကျကျ", new String[] { "ကျ", "ကျ" });
|
||||
}
|
||||
|
||||
/** consonant + medial + sign */
|
||||
public void testCMF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကျံကျံ", new String[] { "ကျံ", "ကျံ" });
|
||||
}
|
||||
|
||||
/** consonant + medial + consonant + asat */
|
||||
public void testCMCA() throws Exception {
|
||||
assertAnalyzesTo(a, "ကျင်ကျင်", new String[] { "ကျင်", "ကျင်" });
|
||||
}
|
||||
|
||||
/** consonant + medial + consonant + asat + sign */
|
||||
public void testCMCAF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကျင်းကျင်း", new String[] { "ကျင်း", "ကျင်း" });
|
||||
}
|
||||
|
||||
/** consonant + medial + vowel */
|
||||
public void testCMV() throws Exception {
|
||||
assertAnalyzesTo(a, "ကျာကျာ", new String[] { "ကျာ", "ကျာ" });
|
||||
}
|
||||
|
||||
/** consonant + medial + vowel + sign */
|
||||
public void testCMVF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကျားကျား", new String[] { "ကျား", "ကျား" });
|
||||
}
|
||||
|
||||
/** consonant + medial + vowel + vowel + asat */
|
||||
public void testCMVVA() throws Exception {
|
||||
assertAnalyzesTo(a, "ကျော်ကျော်", new String[] { "ကျော်", "ကျော်" });
|
||||
}
|
||||
|
||||
/** consonant + medial + vowel + vowel + consonant + asat */
|
||||
public void testCMVVCA() throws Exception {
|
||||
assertAnalyzesTo(a, "ကြောင်ကြောင်", new String[] { "ကြောင်", "ကြောင်"});
|
||||
}
|
||||
|
||||
/** consonant + medial + vowel + vowel + consonant + asat + sign */
|
||||
public void testCMVVCAF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကြောင်းကြောင်း", new String[] { "ကြောင်း", "ကြောင်း"});
|
||||
}
|
||||
|
||||
/** independent vowel */
|
||||
public void testI() throws Exception {
|
||||
assertAnalyzesTo(a, "ဪဪ", new String[] { "ဪ", "ဪ" });
|
||||
}
|
||||
|
||||
/** independent vowel */
|
||||
public void testE() throws Exception {
|
||||
assertAnalyzesTo(a, "ဣဣ", new String[] { "ဣ", "ဣ" });
|
||||
}
|
||||
}
|
|
@ -46,7 +46,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase {
|
|||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
|
||||
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
|
||||
TokenStream result = new CJKBigramFilter(source);
|
||||
return new TokenStreamComponents(source, new StopFilter(result, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase {
|
|||
analyzer2 = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
|
||||
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
|
||||
// we put this before the CJKBigramFilter, because the normalization might combine
|
||||
// some halfwidth katakana forms, which will affect the bigramming.
|
||||
TokenStream result = new ICUNormalizer2Filter(source);
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.index.IndexWriterConfig;
|
|||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
|
@ -82,11 +83,20 @@ public class DatasetSplitter {
|
|||
// get the exact no. of existing classes
|
||||
int noOfClasses = 0;
|
||||
for (LeafReaderContext leave : originalIndex.leaves()) {
|
||||
long valueCount = 0;
|
||||
SortedDocValues classValues = leave.reader().getSortedDocValues(classFieldName);
|
||||
if (classValues == null) {
|
||||
throw new IllegalStateException("the classFieldName \"" + classFieldName + "\" must index sorted doc values");
|
||||
if (classValues != null) {
|
||||
valueCount = classValues.getValueCount();
|
||||
} else {
|
||||
SortedSetDocValues sortedSetDocValues = leave.reader().getSortedSetDocValues(classFieldName);
|
||||
if (sortedSetDocValues != null) {
|
||||
valueCount = sortedSetDocValues.getValueCount();
|
||||
}
|
||||
noOfClasses += classValues.getValueCount();
|
||||
}
|
||||
if (classValues == null) {
|
||||
throw new IllegalStateException("field \"" + classFieldName + "\" must have sorted (set) doc values");
|
||||
}
|
||||
noOfClasses += valueCount;
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
|
@ -68,7 +68,7 @@ class SimpleTextPointsWriter extends PointsWriter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
|
||||
|
||||
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
|
||||
|
||||
|
@ -79,7 +79,7 @@ class SimpleTextPointsWriter extends PointsWriter {
|
|||
fieldInfo.getPointDimensionCount(),
|
||||
fieldInfo.getPointNumBytes(),
|
||||
BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
|
||||
maxMBSortInHeap,
|
||||
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
|
||||
values.size(fieldInfo.name),
|
||||
singleValuePerDoc) {
|
||||
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** {@link PointsReader} whose order of points can be changed.
|
||||
* This class is useful for codecs to optimize flush.
|
||||
* @lucene.internal */
|
||||
public abstract class MutablePointsReader extends PointsReader {
|
||||
|
||||
/** Sole constructor. */
|
||||
protected MutablePointsReader() {}
|
||||
|
||||
/** Set {@code packedValue} with a reference to the packed bytes of the i-th value. */
|
||||
public abstract void getValue(int i, BytesRef packedValue);
|
||||
|
||||
/** Get the k-th byte of the i-th value. */
|
||||
public abstract byte getByteAt(int i, int k);
|
||||
|
||||
/** Return the doc ID of the i-th value. */
|
||||
public abstract int getDocID(int i);
|
||||
|
||||
/** Swap the i-th and j-th values. */
|
||||
public abstract void swap(int i, int j);
|
||||
|
||||
}
|
|
@ -22,7 +22,6 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.util.bkd.BKDWriter;
|
||||
|
||||
/** Abstract API to write points
|
||||
*
|
||||
|
@ -35,9 +34,8 @@ public abstract class PointsWriter implements Closeable {
|
|||
protected PointsWriter() {
|
||||
}
|
||||
|
||||
/** Write all values contained in the provided reader. {@code maxMBSortInHeap} is the maximum
|
||||
* transient heap that can be used to sort values, before spilling to disk for offline sorting */
|
||||
public abstract void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException;
|
||||
/** Write all values contained in the provided reader */
|
||||
public abstract void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException;
|
||||
|
||||
/** Default naive merge implementation for one field: it just re-indexes all the values
|
||||
* from the incoming segment. The default codec overrides this for 1D fields and uses
|
||||
|
@ -147,10 +145,7 @@ public abstract class PointsWriter implements Closeable {
|
|||
public int getDocCount(String fieldName) {
|
||||
return finalDocCount;
|
||||
}
|
||||
},
|
||||
// TODO: also let merging of > 1D fields tap into IW's indexing buffer size, somehow (1D fields do an optimized merge sort
|
||||
// and don't need heap)
|
||||
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
|
||||
});
|
||||
}
|
||||
|
||||
/** Default merge implementation to merge incoming points readers by visiting all their points and
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.MutablePointsReader;
|
||||
import org.apache.lucene.codecs.PointsReader;
|
||||
import org.apache.lucene.codecs.PointsWriter;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -39,9 +40,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
import org.apache.lucene.util.bkd.BKDReader;
|
||||
import org.apache.lucene.util.bkd.BKDWriter;
|
||||
|
||||
/** Writes dimensional values
|
||||
*
|
||||
* @lucene.experimental */
|
||||
/** Writes dimensional values */
|
||||
public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
||||
|
||||
/** Output used to write the BKD tree data file */
|
||||
|
@ -52,13 +51,15 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
|||
|
||||
final SegmentWriteState writeState;
|
||||
final int maxPointsInLeafNode;
|
||||
final double maxMBSortInHeap;
|
||||
private boolean finished;
|
||||
|
||||
/** Full constructor */
|
||||
public Lucene60PointsWriter(SegmentWriteState writeState, int maxPointsInLeafNode) throws IOException {
|
||||
public Lucene60PointsWriter(SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap) throws IOException {
|
||||
assert writeState.fieldInfos.hasPointValues();
|
||||
this.writeState = writeState;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.maxMBSortInHeap = maxMBSortInHeap;
|
||||
String dataFileName = IndexFileNames.segmentFileName(writeState.segmentInfo.name,
|
||||
writeState.segmentSuffix,
|
||||
Lucene60PointsFormat.DATA_EXTENSION);
|
||||
|
@ -80,11 +81,11 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
|||
|
||||
/** Uses the defaults values for {@code maxPointsInLeafNode} (1024) and {@code maxMBSortInHeap} (16.0) */
|
||||
public Lucene60PointsWriter(SegmentWriteState writeState) throws IOException {
|
||||
this(writeState, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
|
||||
this(writeState, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
|
||||
|
||||
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
|
||||
|
||||
|
@ -98,6 +99,14 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
|||
values.size(fieldInfo.name),
|
||||
singleValuePerDoc)) {
|
||||
|
||||
if (values instanceof MutablePointsReader) {
|
||||
final long fp = writer.writeField(dataOut, fieldInfo.name, (MutablePointsReader) values);
|
||||
if (fp != -1) {
|
||||
indexFPs.put(fieldInfo.name, fp);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
values.intersect(fieldInfo.name, new IntersectVisitor() {
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
|
@ -173,8 +182,7 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
|||
fieldInfo.getPointDimensionCount(),
|
||||
fieldInfo.getPointNumBytes(),
|
||||
maxPointsInLeafNode,
|
||||
// NOTE: not used, since BKDWriter.merge does a merge sort:
|
||||
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
|
||||
maxMBSortInHeap,
|
||||
totMaxSize,
|
||||
singleValuePerDoc)) {
|
||||
List<BKDReader> bkdReaders = new ArrayList<>();
|
||||
|
|
|
@ -257,7 +257,7 @@ public class Field implements IndexableField {
|
|||
/**
|
||||
* The value of the field as a String, or null. If null, the Reader value or
|
||||
* binary value is used. Exactly one of stringValue(), readerValue(), and
|
||||
* getBinaryValue() must be set.
|
||||
* binaryValue() must be set.
|
||||
*/
|
||||
@Override
|
||||
public String stringValue() {
|
||||
|
@ -271,7 +271,7 @@ public class Field implements IndexableField {
|
|||
/**
|
||||
* The value of the field as a Reader, or null. If null, the String value or
|
||||
* binary value is used. Exactly one of stringValue(), readerValue(), and
|
||||
* getBinaryValue() must be set.
|
||||
* binaryValue() must be set.
|
||||
*/
|
||||
@Override
|
||||
public Reader readerValue() {
|
||||
|
@ -420,7 +420,7 @@ public class Field implements IndexableField {
|
|||
/**
|
||||
* Expert: sets the token stream to be used for indexing and causes
|
||||
* isIndexed() and isTokenized() to return true. May be combined with stored
|
||||
* values from stringValue() or getBinaryValue()
|
||||
* values from stringValue() or binaryValue()
|
||||
*/
|
||||
public void setTokenStream(TokenStream tokenStream) {
|
||||
if (type.indexOptions() == IndexOptions.NONE || !type.tokenized()) {
|
||||
|
|
|
@ -153,7 +153,7 @@ class DocumentsWriterPerThread {
|
|||
final Allocator byteBlockAllocator;
|
||||
final IntBlockPool.Allocator intBlockAllocator;
|
||||
private final AtomicLong pendingNumDocs;
|
||||
final LiveIndexWriterConfig indexWriterConfig;
|
||||
private final LiveIndexWriterConfig indexWriterConfig;
|
||||
private final boolean enableTestPoints;
|
||||
private final IndexWriter indexWriter;
|
||||
|
||||
|
|
|
@ -762,7 +762,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
* {@link #getConfig()}.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> after this writer is created, the given configuration instance
|
||||
* <b>NOTE:</b> after ths writer is created, the given configuration instance
|
||||
* cannot be passed to another writer.
|
||||
*
|
||||
* @param d
|
||||
|
|
|
@ -168,14 +168,9 @@ public class LiveIndexWriterConfig {
|
|||
|
||||
/**
|
||||
* Determines the amount of RAM that may be used for buffering added documents
|
||||
* and deletions before beginning to flush them to the Directory. For
|
||||
* faster indexing performance it's best to use as large a RAM buffer as you can.
|
||||
* <p>
|
||||
* Note that this setting is not a hard limit on memory usage during indexing, as
|
||||
* transient and non-trivial memory well beyond this buffer size may be used,
|
||||
* for example due to segment merges or writing points to new segments.
|
||||
* For application stability the available memory in the JVM
|
||||
* should be significantly larger than the RAM buffer used for indexing.
|
||||
* and deletions before they are flushed to the Directory. Generally for
|
||||
* faster indexing performance it's best to flush by RAM usage instead of
|
||||
* document count and use as large a RAM buffer as you can.
|
||||
* <p>
|
||||
* When this is set, the writer will flush whenever buffered documents and
|
||||
* deletions use this much RAM. Pass in
|
||||
|
@ -183,6 +178,14 @@ public class LiveIndexWriterConfig {
|
|||
* due to RAM usage. Note that if flushing by document count is also enabled,
|
||||
* then the flush will be triggered by whichever comes first.
|
||||
* <p>
|
||||
* The maximum RAM limit is inherently determined by the JVMs available
|
||||
* memory. Yet, an {@link IndexWriter} session can consume a significantly
|
||||
* larger amount of memory than the given RAM limit since this limit is just
|
||||
* an indicator when to flush memory resident documents to the Directory.
|
||||
* Flushes are likely happen concurrently while other threads adding documents
|
||||
* to the writer. For application stability the available memory in the JVM
|
||||
* should be significantly larger than the RAM buffer used for indexing.
|
||||
* <p>
|
||||
* <b>NOTE</b>: the account of RAM usage for pending deletions is only
|
||||
* approximate. Specifically, if you delete by Query, Lucene currently has no
|
||||
* way to measure the RAM usage of individual Queries so the accounting will
|
||||
|
|
|
@ -18,13 +18,13 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.MutablePointsReader;
|
||||
import org.apache.lucene.codecs.PointsReader;
|
||||
import org.apache.lucene.codecs.PointsWriter;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.bkd.BKDWriter;
|
||||
|
||||
/** Buffers up pending byte[][] value(s) per doc, then flushes when segment flushes. */
|
||||
class PointValuesWriter {
|
||||
|
@ -35,8 +35,7 @@ class PointValuesWriter {
|
|||
private int numPoints;
|
||||
private int numDocs;
|
||||
private int lastDocID = -1;
|
||||
private final byte[] packedValue;
|
||||
private final LiveIndexWriterConfig indexWriterConfig;
|
||||
private final int packedBytesLength;
|
||||
|
||||
public PointValuesWriter(DocumentsWriterPerThread docWriter, FieldInfo fieldInfo) {
|
||||
this.fieldInfo = fieldInfo;
|
||||
|
@ -44,8 +43,7 @@ class PointValuesWriter {
|
|||
this.bytes = new ByteBlockPool(docWriter.byteBlockAllocator);
|
||||
docIDs = new int[16];
|
||||
iwBytesUsed.addAndGet(16 * Integer.BYTES);
|
||||
packedValue = new byte[fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()];
|
||||
indexWriterConfig = docWriter.indexWriterConfig;
|
||||
packedBytesLength = fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes();
|
||||
}
|
||||
|
||||
// TODO: if exactly the same value is added to exactly the same doc, should we dedup?
|
||||
|
@ -53,9 +51,10 @@ class PointValuesWriter {
|
|||
if (value == null) {
|
||||
throw new IllegalArgumentException("field=" + fieldInfo.name + ": point value must not be null");
|
||||
}
|
||||
if (value.length != fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()) {
|
||||
if (value.length != packedBytesLength) {
|
||||
throw new IllegalArgumentException("field=" + fieldInfo.name + ": this field's value has length=" + value.length + " but should be " + (fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()));
|
||||
}
|
||||
|
||||
if (docIDs.length == numPoints) {
|
||||
docIDs = ArrayUtil.grow(docIDs, numPoints+1);
|
||||
iwBytesUsed.addAndGet((docIDs.length - numPoints) * Integer.BYTES);
|
||||
|
@ -66,21 +65,32 @@ class PointValuesWriter {
|
|||
numDocs++;
|
||||
lastDocID = docID;
|
||||
}
|
||||
|
||||
numPoints++;
|
||||
}
|
||||
|
||||
public void flush(SegmentWriteState state, PointsWriter writer) throws IOException {
|
||||
PointsReader reader = new MutablePointsReader() {
|
||||
|
||||
final int[] ords = new int[numPoints];
|
||||
{
|
||||
for (int i = 0; i < numPoints; ++i) {
|
||||
ords[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
writer.writeField(fieldInfo,
|
||||
new PointsReader() {
|
||||
@Override
|
||||
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
|
||||
if (fieldName.equals(fieldInfo.name) == false) {
|
||||
throw new IllegalArgumentException("fieldName must be the same");
|
||||
}
|
||||
final BytesRef scratch = new BytesRef();
|
||||
final byte[] packedValue = new byte[packedBytesLength];
|
||||
for(int i=0;i<numPoints;i++) {
|
||||
bytes.readBytes(packedValue.length * i, packedValue, 0, packedValue.length);
|
||||
visitor.visit(docIDs[i], packedValue);
|
||||
getValue(i, scratch);
|
||||
assert scratch.length == packedValue.length;
|
||||
System.arraycopy(scratch.bytes, scratch.offset, packedValue, 0, packedBytesLength);
|
||||
visitor.visit(getDocID(i), packedValue);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -120,14 +130,46 @@ class PointValuesWriter {
|
|||
|
||||
@Override
|
||||
public long size(String fieldName) {
|
||||
if (fieldName.equals(fieldInfo.name) == false) {
|
||||
throw new IllegalArgumentException("fieldName must be the same");
|
||||
}
|
||||
return numPoints;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocCount(String fieldName) {
|
||||
if (fieldName.equals(fieldInfo.name) == false) {
|
||||
throw new IllegalArgumentException("fieldName must be the same");
|
||||
}
|
||||
return numDocs;
|
||||
}
|
||||
},
|
||||
Math.max(indexWriterConfig.getRAMBufferSizeMB()/8.0, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP));
|
||||
|
||||
@Override
|
||||
public void swap(int i, int j) {
|
||||
int tmp = ords[i];
|
||||
ords[i] = ords[j];
|
||||
ords[j] = tmp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocID(int i) {
|
||||
return docIDs[ords[i]];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getValue(int i, BytesRef packedValue) {
|
||||
final long offset = (long) packedBytesLength * ords[i];
|
||||
packedValue.length = packedBytesLength;
|
||||
bytes.setRawBytesRef(packedValue, offset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getByteAt(int i, int k) {
|
||||
final long offset = (long) packedBytesLength * ords[i] + k;
|
||||
return bytes.readByte(offset);
|
||||
}
|
||||
};
|
||||
|
||||
writer.writeField(fieldInfo, reader);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -405,6 +405,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
|
|||
lock.lock();
|
||||
try {
|
||||
cache.clear();
|
||||
// Note that this also clears the uniqueQueries map since mostRecentlyUsedQueries is the uniqueQueries.keySet view:
|
||||
mostRecentlyUsedQueries.clear();
|
||||
onClear();
|
||||
} finally {
|
||||
|
|
|
@ -41,7 +41,8 @@ public class PrefixQuery extends AutomatonQuery {
|
|||
|
||||
/** Build an automaton accepting all terms with the specified prefix. */
|
||||
public static Automaton toAutomaton(BytesRef prefix) {
|
||||
Automaton automaton = new Automaton();
|
||||
final int numStatesAndTransitions = prefix.length+1;
|
||||
final Automaton automaton = new Automaton(numStatesAndTransitions, numStatesAndTransitions);
|
||||
int lastState = automaton.createState();
|
||||
for(int i=0;i<prefix.length;i++) {
|
||||
int state = automaton.createState();
|
||||
|
@ -66,7 +67,7 @@ public class PrefixQuery extends AutomatonQuery {
|
|||
StringBuilder buffer = new StringBuilder();
|
||||
if (!getField().equals(field)) {
|
||||
buffer.append(getField());
|
||||
buffer.append(":");
|
||||
buffer.append(':');
|
||||
}
|
||||
buffer.append(term.text());
|
||||
buffer.append('*');
|
||||
|
|
|
@ -459,69 +459,26 @@ public final class ArrayUtil {
|
|||
* greater than or equal to it.
|
||||
* This runs in linear time on average and in {@code n log(n)} time in the
|
||||
* worst case.*/
|
||||
public static <T> void select(T[] arr, int from, int to, int k, Comparator<T> comparator) {
|
||||
if (k < from) {
|
||||
throw new IllegalArgumentException("k must be >= from");
|
||||
}
|
||||
if (k >= to) {
|
||||
throw new IllegalArgumentException("k must be < to");
|
||||
}
|
||||
final int maxDepth = 2 * MathUtil.log(to - from, 2);
|
||||
quickSelect(arr, from, to, k, comparator, maxDepth);
|
||||
public static <T> void select(T[] arr, int from, int to, int k, Comparator<? super T> comparator) {
|
||||
new IntroSelector() {
|
||||
|
||||
T pivot;
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
ArrayUtil.swap(arr, i, j);
|
||||
}
|
||||
|
||||
private static <T> void quickSelect(T[] arr, int from, int to, int k, Comparator<T> comparator, int maxDepth) {
|
||||
assert from <= k;
|
||||
assert k < to;
|
||||
if (to - from == 1) {
|
||||
return;
|
||||
}
|
||||
if (--maxDepth < 0) {
|
||||
Arrays.sort(arr, from, to, comparator);
|
||||
return;
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
pivot = arr[i];
|
||||
}
|
||||
|
||||
final int mid = (from + to) >>> 1;
|
||||
// heuristic: we use the median of the values at from, to-1 and mid as a pivot
|
||||
if (comparator.compare(arr[from], arr[to - 1]) > 0) {
|
||||
swap(arr, from, to - 1);
|
||||
}
|
||||
if (comparator.compare(arr[to - 1], arr[mid]) > 0) {
|
||||
swap(arr, to - 1, mid);
|
||||
if (comparator.compare(arr[from], arr[to - 1]) > 0) {
|
||||
swap(arr, from, to - 1);
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
return comparator.compare(pivot, arr[j]);
|
||||
}
|
||||
}.select(from, to, k);
|
||||
}
|
||||
|
||||
T pivot = arr[to - 1];
|
||||
|
||||
int left = from + 1;
|
||||
int right = to - 2;
|
||||
|
||||
for (;;) {
|
||||
while (comparator.compare(pivot, arr[left]) > 0) {
|
||||
++left;
|
||||
}
|
||||
|
||||
while (left < right && comparator.compare(pivot, arr[right]) <= 0) {
|
||||
--right;
|
||||
}
|
||||
|
||||
if (left < right) {
|
||||
swap(arr, left, right);
|
||||
--right;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
swap(arr, left, to - 1);
|
||||
|
||||
if (left == k) {
|
||||
return;
|
||||
} else if (left < k) {
|
||||
quickSelect(arr, left + 1, to, k, comparator, maxDepth);
|
||||
} else {
|
||||
quickSelect(arr, from, left, k, comparator, maxDepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -378,5 +378,34 @@ public final class ByteBlockPool {
|
|||
}
|
||||
} while (true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the given {@link BytesRef} so that its content is equal to the
|
||||
* {@code ref.length} bytes starting at {@code offset}. Most of the time this
|
||||
* method will set pointers to internal data-structures. However, in case a
|
||||
* value crosses a boundary, a fresh copy will be returned.
|
||||
* On the contrary to {@link #setBytesRef(BytesRef, int)}, this does not
|
||||
* expect the length to be encoded with the data.
|
||||
*/
|
||||
public void setRawBytesRef(BytesRef ref, final long offset) {
|
||||
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
|
||||
int pos = (int) (offset & BYTE_BLOCK_MASK);
|
||||
if (pos + ref.length <= BYTE_BLOCK_SIZE) {
|
||||
ref.bytes = buffers[bufferIndex];
|
||||
ref.offset = pos;
|
||||
} else {
|
||||
ref.bytes = new byte[ref.length];
|
||||
ref.offset = 0;
|
||||
readBytes(offset, ref.bytes, 0, ref.length);
|
||||
}
|
||||
}
|
||||
|
||||
/** Read a single byte at the given {@code offset}. */
|
||||
public byte readByte(long offset) {
|
||||
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
|
||||
int pos = (int) (offset & BYTE_BLOCK_MASK);
|
||||
byte[] buffer = buffers[bufferIndex];
|
||||
return buffer[pos];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,8 +33,8 @@ public abstract class InPlaceMergeSorter extends Sorter {
|
|||
}
|
||||
|
||||
void mergeSort(int from, int to) {
|
||||
if (to - from < INSERTION_SORT_THRESHOLD) {
|
||||
insertionSort(from, to);
|
||||
if (to - from < BINARY_SORT_THRESHOLD) {
|
||||
binarySort(from, to);
|
||||
} else {
|
||||
final int mid = (from + to) >>> 1;
|
||||
mergeSort(from, mid);
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
/** Implementation of the quick select algorithm.
|
||||
* <p>It uses the median of the first, middle and last values as a pivot and
|
||||
* falls back to a heap sort when the number of recursion levels exceeds
|
||||
* {@code 2 lg(n)}, as a consequence it runs in linear time on average and in
|
||||
* {@code n log(n)} time in the worst case.</p>
|
||||
* @lucene.internal */
|
||||
public abstract class IntroSelector extends Selector {
|
||||
|
||||
@Override
|
||||
public final void select(int from, int to, int k) {
|
||||
checkArgs(from, to, k);
|
||||
final int maxDepth = 2 * MathUtil.log(to - from, 2);
|
||||
quickSelect(from, to, k, maxDepth);
|
||||
}
|
||||
|
||||
// heap sort
|
||||
// TODO: use median of median instead to have linear worst-case rather than
|
||||
// n*log(n)
|
||||
void slowSelect(int from, int to, int k) {
|
||||
new Sorter() {
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
IntroSelector.this.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return IntroSelector.this.compare(i, j);
|
||||
}
|
||||
|
||||
public void sort(int from, int to) {
|
||||
heapSort(from, to);
|
||||
}
|
||||
}.sort(from, to);
|
||||
}
|
||||
|
||||
private void quickSelect(int from, int to, int k, int maxDepth) {
|
||||
assert from <= k;
|
||||
assert k < to;
|
||||
if (to - from == 1) {
|
||||
return;
|
||||
}
|
||||
if (--maxDepth < 0) {
|
||||
slowSelect(from, to, k);
|
||||
return;
|
||||
}
|
||||
|
||||
final int mid = (from + to) >>> 1;
|
||||
// heuristic: we use the median of the values at from, to-1 and mid as a pivot
|
||||
if (compare(from, to - 1) > 0) {
|
||||
swap(from, to - 1);
|
||||
}
|
||||
if (compare(to - 1, mid) > 0) {
|
||||
swap(to - 1, mid);
|
||||
if (compare(from, to - 1) > 0) {
|
||||
swap(from, to - 1);
|
||||
}
|
||||
}
|
||||
|
||||
setPivot(to - 1);
|
||||
|
||||
int left = from + 1;
|
||||
int right = to - 2;
|
||||
|
||||
for (;;) {
|
||||
while (comparePivot(left) > 0) {
|
||||
++left;
|
||||
}
|
||||
|
||||
while (left < right && comparePivot(right) <= 0) {
|
||||
--right;
|
||||
}
|
||||
|
||||
if (left < right) {
|
||||
swap(left, right);
|
||||
--right;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
swap(left, to - 1);
|
||||
|
||||
if (left == k) {
|
||||
return;
|
||||
} else if (left < k) {
|
||||
quickSelect(left + 1, to, k, maxDepth);
|
||||
} else {
|
||||
quickSelect(from, left, k, maxDepth);
|
||||
}
|
||||
}
|
||||
|
||||
/** Compare entries found in slots <code>i</code> and <code>j</code>.
|
||||
* The contract for the returned value is the same as
|
||||
* {@link Comparator#compare(Object, Object)}. */
|
||||
protected int compare(int i, int j) {
|
||||
setPivot(i);
|
||||
return comparePivot(j);
|
||||
}
|
||||
|
||||
/** Save the value at slot <code>i</code> so that it can later be used as a
|
||||
* pivot, see {@link #comparePivot(int)}. */
|
||||
protected abstract void setPivot(int i);
|
||||
|
||||
/** Compare the pivot with the slot at <code>j</code>, similarly to
|
||||
* {@link #compare(int, int) compare(i, j)}. */
|
||||
protected abstract int comparePivot(int j);
|
||||
}
|
|
@ -16,7 +16,6 @@
|
|||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
|
||||
/**
|
||||
* {@link Sorter} implementation based on a variant of the quicksort algorithm
|
||||
* called <a href="http://en.wikipedia.org/wiki/Introsort">introsort</a>: when
|
||||
|
@ -38,8 +37,8 @@ public abstract class IntroSorter extends Sorter {
|
|||
}
|
||||
|
||||
void quicksort(int from, int to, int maxDepth) {
|
||||
if (to - from < INSERTION_SORT_THRESHOLD) {
|
||||
insertionSort(from, to);
|
||||
if (to - from < BINARY_SORT_THRESHOLD) {
|
||||
binarySort(from, to);
|
||||
return;
|
||||
} else if (--maxDepth < 0) {
|
||||
heapSort(from, to);
|
||||
|
@ -84,11 +83,18 @@ public abstract class IntroSorter extends Sorter {
|
|||
quicksort(left + 1, to, maxDepth);
|
||||
}
|
||||
|
||||
/** Save the value at slot <code>i</code> so that it can later be used as a
|
||||
* pivot, see {@link #comparePivot(int)}. */
|
||||
// Don't rely on the slow default impl of setPivot/comparePivot since
|
||||
// quicksort relies on these methods to be fast for good performance
|
||||
|
||||
@Override
|
||||
protected abstract void setPivot(int i);
|
||||
|
||||
/** Compare the pivot with the slot at <code>j</code>, similarly to
|
||||
* {@link #compare(int, int) compare(i, j)}. */
|
||||
@Override
|
||||
protected abstract int comparePivot(int j);
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
setPivot(i);
|
||||
return comparePivot(j);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,6 +38,7 @@ public abstract class MSBRadixSorter extends Sorter {
|
|||
// we store one histogram per recursion level
|
||||
private final int[][] histograms = new int[LEVEL_THRESHOLD][];
|
||||
private final int[] endOffsets = new int[HISTOGRAM_SIZE];
|
||||
private final int[] commonPrefix;
|
||||
|
||||
private final int maxLength;
|
||||
|
||||
|
@ -47,6 +48,7 @@ public abstract class MSBRadixSorter extends Sorter {
|
|||
*/
|
||||
protected MSBRadixSorter(int maxLength) {
|
||||
this.maxLength = maxLength;
|
||||
this.commonPrefix = new int[Math.min(24, maxLength)];
|
||||
}
|
||||
|
||||
/** Return the k-th byte of the entry at index {@code i}, or {@code -1} if
|
||||
|
@ -116,14 +118,14 @@ public abstract class MSBRadixSorter extends Sorter {
|
|||
@Override
|
||||
public void sort(int from, int to) {
|
||||
checkRange(from, to);
|
||||
sort(from, to, 0);
|
||||
sort(from, to, 0, 0);
|
||||
}
|
||||
|
||||
private void sort(int from, int to, int k) {
|
||||
if (to - from <= LENGTH_THRESHOLD || k >= LEVEL_THRESHOLD) {
|
||||
private void sort(int from, int to, int k, int l) {
|
||||
if (to - from <= LENGTH_THRESHOLD || l >= LEVEL_THRESHOLD) {
|
||||
introSort(from, to, k);
|
||||
} else {
|
||||
radixSort(from, to, k);
|
||||
radixSort(from, to, k, l);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -131,28 +133,30 @@ public abstract class MSBRadixSorter extends Sorter {
|
|||
getFallbackSorter(k).sort(from, to);
|
||||
}
|
||||
|
||||
private void radixSort(int from, int to, int k) {
|
||||
int[] histogram = histograms[k];
|
||||
/**
|
||||
* @param k the character number to compare
|
||||
* @param l the level of recursion
|
||||
*/
|
||||
private void radixSort(int from, int to, int k, int l) {
|
||||
int[] histogram = histograms[l];
|
||||
if (histogram == null) {
|
||||
histogram = histograms[k] = new int[HISTOGRAM_SIZE];
|
||||
histogram = histograms[l] = new int[HISTOGRAM_SIZE];
|
||||
} else {
|
||||
Arrays.fill(histogram, 0);
|
||||
}
|
||||
|
||||
buildHistogram(from, to, k, histogram);
|
||||
|
||||
// short-circuit: if all keys have the same byte at offset k, then recurse directly
|
||||
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
|
||||
if (histogram[i] == to - from) {
|
||||
// everything is in the same bucket, recurse
|
||||
if (i > 0) {
|
||||
sort(from, to, k + 1);
|
||||
final int commonPrefixLength = computeCommonPrefixLengthAndBuildHistogram(from, to, k, histogram);
|
||||
if (commonPrefixLength > 0) {
|
||||
// if there are no more chars to compare or if all entries fell into the
|
||||
// first bucket (which means strings are shorter than k) then we are done
|
||||
// otherwise recurse
|
||||
if (k + commonPrefixLength < maxLength
|
||||
&& histogram[0] < to - from) {
|
||||
radixSort(from, to, k + commonPrefixLength, l);
|
||||
}
|
||||
return;
|
||||
} else if (histogram[i] != 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert assertHistogram(commonPrefixLength, histogram);
|
||||
|
||||
int[] startOffsets = histogram;
|
||||
int[] endOffsets = this.endOffsets;
|
||||
|
@ -167,24 +171,83 @@ public abstract class MSBRadixSorter extends Sorter {
|
|||
int h = endOffsets[i];
|
||||
final int bucketLen = h - prev;
|
||||
if (bucketLen > 1) {
|
||||
sort(from + prev, from + h, k + 1);
|
||||
sort(from + prev, from + h, k + 1, l + 1);
|
||||
}
|
||||
prev = h;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// only used from assert
|
||||
private boolean assertHistogram(int commonPrefixLength, int[] histogram) {
|
||||
int numberOfUniqueBytes = 0;
|
||||
for (int freq : histogram) {
|
||||
if (freq > 0) {
|
||||
numberOfUniqueBytes++;
|
||||
}
|
||||
}
|
||||
if (numberOfUniqueBytes == 1) {
|
||||
assert commonPrefixLength >= 1;
|
||||
} else {
|
||||
assert commonPrefixLength == 0 : commonPrefixLength;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
|
||||
private int getBucket(int i, int k) {
|
||||
return byteAt(i, k) + 1;
|
||||
}
|
||||
|
||||
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */
|
||||
private int[] buildHistogram(int from, int to, int k, int[] histogram) {
|
||||
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}
|
||||
* and return a common prefix length for all visited values.
|
||||
* @see #buildHistogram */
|
||||
private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
|
||||
final int[] commonPrefix = this.commonPrefix;
|
||||
int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
|
||||
for (int j = 0; j < commonPrefixLength; ++j) {
|
||||
final int b = byteAt(from, k + j);
|
||||
commonPrefix[j] = b;
|
||||
if (b == -1) {
|
||||
commonPrefixLength = j + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int i;
|
||||
outer: for (i = from + 1; i < to; ++i) {
|
||||
for (int j = 0; j < commonPrefixLength; ++j) {
|
||||
final int b = byteAt(i, k + j);
|
||||
if (b != commonPrefix[j]) {
|
||||
commonPrefixLength = j;
|
||||
if (commonPrefixLength == 0) { // we have no common prefix
|
||||
histogram[commonPrefix[0] + 1] = i - from;
|
||||
histogram[b + 1] = 1;
|
||||
break outer;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (i < to) {
|
||||
// the loop got broken because there is no common prefix
|
||||
assert commonPrefixLength == 0;
|
||||
buildHistogram(i + 1, to, k, histogram);
|
||||
} else {
|
||||
assert commonPrefixLength > 0;
|
||||
histogram[commonPrefix[0] + 1] = to - from;
|
||||
}
|
||||
|
||||
return commonPrefixLength;
|
||||
}
|
||||
|
||||
/** Build an histogram of the k-th characters of values occurring between
|
||||
* offsets {@code from} and {@code to}, using {@link #getBucket}. */
|
||||
private void buildHistogram(int from, int to, int k, int[] histogram) {
|
||||
for (int i = from; i < to; ++i) {
|
||||
histogram[getBucket(i, k)]++;
|
||||
}
|
||||
return histogram;
|
||||
}
|
||||
|
||||
/** Accumulate values of the histogram so that it does not store counts but
|
||||
|
|
|
@ -0,0 +1,278 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/** Radix selector.
|
||||
* <p>This implementation works similarly to a MSB radix sort except that it
|
||||
* only recurses into the sub partition that contains the desired value.
|
||||
* @lucene.internal */
|
||||
public abstract class RadixSelector extends Selector {
|
||||
|
||||
// after that many levels of recursion we fall back to introselect anyway
|
||||
// this is used as a protection against the fact that radix sort performs
|
||||
// worse when there are long common prefixes (probably because of cache
|
||||
// locality)
|
||||
private static final int LEVEL_THRESHOLD = 8;
|
||||
// size of histograms: 256 + 1 to indicate that the string is finished
|
||||
private static final int HISTOGRAM_SIZE = 257;
|
||||
// buckets below this size will be sorted with introselect
|
||||
private static final int LENGTH_THRESHOLD = 100;
|
||||
|
||||
// we store one histogram per recursion level
|
||||
private final int[] histogram = new int[HISTOGRAM_SIZE];
|
||||
private final int[] commonPrefix;
|
||||
|
||||
private final int maxLength;
|
||||
|
||||
/**
|
||||
* Sole constructor.
|
||||
* @param maxLength the maximum length of keys, pass {@link Integer#MAX_VALUE} if unknown.
|
||||
*/
|
||||
protected RadixSelector(int maxLength) {
|
||||
this.maxLength = maxLength;
|
||||
this.commonPrefix = new int[Math.min(24, maxLength)];
|
||||
}
|
||||
|
||||
/** Return the k-th byte of the entry at index {@code i}, or {@code -1} if
|
||||
* its length is less than or equal to {@code k}. This may only be called
|
||||
* with a value of {@code i} between {@code 0} included and
|
||||
* {@code maxLength} excluded. */
|
||||
protected abstract int byteAt(int i, int k);
|
||||
|
||||
/** Get a fall-back selector which may assume that the first {@code d} bytes
|
||||
* of all compared strings are equal. This fallback selector is used when
|
||||
* the range becomes narrow or when the maximum level of recursion has
|
||||
* been exceeded. */
|
||||
protected Selector getFallbackSelector(int d) {
|
||||
return new IntroSelector() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
RadixSelector.this.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
for (int o = d; o < maxLength; ++o) {
|
||||
final int b1 = byteAt(i, o);
|
||||
final int b2 = byteAt(j, o);
|
||||
if (b1 != b2) {
|
||||
return b1 - b2;
|
||||
} else if (b1 == -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
pivot.setLength(0);
|
||||
for (int o = d; o < maxLength; ++o) {
|
||||
final int b = byteAt(i, o);
|
||||
if (b == -1) {
|
||||
break;
|
||||
}
|
||||
pivot.append((byte) b);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
for (int o = 0; o < pivot.length(); ++o) {
|
||||
final int b1 = pivot.byteAt(o) & 0xff;
|
||||
final int b2 = byteAt(j, d + o);
|
||||
if (b1 != b2) {
|
||||
return b1 - b2;
|
||||
}
|
||||
}
|
||||
if (d + pivot.length() == maxLength) {
|
||||
return 0;
|
||||
}
|
||||
return -1 - byteAt(j, d + pivot.length());
|
||||
}
|
||||
|
||||
private final BytesRefBuilder pivot = new BytesRefBuilder();
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void select(int from, int to, int k) {
|
||||
checkArgs(from, to, k);
|
||||
select(from, to, k, 0, 0);
|
||||
}
|
||||
|
||||
private void select(int from, int to, int k, int d, int l) {
|
||||
if (to - from <= LENGTH_THRESHOLD || d >= LEVEL_THRESHOLD) {
|
||||
getFallbackSelector(d).select(from, to, k);
|
||||
} else {
|
||||
radixSelect(from, to, k, d, l);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param d the character number to compare
|
||||
* @param l the level of recursion
|
||||
*/
|
||||
private void radixSelect(int from, int to, int k, int d, int l) {
|
||||
final int[] histogram = this.histogram;
|
||||
Arrays.fill(histogram, 0);
|
||||
|
||||
final int commonPrefixLength = computeCommonPrefixLengthAndBuildHistogram(from, to, d, histogram);
|
||||
if (commonPrefixLength > 0) {
|
||||
// if there are no more chars to compare or if all entries fell into the
|
||||
// first bucket (which means strings are shorter than d) then we are done
|
||||
// otherwise recurse
|
||||
if (d + commonPrefixLength < maxLength
|
||||
&& histogram[0] < to - from) {
|
||||
radixSelect(from, to, k, d + commonPrefixLength, l);
|
||||
}
|
||||
return;
|
||||
}
|
||||
assert assertHistogram(commonPrefixLength, histogram);
|
||||
|
||||
int bucketFrom = from;
|
||||
for (int bucket = 0; bucket < HISTOGRAM_SIZE; ++bucket) {
|
||||
final int bucketTo = bucketFrom + histogram[bucket];
|
||||
|
||||
if (bucketTo > k) {
|
||||
partition(from, to, bucket, bucketFrom, bucketTo, d);
|
||||
|
||||
if (bucket != 0 && d + 1 < maxLength) {
|
||||
// all elements in bucket 0 are equal so we only need to recurse if bucket != 0
|
||||
select(bucketFrom, bucketTo, k, d + 1, l + 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
bucketFrom = bucketTo;
|
||||
}
|
||||
throw new AssertionError("Unreachable code");
|
||||
}
|
||||
|
||||
// only used from assert
|
||||
private boolean assertHistogram(int commonPrefixLength, int[] histogram) {
|
||||
int numberOfUniqueBytes = 0;
|
||||
for (int freq : histogram) {
|
||||
if (freq > 0) {
|
||||
numberOfUniqueBytes++;
|
||||
}
|
||||
}
|
||||
if (numberOfUniqueBytes == 1) {
|
||||
assert commonPrefixLength >= 1;
|
||||
} else {
|
||||
assert commonPrefixLength == 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
|
||||
private int getBucket(int i, int k) {
|
||||
return byteAt(i, k) + 1;
|
||||
}
|
||||
|
||||
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}
|
||||
* and return a common prefix length for all visited values.
|
||||
* @see #buildHistogram */
|
||||
private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
|
||||
final int[] commonPrefix = this.commonPrefix;
|
||||
int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
|
||||
for (int j = 0; j < commonPrefixLength; ++j) {
|
||||
final int b = byteAt(from, k + j);
|
||||
commonPrefix[j] = b;
|
||||
if (b == -1) {
|
||||
commonPrefixLength = j + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int i;
|
||||
outer: for (i = from + 1; i < to; ++i) {
|
||||
for (int j = 0; j < commonPrefixLength; ++j) {
|
||||
final int b = byteAt(i, k + j);
|
||||
if (b != commonPrefix[j]) {
|
||||
commonPrefixLength = j;
|
||||
if (commonPrefixLength == 0) { // we have no common prefix
|
||||
histogram[commonPrefix[0] + 1] = i - from;
|
||||
histogram[b + 1] = 1;
|
||||
break outer;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (i < to) {
|
||||
// the loop got broken because there is no common prefix
|
||||
assert commonPrefixLength == 0;
|
||||
buildHistogram(i + 1, to, k, histogram);
|
||||
} else {
|
||||
assert commonPrefixLength > 0;
|
||||
histogram[commonPrefix[0] + 1] = to - from;
|
||||
}
|
||||
|
||||
return commonPrefixLength;
|
||||
}
|
||||
|
||||
/** Build an histogram of the k-th characters of values occurring between
|
||||
* offsets {@code from} and {@code to}, using {@link #getBucket}. */
|
||||
private void buildHistogram(int from, int to, int k, int[] histogram) {
|
||||
for (int i = from; i < to; ++i) {
|
||||
histogram[getBucket(i, k)]++;
|
||||
}
|
||||
}
|
||||
|
||||
/** Reorder elements so that all of them that fall into {@code bucket} are
|
||||
* between offsets {@code bucketFrom} and {@code bucketTo}. */
|
||||
private void partition(int from, int to, int bucket, int bucketFrom, int bucketTo, int d) {
|
||||
int left = from;
|
||||
int right = to - 1;
|
||||
|
||||
int slot = bucketFrom;
|
||||
|
||||
for (;;) {
|
||||
int leftBucket = getBucket(left, d);
|
||||
int rightBucket = getBucket(right, d);
|
||||
|
||||
while (leftBucket <= bucket && left < bucketFrom) {
|
||||
if (leftBucket == bucket) {
|
||||
swap(left, slot++);
|
||||
} else {
|
||||
++left;
|
||||
}
|
||||
leftBucket = getBucket(left, d);
|
||||
}
|
||||
|
||||
while (rightBucket >= bucket && right >= bucketTo) {
|
||||
if (rightBucket == bucket) {
|
||||
swap(right, slot++);
|
||||
} else {
|
||||
--right;
|
||||
}
|
||||
rightBucket = getBucket(right, d);
|
||||
}
|
||||
|
||||
if (left < bucketFrom && right >= bucketTo) {
|
||||
swap(left++, right--);
|
||||
} else {
|
||||
assert left == bucketFrom;
|
||||
assert right == bucketTo - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
/** An implementation of a selection algorithm, ie. computing the k-th greatest
|
||||
* value from a collection. */
|
||||
public abstract class Selector {
|
||||
|
||||
/** Reorder elements so that the element at position {@code k} is the same
|
||||
* as if all elements were sorted and all other elements are partitioned
|
||||
* around it: {@code [from, k)} only contains elements that are less than
|
||||
* or equal to {@code k} and {@code (k, to)} only contains elements that
|
||||
* are greater than or equal to {@code k}. */
|
||||
public abstract void select(int from, int to, int k);
|
||||
|
||||
void checkArgs(int from, int to, int k) {
|
||||
if (k < from) {
|
||||
throw new IllegalArgumentException("k must be >= from");
|
||||
}
|
||||
if (k >= to) {
|
||||
throw new IllegalArgumentException("k must be < to");
|
||||
}
|
||||
}
|
||||
|
||||
/** Swap values at slots <code>i</code> and <code>j</code>. */
|
||||
protected abstract void swap(int i, int j);
|
||||
}
|
|
@ -23,7 +23,7 @@ import java.util.Comparator;
|
|||
* @lucene.internal */
|
||||
public abstract class Sorter {
|
||||
|
||||
static final int INSERTION_SORT_THRESHOLD = 20;
|
||||
static final int BINARY_SORT_THRESHOLD = 20;
|
||||
|
||||
/** Sole constructor, used for inheritance. */
|
||||
protected Sorter() {}
|
||||
|
@ -36,6 +36,20 @@ public abstract class Sorter {
|
|||
/** Swap values at slots <code>i</code> and <code>j</code>. */
|
||||
protected abstract void swap(int i, int j);
|
||||
|
||||
private int pivotIndex;
|
||||
|
||||
/** Save the value at slot <code>i</code> so that it can later be used as a
|
||||
* pivot, see {@link #comparePivot(int)}. */
|
||||
protected void setPivot(int i) {
|
||||
pivotIndex = i;
|
||||
}
|
||||
|
||||
/** Compare the pivot with the slot at <code>j</code>, similarly to
|
||||
* {@link #compare(int, int) compare(i, j)}. */
|
||||
protected int comparePivot(int j) {
|
||||
return compare(pivotIndex, j);
|
||||
}
|
||||
|
||||
/** Sort the slice which starts at <code>from</code> (inclusive) and ends at
|
||||
* <code>to</code> (exclusive). */
|
||||
public abstract void sort(int from, int to);
|
||||
|
@ -163,54 +177,41 @@ public abstract class Sorter {
|
|||
}
|
||||
}
|
||||
|
||||
void insertionSort(int from, int to) {
|
||||
for (int i = from + 1; i < to; ++i) {
|
||||
for (int j = i; j > from; --j) {
|
||||
if (compare(j - 1, j) > 0) {
|
||||
swap(j - 1, j);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A binary sort implementation. This performs {@code O(n*log(n))} comparisons
|
||||
* and {@code O(n^2)} swaps. It is typically used by more sophisticated
|
||||
* implementations as a fall-back when the numbers of items to sort has become
|
||||
* less than {@value #BINARY_SORT_THRESHOLD}.
|
||||
*/
|
||||
void binarySort(int from, int to) {
|
||||
binarySort(from, to, from + 1);
|
||||
}
|
||||
|
||||
void binarySort(int from, int to, int i) {
|
||||
for ( ; i < to; ++i) {
|
||||
setPivot(i);
|
||||
int l = from;
|
||||
int h = i - 1;
|
||||
while (l <= h) {
|
||||
final int mid = (l + h) >>> 1;
|
||||
final int cmp = compare(i, mid);
|
||||
final int cmp = comparePivot(mid);
|
||||
if (cmp < 0) {
|
||||
h = mid - 1;
|
||||
} else {
|
||||
l = mid + 1;
|
||||
}
|
||||
}
|
||||
switch (i - l) {
|
||||
case 2:
|
||||
swap(l + 1, l + 2);
|
||||
swap(l, l + 1);
|
||||
break;
|
||||
case 1:
|
||||
swap(l, l + 1);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
for (int j = i; j > l; --j) {
|
||||
swap(j - 1, j);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Use heap sort to sort items between {@code from} inclusive and {@code to}
|
||||
* exclusive. This runs in {@code O(n*log(n))} and is used as a fall-back by
|
||||
* {@link IntroSorter}.
|
||||
*/
|
||||
void heapSort(int from, int to) {
|
||||
if (to - from <= 1) {
|
||||
return;
|
||||
|
|
|
@ -357,13 +357,13 @@ public class Automaton implements Accountable {
|
|||
}
|
||||
|
||||
private void growStates() {
|
||||
if (nextState+2 >= states.length) {
|
||||
if (nextState+2 > states.length) {
|
||||
states = ArrayUtil.grow(states, nextState+2);
|
||||
}
|
||||
}
|
||||
|
||||
private void growTransitions() {
|
||||
if (nextTransition+3 >= transitions.length) {
|
||||
if (nextTransition+3 > transitions.length) {
|
||||
transitions = ArrayUtil.grow(transitions, nextTransition+3);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.List;
|
|||
import java.util.function.IntFunction;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.MutablePointsReader;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||
import org.apache.lucene.index.PointValues.Relation;
|
||||
|
@ -111,7 +112,8 @@ public class BKDWriter implements Closeable {
|
|||
final byte[] scratchDiff;
|
||||
final byte[] scratch1;
|
||||
final byte[] scratch2;
|
||||
final BytesRef scratchBytesRef = new BytesRef();
|
||||
final BytesRef scratchBytesRef1 = new BytesRef();
|
||||
final BytesRef scratchBytesRef2 = new BytesRef();
|
||||
final int[] commonPrefixLengths;
|
||||
|
||||
protected final FixedBitSet docsSeen;
|
||||
|
@ -173,7 +175,6 @@ public class BKDWriter implements Closeable {
|
|||
packedBytesLength = numDims * bytesPerDim;
|
||||
|
||||
scratchDiff = new byte[bytesPerDim];
|
||||
scratchBytesRef.length = packedBytesLength;
|
||||
scratch1 = new byte[packedBytesLength];
|
||||
scratch2 = new byte[packedBytesLength];
|
||||
commonPrefixLengths = new int[numDims];
|
||||
|
@ -204,7 +205,7 @@ public class BKDWriter implements Closeable {
|
|||
// all recursive halves (i.e. 16 + 8 + 4 + 2) so the memory usage is 2X
|
||||
// what that level would consume, so we multiply by 0.5 to convert from
|
||||
// bytes to points here. Each dimension has its own sorted partition, so
|
||||
// we must divide by numDims as well.
|
||||
// we must divide by numDims as wel.
|
||||
|
||||
maxPointsSortInHeap = (int) (0.5 * (maxMBSortInHeap * 1024 * 1024) / (bytesPerDoc * numDims));
|
||||
|
||||
|
@ -416,15 +417,25 @@ public class BKDWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
/** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already
|
||||
* sorted values and currently only works when numDims==1. This returns -1 if all documents containing
|
||||
* dimensional values were deleted. */
|
||||
public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers) throws IOException {
|
||||
if (numDims != 1) {
|
||||
throw new UnsupportedOperationException("numDims must be 1 but got " + numDims);
|
||||
/** Write a field from a {@link MutablePointsReader}. This way of writing
|
||||
* points is faster than regular writes with {@link BKDWriter#add} since
|
||||
* there is opportunity for reordering points before writing them to
|
||||
* disk. This method does not use transient disk in order to reorder points.
|
||||
*/
|
||||
public long writeField(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
|
||||
if (numDims == 1) {
|
||||
return writeField1Dim(out, fieldName, reader);
|
||||
} else {
|
||||
return writeFieldNDims(out, fieldName, reader);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* In the 2+D case, we recursively pick the split dimension, compute the
|
||||
* median value and partition other values around it. */
|
||||
private long writeFieldNDims(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
|
||||
if (pointCount != 0) {
|
||||
throw new IllegalStateException("cannot mix add and merge");
|
||||
throw new IllegalStateException("cannot mix add and writeField");
|
||||
}
|
||||
|
||||
// Catch user silliness:
|
||||
|
@ -435,6 +446,81 @@ public class BKDWriter implements Closeable {
|
|||
// Mark that we already finished:
|
||||
heapPointWriter = null;
|
||||
|
||||
long countPerLeaf = pointCount = reader.size(fieldName);
|
||||
long innerNodeCount = 1;
|
||||
|
||||
while (countPerLeaf > maxPointsInLeafNode) {
|
||||
countPerLeaf = (countPerLeaf+1)/2;
|
||||
innerNodeCount *= 2;
|
||||
}
|
||||
|
||||
int numLeaves = Math.toIntExact(innerNodeCount);
|
||||
|
||||
checkMaxLeafNodeCount(numLeaves);
|
||||
|
||||
final byte[] splitPackedValues = new byte[numLeaves * (bytesPerDim + 1)];
|
||||
final long[] leafBlockFPs = new long[numLeaves];
|
||||
|
||||
// compute the min/max for this slice
|
||||
Arrays.fill(minPackedValue, (byte) 0xff);
|
||||
Arrays.fill(maxPackedValue, (byte) 0);
|
||||
for (int i = 0; i < Math.toIntExact(pointCount); ++i) {
|
||||
reader.getValue(i, scratchBytesRef1);
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset) < 0) {
|
||||
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset, bytesPerDim);
|
||||
}
|
||||
if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset) > 0) {
|
||||
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset, bytesPerDim);
|
||||
}
|
||||
}
|
||||
|
||||
docsSeen.set(reader.getDocID(i));
|
||||
}
|
||||
|
||||
build(1, numLeaves, reader, 0, Math.toIntExact(pointCount), out,
|
||||
minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs,
|
||||
new int[maxPointsInLeafNode]);
|
||||
|
||||
long indexFP = out.getFilePointer();
|
||||
writeIndex(out, leafBlockFPs, splitPackedValues);
|
||||
return indexFP;
|
||||
}
|
||||
|
||||
|
||||
/* In the 1D case, we can simply sort points in ascending order and use the
|
||||
* same writing logic as we use at merge time. */
|
||||
private long writeField1Dim(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
|
||||
MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size(fieldName)));
|
||||
|
||||
final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
|
||||
|
||||
reader.intersect(fieldName, new IntersectVisitor() {
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) throws IOException {
|
||||
oneDimWriter.add(packedValue, docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID) throws IOException {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
});
|
||||
|
||||
return oneDimWriter.finish();
|
||||
}
|
||||
|
||||
/** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already
|
||||
* sorted values and currently only works when numDims==1. This returns -1 if all documents containing
|
||||
* dimensional values were deleted. */
|
||||
public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers) throws IOException {
|
||||
assert docMaps == null || readers.size() == docMaps.size();
|
||||
|
||||
BKDMergeQueue queue = new BKDMergeQueue(bytesPerDim, readers.size());
|
||||
|
@ -453,72 +539,14 @@ public class BKDWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
if (queue.size() == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int leafCount = 0;
|
||||
List<Long> leafBlockFPs = new ArrayList<>();
|
||||
List<byte[]> leafBlockStartValues = new ArrayList<>();
|
||||
|
||||
// Target halfway between min and max allowed for the leaf:
|
||||
int pointsPerLeafBlock = (int) (0.75 * maxPointsInLeafNode);
|
||||
//System.out.println("POINTS PER: " + pointsPerLeafBlock);
|
||||
|
||||
byte[] lastPackedValue = new byte[bytesPerDim];
|
||||
byte[] firstPackedValue = new byte[bytesPerDim];
|
||||
long valueCount = 0;
|
||||
|
||||
// Buffer up each leaf block's docs and values
|
||||
int[] leafBlockDocIDs = new int[maxPointsInLeafNode];
|
||||
byte[][] leafBlockPackedValues = new byte[maxPointsInLeafNode][];
|
||||
for(int i=0;i<maxPointsInLeafNode;i++) {
|
||||
leafBlockPackedValues[i] = new byte[packedBytesLength];
|
||||
}
|
||||
Arrays.fill(commonPrefixLengths, bytesPerDim);
|
||||
OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
|
||||
|
||||
while (queue.size() != 0) {
|
||||
MergeReader reader = queue.top();
|
||||
// System.out.println("iter reader=" + reader);
|
||||
|
||||
// NOTE: doesn't work with subclasses (e.g. SimpleText!)
|
||||
int docID = reader.docID;
|
||||
leafBlockDocIDs[leafCount] = docID;
|
||||
System.arraycopy(reader.state.scratchPackedValue, 0, leafBlockPackedValues[leafCount], 0, packedBytesLength);
|
||||
docsSeen.set(docID);
|
||||
|
||||
if (valueCount == 0) {
|
||||
System.arraycopy(reader.state.scratchPackedValue, 0, minPackedValue, 0, packedBytesLength);
|
||||
}
|
||||
System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, 0, packedBytesLength);
|
||||
|
||||
assert numDims > 1 || valueInOrder(valueCount, lastPackedValue, reader.state.scratchPackedValue, 0);
|
||||
valueCount++;
|
||||
if (pointCount > totalPointCount) {
|
||||
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
|
||||
}
|
||||
|
||||
if (leafCount == 0) {
|
||||
if (leafBlockFPs.size() > 0) {
|
||||
// Save the first (minimum) value in each leaf block except the first, to build the split value index in the end:
|
||||
leafBlockStartValues.add(Arrays.copyOf(reader.state.scratchPackedValue, bytesPerDim));
|
||||
}
|
||||
Arrays.fill(commonPrefixLengths, bytesPerDim);
|
||||
System.arraycopy(reader.state.scratchPackedValue, 0, firstPackedValue, 0, bytesPerDim);
|
||||
} else {
|
||||
// Find per-dim common prefix:
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim * bytesPerDim;
|
||||
for(int j=0;j<commonPrefixLengths[dim];j++) {
|
||||
if (firstPackedValue[offset+j] != reader.state.scratchPackedValue[offset+j]) {
|
||||
commonPrefixLengths[dim] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
leafCount++;
|
||||
oneDimWriter.add(reader.state.scratchPackedValue, reader.docID);
|
||||
|
||||
if (reader.next()) {
|
||||
queue.updateTop();
|
||||
|
@ -526,35 +554,78 @@ public class BKDWriter implements Closeable {
|
|||
// This segment was exhausted
|
||||
queue.pop();
|
||||
}
|
||||
}
|
||||
|
||||
return oneDimWriter.finish();
|
||||
}
|
||||
|
||||
private class OneDimensionBKDWriter {
|
||||
|
||||
final IndexOutput out;
|
||||
final List<Long> leafBlockFPs = new ArrayList<>();
|
||||
final List<byte[]> leafBlockStartValues = new ArrayList<>();
|
||||
final byte[] leafValues = new byte[maxPointsInLeafNode * packedBytesLength];
|
||||
final int[] leafDocs = new int[maxPointsInLeafNode];
|
||||
long valueCount;
|
||||
int leafCount;
|
||||
|
||||
OneDimensionBKDWriter(IndexOutput out) {
|
||||
if (numDims != 1) {
|
||||
throw new UnsupportedOperationException("numDims must be 1 but got " + numDims);
|
||||
}
|
||||
if (pointCount != 0) {
|
||||
throw new IllegalStateException("cannot mix add and merge");
|
||||
}
|
||||
|
||||
// Catch user silliness:
|
||||
if (heapPointWriter == null && tempInput == null) {
|
||||
throw new IllegalStateException("already finished");
|
||||
}
|
||||
|
||||
// Mark that we already finished:
|
||||
heapPointWriter = null;
|
||||
|
||||
this.out = out;
|
||||
|
||||
lastPackedValue = new byte[packedBytesLength];
|
||||
}
|
||||
|
||||
// for asserts
|
||||
final byte[] lastPackedValue;
|
||||
int lastDocID;
|
||||
|
||||
void add(byte[] packedValue, int docID) throws IOException {
|
||||
assert valueInOrder(valueCount + leafCount,
|
||||
0, lastPackedValue, packedValue, 0, docID, lastDocID);
|
||||
|
||||
System.arraycopy(packedValue, 0, leafValues, leafCount * packedBytesLength, packedBytesLength);
|
||||
leafDocs[leafCount] = docID;
|
||||
docsSeen.set(docID);
|
||||
leafCount++;
|
||||
|
||||
if (valueCount > totalPointCount) {
|
||||
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
|
||||
}
|
||||
|
||||
if (leafCount == maxPointsInLeafNode) {
|
||||
// We write a block once we hit exactly the max count ... this is different from
|
||||
// when we flush a new segment, where we write between max/2 and max per leaf block,
|
||||
// so merged segments will behave differently from newly flushed segments:
|
||||
if (leafCount == pointsPerLeafBlock || queue.size() == 0) {
|
||||
leafBlockFPs.add(out.getFilePointer());
|
||||
checkMaxLeafNodeCount(leafBlockFPs.size());
|
||||
|
||||
writeLeafBlockDocs(out, leafBlockDocIDs, 0, leafCount);
|
||||
writeCommonPrefixes(out, commonPrefixLengths, firstPackedValue);
|
||||
|
||||
final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
|
||||
final BytesRef scratch = new BytesRef();
|
||||
|
||||
{
|
||||
scratch.length = packedBytesLength;
|
||||
scratch.offset = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef apply(int i) {
|
||||
scratch.bytes = leafBlockPackedValues[i];
|
||||
return scratch;
|
||||
}
|
||||
};
|
||||
writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
|
||||
|
||||
writeLeafBlock();
|
||||
leafCount = 0;
|
||||
}
|
||||
|
||||
assert (lastDocID = docID) >= 0; // only assign when asserts are enabled
|
||||
}
|
||||
|
||||
public long finish() throws IOException {
|
||||
if (leafCount > 0) {
|
||||
writeLeafBlock();
|
||||
leafCount = 0;
|
||||
}
|
||||
|
||||
if (valueCount == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
pointCount = valueCount;
|
||||
|
@ -575,6 +646,60 @@ public class BKDWriter implements Closeable {
|
|||
return indexFP;
|
||||
}
|
||||
|
||||
private void writeLeafBlock() throws IOException {
|
||||
assert leafCount != 0;
|
||||
if (valueCount == 0) {
|
||||
System.arraycopy(leafValues, 0, minPackedValue, 0, packedBytesLength);
|
||||
}
|
||||
System.arraycopy(leafValues, (leafCount - 1) * packedBytesLength, maxPackedValue, 0, packedBytesLength);
|
||||
|
||||
valueCount += leafCount;
|
||||
|
||||
if (leafBlockFPs.size() > 0) {
|
||||
// Save the first (minimum) value in each leaf block except the first, to build the split value index in the end:
|
||||
leafBlockStartValues.add(Arrays.copyOf(leafValues, packedBytesLength));
|
||||
}
|
||||
leafBlockFPs.add(out.getFilePointer());
|
||||
checkMaxLeafNodeCount(leafBlockFPs.size());
|
||||
|
||||
Arrays.fill(commonPrefixLengths, bytesPerDim);
|
||||
// Find per-dim common prefix:
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset1 = dim * bytesPerDim;
|
||||
int offset2 = (leafCount - 1) * packedBytesLength + offset1;
|
||||
for(int j=0;j<commonPrefixLengths[dim];j++) {
|
||||
if (leafValues[offset1+j] != leafValues[offset2+j]) {
|
||||
commonPrefixLengths[dim] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writeLeafBlockDocs(out, leafDocs, 0, leafCount);
|
||||
writeCommonPrefixes(out, commonPrefixLengths, leafValues);
|
||||
|
||||
final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
|
||||
final BytesRef scratch = new BytesRef();
|
||||
|
||||
{
|
||||
scratch.length = packedBytesLength;
|
||||
scratch.bytes = leafValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef apply(int i) {
|
||||
scratch.offset = packedBytesLength * i;
|
||||
return scratch;
|
||||
}
|
||||
};
|
||||
assert valuesInOrderAndBounds(leafCount, 0, Arrays.copyOf(leafValues, packedBytesLength),
|
||||
Arrays.copyOfRange(leafValues, (leafCount - 1) * packedBytesLength, leafCount * packedBytesLength),
|
||||
packedValues, leafDocs, 0);
|
||||
writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// TODO: there must be a simpler way?
|
||||
private void rotateToTree(int nodeID, int offset, int count, byte[] index, List<byte[]> leafBlockStartValues) {
|
||||
//System.out.println("ROTATE: nodeID=" + nodeID + " offset=" + offset + " count=" + count + " bpd=" + bytesPerDim + " index.length=" + index.length);
|
||||
|
@ -686,6 +811,7 @@ public class BKDWriter implements Closeable {
|
|||
}
|
||||
|
||||
private PointWriter sort(int dim) throws IOException {
|
||||
assert dim >= 0 && dim < numDims;
|
||||
|
||||
if (heapPointWriter != null) {
|
||||
|
||||
|
@ -1110,6 +1236,132 @@ public class BKDWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
/* Recursively reorders the provided reader and writes the bkd-tree on the fly. */
|
||||
private void build(int nodeID, int leafNodeOffset,
|
||||
MutablePointsReader reader, int from, int to,
|
||||
IndexOutput out,
|
||||
byte[] minPackedValue, byte[] maxPackedValue,
|
||||
byte[] splitPackedValues,
|
||||
long[] leafBlockFPs,
|
||||
int[] spareDocIds) throws IOException {
|
||||
|
||||
if (nodeID >= leafNodeOffset) {
|
||||
// leaf node
|
||||
final int count = to - from;
|
||||
assert count <= maxPointsInLeafNode;
|
||||
|
||||
// Compute common prefixes
|
||||
Arrays.fill(commonPrefixLengths, bytesPerDim);
|
||||
reader.getValue(from, scratchBytesRef1);
|
||||
for (int i = from + 1; i < to; ++i) {
|
||||
reader.getValue(i, scratchBytesRef2);
|
||||
for (int dim=0;dim<numDims;dim++) {
|
||||
final int offset = dim * bytesPerDim;
|
||||
for(int j=0;j<commonPrefixLengths[dim];j++) {
|
||||
if (scratchBytesRef1.bytes[scratchBytesRef1.offset+offset+j] != scratchBytesRef2.bytes[scratchBytesRef2.offset+offset+j]) {
|
||||
commonPrefixLengths[dim] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
|
||||
FixedBitSet[] usedBytes = new FixedBitSet[numDims];
|
||||
for (int dim = 0; dim < numDims; ++dim) {
|
||||
if (commonPrefixLengths[dim] < bytesPerDim) {
|
||||
usedBytes[dim] = new FixedBitSet(256);
|
||||
}
|
||||
}
|
||||
for (int i = from + 1; i < to; ++i) {
|
||||
for (int dim=0;dim<numDims;dim++) {
|
||||
if (usedBytes[dim] != null) {
|
||||
byte b = reader.getByteAt(i, dim * bytesPerDim + commonPrefixLengths[dim]);
|
||||
usedBytes[dim].set(Byte.toUnsignedInt(b));
|
||||
}
|
||||
}
|
||||
}
|
||||
int sortedDim = 0;
|
||||
int sortedDimCardinality = Integer.MAX_VALUE;
|
||||
for (int dim = 0; dim < numDims; ++dim) {
|
||||
if (usedBytes[dim] != null) {
|
||||
final int cardinality = usedBytes[dim].cardinality();
|
||||
if (cardinality < sortedDimCardinality) {
|
||||
sortedDim = dim;
|
||||
sortedDimCardinality = cardinality;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sort by sortedDim
|
||||
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
|
||||
reader, from, to, scratchBytesRef1, scratchBytesRef2);
|
||||
|
||||
// Save the block file pointer:
|
||||
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
|
||||
|
||||
// Write doc IDs
|
||||
int[] docIDs = spareDocIds;
|
||||
for (int i = from; i < to; ++i) {
|
||||
docIDs[i - from] = reader.getDocID(i);
|
||||
}
|
||||
writeLeafBlockDocs(out, docIDs, 0, count);
|
||||
|
||||
// Write the common prefixes:
|
||||
reader.getValue(from, scratchBytesRef1);
|
||||
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
|
||||
writeCommonPrefixes(out, commonPrefixLengths, scratch1);
|
||||
|
||||
// Write the full values:
|
||||
IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
|
||||
@Override
|
||||
public BytesRef apply(int i) {
|
||||
reader.getValue(from + i, scratchBytesRef1);
|
||||
return scratchBytesRef1;
|
||||
}
|
||||
};
|
||||
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
|
||||
docIDs, 0);
|
||||
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
|
||||
|
||||
} else {
|
||||
// inner node
|
||||
|
||||
// compute the split dimension and partition around it
|
||||
final int splitDim = split(minPackedValue, maxPackedValue);
|
||||
final int mid = (from + to + 1) >>> 1;
|
||||
|
||||
int commonPrefixLen = bytesPerDim;
|
||||
for (int i = 0; i < bytesPerDim; ++i) {
|
||||
if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) {
|
||||
commonPrefixLen = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
|
||||
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
|
||||
|
||||
// set the split value
|
||||
final int address = nodeID * (1+bytesPerDim);
|
||||
splitPackedValues[address] = (byte) splitDim;
|
||||
reader.getValue(mid, scratchBytesRef1);
|
||||
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
|
||||
|
||||
byte[] minSplitPackedValue = Arrays.copyOf(minPackedValue, packedBytesLength);
|
||||
byte[] maxSplitPackedValue = Arrays.copyOf(maxPackedValue, packedBytesLength);
|
||||
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
|
||||
minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
|
||||
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
|
||||
maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
|
||||
|
||||
// recurse
|
||||
build(nodeID * 2, leafNodeOffset, reader, from, mid, out,
|
||||
minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
|
||||
build(nodeID * 2 + 1, leafNodeOffset, reader, mid, to, out,
|
||||
minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
|
||||
}
|
||||
}
|
||||
|
||||
/** The array (sized numDims) of PathSlice describe the cell we have currently recursed to. */
|
||||
private void build(int nodeID, int leafNodeOffset,
|
||||
PathSlice[] slices,
|
||||
|
@ -1217,7 +1469,8 @@ public class BKDWriter implements Closeable {
|
|||
return scratch;
|
||||
}
|
||||
};
|
||||
assert valuesInOrderAndBounds(count, minPackedValue, maxPackedValue, packedValues);
|
||||
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
|
||||
heapSource.docIDs, Math.toIntExact(source.start));
|
||||
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
|
||||
|
||||
} else {
|
||||
|
@ -1321,12 +1574,16 @@ public class BKDWriter implements Closeable {
|
|||
}
|
||||
|
||||
// only called from assert
|
||||
private boolean valuesInOrderAndBounds(int count, byte[] minPackedValue, byte[] maxPackedValue, IntFunction<BytesRef> values) throws IOException {
|
||||
byte[] lastPackedValue = new byte[bytesPerDim];
|
||||
private boolean valuesInOrderAndBounds(int count, int sortedDim, byte[] minPackedValue, byte[] maxPackedValue,
|
||||
IntFunction<BytesRef> values, int[] docs, int docsOffset) throws IOException {
|
||||
byte[] lastPackedValue = new byte[packedBytesLength];
|
||||
int lastDoc = -1;
|
||||
for (int i=0;i<count;i++) {
|
||||
BytesRef packedValue = values.apply(i);
|
||||
assert packedValue.length == packedBytesLength;
|
||||
assert numDims != 1 || valueInOrder(i, lastPackedValue, packedValue.bytes, packedValue.offset);
|
||||
assert valueInOrder(i, sortedDim, lastPackedValue, packedValue.bytes, packedValue.offset,
|
||||
docs[docsOffset + i], lastDoc);
|
||||
lastDoc = docs[docsOffset + i];
|
||||
|
||||
// Make sure this value does in fact fall within this leaf cell:
|
||||
assert valueInBounds(packedValue, minPackedValue, maxPackedValue);
|
||||
|
@ -1335,11 +1592,19 @@ public class BKDWriter implements Closeable {
|
|||
}
|
||||
|
||||
// only called from assert
|
||||
private boolean valueInOrder(long ord, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset) {
|
||||
if (ord > 0 && StringHelper.compare(bytesPerDim, lastPackedValue, 0, packedValue, packedValueOffset) > 0) {
|
||||
private boolean valueInOrder(long ord, int sortedDim, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset,
|
||||
int doc, int lastDoc) {
|
||||
int dimOffset = sortedDim * bytesPerDim;
|
||||
if (ord > 0) {
|
||||
int cmp = StringHelper.compare(bytesPerDim, lastPackedValue, dimOffset, packedValue, packedValueOffset + dimOffset);
|
||||
if (cmp > 0) {
|
||||
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
|
||||
}
|
||||
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, bytesPerDim);
|
||||
if (cmp == 0 && doc < lastDoc) {
|
||||
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord);
|
||||
}
|
||||
}
|
||||
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, packedBytesLength);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,186 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util.bkd;
|
||||
|
||||
import org.apache.lucene.codecs.MutablePointsReader;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntroSelector;
|
||||
import org.apache.lucene.util.IntroSorter;
|
||||
import org.apache.lucene.util.MSBRadixSorter;
|
||||
import org.apache.lucene.util.RadixSelector;
|
||||
import org.apache.lucene.util.Selector;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
final class MutablePointsReaderUtils {
|
||||
|
||||
MutablePointsReaderUtils() {}
|
||||
|
||||
/** Sort the given {@link MutablePointsReader} based on its packed value then doc ID. */
|
||||
static void sort(int maxDoc, int packedBytesLength,
|
||||
MutablePointsReader reader, int from, int to) {
|
||||
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
|
||||
new MSBRadixSorter(packedBytesLength + (bitsPerDocId + 7) / 8) {
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
reader.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int byteAt(int i, int k) {
|
||||
if (k < packedBytesLength) {
|
||||
return Byte.toUnsignedInt(reader.getByteAt(i, k));
|
||||
} else {
|
||||
final int shift = bitsPerDocId - ((k - packedBytesLength + 1) << 3);
|
||||
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected org.apache.lucene.util.Sorter getFallbackSorter(int k) {
|
||||
return new IntroSorter() {
|
||||
|
||||
final BytesRef pivot = new BytesRef();
|
||||
final BytesRef scratch = new BytesRef();
|
||||
int pivotDoc;
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
reader.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
reader.getValue(i, pivot);
|
||||
pivotDoc = reader.getDocID(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
if (k < packedBytesLength) {
|
||||
reader.getValue(j, scratch);
|
||||
int cmp = StringHelper.compare(packedBytesLength - k, pivot.bytes, pivot.offset + k, scratch.bytes, scratch.offset + k);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
return pivotDoc - reader.getDocID(j);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}.sort(from, to);
|
||||
}
|
||||
|
||||
/** Sort points on the given dimension. */
|
||||
static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
|
||||
MutablePointsReader reader, int from, int to,
|
||||
BytesRef scratch1, BytesRef scratch2) {
|
||||
|
||||
// No need for a fancy radix sort here, this is called on the leaves only so
|
||||
// there are not many values to sort
|
||||
final int offset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
|
||||
final int numBytesToCompare = bytesPerDim - commonPrefixLengths[sortedDim];
|
||||
new IntroSorter() {
|
||||
|
||||
final BytesRef pivot = scratch1;
|
||||
int pivotDoc = -1;
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
reader.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
reader.getValue(i, pivot);
|
||||
pivotDoc = reader.getDocID(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
reader.getValue(j, scratch2);
|
||||
int cmp = StringHelper.compare(numBytesToCompare, pivot.bytes, pivot.offset + offset, scratch2.bytes, scratch2.offset + offset);
|
||||
if (cmp == 0) {
|
||||
cmp = pivotDoc - reader.getDocID(j);
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
}.sort(from, to);
|
||||
}
|
||||
|
||||
/** Partition points around {@code mid}. All values on the left must be less
|
||||
* than or equal to it and all values on the right must be greater than or
|
||||
* equal to it. */
|
||||
static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
|
||||
MutablePointsReader reader, int from, int to, int mid,
|
||||
BytesRef scratch1, BytesRef scratch2) {
|
||||
final int offset = splitDim * bytesPerDim + commonPrefixLen;
|
||||
final int cmpBytes = bytesPerDim - commonPrefixLen;
|
||||
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
|
||||
new RadixSelector(cmpBytes + (bitsPerDocId + 7) / 8) {
|
||||
|
||||
@Override
|
||||
protected Selector getFallbackSelector(int k) {
|
||||
return new IntroSelector() {
|
||||
|
||||
final BytesRef pivot = scratch1;
|
||||
int pivotDoc;
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
reader.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
reader.getValue(i, pivot);
|
||||
pivotDoc = reader.getDocID(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
if (k < cmpBytes) {
|
||||
reader.getValue(j, scratch2);
|
||||
int cmp = StringHelper.compare(cmpBytes - k, pivot.bytes, pivot.offset + offset + k, scratch2.bytes, scratch2.offset + offset + k);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
return pivotDoc - reader.getDocID(j);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
reader.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int byteAt(int i, int k) {
|
||||
if (k < cmpBytes) {
|
||||
return Byte.toUnsignedInt(reader.getByteAt(i, offset + k));
|
||||
} else {
|
||||
final int shift = bitsPerDocId - ((k - cmpBytes + 1) << 3);
|
||||
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
|
||||
}
|
||||
}
|
||||
}.select(from, to, mid);
|
||||
}
|
||||
}
|
|
@ -41,8 +41,9 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
|
|||
if (random().nextBoolean()) {
|
||||
// randomize parameters
|
||||
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
|
||||
double maxMBSortInHeap = 3.0 + (3*random().nextDouble());
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode);
|
||||
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
|
||||
}
|
||||
|
||||
// sneaky impersonation!
|
||||
|
@ -52,7 +53,7 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
|
|||
return new PointsFormat() {
|
||||
@Override
|
||||
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode);
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1156,8 +1156,9 @@ public class TestPointQueries extends LuceneTestCase {
|
|||
private static Codec getCodec() {
|
||||
if (Codec.getDefault().getName().equals("Lucene62")) {
|
||||
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
|
||||
double maxMBSortInHeap = 5.0 + (3*random().nextDouble());
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode);
|
||||
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
|
||||
}
|
||||
|
||||
return new FilterCodec("Lucene62", Codec.getDefault()) {
|
||||
|
@ -1166,7 +1167,7 @@ public class TestPointQueries extends LuceneTestCase {
|
|||
return new PointsFormat() {
|
||||
@Override
|
||||
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode);
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -45,7 +45,26 @@ public class TestByteBlockPool extends LuceneTestCase {
|
|||
for (BytesRef expected : list) {
|
||||
ref.grow(expected.length);
|
||||
ref.setLength(expected.length);
|
||||
switch (random().nextInt(3)) {
|
||||
case 0:
|
||||
// copy bytes
|
||||
pool.readBytes(position, ref.bytes(), 0, ref.length());
|
||||
break;
|
||||
case 1:
|
||||
// copy bytes one by one
|
||||
for (int i = 0; i < ref.length(); ++i) {
|
||||
ref.setByteAt(i, pool.readByte(position + i));
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
BytesRef scratch = new BytesRef();
|
||||
scratch.length = ref.length();
|
||||
pool.setRawBytesRef(scratch, position);
|
||||
System.arraycopy(scratch.bytes, scratch.offset, ref.bytes(), 0, ref.length());
|
||||
break;
|
||||
default:
|
||||
fail();
|
||||
}
|
||||
assertEquals(expected, ref.get());
|
||||
position += ref.length();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class TestIntroSelector extends LuceneTestCase {
|
||||
|
||||
public void testSelect() {
|
||||
for (int iter = 0; iter < 100; ++iter) {
|
||||
doTestSelect(false);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSlowSelect() {
|
||||
for (int iter = 0; iter < 100; ++iter) {
|
||||
doTestSelect(true);
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestSelect(boolean slow) {
|
||||
final int from = random().nextInt(5);
|
||||
final int to = from + TestUtil.nextInt(random(), 1, 10000);
|
||||
final int max = random().nextBoolean() ? random().nextInt(100) : random().nextInt(100000);
|
||||
Integer[] arr = new Integer[from + to + random().nextInt(5)];
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
arr[i] = TestUtil.nextInt(random(), 0, max);
|
||||
}
|
||||
final int k = TestUtil.nextInt(random(), from, to - 1);
|
||||
|
||||
Integer[] expected = arr.clone();
|
||||
Arrays.sort(expected, from, to);
|
||||
|
||||
Integer[] actual = arr.clone();
|
||||
IntroSelector selector = new IntroSelector() {
|
||||
|
||||
Integer pivot;
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
ArrayUtil.swap(actual, i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
pivot = actual[i];
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
return pivot.compareTo(actual[j]);
|
||||
}
|
||||
};
|
||||
if (slow) {
|
||||
selector.slowSelect(from, to, k);
|
||||
} else {
|
||||
selector.select(from, to, k);
|
||||
}
|
||||
|
||||
assertEquals(expected[k], actual[k]);
|
||||
for (int i = 0; i < actual.length; ++i) {
|
||||
if (i < from || i >= to) {
|
||||
assertSame(arr[i], actual[i]);
|
||||
} else if (i <= k) {
|
||||
assertTrue(actual[i].intValue() <= actual[k].intValue());
|
||||
} else {
|
||||
assertTrue(actual[i].intValue() >= actual[k].intValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -17,6 +17,8 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
public class TestMSBRadixSorter extends LuceneTestCase {
|
||||
|
||||
|
@ -41,9 +43,12 @@ public class TestMSBRadixSorter extends LuceneTestCase {
|
|||
break;
|
||||
}
|
||||
|
||||
final int finalMaxLength = maxLength;
|
||||
new MSBRadixSorter(maxLength) {
|
||||
|
||||
@Override
|
||||
protected int byteAt(int i, int k) {
|
||||
assertTrue(k < finalMaxLength);
|
||||
BytesRef ref = refs[i];
|
||||
if (ref.length <= k) {
|
||||
return -1;
|
||||
|
@ -114,4 +119,67 @@ public class TestMSBRadixSorter extends LuceneTestCase {
|
|||
testRandom(TestUtil.nextInt(random(), 1, 30), 2);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandom2() {
|
||||
// how large our alphabet is
|
||||
int letterCount = TestUtil.nextInt(random(), 2, 10);
|
||||
|
||||
// how many substring fragments to use
|
||||
int substringCount = TestUtil.nextInt(random(), 2, 10);
|
||||
Set<BytesRef> substringsSet = new HashSet<>();
|
||||
|
||||
// how many strings to make
|
||||
int stringCount = atLeast(10000);
|
||||
|
||||
//System.out.println("letterCount=" + letterCount + " substringCount=" + substringCount + " stringCount=" + stringCount);
|
||||
while(substringsSet.size() < substringCount) {
|
||||
int length = TestUtil.nextInt(random(), 2, 10);
|
||||
byte[] bytes = new byte[length];
|
||||
for(int i=0;i<length;i++) {
|
||||
bytes[i] = (byte) random().nextInt(letterCount);
|
||||
}
|
||||
BytesRef br = new BytesRef(bytes);
|
||||
substringsSet.add(br);
|
||||
//System.out.println("add substring count=" + substringsSet.size() + ": " + br);
|
||||
}
|
||||
|
||||
BytesRef[] substrings = substringsSet.toArray(new BytesRef[substringsSet.size()]);
|
||||
double[] chance = new double[substrings.length];
|
||||
double sum = 0.0;
|
||||
for(int i=0;i<substrings.length;i++) {
|
||||
chance[i] = random().nextDouble();
|
||||
sum += chance[i];
|
||||
}
|
||||
|
||||
// give each substring a random chance of occurring:
|
||||
double accum = 0.0;
|
||||
for(int i=0;i<substrings.length;i++) {
|
||||
accum += chance[i]/sum;
|
||||
chance[i] = accum;
|
||||
}
|
||||
|
||||
Set<BytesRef> stringsSet = new HashSet<>();
|
||||
int iters = 0;
|
||||
while (stringsSet.size() < stringCount && iters < stringCount*5) {
|
||||
int count = TestUtil.nextInt(random(), 1, 5);
|
||||
BytesRefBuilder b = new BytesRefBuilder();
|
||||
for(int i=0;i<count;i++) {
|
||||
double v = random().nextDouble();
|
||||
accum = 0.0;
|
||||
for(int j=0;j<substrings.length;j++) {
|
||||
accum += chance[j];
|
||||
if (accum >= v) {
|
||||
b.append(substrings[j]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
BytesRef br = b.toBytesRef();
|
||||
stringsSet.add(br);
|
||||
//System.out.println("add string count=" + stringsSet.size() + ": " + br);
|
||||
iters++;
|
||||
}
|
||||
|
||||
test(stringsSet.toArray(new BytesRef[stringsSet.size()]), stringsSet.size());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class TestRadixSelector extends LuceneTestCase {
|
||||
|
||||
public void testSelect() {
|
||||
for (int iter = 0; iter < 100; ++iter) {
|
||||
doTestSelect();
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestSelect() {
|
||||
final int from = random().nextInt(5);
|
||||
final int to = from + TestUtil.nextInt(random(), 1, 10000);
|
||||
final int maxLen = TestUtil.nextInt(random(), 1, 12);
|
||||
BytesRef[] arr = new BytesRef[from + to + random().nextInt(5)];
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
byte[] bytes = new byte[TestUtil.nextInt(random(), 0, maxLen)];
|
||||
random().nextBytes(bytes);
|
||||
arr[i] = new BytesRef(bytes);
|
||||
}
|
||||
doTest(arr, from, to, maxLen);
|
||||
}
|
||||
|
||||
public void testSharedPrefixes() {
|
||||
for (int iter = 0; iter < 100; ++iter) {
|
||||
doTestSharedPrefixes();
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestSharedPrefixes() {
|
||||
final int from = random().nextInt(5);
|
||||
final int to = from + TestUtil.nextInt(random(), 1, 10000);
|
||||
final int maxLen = TestUtil.nextInt(random(), 1, 12);
|
||||
BytesRef[] arr = new BytesRef[from + to + random().nextInt(5)];
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
byte[] bytes = new byte[TestUtil.nextInt(random(), 0, maxLen)];
|
||||
random().nextBytes(bytes);
|
||||
arr[i] = new BytesRef(bytes);
|
||||
}
|
||||
final int sharedPrefixLength = Math.min(arr[0].length, TestUtil.nextInt(random(), 1, maxLen));
|
||||
for (int i = 1; i < arr.length; ++i) {
|
||||
System.arraycopy(arr[0].bytes, arr[0].offset, arr[i].bytes, arr[i].offset, Math.min(sharedPrefixLength, arr[i].length));
|
||||
}
|
||||
doTest(arr, from, to, maxLen);
|
||||
}
|
||||
|
||||
private void doTest(BytesRef[] arr, int from, int to, int maxLen) {
|
||||
final int k = TestUtil.nextInt(random(), from, to - 1);
|
||||
|
||||
BytesRef[] expected = arr.clone();
|
||||
Arrays.sort(expected, from, to);
|
||||
|
||||
BytesRef[] actual = arr.clone();
|
||||
final int enforcedMaxLen = random().nextBoolean() ? maxLen : Integer.MAX_VALUE;
|
||||
RadixSelector selector = new RadixSelector(enforcedMaxLen) {
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
ArrayUtil.swap(actual, i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int byteAt(int i, int k) {
|
||||
assertTrue(k < enforcedMaxLen);
|
||||
BytesRef b = actual[i];
|
||||
if (k >= b.length) {
|
||||
return -1;
|
||||
} else {
|
||||
return Byte.toUnsignedInt(b.bytes[b.offset + k]);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
selector.select(from, to, k);
|
||||
|
||||
assertEquals(expected[k], actual[k]);
|
||||
for (int i = 0; i < actual.length; ++i) {
|
||||
if (i < from || i >= to) {
|
||||
assertSame(arr[i], actual[i]);
|
||||
} else if (i <= k) {
|
||||
assertTrue(actual[i].compareTo(actual[k]) <= 0);
|
||||
} else {
|
||||
assertTrue(actual[i].compareTo(actual[k]) >= 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,270 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util.bkd;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.codecs.MutablePointsReader;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
||||
|
||||
public void testSort() {
|
||||
for (int iter = 0; iter < 5; ++iter) {
|
||||
doTestSort();
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestSort() {
|
||||
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||
Point[] points = createRandomPoints(1, bytesPerDim, maxDoc);
|
||||
DummyPointsReader reader = new DummyPointsReader(points);
|
||||
MutablePointsReaderUtils.sort(maxDoc, bytesPerDim, reader, 0, points.length);
|
||||
Arrays.sort(points, new Comparator<Point>() {
|
||||
@Override
|
||||
public int compare(Point o1, Point o2) {
|
||||
int cmp = o1.packedValue.compareTo(o2.packedValue);
|
||||
if (cmp == 0) {
|
||||
cmp = Integer.compare(o1.doc, o2.doc);
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
});
|
||||
assertNotSame(points, reader.points);
|
||||
assertArrayEquals(points, reader.points);
|
||||
}
|
||||
|
||||
public void testSortByDim() {
|
||||
for (int iter = 0; iter < 5; ++iter) {
|
||||
doTestSortByDim();
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestSortByDim() {
|
||||
final int numDims = TestUtil.nextInt(random(), 1, 8);
|
||||
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
|
||||
int[] commonPrefixLengths = new int[numDims];
|
||||
for (int i = 0; i < commonPrefixLengths.length; ++i) {
|
||||
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
|
||||
}
|
||||
BytesRef firstValue = points[0].packedValue;
|
||||
for (int i = 1; i < points.length; ++i) {
|
||||
for (int dim = 0; dim < numDims; ++dim) {
|
||||
int offset = dim * bytesPerDim;
|
||||
BytesRef packedValue = points[i].packedValue;
|
||||
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
|
||||
}
|
||||
}
|
||||
DummyPointsReader reader = new DummyPointsReader(points);
|
||||
final int sortedDim = random().nextInt(numDims);
|
||||
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
|
||||
new BytesRef(), new BytesRef());
|
||||
for (int i = 1; i < points.length; ++i) {
|
||||
final int offset = sortedDim * bytesPerDim;
|
||||
BytesRef previousValue = reader.points[i-1].packedValue;
|
||||
BytesRef currentValue = reader.points[i].packedValue;
|
||||
int cmp = StringHelper.compare(bytesPerDim,
|
||||
previousValue.bytes, previousValue.offset + offset,
|
||||
currentValue.bytes, currentValue.offset + offset);
|
||||
if (cmp == 0) {
|
||||
cmp = reader.points[i - 1].doc - reader.points[i].doc;
|
||||
}
|
||||
assertTrue(cmp <= 0);
|
||||
}
|
||||
}
|
||||
|
||||
public void testPartition() {
|
||||
for (int iter = 0; iter < 5; ++iter) {
|
||||
doTestPartition();
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestPartition() {
|
||||
final int numDims = TestUtil.nextInt(random(), 1, 8);
|
||||
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
|
||||
int commonPrefixLength = TestUtil.nextInt(random(), 0, bytesPerDim);
|
||||
final int splitDim = random().nextInt(numDims);
|
||||
BytesRef firstValue = points[0].packedValue;
|
||||
for (int i = 1; i < points.length; ++i) {
|
||||
BytesRef packedValue = points[i].packedValue;
|
||||
int offset = splitDim * bytesPerDim;
|
||||
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLength);
|
||||
}
|
||||
DummyPointsReader reader = new DummyPointsReader(points);
|
||||
final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
|
||||
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLength, reader, 0, points.length, pivot,
|
||||
new BytesRef(), new BytesRef());
|
||||
BytesRef pivotValue = reader.points[pivot].packedValue;
|
||||
int offset = splitDim * bytesPerDim;
|
||||
for (int i = 0; i < points.length; ++i) {
|
||||
BytesRef value = reader.points[i].packedValue;
|
||||
int cmp = StringHelper.compare(bytesPerDim,
|
||||
value.bytes, value.offset + offset,
|
||||
pivotValue.bytes, pivotValue.offset + offset);
|
||||
if (cmp == 0) {
|
||||
cmp = reader.points[i].doc - reader.points[pivot].doc;
|
||||
}
|
||||
if (i < pivot) {
|
||||
assertTrue(cmp <= 0);
|
||||
} else if (i > pivot) {
|
||||
assertTrue(cmp >= 0);
|
||||
} else {
|
||||
assertEquals(0, cmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Point[] createRandomPoints(int numDims, int bytesPerDim, int maxDoc) {
|
||||
final int packedBytesLength = numDims * bytesPerDim;
|
||||
final int numPoints = TestUtil.nextInt(random(), 1, 100000);
|
||||
Point[] points = new Point[numPoints];
|
||||
for (int i = 0; i < numPoints; ++i) {
|
||||
byte[] value = new byte[packedBytesLength];
|
||||
random().nextBytes(value);
|
||||
points[i] = new Point(value, random().nextInt(maxDoc));
|
||||
}
|
||||
return points;
|
||||
}
|
||||
|
||||
private static class Point {
|
||||
final BytesRef packedValue;
|
||||
final int doc;
|
||||
|
||||
Point(byte[] packedValue, int doc) {
|
||||
// use a non-null offset to make sure MutablePointsReaderUtils does not ignore it
|
||||
this.packedValue = new BytesRef(packedValue.length + 1);
|
||||
this.packedValue.bytes[0] = (byte) random().nextInt(256);
|
||||
this.packedValue.offset = 1;
|
||||
this.packedValue.length = packedValue.length;
|
||||
this.doc = doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj == null || obj instanceof Point == false) {
|
||||
return false;
|
||||
}
|
||||
Point that = (Point) obj;
|
||||
return packedValue.equals(that.packedValue) && doc == that.doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 31 * packedValue.hashCode() + doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "value=" + packedValue + " doc=" + doc;
|
||||
}
|
||||
}
|
||||
|
||||
private static class DummyPointsReader extends MutablePointsReader {
|
||||
|
||||
private final Point[] points;
|
||||
|
||||
DummyPointsReader(Point[] points) {
|
||||
this.points = points.clone();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getValue(int i, BytesRef packedValue) {
|
||||
packedValue.bytes = points[i].packedValue.bytes;
|
||||
packedValue.offset = points[i].packedValue.offset;
|
||||
packedValue.length = points[i].packedValue.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getByteAt(int i, int k) {
|
||||
BytesRef packedValue = points[i].packedValue;
|
||||
return packedValue.bytes[packedValue.offset + k];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocID(int i) {
|
||||
return points[i].doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void swap(int i, int j) {
|
||||
ArrayUtil.swap(points, i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocCount(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -106,6 +106,7 @@ io.netty.netty-all.version = 4.0.36.Final
|
|||
org.apache.curator.version = 2.8.0
|
||||
/org.apache.curator/curator-client = ${org.apache.curator.version}
|
||||
/org.apache.curator/curator-framework = ${org.apache.curator.version}
|
||||
/org.apache.curator/curator-recipes = ${org.apache.curator.version}
|
||||
|
||||
/org.apache.derby/derby = 10.9.1.0
|
||||
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.queries.function.valuesource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
||||
/**
|
||||
* Base class for comparison operators useful within an "if"/conditional.
|
||||
*/
|
||||
public abstract class ComparisonBoolFunction extends BoolFunction {
|
||||
|
||||
private final ValueSource lhs;
|
||||
private final ValueSource rhs;
|
||||
private final String name;
|
||||
|
||||
public ComparisonBoolFunction(ValueSource lhs, ValueSource rhs, String name) {
|
||||
this.lhs = lhs;
|
||||
this.rhs = rhs;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/** Perform the comparison, returning true or false */
|
||||
public abstract boolean compare(int doc, FunctionValues lhs, FunctionValues rhs);
|
||||
|
||||
/** Uniquely identify the operation (ie "gt", "lt" "gte", etc) */
|
||||
public String name() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
||||
final FunctionValues lhsVal = this.lhs.getValues(context, readerContext);
|
||||
final FunctionValues rhsVal = this.rhs.getValues(context, readerContext);
|
||||
final String compLabel = this.name();
|
||||
|
||||
return new BoolDocValues(this) {
|
||||
@Override
|
||||
public boolean boolVal(int doc) {
|
||||
return compare(doc, lhsVal, rhsVal);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(int doc) {
|
||||
return compLabel + "(" + lhsVal.toString(doc) + "," + rhsVal.toString(doc) + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean exists(int doc) {
|
||||
return lhsVal.exists(doc) && rhsVal.exists(doc);
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this.getClass() != o.getClass()) return false;
|
||||
ComparisonBoolFunction other = (ComparisonBoolFunction)o;
|
||||
return name().equals(other.name())
|
||||
&& lhs.equals(other.lhs)
|
||||
&& rhs.equals(other.rhs); }
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = this.getClass().hashCode();
|
||||
h = h * 31 + this.name().hashCode();
|
||||
h = h * 31 + lhs.hashCode();
|
||||
h = h * 31 + rhs.hashCode();
|
||||
return h;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return name() + "(" + lhs.description() + "," + rhs.description() + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
|
||||
lhs.createWeight(context, searcher);
|
||||
rhs.createWeight(context, searcher);
|
||||
}
|
||||
|
||||
}
|
|
@ -38,5 +38,10 @@ public class FileMetaData {
|
|||
this.length = length;
|
||||
this.checksum = checksum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "FileMetaData(length=" + length + ")";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -118,6 +118,8 @@ class SimpleCopyJob extends CopyJob {
|
|||
return highPriority ? -1 : 1;
|
||||
} else if (ord < other.ord) {
|
||||
return -1;
|
||||
} else if (ord > other.ord) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -121,4 +121,11 @@ public interface Bounds {
|
|||
*/
|
||||
public Bounds noBottomLatitudeBound();
|
||||
|
||||
/** Signal that there is no bound whatsoever.
|
||||
* The bound is limited only by the constraints of the
|
||||
* planet.
|
||||
*@return the updated Bounds object.,
|
||||
*/
|
||||
public Bounds noBound(final PlanetModel planetModel);
|
||||
|
||||
}
|
||||
|
|
|
@ -253,6 +253,11 @@ public class LatLonBounds implements Bounds {
|
|||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bounds noBound(final PlanetModel planetModel) {
|
||||
return noLongitudeBound().noTopLatitudeBound().noBottomLatitudeBound();
|
||||
}
|
||||
|
||||
// Protected methods
|
||||
|
||||
/** Update latitude bound.
|
||||
|
|
|
@ -1003,13 +1003,14 @@ public class Plane extends Vector {
|
|||
* D - MINIMUM_RESOLUTION. Both are examined and intersection points determined.
|
||||
*/
|
||||
protected void findIntersectionBounds(final PlanetModel planetModel, final Bounds boundsInfo, final Plane q, final Membership... bounds) {
|
||||
//System.out.println("Finding intersection bounds");
|
||||
// Unnormalized, unchecked...
|
||||
final double lineVectorX = y * q.z - z * q.y;
|
||||
final double lineVectorY = z * q.x - x * q.z;
|
||||
final double lineVectorZ = x * q.y - y * q.x;
|
||||
if (Math.abs(lineVectorX) < MINIMUM_RESOLUTION && Math.abs(lineVectorY) < MINIMUM_RESOLUTION && Math.abs(lineVectorZ) < MINIMUM_RESOLUTION) {
|
||||
// Degenerate case: parallel planes
|
||||
//System.err.println(" planes are parallel - no intersection");
|
||||
//System.out.println(" planes are parallel - no intersection");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1037,9 +1038,10 @@ public class Plane extends Vector {
|
|||
final double denomXZ = this.x * q.z - this.z * q.x;
|
||||
final double denomXY = this.x * q.y - this.y * q.x;
|
||||
if (Math.abs(denomYZ) >= Math.abs(denomXZ) && Math.abs(denomYZ) >= Math.abs(denomXY)) {
|
||||
//System.out.println("X biggest");
|
||||
// X is the biggest, so our point will have x0 = 0.0
|
||||
if (Math.abs(denomYZ) < MINIMUM_RESOLUTION_SQUARED) {
|
||||
//System.err.println(" Denominator is zero: no intersection");
|
||||
//System.out.println(" Denominator is zero: no intersection");
|
||||
return;
|
||||
}
|
||||
final double denom = 1.0 / denomYZ;
|
||||
|
@ -1061,9 +1063,10 @@ public class Plane extends Vector {
|
|||
0.0, (-(this.D-MINIMUM_RESOLUTION) * q.z - this.z * -(q.D-MINIMUM_RESOLUTION)) * denom, (this.y * -(q.D-MINIMUM_RESOLUTION) + (this.D-MINIMUM_RESOLUTION) * q.y) * denom,
|
||||
bounds);
|
||||
} else if (Math.abs(denomXZ) >= Math.abs(denomXY) && Math.abs(denomXZ) >= Math.abs(denomYZ)) {
|
||||
//System.out.println("Y biggest");
|
||||
// Y is the biggest, so y0 = 0.0
|
||||
if (Math.abs(denomXZ) < MINIMUM_RESOLUTION_SQUARED) {
|
||||
//System.err.println(" Denominator is zero: no intersection");
|
||||
//System.out.println(" Denominator is zero: no intersection");
|
||||
return;
|
||||
}
|
||||
final double denom = 1.0 / denomXZ;
|
||||
|
@ -1084,9 +1087,10 @@ public class Plane extends Vector {
|
|||
(-(this.D-MINIMUM_RESOLUTION) * q.z - this.z * -(q.D-MINIMUM_RESOLUTION)) * denom, 0.0, (this.x * -(q.D-MINIMUM_RESOLUTION) + (this.D-MINIMUM_RESOLUTION) * q.x) * denom,
|
||||
bounds);
|
||||
} else {
|
||||
//System.out.println("Z biggest");
|
||||
// Z is the biggest, so Z0 = 0.0
|
||||
if (Math.abs(denomXY) < MINIMUM_RESOLUTION_SQUARED) {
|
||||
//System.err.println(" Denominator is zero: no intersection");
|
||||
//System.out.println(" Denominator is zero: no intersection");
|
||||
return;
|
||||
}
|
||||
final double denom = 1.0 / denomXY;
|
||||
|
@ -1178,6 +1182,10 @@ public class Plane extends Vector {
|
|||
if (point2Valid) {
|
||||
boundsInfo.addPoint(new GeoPoint(point2X, point2Y, point2Z));
|
||||
}
|
||||
} else {
|
||||
// If we can't intersect line with world, then it's outside the world, so
|
||||
// we have to assume everything is included.
|
||||
boundsInfo.noBound(planetModel);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1351,8 +1359,6 @@ public class Plane extends Vector {
|
|||
// m * [- 2*A*ab^2*r + 2*A^2*ab^2*r*q + 2*B^2*ab^2*r*q + 2*C^2*c^2*r*q] +
|
||||
// [ab^2 - 2*A*ab^2*q + A^2*ab^2*q^2 + B^2*ab^2*q^2 + C^2*c^2*q^2] = 0
|
||||
|
||||
//System.err.println(" computing X bound");
|
||||
|
||||
// Useful subexpressions for this bound
|
||||
final double q = A*abSquared*k;
|
||||
final double qSquared = q * q;
|
||||
|
@ -1392,6 +1398,7 @@ public class Plane extends Vector {
|
|||
assert Math.abs(a * m1 * m1 + b * m1 + c) < MINIMUM_RESOLUTION;
|
||||
final double m2 = (-b - sqrtResult) * commonDenom;
|
||||
assert Math.abs(a * m2 * m2 + b * m2 + c) < MINIMUM_RESOLUTION;
|
||||
if (Math.abs(m1) >= MINIMUM_RESOLUTION || Math.abs(m2) >= MINIMUM_RESOLUTION) {
|
||||
final double l1 = r * m1 + q;
|
||||
final double l2 = r * m2 + q;
|
||||
// x = ((1 - l*A) * ab^2 ) / (2 * m)
|
||||
|
@ -1410,11 +1417,14 @@ public class Plane extends Vector {
|
|||
//assert evaluateIsZero(thePoint2): "Evaluation of point2: "+evaluate(thePoint2);
|
||||
addPoint(boundsInfo, bounds, thePoint1);
|
||||
addPoint(boundsInfo, bounds, thePoint2);
|
||||
} else {
|
||||
// This is a plane of the form A=n B=0 C=0. We can set a bound only by noting the D value.
|
||||
boundsInfo.addXValue(-D/A);
|
||||
}
|
||||
} else {
|
||||
// No solutions
|
||||
}
|
||||
} else if (Math.abs(b) > MINIMUM_RESOLUTION_SQUARED) {
|
||||
//System.err.println("Not x quadratic");
|
||||
// a = 0, so m = - c / b
|
||||
final double m = -c / b;
|
||||
final double l = r * m + q;
|
||||
|
@ -1561,6 +1571,7 @@ public class Plane extends Vector {
|
|||
assert Math.abs(a * m1 * m1 + b * m1 + c) < MINIMUM_RESOLUTION;
|
||||
final double m2 = (-b - sqrtResult) * commonDenom;
|
||||
assert Math.abs(a * m2 * m2 + b * m2 + c) < MINIMUM_RESOLUTION;
|
||||
if (Math.abs(m1) >= MINIMUM_RESOLUTION || Math.abs(m2) >= MINIMUM_RESOLUTION) {
|
||||
final double l1 = r * m1 + q;
|
||||
final double l2 = r * m2 + q;
|
||||
// x = (-l*A * ab^2 ) / (2 * m)
|
||||
|
@ -1579,6 +1590,10 @@ public class Plane extends Vector {
|
|||
//assert evaluateIsZero(thePoint2): "Evaluation of point2: "+evaluate(thePoint2);
|
||||
addPoint(boundsInfo, bounds, thePoint1);
|
||||
addPoint(boundsInfo, bounds, thePoint2);
|
||||
} else {
|
||||
// This is a plane of the form A=0 B=n C=0. We can set a bound only by noting the D value.
|
||||
boundsInfo.addYValue(-D/B);
|
||||
}
|
||||
} else {
|
||||
// No solutions
|
||||
}
|
||||
|
|
|
@ -292,6 +292,17 @@ public class XYZBounds implements Bounds {
|
|||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bounds noBound(final PlanetModel planetModel) {
|
||||
minX = planetModel.getMinimumXValue();
|
||||
maxX = planetModel.getMaximumXValue();
|
||||
minY = planetModel.getMinimumYValue();
|
||||
maxY = planetModel.getMaximumYValue();
|
||||
minZ = planetModel.getMinimumZValue();
|
||||
maxZ = planetModel.getMaximumZValue();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "XYZBounds: [xmin="+minX+" xmax="+maxX+" ymin="+minY+" ymax="+maxY+" zmin="+minZ+" zmax="+maxZ+"]";
|
||||
|
|
|
@ -87,8 +87,9 @@ public class TestGeo3DPoint extends LuceneTestCase {
|
|||
private static Codec getCodec() {
|
||||
if (Codec.getDefault().getName().equals("Lucene62")) {
|
||||
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
|
||||
double maxMBSortInHeap = 3.0 + (3*random().nextDouble());
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode);
|
||||
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
|
||||
}
|
||||
|
||||
return new FilterCodec("Lucene62", Codec.getDefault()) {
|
||||
|
@ -97,7 +98,7 @@ public class TestGeo3DPoint extends LuceneTestCase {
|
|||
return new PointsFormat() {
|
||||
@Override
|
||||
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode);
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -372,4 +372,19 @@ public class GeoBBoxTest {
|
|||
assertTrue(box.isWithin(point)?solid.isWithin(point):true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailureCase2() {
|
||||
//final GeoPoint point = new GeoPoint(-0.7375647084975573, -2.3309121299774915E-10, 0.6746626163258577);
|
||||
final GeoPoint point = new GeoPoint(-0.737564708579924, -9.032562595264542E-17, 0.6746626165197899);
|
||||
final GeoBBox box = new GeoRectangle(PlanetModel.WGS84, 0.7988584710911523, 0.25383311815493353, -1.2236144735575564E-12, 7.356011300929654E-49);
|
||||
final XYZBounds bounds = new XYZBounds();
|
||||
box.getBounds(bounds);
|
||||
final XYZSolid solid = XYZSolidFactory.makeXYZSolid(PlanetModel.WGS84, bounds.getMinimumX(), bounds.getMaximumX(), bounds.getMinimumY(), bounds.getMaximumY(), bounds.getMinimumZ(), bounds.getMaximumZ());
|
||||
|
||||
//System.out.println("Is within Y value? "+(point.y >= bounds.getMinimumY() && point.y <= bounds.getMaximumY()));
|
||||
//System.out.println("Shape = "+box+" is within? "+box.isWithin(point));
|
||||
//System.out.println("XYZBounds = "+bounds+" is within? "+solid.isWithin(point)+" solid="+solid);
|
||||
assertTrue(box.isWithin(point) == solid.isWithin(point));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -405,4 +405,18 @@ public class GeoCircleTest extends LuceneTestCase {
|
|||
assertTrue(solid.isWithin(gp));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBoundsFailureCase2() {
|
||||
final GeoCircle gc = GeoCircleFactory.makeGeoCircle(PlanetModel.WGS84, -2.7574435614238194E-13, 0.0, 1.5887859182593391);
|
||||
final GeoPoint gp = new GeoPoint(PlanetModel.WGS84, 0.7980359504429014, 1.5964981068121482);
|
||||
final XYZBounds bounds = new XYZBounds();
|
||||
gc.getBounds(bounds);
|
||||
System.out.println("Bounds = "+bounds);
|
||||
System.out.println("Point = "+gp);
|
||||
final XYZSolid solid = XYZSolidFactory.makeXYZSolid(PlanetModel.WGS84, bounds.getMinimumX(), bounds.getMaximumX(), bounds.getMinimumY(), bounds.getMaximumY(), bounds.getMinimumZ(), bounds.getMaximumZ());
|
||||
|
||||
assert gc.isWithin(gp)?solid.isWithin(gp):true;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -126,6 +126,7 @@ public final class AssertingPointsFormat extends PointsFormat {
|
|||
assert false: "point values are out of order";
|
||||
}
|
||||
System.arraycopy(packedValue, 0, lastDocValue, 0, bytesPerDim);
|
||||
lastDocID = docID;
|
||||
}
|
||||
in.visit(docID, packedValue);
|
||||
}
|
||||
|
@ -254,11 +255,11 @@ public final class AssertingPointsFormat extends PointsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
|
||||
if (fieldInfo.getPointDimensionCount() == 0) {
|
||||
throw new IllegalArgumentException("writing field=\"" + fieldInfo.name + "\" but pointDimensionalCount is 0");
|
||||
}
|
||||
in.writeField(fieldInfo, values, maxMBSortInHeap);
|
||||
in.writeField(fieldInfo, values);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -56,11 +56,11 @@ class CrankyPointsFormat extends PointsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
|
||||
if (random.nextInt(100) == 0) {
|
||||
throw new IOException("Fake IOException");
|
||||
}
|
||||
delegate.writeField(fieldInfo, values, maxMBSortInHeap);
|
||||
delegate.writeField(fieldInfo, values);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -67,6 +67,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.SloppyMath;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.bkd.BKDWriter;
|
||||
|
||||
/**
|
||||
* Abstract class to do basic tests for a geospatial impl (high level
|
||||
|
@ -1247,7 +1248,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
|||
return new PointsFormat() {
|
||||
@Override
|
||||
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
|
||||
return new Lucene60PointsWriter(writeState, pointsInLeaf);
|
||||
return new Lucene60PointsWriter(writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -92,6 +92,7 @@ public class RandomCodec extends AssertingCodec {
|
|||
// which is less effective for testing.
|
||||
// TODO: improve how we randomize this...
|
||||
private final int maxPointsInLeafNode;
|
||||
private final double maxMBSortInHeap;
|
||||
private final int bkdSplitRandomSeed;
|
||||
|
||||
@Override
|
||||
|
@ -102,9 +103,9 @@ public class RandomCodec extends AssertingCodec {
|
|||
|
||||
// Randomize how BKDWriter chooses its splis:
|
||||
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode) {
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap) {
|
||||
@Override
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
|
||||
|
||||
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
|
||||
|
||||
|
@ -184,6 +185,7 @@ public class RandomCodec extends AssertingCodec {
|
|||
int lowFreqCutoff = TestUtil.nextInt(random, 2, 100);
|
||||
|
||||
maxPointsInLeafNode = TestUtil.nextInt(random, 16, 2048);
|
||||
maxMBSortInHeap = 5.0 + (3*random.nextDouble());
|
||||
bkdSplitRandomSeed = random.nextInt();
|
||||
|
||||
add(avoidCodecs,
|
||||
|
@ -251,7 +253,8 @@ public class RandomCodec extends AssertingCodec {
|
|||
public String toString() {
|
||||
return super.toString() + ": " + previousMappings.toString() +
|
||||
", docValues:" + previousDVMappings.toString() +
|
||||
", maxPointsInLeafNode=" + maxPointsInLeafNode;
|
||||
", maxPointsInLeafNode=" + maxPointsInLeafNode +
|
||||
", maxMBSortInHeap=" + maxMBSortInHeap;
|
||||
}
|
||||
|
||||
/** Just like {@link BKDWriter} except it evilly picks random ways to split cells on
|
||||
|
|
|
@ -771,7 +771,7 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
|
|||
}
|
||||
ii = new SlowOpeningMockIndexInputWrapper(this, name, delegateInput);
|
||||
} else {
|
||||
ii = new MockIndexInputWrapper(this, name, delegateInput);
|
||||
ii = new MockIndexInputWrapper(this, name, delegateInput, null);
|
||||
}
|
||||
addFileHandle(ii, name, Handle.Input);
|
||||
return ii;
|
||||
|
|
|
@ -30,12 +30,19 @@ public class MockIndexInputWrapper extends IndexInput {
|
|||
private MockDirectoryWrapper dir;
|
||||
final String name;
|
||||
private IndexInput delegate;
|
||||
private boolean isClone;
|
||||
private boolean closed;
|
||||
private volatile boolean closed;
|
||||
|
||||
/** Construct an empty output buffer. */
|
||||
public MockIndexInputWrapper(MockDirectoryWrapper dir, String name, IndexInput delegate) {
|
||||
// Which MockIndexInputWrapper we were cloned from, or null if we are not a clone:
|
||||
private final MockIndexInputWrapper parent;
|
||||
|
||||
/** Sole constructor */
|
||||
public MockIndexInputWrapper(MockDirectoryWrapper dir, String name, IndexInput delegate, MockIndexInputWrapper parent) {
|
||||
super("MockIndexInputWrapper(name=" + name + " delegate=" + delegate + ")");
|
||||
|
||||
// If we are a clone then our parent better not be a clone!
|
||||
assert parent == null || parent.parent == null;
|
||||
|
||||
this.parent = parent;
|
||||
this.name = name;
|
||||
this.dir = dir;
|
||||
this.delegate = delegate;
|
||||
|
@ -54,7 +61,7 @@ public class MockIndexInputWrapper extends IndexInput {
|
|||
// remove the conditional check so we also track that
|
||||
// all clones get closed:
|
||||
assert delegate != null;
|
||||
if (!isClone) {
|
||||
if (parent == null) {
|
||||
dir.removeIndexInput(this, name);
|
||||
}
|
||||
dir.maybeThrowDeterministicException();
|
||||
|
@ -62,9 +69,13 @@ public class MockIndexInputWrapper extends IndexInput {
|
|||
}
|
||||
|
||||
private void ensureOpen() {
|
||||
// TODO: not great this is a volatile read (closed) ... we should deploy heavy JVM voodoo like SwitchPoint to avoid this
|
||||
if (closed) {
|
||||
throw new RuntimeException("Abusing closed IndexInput!");
|
||||
}
|
||||
if (parent != null && parent.closed) {
|
||||
throw new RuntimeException("Abusing clone of a closed IndexInput!");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -75,8 +86,7 @@ public class MockIndexInputWrapper extends IndexInput {
|
|||
}
|
||||
dir.inputCloneCount.incrementAndGet();
|
||||
IndexInput iiclone = delegate.clone();
|
||||
MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, name, iiclone);
|
||||
clone.isClone = true;
|
||||
MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, name, iiclone, parent != null ? parent : this);
|
||||
// Pending resolution on LUCENE-686 we may want to
|
||||
// uncomment this code so that we also track that all
|
||||
// clones get closed:
|
||||
|
@ -102,8 +112,7 @@ public class MockIndexInputWrapper extends IndexInput {
|
|||
}
|
||||
dir.inputCloneCount.incrementAndGet();
|
||||
IndexInput slice = delegate.slice(sliceDescription, offset, length);
|
||||
MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, sliceDescription, slice);
|
||||
clone.isClone = true;
|
||||
MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, sliceDescription, slice, parent != null ? parent : this);
|
||||
return clone;
|
||||
}
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ class SlowClosingMockIndexInputWrapper extends MockIndexInputWrapper {
|
|||
|
||||
public SlowClosingMockIndexInputWrapper(MockDirectoryWrapper dir,
|
||||
String name, IndexInput delegate) {
|
||||
super(dir, name, delegate);
|
||||
super(dir, name, delegate, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -28,7 +28,7 @@ class SlowOpeningMockIndexInputWrapper extends MockIndexInputWrapper {
|
|||
|
||||
public SlowOpeningMockIndexInputWrapper(MockDirectoryWrapper dir,
|
||||
String name, IndexInput delegate) throws IOException {
|
||||
super(dir, name, delegate);
|
||||
super(dir, name, delegate, null);
|
||||
try {
|
||||
Thread.sleep(50);
|
||||
} catch (InterruptedException ie) {
|
||||
|
|
|
@ -171,4 +171,40 @@ public class TestMockDirectoryWrapper extends BaseDirectoryTestCase {
|
|||
|
||||
assertTrue("MockDirectoryWrapper on dir=" + dir + " failed to corrupt an unsync'd file", changed);
|
||||
}
|
||||
|
||||
public void testAbuseClosedIndexInput() throws Exception {
|
||||
MockDirectoryWrapper dir = newMockDirectory();
|
||||
IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT);
|
||||
out.writeByte((byte) 42);
|
||||
out.close();
|
||||
final IndexInput in = dir.openInput("foo", IOContext.DEFAULT);
|
||||
in.close();
|
||||
expectThrows(RuntimeException.class, in::readByte);
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testAbuseCloneAfterParentClosed() throws Exception {
|
||||
MockDirectoryWrapper dir = newMockDirectory();
|
||||
IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT);
|
||||
out.writeByte((byte) 42);
|
||||
out.close();
|
||||
IndexInput in = dir.openInput("foo", IOContext.DEFAULT);
|
||||
final IndexInput clone = in.clone();
|
||||
in.close();
|
||||
expectThrows(RuntimeException.class, clone::readByte);
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testAbuseCloneOfCloneAfterParentClosed() throws Exception {
|
||||
MockDirectoryWrapper dir = newMockDirectory();
|
||||
IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT);
|
||||
out.writeByte((byte) 42);
|
||||
out.close();
|
||||
IndexInput in = dir.openInput("foo", IOContext.DEFAULT);
|
||||
IndexInput clone1 = in.clone();
|
||||
IndexInput clone2 = clone1.clone();
|
||||
in.close();
|
||||
expectThrows(RuntimeException.class, clone2::readByte);
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -100,6 +100,25 @@ New Features
|
|||
* SOLR-9275: XML QueryParser support (defType=xmlparser) now extensible via configuration.
|
||||
(Christine Poerschke)
|
||||
|
||||
* SOLR-9200: Add Delegation Token Support to Solr.
|
||||
(Gregory Chanan)
|
||||
|
||||
* SOLR-9038: Solr core snapshots: The current commit can be snapshotted which retains the commit and associates it with
|
||||
a name. The core admin API can create snapshots, list them, and delete them. Snapshot names can be referenced in
|
||||
doing a core backup, and in replication. Snapshot metadata is stored in a new snapshot_metadata/ dir.
|
||||
(Hrishikesh Gadre via David Smiley)
|
||||
|
||||
* SOLR-9279: New boolean comparison function queries comparing numeric arguments: gt, gte, lt, lte, eq
|
||||
(Doug Turnbull, David Smiley)
|
||||
|
||||
* SOLR-9324: Support Secure Impersonation / Proxy User for solr authentication
|
||||
(Gregory Chanan)
|
||||
|
||||
* SOLR-9252: Feature selection and logistic regression on text (Cao Manh Dat, Joel Bernstein)
|
||||
|
||||
* SOLR-6465: CDCR: fall back to whole-index replication when tlogs are insufficient.
|
||||
(Noble Paul, Renaud Delbru, shalin)
|
||||
|
||||
* SOLR-9320: A REPLACENODE command to decommission an existing node with another new node
|
||||
(noble, Nitin Sharma, Varun Thacker)
|
||||
|
||||
|
@ -170,6 +189,19 @@ Bug Fixes
|
|||
|
||||
* SOLR-9339: NPE in CloudSolrClient when the response is null (noble)
|
||||
|
||||
* SOLR-8596: Web UI doesn't correctly generate queries which include local parameters (Alexandre Rafalovitch, janhoy)
|
||||
|
||||
* SOLR-8645: managed-schema is now syntax highlighted in cloud->Tree view (Alexandre Rafalovitch via janhoy)
|
||||
|
||||
* SOLR-8379: UI Cloud->Tree view now shows .txt files correctly (Alexandre Rafalovitch via janhoy)
|
||||
|
||||
* SOLR-9003: New Admin UI's Dataimport screen now correctly displays DIH Debug output (Alexandre Rafalovitch)
|
||||
|
||||
* SOLR-9308: Fix distributed RTG to forward request params, fixes fq and non-default fl params (hossman)
|
||||
|
||||
* SOLR-9179: NPE in IndexSchema using IBM JDK (noble, Colvin Cowie)
|
||||
|
||||
* SOLR-9397: Config API does not support adding caches (noble)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
@ -179,6 +211,13 @@ Optimizations
|
|||
* SOLR-9264: Optimize ZkController.publishAndWaitForDownStates to not read all collection states and
|
||||
watch relevant collections instead. (Hrishikesh Gadre, shalin)
|
||||
|
||||
* SOLR-9335: Solr cache/search/update stats counters now use LongAdder which are supposed to have higher throughput
|
||||
under high contention. (Varun Thacker)
|
||||
|
||||
* SOLR-9350: JSON Facets: method="stream" will no longer always uses & populates the filter cache, likely
|
||||
flushing it. 'cacheDf' can be configured to set a doc frequency threshold, now defaulting to 1/16th doc count.
|
||||
Using -1 Disables use of the cache. (David Smiley, yonik)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
@ -202,6 +241,25 @@ Other Changes
|
|||
* SOLR-9163: Sync up basic_configs and data_driven_schema_configs, removing almost all differences
|
||||
except what is required for schemaless. (yonik)
|
||||
|
||||
* SOLR-9340: Change ZooKeeper disconnect and session expiry related logging from INFO to WARN to
|
||||
make debugging easier (Varun Thacker)
|
||||
|
||||
* SOLR-9358: [AngularUI] In Cloud->Tree file view area, collapse metadata by default (janhoy)
|
||||
|
||||
* SOLR-9256: asserting hasNext() contract in JdbcDataSource in DataImportHandler (Kristine Jetzke via Mikhai Khludnev)
|
||||
|
||||
* SOLR-9209: extracting JdbcDataSource.createResultSetIterator() for extension (Kristine Jetzke via Mikhai Khludnev)
|
||||
|
||||
* SOLR-9353: Factor out ReRankQParserPlugin.ReRankQueryRescorer private class. (Christine Poerschke)
|
||||
|
||||
* SOLR-9392: Fixed CDCR Test failures which were due to leaked resources. (shalin)
|
||||
|
||||
* SOLR-9385: Add QParser.getParser(String,SolrQueryRequest) variant. (Christine Poerschke)
|
||||
|
||||
* SOLR-9367: Improved TestInjection's randomization logic to use LuceneTestCase.random() (hossman)
|
||||
|
||||
* SOLR-9331: Remove ReRankQuery's length constructor argument and member. (Christine Poerschke)
|
||||
|
||||
================== 6.1.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
|
|
@ -604,7 +604,7 @@ public class FacetingAccumulator extends BasicAccumulator implements FacetValueA
|
|||
QueryFacetAccumulator qAcc = new QueryFacetAccumulator(this,qfr.getName(),query);
|
||||
final Query q;
|
||||
try {
|
||||
q = QParser.getParser(query, null, queryRequest).getQuery();
|
||||
q = QParser.getParser(query, queryRequest).getQuery();
|
||||
} catch( SyntaxError e ){
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST,"Invalid query '"+query+"'",e);
|
||||
}
|
||||
|
|
|
@ -280,10 +280,14 @@ public class JdbcDataSource extends
|
|||
resultSetIterator.close();
|
||||
resultSetIterator = null;
|
||||
}
|
||||
resultSetIterator = new ResultSetIterator(query);
|
||||
resultSetIterator = createResultSetIterator(query);
|
||||
return resultSetIterator.getIterator();
|
||||
}
|
||||
|
||||
protected ResultSetIterator createResultSetIterator(String query) {
|
||||
return new ResultSetIterator(query);
|
||||
}
|
||||
|
||||
private void logError(String msg, Exception e) {
|
||||
LOG.warn(msg, e);
|
||||
}
|
||||
|
|
|
@ -510,6 +510,45 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase {
|
|||
DriverManager.deregisterDriver(driver);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testEmptyResultSet() throws Exception {
|
||||
MockInitialContextFactory.bind("java:comp/env/jdbc/JndiDB", dataSource);
|
||||
|
||||
props.put(JdbcDataSource.JNDI_NAME, "java:comp/env/jdbc/JndiDB");
|
||||
EasyMock.expect(dataSource.getConnection()).andReturn(connection);
|
||||
|
||||
jdbcDataSource.init(context, props);
|
||||
|
||||
connection.setAutoCommit(false);
|
||||
|
||||
Statement statement = mockControl.createMock(Statement.class);
|
||||
EasyMock.expect(connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY))
|
||||
.andReturn(statement);
|
||||
statement.setFetchSize(500);
|
||||
statement.setMaxRows(0);
|
||||
EasyMock.expect(statement.execute("query")).andReturn(true);
|
||||
ResultSet resultSet = mockControl.createMock(ResultSet.class);
|
||||
EasyMock.expect(statement.getResultSet()).andReturn(resultSet);
|
||||
ResultSetMetaData metaData = mockControl.createMock(ResultSetMetaData.class);
|
||||
EasyMock.expect(resultSet.getMetaData()).andReturn(metaData);
|
||||
EasyMock.expect(metaData.getColumnCount()).andReturn(0);
|
||||
EasyMock.expect(resultSet.next()).andReturn(false);
|
||||
resultSet.close();
|
||||
EasyMock.expect(statement.getMoreResults()).andReturn(false);
|
||||
EasyMock.expect(statement.getUpdateCount()).andReturn(-1);
|
||||
statement.close();
|
||||
|
||||
mockControl.replay();
|
||||
|
||||
Iterator<Map<String,Object>> resultSetIterator = jdbcDataSource.getData("query");
|
||||
resultSetIterator.hasNext();
|
||||
resultSetIterator.hasNext();
|
||||
|
||||
mockControl.verify();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore("Needs a Mock database server to work")
|
||||
public void testBasic() throws Exception {
|
||||
|
|
|
@ -16,6 +16,15 @@
|
|||
*/
|
||||
package org.apache.solr.hadoop;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import com.google.common.io.Files;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
|
@ -35,15 +44,6 @@ import org.apache.zookeeper.KeeperException;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Extracts SolrCloud information from ZooKeeper.
|
||||
*/
|
||||
|
@ -78,8 +78,7 @@ final class ZooKeeperInspector {
|
|||
}
|
||||
SolrZkClient zkClient = getZkClient(zkHost);
|
||||
|
||||
try {
|
||||
ZkStateReader zkStateReader = new ZkStateReader(zkClient);
|
||||
try (ZkStateReader zkStateReader = new ZkStateReader(zkClient)) {
|
||||
try {
|
||||
// first check for alias
|
||||
collection = checkForAlias(zkClient, collection);
|
||||
|
|
|
@ -134,6 +134,10 @@
|
|||
<dependency org="antlr" name="antlr" rev="${/antlr/antlr}" conf="test.MiniKdc"/>
|
||||
<dependency org="net.sf.ehcache" name="ehcache-core" rev="${/net.sf.ehcache/ehcache-core}" conf="test.MiniKdc"/>
|
||||
|
||||
<dependency org="org.apache.curator" name="curator-framework" rev="${/org.apache.curator/curator-framework}" conf="compile"/>
|
||||
<dependency org="org.apache.curator" name="curator-client" rev="${/org.apache.curator/curator-client}" conf="compile"/>
|
||||
<dependency org="org.apache.curator" name="curator-recipes" rev="${/org.apache.curator/curator-recipes}" conf="compile"/>
|
||||
|
||||
<!-- StatsComponents percentiles Dependencies-->
|
||||
<dependency org="com.tdunning" name="t-digest" rev="${/com.tdunning/t-digest}" conf="compile->*"/>
|
||||
<!-- SQL Parser -->
|
||||
|
|
|
@ -15,21 +15,26 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.core;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
|
||||
import org.apache.solr.update.SolrIndexWriter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/**
|
||||
* A wrapper for an IndexDeletionPolicy instance.
|
||||
* <p>
|
||||
|
@ -52,9 +57,11 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
|
|||
private final Map<Long, Long> reserves = new ConcurrentHashMap<>();
|
||||
private volatile IndexCommit latestCommit;
|
||||
private final ConcurrentHashMap<Long, AtomicInteger> savedCommits = new ConcurrentHashMap<>();
|
||||
private final SolrSnapshotMetaDataManager snapshotMgr;
|
||||
|
||||
public IndexDeletionPolicyWrapper(IndexDeletionPolicy deletionPolicy) {
|
||||
public IndexDeletionPolicyWrapper(IndexDeletionPolicy deletionPolicy, SolrSnapshotMetaDataManager snapshotMgr) {
|
||||
this.deletionPolicy = deletionPolicy;
|
||||
this.snapshotMgr = snapshotMgr;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -134,7 +141,6 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Internal use for Lucene... do not explicitly call.
|
||||
*/
|
||||
|
@ -185,7 +191,8 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
|
|||
Long gen = delegate.getGeneration();
|
||||
Long reserve = reserves.get(gen);
|
||||
if (reserve != null && System.nanoTime() < reserve) return;
|
||||
if(savedCommits.containsKey(gen)) return;
|
||||
if (savedCommits.containsKey(gen)) return;
|
||||
if (snapshotMgr.isSnapshotted(gen)) return;
|
||||
delegate.delete();
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,17 @@ import java.net.URL;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
@ -49,6 +59,7 @@ import org.apache.solr.schema.IndexSchemaFactory;
|
|||
import org.apache.solr.search.CacheConfig;
|
||||
import org.apache.solr.search.FastLRUCache;
|
||||
import org.apache.solr.search.QParserPlugin;
|
||||
import org.apache.solr.search.SolrCache;
|
||||
import org.apache.solr.search.ValueSourceParser;
|
||||
import org.apache.solr.search.stats.StatsCache;
|
||||
import org.apache.solr.servlet.SolrRequestParsers;
|
||||
|
@ -91,7 +102,7 @@ public class SolrConfig extends Config implements MapSerializable {
|
|||
public static final String DEFAULT_CONF_FILE = "solrconfig.xml";
|
||||
private RequestParams requestParams;
|
||||
|
||||
public static enum PluginOpts {
|
||||
public enum PluginOpts {
|
||||
MULTI_OK,
|
||||
REQUIRE_NAME,
|
||||
REQUIRE_NAME_IN_OVERLAY,
|
||||
|
@ -254,7 +265,6 @@ public class SolrConfig extends Config implements MapSerializable {
|
|||
dataDir = get("dataDir", null);
|
||||
if (dataDir != null && dataDir.length() == 0) dataDir = null;
|
||||
|
||||
userCacheConfigs = CacheConfig.getMultipleConfigs(this, "query/cache");
|
||||
|
||||
org.apache.solr.search.SolrIndexSearcher.initRegenerators(this);
|
||||
|
||||
|
@ -276,6 +286,16 @@ public class SolrConfig extends Config implements MapSerializable {
|
|||
maxWarmingSearchers = getInt("query/maxWarmingSearchers", Integer.MAX_VALUE);
|
||||
slowQueryThresholdMillis = getInt("query/slowQueryThresholdMillis", -1);
|
||||
for (SolrPluginInfo plugin : plugins) loadPluginInfo(plugin);
|
||||
|
||||
Map<String, CacheConfig> userCacheConfigs = CacheConfig.getMultipleConfigs(this, "query/cache");
|
||||
List<PluginInfo> caches = getPluginInfos(SolrCache.class.getName());
|
||||
if (!caches.isEmpty()) {
|
||||
for (PluginInfo c : caches) {
|
||||
userCacheConfigs.put(c.name, CacheConfig.getConfig(this, "cache", c.attributes, null));
|
||||
}
|
||||
}
|
||||
this.userCacheConfigs = Collections.unmodifiableMap(userCacheConfigs);
|
||||
|
||||
updateHandlerInfo = loadUpdatehandlerInfo();
|
||||
|
||||
multipartUploadLimitKB = getInt(
|
||||
|
@ -317,6 +337,7 @@ public class SolrConfig extends Config implements MapSerializable {
|
|||
.add(new SolrPluginInfo(TransformerFactory.class, "transformer", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK))
|
||||
.add(new SolrPluginInfo(SearchComponent.class, "searchComponent", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK))
|
||||
.add(new SolrPluginInfo(UpdateRequestProcessorFactory.class, "updateProcessor", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK))
|
||||
.add(new SolrPluginInfo(SolrCache.class, "cache", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK))
|
||||
// TODO: WTF is up with queryConverter???
|
||||
// it apparently *only* works as a singleton? - SOLR-4304
|
||||
// and even then -- only if there is a single SpellCheckComponent
|
||||
|
@ -457,7 +478,7 @@ public class SolrConfig extends Config implements MapSerializable {
|
|||
public final CacheConfig queryResultCacheConfig;
|
||||
public final CacheConfig documentCacheConfig;
|
||||
public final CacheConfig fieldValueCacheConfig;
|
||||
public final CacheConfig[] userCacheConfigs;
|
||||
public final Map<String, CacheConfig> userCacheConfigs;
|
||||
// SolrIndexSearcher - more...
|
||||
public final boolean useFilterForSortedQuery;
|
||||
public final int queryResultWindowSize;
|
||||
|
|
|
@ -81,6 +81,7 @@ import org.apache.solr.common.util.ObjectReleaseTracker;
|
|||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
|
||||
import org.apache.solr.handler.IndexFetcher;
|
||||
import org.apache.solr.handler.ReplicationHandler;
|
||||
import org.apache.solr.handler.RequestHandlerBase;
|
||||
|
@ -184,6 +185,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
private final Map<String,UpdateRequestProcessorChain> updateProcessorChains;
|
||||
private final Map<String, SolrInfoMBean> infoRegistry;
|
||||
private final IndexDeletionPolicyWrapper solrDelPolicy;
|
||||
private final SolrSnapshotMetaDataManager snapshotMgr;
|
||||
private final DirectoryFactory directoryFactory;
|
||||
private IndexReaderFactory indexReaderFactory;
|
||||
private final Codec codec;
|
||||
|
@ -414,7 +416,19 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
} else {
|
||||
delPolicy = new SolrDeletionPolicy();
|
||||
}
|
||||
return new IndexDeletionPolicyWrapper(delPolicy);
|
||||
|
||||
return new IndexDeletionPolicyWrapper(delPolicy, snapshotMgr);
|
||||
}
|
||||
|
||||
private SolrSnapshotMetaDataManager initSnapshotMetaDataManager() {
|
||||
try {
|
||||
String dirName = getDataDir() + SolrSnapshotMetaDataManager.SNAPSHOT_METADATA_DIR + "/";
|
||||
Directory snapshotDir = directoryFactory.get(dirName, DirContext.DEFAULT,
|
||||
getSolrConfig().indexConfig.lockType);
|
||||
return new SolrSnapshotMetaDataManager(this, snapshotDir);
|
||||
} catch (IOException e) {
|
||||
throw new IllegalStateException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void initListeners() {
|
||||
|
@ -739,6 +753,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
|
||||
initListeners();
|
||||
|
||||
this.snapshotMgr = initSnapshotMetaDataManager();
|
||||
this.solrDelPolicy = initDeletionPolicy(delPolicy);
|
||||
|
||||
this.codec = initCodec(solrConfig, this.schema);
|
||||
|
@ -1242,6 +1257,17 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
// Close the snapshots meta-data directory.
|
||||
Directory snapshotsDir = snapshotMgr.getSnapshotsDir();
|
||||
try {
|
||||
this.directoryFactory.release(snapshotsDir);
|
||||
} catch (Throwable e) {
|
||||
SolrException.log(log,e);
|
||||
if (e instanceof Error) {
|
||||
throw (Error) e;
|
||||
}
|
||||
}
|
||||
|
||||
if (coreStateClosed) {
|
||||
|
||||
try {
|
||||
|
@ -2343,6 +2369,14 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
return solrDelPolicy;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return A reference of {@linkplain SolrSnapshotMetaDataManager}
|
||||
* managing the persistent snapshots for this Solr core.
|
||||
*/
|
||||
public SolrSnapshotMetaDataManager getSnapshotMetaDataManager() {
|
||||
return snapshotMgr;
|
||||
}
|
||||
|
||||
public ReentrantLock getRuleExpiryLock() {
|
||||
return ruleExpiryLock;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.store.IOContext;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.NoLockFactory;
|
||||
import org.apache.lucene.store.SimpleFSDirectory;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
|
||||
|
@ -59,10 +60,22 @@ public class LocalFileSystemRepository implements BackupRepository {
|
|||
@Override
|
||||
public URI createURI(String... pathComponents) {
|
||||
Preconditions.checkArgument(pathComponents.length > 0);
|
||||
Path result = Paths.get(pathComponents[0]);
|
||||
|
||||
String basePath = Preconditions.checkNotNull(pathComponents[0]);
|
||||
// Note the URI.getPath() invocation on Windows platform generates an invalid URI.
|
||||
// Refer to http://stackoverflow.com/questions/9834776/java-nio-file-path-issue
|
||||
// Since the caller may have used this method to generate the string representation
|
||||
// for the pathComponents, we implement a work-around specifically for Windows platform
|
||||
// to remove the leading '/' character.
|
||||
if (Constants.WINDOWS) {
|
||||
basePath = basePath.replaceFirst("^/(.:/)", "$1");
|
||||
}
|
||||
|
||||
Path result = Paths.get(basePath);
|
||||
for (int i = 1; i < pathComponents.length; i++) {
|
||||
result = result.resolve(pathComponents[i]);
|
||||
}
|
||||
|
||||
return result.toUri();
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.core.snapshots;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* This class provides functionality required to handle the data files corresponding to Solr snapshots.
|
||||
*/
|
||||
public class SolrSnapshotManager {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
/**
|
||||
* This method deletes index files of the {@linkplain IndexCommit} for the specified generation number.
|
||||
*
|
||||
* @param dir The index directory storing the snapshot.
|
||||
* @param gen The generation number for the {@linkplain IndexCommit}
|
||||
* @throws IOException in case of I/O errors.
|
||||
*/
|
||||
public static void deleteIndexFiles ( Directory dir, Collection<SnapshotMetaData> snapshots, long gen ) throws IOException {
|
||||
List<IndexCommit> commits = DirectoryReader.listCommits(dir);
|
||||
Map<String, Integer> refCounts = buildRefCounts(snapshots, commits);
|
||||
for (IndexCommit ic : commits) {
|
||||
if (ic.getGeneration() == gen) {
|
||||
deleteIndexFiles(dir,refCounts, ic);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method deletes all files not corresponding to a configured snapshot in the specified index directory.
|
||||
*
|
||||
* @param dir The index directory to search for.
|
||||
* @throws IOException in case of I/O errors.
|
||||
*/
|
||||
public static void deleteNonSnapshotIndexFiles (Directory dir, Collection<SnapshotMetaData> snapshots) throws IOException {
|
||||
List<IndexCommit> commits = DirectoryReader.listCommits(dir);
|
||||
Map<String, Integer> refCounts = buildRefCounts(snapshots, commits);
|
||||
Set<Long> snapshotGenNumbers = snapshots.stream()
|
||||
.map(SnapshotMetaData::getGenerationNumber)
|
||||
.collect(Collectors.toSet());
|
||||
for (IndexCommit ic : commits) {
|
||||
if (!snapshotGenNumbers.contains(ic.getGeneration())) {
|
||||
deleteIndexFiles(dir,refCounts, ic);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method computes reference count for the index files by taking into consideration
|
||||
* (a) configured snapshots and (b) files sharing between two or more {@linkplain IndexCommit} instances.
|
||||
*
|
||||
* @param snapshots A collection of user configured snapshots
|
||||
* @param commits A list of {@linkplain IndexCommit} instances
|
||||
* @return A map containing reference count for each index file referred in one of the {@linkplain IndexCommit} instances.
|
||||
* @throws IOException in case of I/O error.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
static Map<String, Integer> buildRefCounts (Collection<SnapshotMetaData> snapshots, List<IndexCommit> commits) throws IOException {
|
||||
Map<String, Integer> result = new HashMap<>();
|
||||
Map<Long, IndexCommit> commitsByGen = commits.stream().collect(
|
||||
Collectors.toMap(IndexCommit::getGeneration, Function.identity()));
|
||||
|
||||
for(SnapshotMetaData md : snapshots) {
|
||||
IndexCommit ic = commitsByGen.get(md.getGenerationNumber());
|
||||
if (ic != null) {
|
||||
Collection<String> fileNames = ic.getFileNames();
|
||||
for(String fileName : fileNames) {
|
||||
int refCount = result.getOrDefault(fileName, 0);
|
||||
result.put(fileName, refCount+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method deletes the index files associated with specified <code>indexCommit</code> provided they
|
||||
* are not referred by some other {@linkplain IndexCommit}.
|
||||
*
|
||||
* @param dir The index directory containing the {@linkplain IndexCommit} to be deleted.
|
||||
* @param refCounts A map containing reference counts for each file associated with every {@linkplain IndexCommit}
|
||||
* in the specified directory.
|
||||
* @param indexCommit The {@linkplain IndexCommit} whose files need to be deleted.
|
||||
* @throws IOException in case of I/O errors.
|
||||
*/
|
||||
private static void deleteIndexFiles ( Directory dir, Map<String, Integer> refCounts, IndexCommit indexCommit ) throws IOException {
|
||||
log.info("Deleting index files for index commit with generation {} in directory {}", indexCommit.getGeneration(), dir);
|
||||
for (String fileName : indexCommit.getFileNames()) {
|
||||
try {
|
||||
// Ensure that a file being deleted is not referred by some other commit.
|
||||
int ref = refCounts.getOrDefault(fileName, 0);
|
||||
log.debug("Reference count for file {} is {}", fileName, ref);
|
||||
if (ref == 0) {
|
||||
dir.deleteFile(fileName);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.warn("Unable to delete file {} in directory {} due to exception {}", fileName, dir, e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,416 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.core.snapshots;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.IndexDeletionPolicyWrapper;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* This class is responsible to manage the persistent snapshots meta-data for the Solr indexes. The
|
||||
* persistent snapshots are implemented by relying on Lucene {@linkplain IndexDeletionPolicy}
|
||||
* abstraction to configure a specific {@linkplain IndexCommit} to be retained. The
|
||||
* {@linkplain IndexDeletionPolicyWrapper} in Solr uses this class to create/delete the Solr index
|
||||
* snapshots.
|
||||
*/
|
||||
public class SolrSnapshotMetaDataManager {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
public static final String SNAPSHOT_METADATA_DIR = "snapshot_metadata";
|
||||
|
||||
/**
|
||||
* A class defining the meta-data for a specific snapshot.
|
||||
*/
|
||||
public static class SnapshotMetaData {
|
||||
private String name;
|
||||
private String indexDirPath;
|
||||
private long generationNumber;
|
||||
|
||||
public SnapshotMetaData(String name, String indexDirPath, long generationNumber) {
|
||||
super();
|
||||
this.name = name;
|
||||
this.indexDirPath = indexDirPath;
|
||||
this.generationNumber = generationNumber;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getIndexDirPath() {
|
||||
return indexDirPath;
|
||||
}
|
||||
|
||||
public long getGenerationNumber() {
|
||||
return generationNumber;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append("SnapshotMetaData[name=");
|
||||
builder.append(name);
|
||||
builder.append(", indexDirPath=");
|
||||
builder.append(indexDirPath);
|
||||
builder.append(", generation=");
|
||||
builder.append(generationNumber);
|
||||
builder.append("]");
|
||||
return builder.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/** Prefix used for the save file. */
|
||||
public static final String SNAPSHOTS_PREFIX = "snapshots_";
|
||||
private static final int VERSION_START = 0;
|
||||
private static final int VERSION_CURRENT = VERSION_START;
|
||||
private static final String CODEC_NAME = "solr-snapshots";
|
||||
|
||||
// The index writer which maintains the snapshots metadata
|
||||
private long nextWriteGen;
|
||||
|
||||
private final Directory dir;
|
||||
|
||||
/** Used to map snapshot name to snapshot meta-data. */
|
||||
protected final Map<String,SnapshotMetaData> nameToDetailsMapping = new LinkedHashMap<>();
|
||||
/** Used to figure out the *current* index data directory path */
|
||||
private final SolrCore solrCore;
|
||||
|
||||
/**
|
||||
* A constructor.
|
||||
*
|
||||
* @param dir The directory where the snapshot meta-data should be stored. Enables updating
|
||||
* the existing meta-data.
|
||||
* @throws IOException in case of errors.
|
||||
*/
|
||||
public SolrSnapshotMetaDataManager(SolrCore solrCore, Directory dir) throws IOException {
|
||||
this(solrCore, dir, OpenMode.CREATE_OR_APPEND);
|
||||
}
|
||||
|
||||
/**
|
||||
* A constructor.
|
||||
*
|
||||
* @param dir The directory where the snapshot meta-data is stored.
|
||||
* @param mode CREATE If previous meta-data should be erased.
|
||||
* APPEND If previous meta-data should be read and updated.
|
||||
* CREATE_OR_APPEND Creates a new meta-data structure if one does not exist
|
||||
* Updates the existing structure if one exists.
|
||||
* @throws IOException in case of errors.
|
||||
*/
|
||||
public SolrSnapshotMetaDataManager(SolrCore solrCore, Directory dir, OpenMode mode) throws IOException {
|
||||
this.solrCore = solrCore;
|
||||
this.dir = dir;
|
||||
|
||||
if (mode == OpenMode.CREATE) {
|
||||
deleteSnapshotMetadataFiles();
|
||||
}
|
||||
|
||||
loadFromSnapshotMetadataFile();
|
||||
|
||||
if (mode == OpenMode.APPEND && nextWriteGen == 0) {
|
||||
throw new IllegalStateException("no snapshots stored in this directory");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The snapshot meta-data directory
|
||||
*/
|
||||
public Directory getSnapshotsDir() {
|
||||
return dir;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method creates a new snapshot meta-data entry.
|
||||
*
|
||||
* @param name The name of the snapshot.
|
||||
* @param indexDirPath The directory path where the index files are stored.
|
||||
* @param gen The generation number for the {@linkplain IndexCommit} being snapshotted.
|
||||
* @throws IOException in case of I/O errors.
|
||||
*/
|
||||
public synchronized void snapshot(String name, String indexDirPath, long gen) throws IOException {
|
||||
Preconditions.checkNotNull(name);
|
||||
|
||||
log.info("Creating the snapshot named {} for core {} associated with index commit with generation {} in directory {}"
|
||||
, name, solrCore.getName(), gen, indexDirPath);
|
||||
|
||||
if(nameToDetailsMapping.containsKey(name)) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "A snapshot with name " + name + " already exists");
|
||||
}
|
||||
|
||||
SnapshotMetaData d = new SnapshotMetaData(name, indexDirPath, gen);
|
||||
nameToDetailsMapping.put(name, d);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
persist();
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
try {
|
||||
release(name);
|
||||
} catch (Exception e) {
|
||||
// Suppress so we keep throwing original exception
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method deletes a previously created snapshot (if any).
|
||||
*
|
||||
* @param name The name of the snapshot to be deleted.
|
||||
* @return The snapshot meta-data if the snapshot with the snapshot name exists.
|
||||
* @throws IOException in case of I/O error
|
||||
*/
|
||||
public synchronized Optional<SnapshotMetaData> release(String name) throws IOException {
|
||||
log.info("Deleting the snapshot named {} for core {}", name, solrCore.getName());
|
||||
SnapshotMetaData result = nameToDetailsMapping.remove(Preconditions.checkNotNull(name));
|
||||
if(result != null) {
|
||||
boolean success = false;
|
||||
try {
|
||||
persist();
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
nameToDetailsMapping.put(name, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
return Optional.ofNullable(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns if snapshot is created for the specified generation number in
|
||||
* the *current* index directory.
|
||||
*
|
||||
* @param genNumber The generation number for the {@linkplain IndexCommit} to be checked.
|
||||
* @return true if the snapshot is created.
|
||||
* false otherwise.
|
||||
*/
|
||||
public synchronized boolean isSnapshotted(long genNumber) {
|
||||
return !nameToDetailsMapping.isEmpty() && isSnapshotted(solrCore.getIndexDir(), genNumber);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns if snapshot is created for the specified generation number in
|
||||
* the specified index directory.
|
||||
*
|
||||
* @param genNumber The generation number for the {@linkplain IndexCommit} to be checked.
|
||||
* @return true if the snapshot is created.
|
||||
* false otherwise.
|
||||
*/
|
||||
public synchronized boolean isSnapshotted(String indexDirPath, long genNumber) {
|
||||
return !nameToDetailsMapping.isEmpty()
|
||||
&& nameToDetailsMapping.values().stream()
|
||||
.anyMatch(entry -> entry.getIndexDirPath().equals(indexDirPath) && entry.getGenerationNumber() == genNumber);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns the snapshot meta-data for the specified name (if it exists).
|
||||
*
|
||||
* @param name The name of the snapshot
|
||||
* @return The snapshot meta-data if exists.
|
||||
*/
|
||||
public synchronized Optional<SnapshotMetaData> getSnapshotMetaData(String name) {
|
||||
return Optional.ofNullable(nameToDetailsMapping.get(name));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return A list of snapshots created so far.
|
||||
*/
|
||||
public synchronized List<String> listSnapshots() {
|
||||
// We create a copy for thread safety.
|
||||
return new ArrayList<>(nameToDetailsMapping.keySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns a list of snapshots created in a specified index directory.
|
||||
*
|
||||
* @param indexDirPath The index directory path.
|
||||
* @return a list snapshots stored in the specified directory.
|
||||
*/
|
||||
public synchronized Collection<SnapshotMetaData> listSnapshotsInIndexDir(String indexDirPath) {
|
||||
return nameToDetailsMapping.values().stream()
|
||||
.filter(entry -> indexDirPath.equals(entry.getIndexDirPath()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns the {@linkplain IndexCommit} associated with the specified
|
||||
* <code>commitName</code>. A snapshot with specified <code>commitName</code> must
|
||||
* be created before invoking this method.
|
||||
*
|
||||
* @param commitName The name of persisted commit
|
||||
* @return the {@linkplain IndexCommit}
|
||||
* @throws IOException in case of I/O error.
|
||||
*/
|
||||
public Optional<IndexCommit> getIndexCommitByName(String commitName) throws IOException {
|
||||
Optional<IndexCommit> result = Optional.empty();
|
||||
Optional<SnapshotMetaData> metaData = getSnapshotMetaData(commitName);
|
||||
if (metaData.isPresent()) {
|
||||
String indexDirPath = metaData.get().getIndexDirPath();
|
||||
long gen = metaData.get().getGenerationNumber();
|
||||
|
||||
Directory d = solrCore.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, DirectoryFactory.LOCK_TYPE_NONE);
|
||||
try {
|
||||
result = DirectoryReader.listCommits(d)
|
||||
.stream()
|
||||
.filter(ic -> ic.getGeneration() == gen)
|
||||
.findAny();
|
||||
|
||||
if (!result.isPresent()) {
|
||||
log.warn("Unable to find commit with generation {} in the directory {}", gen, indexDirPath);
|
||||
}
|
||||
|
||||
} finally {
|
||||
solrCore.getDirectoryFactory().release(d);
|
||||
}
|
||||
} else {
|
||||
log.warn("Commit with name {} is not persisted for core {}", commitName, solrCore.getName());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private synchronized void persist() throws IOException {
|
||||
String fileName = SNAPSHOTS_PREFIX + nextWriteGen;
|
||||
IndexOutput out = dir.createOutput(fileName, IOContext.DEFAULT);
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
||||
out.writeVInt(nameToDetailsMapping.size());
|
||||
for(Entry<String,SnapshotMetaData> ent : nameToDetailsMapping.entrySet()) {
|
||||
out.writeString(ent.getKey());
|
||||
out.writeString(ent.getValue().getIndexDirPath());
|
||||
out.writeVLong(ent.getValue().getGenerationNumber());
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(out);
|
||||
IOUtils.deleteFilesIgnoringExceptions(dir, fileName);
|
||||
} else {
|
||||
IOUtils.close(out);
|
||||
}
|
||||
}
|
||||
|
||||
dir.sync(Collections.singletonList(fileName));
|
||||
|
||||
if (nextWriteGen > 0) {
|
||||
String lastSaveFile = SNAPSHOTS_PREFIX + (nextWriteGen-1);
|
||||
// exception OK: likely it didn't exist
|
||||
IOUtils.deleteFilesIgnoringExceptions(dir, lastSaveFile);
|
||||
}
|
||||
|
||||
nextWriteGen++;
|
||||
}
|
||||
|
||||
private synchronized void deleteSnapshotMetadataFiles() throws IOException {
|
||||
for(String file : dir.listAll()) {
|
||||
if (file.startsWith(SNAPSHOTS_PREFIX)) {
|
||||
dir.deleteFile(file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the snapshot meta-data information from the given {@link Directory}.
|
||||
*/
|
||||
private synchronized void loadFromSnapshotMetadataFile() throws IOException {
|
||||
log.info("Loading from snapshot metadata file...");
|
||||
long genLoaded = -1;
|
||||
IOException ioe = null;
|
||||
List<String> snapshotFiles = new ArrayList<>();
|
||||
for(String file : dir.listAll()) {
|
||||
if (file.startsWith(SNAPSHOTS_PREFIX)) {
|
||||
long gen = Long.parseLong(file.substring(SNAPSHOTS_PREFIX.length()));
|
||||
if (genLoaded == -1 || gen > genLoaded) {
|
||||
snapshotFiles.add(file);
|
||||
Map<String, SnapshotMetaData> snapshotMetaDataMapping = new HashMap<>();
|
||||
IndexInput in = dir.openInput(file, IOContext.DEFAULT);
|
||||
try {
|
||||
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
||||
int count = in.readVInt();
|
||||
for(int i=0;i<count;i++) {
|
||||
String name = in.readString();
|
||||
String indexDirPath = in.readString();
|
||||
long commitGen = in.readVLong();
|
||||
snapshotMetaDataMapping.put(name, new SnapshotMetaData(name, indexDirPath, commitGen));
|
||||
}
|
||||
} catch (IOException ioe2) {
|
||||
// Save first exception & throw in the end
|
||||
if (ioe == null) {
|
||||
ioe = ioe2;
|
||||
}
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
|
||||
genLoaded = gen;
|
||||
nameToDetailsMapping.clear();
|
||||
nameToDetailsMapping.putAll(snapshotMetaDataMapping);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (genLoaded == -1) {
|
||||
// Nothing was loaded...
|
||||
if (ioe != null) {
|
||||
// ... not for lack of trying:
|
||||
throw ioe;
|
||||
}
|
||||
} else {
|
||||
if (snapshotFiles.size() > 1) {
|
||||
// Remove any broken / old snapshot files:
|
||||
String curFileName = SNAPSHOTS_PREFIX + genLoaded;
|
||||
for(String file : snapshotFiles) {
|
||||
if (!curFileName.equals(file)) {
|
||||
IOUtils.deleteFilesIgnoringExceptions(dir, file);
|
||||
}
|
||||
}
|
||||
}
|
||||
nextWriteGen = 1+genLoaded;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Core classes for Solr's persistent snapshots functionality
|
||||
*/
|
||||
package org.apache.solr.core.snapshots;
|
|
@ -160,7 +160,7 @@ public class BlobHandler extends RequestHandlerBase implements PluginInfoInitial
|
|||
} else {
|
||||
String q = "blobName:{0}";
|
||||
if (version != -1) q = "id:{0}/{1}";
|
||||
QParser qparser = QParser.getParser(StrUtils.formatString(q, blobName, version), "lucene", req);
|
||||
QParser qparser = QParser.getParser(StrUtils.formatString(q, blobName, version), req);
|
||||
final TopDocs docs = req.getSearcher().search(qparser.parse(), 1, new Sort(new SortField("version", SortField.Type.LONG, true)));
|
||||
if (docs.totalHits > 0) {
|
||||
rsp.add(ReplicationHandler.FILE_STREAM, new SolrCore.RawWriter() {
|
||||
|
|
|
@ -121,6 +121,11 @@ public class CdcrParams {
|
|||
*/
|
||||
public final static String COUNTER_DELETES = "deletes";
|
||||
|
||||
/**
|
||||
* Counter for Bootstrap operations *
|
||||
*/
|
||||
public final static String COUNTER_BOOTSTRAP = "bootstraps";
|
||||
|
||||
/**
|
||||
* A list of errors per target collection *
|
||||
*/
|
||||
|
@ -165,7 +170,10 @@ public class CdcrParams {
|
|||
LASTPROCESSEDVERSION,
|
||||
QUEUES,
|
||||
OPS,
|
||||
ERRORS;
|
||||
ERRORS,
|
||||
BOOTSTRAP,
|
||||
BOOTSTRAP_STATUS,
|
||||
CANCEL_BOOTSTRAP;
|
||||
|
||||
public static CdcrAction get(String p) {
|
||||
if (p != null) {
|
||||
|
|
|
@ -119,7 +119,7 @@ public class CdcrReplicator implements Runnable {
|
|||
// we might have read a single commit operation and reached the end of the update logs
|
||||
logReader.forwardSeek(subReader);
|
||||
|
||||
log.debug("Forwarded {} updates to target {}", counter, state.getTargetCollection());
|
||||
log.info("Forwarded {} updates to target {}", counter, state.getTargetCollection());
|
||||
} catch (Exception e) {
|
||||
// report error and update error stats
|
||||
this.handleException(e);
|
||||
|
@ -150,13 +150,13 @@ public class CdcrReplicator implements Runnable {
|
|||
if (e instanceof CdcrReplicatorException) {
|
||||
UpdateRequest req = ((CdcrReplicatorException) e).req;
|
||||
UpdateResponse rsp = ((CdcrReplicatorException) e).rsp;
|
||||
log.warn("Failed to forward update request {}. Got response {}", req, rsp);
|
||||
log.warn("Failed to forward update request {} to target: {}. Got response {}", req, state.getTargetCollection(), rsp);
|
||||
state.reportError(CdcrReplicatorState.ErrorType.BAD_REQUEST);
|
||||
} else if (e instanceof CloudSolrClient.RouteException) {
|
||||
log.warn("Failed to forward update request", e);
|
||||
log.warn("Failed to forward update request to target: " + state.getTargetCollection(), e);
|
||||
state.reportError(CdcrReplicatorState.ErrorType.BAD_REQUEST);
|
||||
} else {
|
||||
log.warn("Failed to forward update request", e);
|
||||
log.warn("Failed to forward update request to target: " + state.getTargetCollection(), e);
|
||||
state.reportError(CdcrReplicatorState.ErrorType.INTERNAL);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,29 +16,49 @@
|
|||
*/
|
||||
package org.apache.solr.handler;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.http.client.HttpClient;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.SolrRequest;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient.Builder;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.ZkCoreNodeProps;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.ExecutorUtil;
|
||||
import org.apache.solr.common.util.IOUtils;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SolrjNamedThreadFactory;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.update.CdcrUpdateLog;
|
||||
import org.apache.solr.util.TimeOut;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.handler.admin.CoreAdminHandler.RESPONSE_STATUS;
|
||||
|
||||
class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
|
||||
|
||||
private static final int MAX_BOOTSTRAP_ATTEMPTS = 5;
|
||||
private static final int BOOTSTRAP_RETRY_DELAY_MS = 2000;
|
||||
// 6 hours is hopefully long enough for most indexes
|
||||
private static final long BOOTSTRAP_TIMEOUT_SECONDS = 6L * 3600L * 3600L;
|
||||
|
||||
private List<CdcrReplicatorState> replicatorStates;
|
||||
|
||||
private final CdcrReplicatorScheduler scheduler;
|
||||
|
@ -48,6 +68,9 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
|
|||
private SolrCore core;
|
||||
private String path;
|
||||
|
||||
private ExecutorService bootstrapExecutor;
|
||||
private volatile BootstrapStatusRunnable bootstrapStatusRunnable;
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
CdcrReplicatorManager(final SolrCore core, String path,
|
||||
|
@ -104,12 +127,20 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
|
|||
@Override
|
||||
public synchronized void stateUpdate() {
|
||||
if (leaderStateManager.amILeader() && processStateManager.getState().equals(CdcrParams.ProcessState.STARTED)) {
|
||||
if (replicatorStates.size() > 0) {
|
||||
this.bootstrapExecutor = ExecutorUtil.newMDCAwareFixedThreadPool(replicatorStates.size(),
|
||||
new SolrjNamedThreadFactory("cdcr-bootstrap-status"));
|
||||
}
|
||||
this.initLogReaders();
|
||||
this.scheduler.start();
|
||||
return;
|
||||
}
|
||||
|
||||
this.scheduler.shutdown();
|
||||
if (bootstrapExecutor != null) {
|
||||
IOUtils.closeQuietly(bootstrapStatusRunnable);
|
||||
ExecutorUtil.shutdownAndAwaitTermination(bootstrapExecutor);
|
||||
}
|
||||
this.closeLogReaders();
|
||||
}
|
||||
|
||||
|
@ -117,7 +148,7 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
|
|||
return replicatorStates;
|
||||
}
|
||||
|
||||
void initLogReaders() {
|
||||
private void initLogReaders() {
|
||||
String collectionName = core.getCoreDescriptor().getCloudDescriptor().getCollectionName();
|
||||
String shard = core.getCoreDescriptor().getCloudDescriptor().getShardId();
|
||||
CdcrUpdateLog ulog = (CdcrUpdateLog) core.getUpdateHandler().getUpdateLog();
|
||||
|
@ -129,8 +160,23 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
|
|||
log.info("Create new update log reader for target {} with checkpoint {} @ {}:{}", state.getTargetCollection(),
|
||||
checkpoint, collectionName, shard);
|
||||
CdcrUpdateLog.CdcrLogReader reader = ulog.newLogReader();
|
||||
reader.seek(checkpoint);
|
||||
boolean seek = reader.seek(checkpoint);
|
||||
state.init(reader);
|
||||
if (!seek) {
|
||||
// targetVersion is lower than the oldest known entry.
|
||||
// In this scenario, it probably means that there is a gap in the updates log.
|
||||
// the best we can do here is to bootstrap the target leader by replicating the full index
|
||||
final String targetCollection = state.getTargetCollection();
|
||||
state.setBootstrapInProgress(true);
|
||||
log.info("Attempting to bootstrap target collection: {}, shard: {}", targetCollection, shard);
|
||||
bootstrapStatusRunnable = new BootstrapStatusRunnable(core, state);
|
||||
log.info("Submitting bootstrap task to executor");
|
||||
try {
|
||||
bootstrapExecutor.submit(bootstrapStatusRunnable);
|
||||
} catch (Exception e) {
|
||||
log.error("Unable to submit bootstrap call to executor", e);
|
||||
}
|
||||
}
|
||||
} catch (IOException | SolrServerException | SolrException e) {
|
||||
log.warn("Unable to instantiate the log reader for target collection " + state.getTargetCollection(), e);
|
||||
} catch (InterruptedException e) {
|
||||
|
@ -164,11 +210,203 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
|
|||
*/
|
||||
void shutdown() {
|
||||
this.scheduler.shutdown();
|
||||
if (bootstrapExecutor != null) {
|
||||
IOUtils.closeQuietly(bootstrapStatusRunnable);
|
||||
ExecutorUtil.shutdownAndAwaitTermination(bootstrapExecutor);
|
||||
}
|
||||
for (CdcrReplicatorState state : replicatorStates) {
|
||||
state.shutdown();
|
||||
}
|
||||
replicatorStates.clear();
|
||||
}
|
||||
|
||||
private class BootstrapStatusRunnable implements Runnable, Closeable {
|
||||
private final CdcrReplicatorState state;
|
||||
private final String targetCollection;
|
||||
private final String shard;
|
||||
private final String collectionName;
|
||||
private final CdcrUpdateLog ulog;
|
||||
private final String myCoreUrl;
|
||||
|
||||
private volatile boolean closed = false;
|
||||
|
||||
BootstrapStatusRunnable(SolrCore core, CdcrReplicatorState state) {
|
||||
this.collectionName = core.getCoreDescriptor().getCloudDescriptor().getCollectionName();
|
||||
this.shard = core.getCoreDescriptor().getCloudDescriptor().getShardId();
|
||||
this.ulog = (CdcrUpdateLog) core.getUpdateHandler().getUpdateLog();
|
||||
this.state = state;
|
||||
this.targetCollection = state.getTargetCollection();
|
||||
String baseUrl = core.getCoreDescriptor().getCoreContainer().getZkController().getBaseUrl();
|
||||
this.myCoreUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, core.getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
closed = true;
|
||||
try {
|
||||
Replica leader = state.getClient().getZkStateReader().getLeaderRetry(targetCollection, shard, 30000); // assume same shard exists on target
|
||||
String leaderCoreUrl = leader.getCoreUrl();
|
||||
HttpClient httpClient = state.getClient().getLbClient().getHttpClient();
|
||||
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderCoreUrl).withHttpClient(httpClient).build()) {
|
||||
sendCdcrCommand(client, CdcrParams.CdcrAction.CANCEL_BOOTSTRAP);
|
||||
} catch (SolrServerException e) {
|
||||
log.error("Error sending cancel bootstrap message to target collection: {} shard: {} leader: {}",
|
||||
targetCollection, shard, leaderCoreUrl);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
log.error("Interrupted while closing BootstrapStatusRunnable", e);
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
int retries = 1;
|
||||
boolean success = false;
|
||||
try {
|
||||
while (!closed && sendBootstrapCommand() != BootstrapStatus.SUBMITTED) {
|
||||
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
|
||||
}
|
||||
TimeOut timeOut = new TimeOut(BOOTSTRAP_TIMEOUT_SECONDS, TimeUnit.SECONDS);
|
||||
while (!timeOut.hasTimedOut()) {
|
||||
if (closed) {
|
||||
log.warn("Cancelling waiting for bootstrap on target: {} shard: {} to complete", targetCollection, shard);
|
||||
state.setBootstrapInProgress(false);
|
||||
break;
|
||||
}
|
||||
BootstrapStatus status = getBoostrapStatus();
|
||||
if (status == BootstrapStatus.RUNNING) {
|
||||
try {
|
||||
log.info("CDCR bootstrap running for {} seconds, sleeping for {} ms",
|
||||
BOOTSTRAP_TIMEOUT_SECONDS - timeOut.timeLeft(TimeUnit.SECONDS), BOOTSTRAP_RETRY_DELAY_MS);
|
||||
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
} else if (status == BootstrapStatus.COMPLETED) {
|
||||
log.info("CDCR bootstrap successful in {} seconds", BOOTSTRAP_TIMEOUT_SECONDS - timeOut.timeLeft(TimeUnit.SECONDS));
|
||||
long checkpoint = CdcrReplicatorManager.this.getCheckpoint(state);
|
||||
log.info("Create new update log reader for target {} with checkpoint {} @ {}:{}", state.getTargetCollection(),
|
||||
checkpoint, collectionName, shard);
|
||||
CdcrUpdateLog.CdcrLogReader reader1 = ulog.newLogReader();
|
||||
reader1.seek(checkpoint);
|
||||
success = true;
|
||||
break;
|
||||
} else if (status == BootstrapStatus.FAILED) {
|
||||
log.warn("CDCR bootstrap failed in {} seconds", BOOTSTRAP_TIMEOUT_SECONDS - timeOut.timeLeft(TimeUnit.SECONDS));
|
||||
// let's retry a fixed number of times before giving up
|
||||
if (retries >= MAX_BOOTSTRAP_ATTEMPTS) {
|
||||
log.error("Unable to bootstrap the target collection: {}, shard: {} even after {} retries", targetCollection, shard, retries);
|
||||
break;
|
||||
} else {
|
||||
log.info("Retry: {} - Attempting to bootstrap target collection: {} shard: {}", retries, targetCollection, shard);
|
||||
while (!closed && sendBootstrapCommand() != BootstrapStatus.SUBMITTED) {
|
||||
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
|
||||
}
|
||||
timeOut = new TimeOut(BOOTSTRAP_TIMEOUT_SECONDS, TimeUnit.SECONDS); // reset the timer
|
||||
retries++;
|
||||
}
|
||||
} else if (status == BootstrapStatus.NOTFOUND) {
|
||||
// the leader of the target shard may have changed and therefore there is no record of the
|
||||
// bootstrap process so we must retry the operation
|
||||
while (!closed && sendBootstrapCommand() != BootstrapStatus.SUBMITTED) {
|
||||
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
|
||||
}
|
||||
retries = 1;
|
||||
timeOut = new TimeOut(6L * 3600L * 3600L, TimeUnit.SECONDS); // reset the timer
|
||||
} else if (status == BootstrapStatus.UNKNOWN) {
|
||||
// we were not able to query the status on the remote end
|
||||
// so just sleep for a bit and try again
|
||||
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
|
||||
}
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
log.info("Bootstrap thread interrupted");
|
||||
state.reportError(CdcrReplicatorState.ErrorType.INTERNAL);
|
||||
Thread.currentThread().interrupt();
|
||||
} catch (IOException | SolrServerException | SolrException e) {
|
||||
log.error("Unable to bootstrap the target collection " + targetCollection + " shard: " + shard, e);
|
||||
state.reportError(CdcrReplicatorState.ErrorType.BAD_REQUEST);
|
||||
} finally {
|
||||
if (success) {
|
||||
log.info("Bootstrap successful, giving the go-ahead to replicator");
|
||||
state.setBootstrapInProgress(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private BootstrapStatus sendBootstrapCommand() throws InterruptedException {
|
||||
Replica leader = state.getClient().getZkStateReader().getLeaderRetry(targetCollection, shard, 30000); // assume same shard exists on target
|
||||
String leaderCoreUrl = leader.getCoreUrl();
|
||||
HttpClient httpClient = state.getClient().getLbClient().getHttpClient();
|
||||
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderCoreUrl).withHttpClient(httpClient).build()) {
|
||||
log.info("Attempting to bootstrap target collection: {} shard: {} leader: {}", targetCollection, shard, leaderCoreUrl);
|
||||
try {
|
||||
NamedList response = sendCdcrCommand(client, CdcrParams.CdcrAction.BOOTSTRAP, ReplicationHandler.MASTER_URL, myCoreUrl);
|
||||
log.debug("CDCR Bootstrap response: {}", response);
|
||||
String status = response.get(RESPONSE_STATUS).toString();
|
||||
return BootstrapStatus.valueOf(status.toUpperCase(Locale.ROOT));
|
||||
} catch (Exception e) {
|
||||
log.error("Exception submitting bootstrap request", e);
|
||||
return BootstrapStatus.UNKNOWN;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error("There shouldn't be an IOException while closing but there was!", e);
|
||||
}
|
||||
return BootstrapStatus.UNKNOWN;
|
||||
}
|
||||
|
||||
private BootstrapStatus getBoostrapStatus() throws InterruptedException {
|
||||
try {
|
||||
Replica leader = state.getClient().getZkStateReader().getLeaderRetry(targetCollection, shard, 30000); // assume same shard exists on target
|
||||
String leaderCoreUrl = leader.getCoreUrl();
|
||||
HttpClient httpClient = state.getClient().getLbClient().getHttpClient();
|
||||
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderCoreUrl).withHttpClient(httpClient).build()) {
|
||||
NamedList response = sendCdcrCommand(client, CdcrParams.CdcrAction.BOOTSTRAP_STATUS);
|
||||
String status = (String) response.get(RESPONSE_STATUS);
|
||||
BootstrapStatus bootstrapStatus = BootstrapStatus.valueOf(status.toUpperCase(Locale.ROOT));
|
||||
if (bootstrapStatus == BootstrapStatus.RUNNING) {
|
||||
return BootstrapStatus.RUNNING;
|
||||
} else if (bootstrapStatus == BootstrapStatus.COMPLETED) {
|
||||
return BootstrapStatus.COMPLETED;
|
||||
} else if (bootstrapStatus == BootstrapStatus.FAILED) {
|
||||
return BootstrapStatus.FAILED;
|
||||
} else if (bootstrapStatus == BootstrapStatus.NOTFOUND) {
|
||||
log.warn("Bootstrap process was not found on target collection: {} shard: {}, leader: {}", targetCollection, shard, leaderCoreUrl);
|
||||
return BootstrapStatus.NOTFOUND;
|
||||
} else if (bootstrapStatus == BootstrapStatus.CANCELLED) {
|
||||
return BootstrapStatus.CANCELLED;
|
||||
} else {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Unknown status: " + status + " returned by BOOTSTRAP_STATUS command");
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Exception during bootstrap status request", e);
|
||||
return BootstrapStatus.UNKNOWN;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private NamedList sendCdcrCommand(SolrClient client, CdcrParams.CdcrAction action, String... params) throws SolrServerException, IOException {
|
||||
ModifiableSolrParams solrParams = new ModifiableSolrParams();
|
||||
solrParams.set(CommonParams.QT, "/cdcr");
|
||||
solrParams.set(CommonParams.ACTION, action.toString());
|
||||
for (int i = 0; i < params.length - 1; i+=2) {
|
||||
solrParams.set(params[i], params[i + 1]);
|
||||
}
|
||||
SolrRequest request = new QueryRequest(solrParams);
|
||||
return client.request(request);
|
||||
}
|
||||
|
||||
private enum BootstrapStatus {
|
||||
SUBMITTED,
|
||||
RUNNING,
|
||||
COMPLETED,
|
||||
FAILED,
|
||||
NOTFOUND,
|
||||
CANCELLED,
|
||||
UNKNOWN
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -77,7 +77,11 @@ class CdcrReplicatorScheduler {
|
|||
CdcrReplicatorState state = statesQueue.poll();
|
||||
assert state != null; // Should never happen
|
||||
try {
|
||||
if (!state.isBootstrapInProgress()) {
|
||||
new CdcrReplicator(state, batchSize).run();
|
||||
} else {
|
||||
log.debug("Replicator state is bootstrapping, skipping replication for target collection {}", state.getTargetCollection());
|
||||
}
|
||||
} finally {
|
||||
statesQueue.offer(state);
|
||||
}
|
||||
|
|
|
@ -27,6 +27,8 @@ import java.util.LinkedList;
|
|||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.update.CdcrUpdateLog;
|
||||
|
@ -53,6 +55,9 @@ class CdcrReplicatorState {
|
|||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private final AtomicBoolean bootstrapInProgress = new AtomicBoolean(false);
|
||||
private final AtomicInteger numBootstraps = new AtomicInteger();
|
||||
|
||||
CdcrReplicatorState(final String targetCollection, final String zkHost, final CloudSolrClient targetClient) {
|
||||
this.targetCollection = targetCollection;
|
||||
this.targetClient = targetClient;
|
||||
|
@ -164,6 +169,24 @@ class CdcrReplicatorState {
|
|||
return this.benchmarkTimer;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if a bootstrap operation is in progress, false otherwise
|
||||
*/
|
||||
boolean isBootstrapInProgress() {
|
||||
return bootstrapInProgress.get();
|
||||
}
|
||||
|
||||
void setBootstrapInProgress(boolean inProgress) {
|
||||
if (bootstrapInProgress.compareAndSet(true, false)) {
|
||||
numBootstraps.incrementAndGet();
|
||||
}
|
||||
bootstrapInProgress.set(inProgress);
|
||||
}
|
||||
|
||||
public int getNumBootstraps() {
|
||||
return numBootstraps.get();
|
||||
}
|
||||
|
||||
enum ErrorType {
|
||||
INTERNAL,
|
||||
BAD_REQUEST;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.solr.handler;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
|
@ -24,14 +25,20 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.CancellationException;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.RejectedExecutionException;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
|
||||
import org.apache.solr.client.solrj.SolrRequest;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
|
||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
|
@ -41,21 +48,33 @@ import org.apache.solr.common.cloud.ZkNodeProps;
|
|||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.UpdateParams;
|
||||
import org.apache.solr.common.util.ExecutorUtil;
|
||||
import org.apache.solr.common.util.IOUtils;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.CloseHook;
|
||||
import org.apache.solr.core.PluginBag;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestHandler;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.update.CdcrUpdateLog;
|
||||
import org.apache.solr.update.UpdateLog;
|
||||
import org.apache.solr.update.VersionInfo;
|
||||
import org.apache.solr.update.processor.DistributedUpdateProcessor;
|
||||
import org.apache.solr.util.DefaultSolrThreadFactory;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.handler.admin.CoreAdminHandler.COMPLETED;
|
||||
import static org.apache.solr.handler.admin.CoreAdminHandler.FAILED;
|
||||
import static org.apache.solr.handler.admin.CoreAdminHandler.RESPONSE;
|
||||
import static org.apache.solr.handler.admin.CoreAdminHandler.RESPONSE_MESSAGE;
|
||||
import static org.apache.solr.handler.admin.CoreAdminHandler.RESPONSE_STATUS;
|
||||
import static org.apache.solr.handler.admin.CoreAdminHandler.RUNNING;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This request handler implements the CDCR API and is responsible of the execution of the
|
||||
|
@ -199,6 +218,18 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
this.handleErrorsAction(req, rsp);
|
||||
break;
|
||||
}
|
||||
case BOOTSTRAP: {
|
||||
this.handleBootstrapAction(req, rsp);
|
||||
break;
|
||||
}
|
||||
case BOOTSTRAP_STATUS: {
|
||||
this.handleBootstrapStatus(req, rsp);
|
||||
break;
|
||||
}
|
||||
case CANCEL_BOOTSTRAP: {
|
||||
this.handleCancelBootstrap(req, rsp);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
throw new RuntimeException("Unknown action: " + action);
|
||||
}
|
||||
|
@ -409,10 +440,20 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
}
|
||||
|
||||
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
|
||||
VersionInfo versionInfo = ulog.getVersionInfo();
|
||||
try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) {
|
||||
List<Long> versions = recentUpdates.getVersions(1);
|
||||
long lastVersion = versions.isEmpty() ? -1 : Math.abs(versions.get(0));
|
||||
rsp.add(CdcrParams.CHECKPOINT, lastVersion);
|
||||
long maxVersionFromRecent = recentUpdates.getMaxRecentVersion();
|
||||
long maxVersionFromIndex = versionInfo.getMaxVersionFromIndex(req.getSearcher());
|
||||
log.info("Found maxVersionFromRecent {} maxVersionFromIndex {}", maxVersionFromRecent, maxVersionFromIndex);
|
||||
// there is no race with ongoing bootstrap because we don't expect any updates to come from the source
|
||||
long maxVersion = Math.max(maxVersionFromIndex, maxVersionFromRecent);
|
||||
if (maxVersion == 0L) {
|
||||
maxVersion = -1;
|
||||
}
|
||||
rsp.add(CdcrParams.CHECKPOINT, maxVersion);
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Action '" + CdcrParams.CdcrAction.SHARDCHECKPOINT +
|
||||
"' could not read max version");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -574,6 +615,192 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
rsp.add(CdcrParams.ERRORS, hosts);
|
||||
}
|
||||
|
||||
private AtomicBoolean running = new AtomicBoolean();
|
||||
private volatile Future<Boolean> bootstrapFuture;
|
||||
private volatile BootstrapCallable bootstrapCallable;
|
||||
|
||||
private void handleBootstrapAction(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, SolrServerException {
|
||||
String collectionName = core.getCoreDescriptor().getCloudDescriptor().getCollectionName();
|
||||
String shard = core.getCoreDescriptor().getCloudDescriptor().getShardId();
|
||||
if (!leaderStateManager.amILeader()) {
|
||||
log.warn("Action {} sent to non-leader replica @ {}:{}", CdcrParams.CdcrAction.BOOTSTRAP, collectionName, shard);
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Action " + CdcrParams.CdcrAction.BOOTSTRAP +
|
||||
" sent to non-leader replica");
|
||||
}
|
||||
|
||||
Runnable runnable = () -> {
|
||||
Lock recoveryLock = req.getCore().getSolrCoreState().getRecoveryLock();
|
||||
boolean locked = recoveryLock.tryLock();
|
||||
try {
|
||||
if (!locked) {
|
||||
handleCancelBootstrap(req, rsp);
|
||||
} else if (leaderStateManager.amILeader()) {
|
||||
running.set(true);
|
||||
String masterUrl = req.getParams().get(ReplicationHandler.MASTER_URL);
|
||||
bootstrapCallable = new BootstrapCallable(masterUrl, core);
|
||||
bootstrapFuture = core.getCoreDescriptor().getCoreContainer().getUpdateShardHandler().getRecoveryExecutor().submit(bootstrapCallable);
|
||||
try {
|
||||
bootstrapFuture.get();
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
log.warn("Bootstrap was interrupted", e);
|
||||
} catch (ExecutionException e) {
|
||||
log.error("Bootstrap operation failed", e);
|
||||
}
|
||||
} else {
|
||||
log.error("Action {} sent to non-leader replica @ {}:{}. Aborting bootstrap.", CdcrParams.CdcrAction.BOOTSTRAP, collectionName, shard);
|
||||
}
|
||||
} finally {
|
||||
if (locked) {
|
||||
running.set(false);
|
||||
recoveryLock.unlock();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
core.getCoreDescriptor().getCoreContainer().getUpdateShardHandler().getUpdateExecutor().submit(runnable);
|
||||
rsp.add(RESPONSE_STATUS, "submitted");
|
||||
} catch (RejectedExecutionException ree) {
|
||||
// no problem, we're probably shutting down
|
||||
rsp.add(RESPONSE_STATUS, "failed");
|
||||
}
|
||||
}
|
||||
|
||||
private void handleCancelBootstrap(SolrQueryRequest req, SolrQueryResponse rsp) {
|
||||
BootstrapCallable callable = this.bootstrapCallable;
|
||||
IOUtils.closeQuietly(callable);
|
||||
rsp.add(RESPONSE_STATUS, "cancelled");
|
||||
}
|
||||
|
||||
private void handleBootstrapStatus(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, SolrServerException {
|
||||
if (running.get()) {
|
||||
rsp.add(RESPONSE_STATUS, RUNNING);
|
||||
return;
|
||||
}
|
||||
|
||||
Future<Boolean> future = bootstrapFuture;
|
||||
BootstrapCallable callable = this.bootstrapCallable;
|
||||
if (future == null) {
|
||||
rsp.add(RESPONSE_STATUS, "notfound");
|
||||
rsp.add(RESPONSE_MESSAGE, "No bootstrap found in running, completed or failed states");
|
||||
} else if (future.isCancelled() || callable.isClosed()) {
|
||||
rsp.add(RESPONSE_STATUS, "cancelled");
|
||||
} else if (future.isDone()) {
|
||||
// could be a normal termination or an exception
|
||||
try {
|
||||
Boolean result = future.get();
|
||||
if (result) {
|
||||
rsp.add(RESPONSE_STATUS, COMPLETED);
|
||||
} else {
|
||||
rsp.add(RESPONSE_STATUS, FAILED);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
// should not happen?
|
||||
} catch (ExecutionException e) {
|
||||
rsp.add(RESPONSE_STATUS, FAILED);
|
||||
rsp.add(RESPONSE, e);
|
||||
} catch (CancellationException ce) {
|
||||
rsp.add(RESPONSE_STATUS, FAILED);
|
||||
rsp.add(RESPONSE_MESSAGE, "Bootstrap was cancelled");
|
||||
}
|
||||
} else {
|
||||
rsp.add(RESPONSE_STATUS, RUNNING);
|
||||
}
|
||||
}
|
||||
|
||||
private static class BootstrapCallable implements Callable<Boolean>, Closeable {
|
||||
private final String masterUrl;
|
||||
private final SolrCore core;
|
||||
private volatile boolean closed = false;
|
||||
|
||||
BootstrapCallable(String masterUrl, SolrCore core) {
|
||||
this.masterUrl = masterUrl;
|
||||
this.core = core;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
closed = true;
|
||||
SolrRequestHandler handler = core.getRequestHandler(ReplicationHandler.PATH);
|
||||
ReplicationHandler replicationHandler = (ReplicationHandler) handler;
|
||||
replicationHandler.abortFetch();
|
||||
}
|
||||
|
||||
public boolean isClosed() {
|
||||
return closed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean call() throws Exception {
|
||||
boolean success = false;
|
||||
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
|
||||
// we start buffering updates as a safeguard however we do not expect
|
||||
// to receive any updates from the source during bootstrap
|
||||
ulog.bufferUpdates();
|
||||
try {
|
||||
commitOnLeader(masterUrl);
|
||||
// use rep handler directly, so we can do this sync rather than async
|
||||
SolrRequestHandler handler = core.getRequestHandler(ReplicationHandler.PATH);
|
||||
ReplicationHandler replicationHandler = (ReplicationHandler) handler;
|
||||
|
||||
if (replicationHandler == null) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE,
|
||||
"Skipping recovery, no " + ReplicationHandler.PATH + " handler found");
|
||||
}
|
||||
|
||||
ModifiableSolrParams solrParams = new ModifiableSolrParams();
|
||||
solrParams.set(ReplicationHandler.MASTER_URL, masterUrl);
|
||||
// we do not want the raw tlog files from the source
|
||||
solrParams.set(ReplicationHandler.TLOG_FILES, false);
|
||||
|
||||
success = replicationHandler.doFetch(solrParams, false);
|
||||
|
||||
// this is required because this callable can race with HttpSolrCall#destroy
|
||||
// which clears the request info.
|
||||
// Applying buffered updates fails without the following line because LogReplayer
|
||||
// also tries to set request info and fails with AssertionError
|
||||
SolrRequestInfo.clearRequestInfo();
|
||||
|
||||
Future<UpdateLog.RecoveryInfo> future = ulog.applyBufferedUpdates();
|
||||
if (future == null) {
|
||||
// no replay needed
|
||||
log.info("No replay needed.");
|
||||
} else {
|
||||
log.info("Replaying buffered documents.");
|
||||
// wait for replay
|
||||
UpdateLog.RecoveryInfo report = future.get();
|
||||
if (report.failed) {
|
||||
SolrException.log(log, "Replay failed");
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Replay failed");
|
||||
}
|
||||
}
|
||||
return success;
|
||||
} finally {
|
||||
if (closed || !success) {
|
||||
// we cannot apply the buffer in this case because it will introduce newer versions in the
|
||||
// update log and then the source cluster will get those versions via collectioncheckpoint
|
||||
// causing the versions in between to be completely missed
|
||||
boolean dropped = ulog.dropBufferedUpdates();
|
||||
assert dropped;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void commitOnLeader(String leaderUrl) throws SolrServerException,
|
||||
IOException {
|
||||
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderUrl).build()) {
|
||||
client.setConnectionTimeout(30000);
|
||||
UpdateRequest ureq = new UpdateRequest();
|
||||
ureq.setParams(new ModifiableSolrParams());
|
||||
ureq.getParams().set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
|
||||
ureq.getParams().set(UpdateParams.OPEN_SEARCHER, false);
|
||||
ureq.setAction(AbstractUpdateRequest.ACTION.COMMIT, false, true).process(
|
||||
client);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "Manage Cross Data Center Replication";
|
||||
|
|
|
@ -82,6 +82,9 @@ import org.apache.solr.core.DirectoryFactory;
|
|||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.IndexDeletionPolicyWrapper;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotManager;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
|
||||
import org.apache.solr.handler.ReplicationHandler.*;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
|
@ -468,9 +471,18 @@ public class IndexFetcher {
|
|||
// let the system know we are changing dir's and the old one
|
||||
// may be closed
|
||||
if (indexDir != null) {
|
||||
LOG.info("removing old index directory " + indexDir);
|
||||
solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
|
||||
|
||||
SolrSnapshotMetaDataManager snapshotsMgr = solrCore.getSnapshotMetaDataManager();
|
||||
Collection<SnapshotMetaData> snapshots = snapshotsMgr.listSnapshotsInIndexDir(indexDirPath);
|
||||
|
||||
// Delete the old index directory only if no snapshot exists in that directory.
|
||||
if(snapshots.isEmpty()) {
|
||||
LOG.info("removing old index directory " + indexDir);
|
||||
solrCore.getDirectoryFactory().remove(indexDir);
|
||||
} else {
|
||||
SolrSnapshotManager.deleteNonSnapshotIndexFiles(indexDir, snapshots);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -738,14 +750,14 @@ public class IndexFetcher {
|
|||
}
|
||||
|
||||
private void openNewSearcherAndUpdateCommitPoint() throws IOException {
|
||||
SolrQueryRequest req = new LocalSolrQueryRequest(solrCore,
|
||||
new ModifiableSolrParams());
|
||||
|
||||
RefCounted<SolrIndexSearcher> searcher = null;
|
||||
IndexCommit commitPoint;
|
||||
// must get the latest solrCore object because the one we have might be closed because of a reload
|
||||
// todo stop keeping solrCore around
|
||||
SolrCore core = solrCore.getCoreDescriptor().getCoreContainer().getCore(solrCore.getName());
|
||||
try {
|
||||
Future[] waitSearcher = new Future[1];
|
||||
searcher = solrCore.getSearcher(true, true, waitSearcher, true);
|
||||
searcher = core.getSearcher(true, true, waitSearcher, true);
|
||||
if (waitSearcher[0] != null) {
|
||||
try {
|
||||
waitSearcher[0].get();
|
||||
|
@ -755,10 +767,10 @@ public class IndexFetcher {
|
|||
}
|
||||
commitPoint = searcher.get().getIndexReader().getIndexCommit();
|
||||
} finally {
|
||||
req.close();
|
||||
if (searcher != null) {
|
||||
searcher.decref();
|
||||
}
|
||||
core.close();
|
||||
}
|
||||
|
||||
// update the commit point in replication handler
|
||||
|
|
|
@ -125,7 +125,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
|
|||
filters = new ArrayList<>();
|
||||
for (String fq : fqs) {
|
||||
if (fq != null && fq.trim().length() != 0) {
|
||||
QParser fqp = QParser.getParser(fq, null, req);
|
||||
QParser fqp = QParser.getParser(fq, req);
|
||||
filters.add(fqp.getQuery());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -87,6 +87,7 @@ import org.apache.solr.core.SolrDeletionPolicy;
|
|||
import org.apache.solr.core.SolrEventListener;
|
||||
import org.apache.solr.core.backup.repository.BackupRepository;
|
||||
import org.apache.solr.core.backup.repository.LocalFileSystemRepository;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
@ -299,9 +300,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
rsp.add("message","No slave configured");
|
||||
}
|
||||
} else if (command.equalsIgnoreCase(CMD_ABORT_FETCH)) {
|
||||
IndexFetcher fetcher = currentIndexFetcher;
|
||||
if (fetcher != null){
|
||||
fetcher.abortFetch();
|
||||
if (abortFetch()){
|
||||
rsp.add(STATUS, OK_STATUS);
|
||||
} else {
|
||||
rsp.add(STATUS,ERR_STATUS);
|
||||
|
@ -320,6 +319,16 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
}
|
||||
}
|
||||
|
||||
public boolean abortFetch() {
|
||||
IndexFetcher fetcher = currentIndexFetcher;
|
||||
if (fetcher != null){
|
||||
fetcher.abortFetch();
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private void deleteSnapshot(ModifiableSolrParams params) {
|
||||
String name = params.get(NAME);
|
||||
if(name == null) {
|
||||
|
@ -512,12 +521,25 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
numberToKeep = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
IndexCommit indexCommit = null;
|
||||
String commitName = params.get(CoreAdminParams.COMMIT_NAME);
|
||||
if (commitName != null) {
|
||||
SolrSnapshotMetaDataManager snapshotMgr = core.getSnapshotMetaDataManager();
|
||||
Optional<IndexCommit> commit = snapshotMgr.getIndexCommitByName(commitName);
|
||||
if(commit.isPresent()) {
|
||||
indexCommit = commit.get();
|
||||
} else {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to find an index commit with name " + commitName +
|
||||
" for core " + core.getName());
|
||||
}
|
||||
} else {
|
||||
IndexDeletionPolicyWrapper delPolicy = core.getDeletionPolicy();
|
||||
IndexCommit indexCommit = delPolicy.getLatestCommit();
|
||||
indexCommit = delPolicy.getLatestCommit();
|
||||
|
||||
if (indexCommit == null) {
|
||||
indexCommit = req.getSearcher().getIndexReader().getIndexCommit();
|
||||
}
|
||||
}
|
||||
|
||||
String location = params.get(CoreAdminParams.BACKUP_LOCATION);
|
||||
String repoName = params.get(CoreAdminParams.BACKUP_REPOSITORY);
|
||||
|
@ -539,7 +561,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
}
|
||||
|
||||
// small race here before the commit point is saved
|
||||
SnapShooter snapShooter = new SnapShooter(repo, core, location, params.get(NAME));
|
||||
SnapShooter snapShooter = new SnapShooter(repo, core, location, params.get(NAME), commitName);
|
||||
snapShooter.validateCreateSnapshot();
|
||||
snapShooter.createSnapAsync(indexCommit, numberToKeep, (nl) -> snapShootDetails = nl);
|
||||
|
||||
|
@ -644,7 +666,8 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
rsp.add(CMD_GET_FILE_LIST, result);
|
||||
|
||||
// fetch list of tlog files only if cdcr is activated
|
||||
if (core.getUpdateHandler().getUpdateLog() != null && core.getUpdateHandler().getUpdateLog() instanceof CdcrUpdateLog) {
|
||||
if (solrParams.getBool(TLOG_FILES, true) && core.getUpdateHandler().getUpdateLog() != null
|
||||
&& core.getUpdateHandler().getUpdateLog() instanceof CdcrUpdateLog) {
|
||||
try {
|
||||
List<Map<String, Object>> tlogfiles = getTlogFileList(commit);
|
||||
LOG.info("Adding tlog files to list: " + tlogfiles);
|
||||
|
|
|
@ -16,13 +16,17 @@
|
|||
*/
|
||||
package org.apache.solr.handler;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URL;
|
||||
import java.util.concurrent.atomic.LongAdder;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
import org.apache.solr.core.PluginBag;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
import org.apache.solr.core.SolrInfoMBean;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestHandler;
|
||||
|
@ -35,10 +39,6 @@ import org.apache.solr.util.stats.TimerContext;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URL;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import static org.apache.solr.core.RequestParams.USEPARAM;
|
||||
|
||||
/**
|
||||
|
@ -53,10 +53,10 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
|
|||
protected boolean httpCaching = true;
|
||||
|
||||
// Statistics
|
||||
private final AtomicLong numRequests = new AtomicLong();
|
||||
private final AtomicLong numServerErrors = new AtomicLong();
|
||||
private final AtomicLong numClientErrors = new AtomicLong();
|
||||
private final AtomicLong numTimeouts = new AtomicLong();
|
||||
private final LongAdder numRequests = new LongAdder();
|
||||
private final LongAdder numServerErrors = new LongAdder();
|
||||
private final LongAdder numClientErrors = new LongAdder();
|
||||
private final LongAdder numTimeouts = new LongAdder();
|
||||
private final Timer requestTimes = new Timer();
|
||||
|
||||
private final long handlerStart;
|
||||
|
@ -144,7 +144,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
|
|||
|
||||
@Override
|
||||
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
|
||||
numRequests.incrementAndGet();
|
||||
numRequests.increment();
|
||||
TimerContext timer = requestTimes.time();
|
||||
try {
|
||||
if(pluginInfo != null && pluginInfo.attributes.containsKey(USEPARAM)) req.getContext().put(USEPARAM,pluginInfo.attributes.get(USEPARAM));
|
||||
|
@ -158,7 +158,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
|
|||
Object partialResults = header.get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY);
|
||||
boolean timedOut = partialResults == null ? false : (Boolean)partialResults;
|
||||
if( timedOut ) {
|
||||
numTimeouts.incrementAndGet();
|
||||
numTimeouts.increment();
|
||||
rsp.setHttpCaching(false);
|
||||
}
|
||||
}
|
||||
|
@ -185,9 +185,9 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
|
|||
SolrException.log(log, e);
|
||||
|
||||
if (isServerError) {
|
||||
numServerErrors.incrementAndGet();
|
||||
numServerErrors.increment();
|
||||
} else {
|
||||
numClientErrors.incrementAndGet();
|
||||
numClientErrors.increment();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.handler;
|
|||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URI;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Collection;
|
||||
import java.util.Date;
|
||||
import java.util.Locale;
|
||||
import java.util.concurrent.Callable;
|
||||
|
@ -32,6 +33,9 @@ import org.apache.solr.common.SolrException;
|
|||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.backup.repository.BackupRepository;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotManager;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -63,6 +67,7 @@ public class RestoreCore implements Callable<Boolean> {
|
|||
String restoreIndexName = "restore." + dateFormat.format(new Date());
|
||||
String restoreIndexPath = core.getDataDir() + restoreIndexName;
|
||||
|
||||
String indexDirPath = core.getIndexDir();
|
||||
Directory restoreIndexDir = null;
|
||||
Directory indexDir = null;
|
||||
try {
|
||||
|
@ -71,7 +76,7 @@ public class RestoreCore implements Callable<Boolean> {
|
|||
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
|
||||
//Prefer local copy.
|
||||
indexDir = core.getDirectoryFactory().get(core.getIndexDir(),
|
||||
indexDir = core.getDirectoryFactory().get(indexDirPath,
|
||||
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
|
||||
//Move all files from backupDir to restoreIndexDir
|
||||
|
@ -130,7 +135,16 @@ public class RestoreCore implements Callable<Boolean> {
|
|||
}
|
||||
if (success) {
|
||||
core.getDirectoryFactory().doneWithDirectory(indexDir);
|
||||
|
||||
SolrSnapshotMetaDataManager snapshotsMgr = core.getSnapshotMetaDataManager();
|
||||
Collection<SnapshotMetaData> snapshots = snapshotsMgr.listSnapshotsInIndexDir(indexDirPath);
|
||||
|
||||
// Delete the old index directory only if no snapshot exists in that directory.
|
||||
if (snapshots.isEmpty()) {
|
||||
core.getDirectoryFactory().remove(indexDir);
|
||||
} else {
|
||||
SolrSnapshotManager.deleteNonSnapshotIndexFiles(indexDir, snapshots);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -185,7 +185,7 @@ public class SchemaHandler extends RequestHandlerBase implements SolrCoreAware,
|
|||
if (parts.get(0).isEmpty()) parts.remove(0);
|
||||
if (parts.size() > 1 && level2.containsKey(parts.get(1))) {
|
||||
String realName = parts.get(1);
|
||||
String fieldName = IndexSchema.SchemaProps.nameMapping.get(realName);
|
||||
String fieldName = IndexSchema.nameMapping.get(realName);
|
||||
|
||||
String pathParam = level2.get(realName);
|
||||
if (parts.size() > 2) {
|
||||
|
|
|
@ -26,12 +26,14 @@ import java.util.Collections;
|
|||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.IndexDeletionPolicyWrapper;
|
||||
|
@ -39,6 +41,7 @@ import org.apache.solr.core.SolrCore;
|
|||
import org.apache.solr.core.backup.repository.BackupRepository;
|
||||
import org.apache.solr.core.backup.repository.BackupRepository.PathType;
|
||||
import org.apache.solr.core.backup.repository.LocalFileSystemRepository;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -59,6 +62,7 @@ public class SnapShooter {
|
|||
private URI baseSnapDirPath = null;
|
||||
private URI snapshotDirPath = null;
|
||||
private BackupRepository backupRepo = null;
|
||||
private String commitName; // can be null
|
||||
|
||||
@Deprecated
|
||||
public SnapShooter(SolrCore core, String location, String snapshotName) {
|
||||
|
@ -71,14 +75,14 @@ public class SnapShooter {
|
|||
} else {
|
||||
snapDirStr = core.getCoreDescriptor().getInstanceDir().resolve(location).normalize().toString();
|
||||
}
|
||||
initialize(new LocalFileSystemRepository(), core, snapDirStr, snapshotName);
|
||||
initialize(new LocalFileSystemRepository(), core, snapDirStr, snapshotName, null);
|
||||
}
|
||||
|
||||
public SnapShooter(BackupRepository backupRepo, SolrCore core, String location, String snapshotName) {
|
||||
initialize(backupRepo, core, location, snapshotName);
|
||||
public SnapShooter(BackupRepository backupRepo, SolrCore core, String location, String snapshotName, String commitName) {
|
||||
initialize(backupRepo, core, location, snapshotName, commitName);
|
||||
}
|
||||
|
||||
private void initialize(BackupRepository backupRepo, SolrCore core, String location, String snapshotName) {
|
||||
private void initialize(BackupRepository backupRepo, SolrCore core, String location, String snapshotName, String commitName) {
|
||||
this.solrCore = Preconditions.checkNotNull(core);
|
||||
this.backupRepo = Preconditions.checkNotNull(backupRepo);
|
||||
this.baseSnapDirPath = backupRepo.createURI(Preconditions.checkNotNull(location)).normalize();
|
||||
|
@ -90,6 +94,7 @@ public class SnapShooter {
|
|||
directoryName = "snapshot." + fmt.format(new Date());
|
||||
}
|
||||
this.snapshotDirPath = backupRepo.createURI(location, directoryName);
|
||||
this.commitName = commitName;
|
||||
}
|
||||
|
||||
public BackupRepository getBackupRepository() {
|
||||
|
@ -145,10 +150,19 @@ public class SnapShooter {
|
|||
}
|
||||
|
||||
public NamedList createSnapshot() throws Exception {
|
||||
IndexDeletionPolicyWrapper deletionPolicy = solrCore.getDeletionPolicy();
|
||||
RefCounted<SolrIndexSearcher> searcher = solrCore.getSearcher();
|
||||
try {
|
||||
if (commitName != null) {
|
||||
SolrSnapshotMetaDataManager snapshotMgr = solrCore.getSnapshotMetaDataManager();
|
||||
Optional<IndexCommit> commit = snapshotMgr.getIndexCommitByName(commitName);
|
||||
if(commit.isPresent()) {
|
||||
return createSnapshot(commit.get());
|
||||
}
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to find an index commit with name " + commitName +
|
||||
" for core " + solrCore.getName());
|
||||
} else {
|
||||
//TODO should we try solrCore.getDeletionPolicy().getLatestCommit() first?
|
||||
IndexDeletionPolicyWrapper deletionPolicy = solrCore.getDeletionPolicy();
|
||||
IndexCommit indexCommit = searcher.get().getIndexReader().getIndexCommit();
|
||||
deletionPolicy.saveCommitPoint(indexCommit.getGeneration());
|
||||
try {
|
||||
|
@ -156,6 +170,7 @@ public class SnapShooter {
|
|||
} finally {
|
||||
deletionPolicy.releaseCommitPoint(indexCommit.getGeneration());
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
searcher.decref();
|
||||
}
|
||||
|
|
|
@ -122,6 +122,8 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
|
|||
.withFunctionName("intersect", IntersectStream.class)
|
||||
.withFunctionName("complement", ComplementStream.class)
|
||||
.withFunctionName("sort", SortStream.class)
|
||||
.withFunctionName("train", TextLogitStream.class)
|
||||
.withFunctionName("features", FeaturesSelectionStream.class)
|
||||
.withFunctionName("daemon", DaemonStream.class)
|
||||
.withFunctionName("shortestPath", ShortestPathStream.class)
|
||||
.withFunctionName("gatherNodes", GatherNodesStream.class)
|
||||
|
|
|
@ -714,10 +714,8 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
|||
String location = repository.getBackupLocation(req.getParams().get(CoreAdminParams.BACKUP_LOCATION));
|
||||
if (location == null) {
|
||||
//Refresh the cluster property file to make sure the value set for location is the latest
|
||||
h.coreContainer.getZkController().getZkStateReader().forceUpdateClusterProperties();
|
||||
|
||||
// Check if the location is specified in the cluster property.
|
||||
location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty(CoreAdminParams.BACKUP_LOCATION, null);
|
||||
location = new ClusterProperties(h.coreContainer.getZkController().getZkClient()).getClusterProperty(CoreAdminParams.BACKUP_LOCATION, null);
|
||||
if (location == null) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query"
|
||||
+ " parameter or as a default repository property or as a cluster property.");
|
||||
|
@ -755,10 +753,8 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
|||
String location = repository.getBackupLocation(req.getParams().get(CoreAdminParams.BACKUP_LOCATION));
|
||||
if (location == null) {
|
||||
//Refresh the cluster property file to make sure the value set for location is the latest
|
||||
h.coreContainer.getZkController().getZkStateReader().forceUpdateClusterProperties();
|
||||
|
||||
// Check if the location is specified in the cluster property.
|
||||
location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty("location", null);
|
||||
location = new ClusterProperties(h.coreContainer.getZkController().getZkClient()).getClusterProperty("location", null);
|
||||
if (location == null) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query"
|
||||
+ " parameter or as a default repository property or as a cluster property.");
|
||||
|
|
|
@ -34,6 +34,7 @@ import java.util.concurrent.Future;
|
|||
import com.google.common.collect.Lists;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -59,9 +60,13 @@ import org.apache.solr.core.CachingDirectoryFactory;
|
|||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.core.CoreDescriptor;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.core.backup.repository.BackupRepository;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotManager;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
|
||||
import org.apache.solr.handler.RestoreCore;
|
||||
import org.apache.solr.handler.SnapShooter;
|
||||
import org.apache.solr.handler.admin.CoreAdminHandler.CoreAdminOp;
|
||||
|
@ -794,20 +799,24 @@ enum CoreAdminOperation implements CoreAdminOp {
|
|||
+ " parameter or as a default repository property");
|
||||
}
|
||||
|
||||
// An optional parameter to describe the snapshot to be backed-up. If this
|
||||
// parameter is not supplied, the latest index commit is backed-up.
|
||||
String commitName = params.get(CoreAdminParams.COMMIT_NAME);
|
||||
|
||||
try (SolrCore core = it.handler.coreContainer.getCore(cname)) {
|
||||
SnapShooter snapShooter = new SnapShooter(repository, core, location, name);
|
||||
SnapShooter snapShooter = new SnapShooter(repository, core, location, name, commitName);
|
||||
// validateCreateSnapshot will create parent dirs instead of throw; that choice is dubious.
|
||||
// But we want to throw. One reason is that
|
||||
// this dir really should, in fact must, already exist here if triggered via a collection backup on a shared
|
||||
// file system. Otherwise, perhaps the FS location isn't shared -- we want an error.
|
||||
if (!snapShooter.getBackupRepository().exists(snapShooter.getLocation())) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST,
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Directory to contain snapshots doesn't exist: " + snapShooter.getLocation());
|
||||
}
|
||||
snapShooter.validateCreateSnapshot();
|
||||
snapShooter.createSnapshot();
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Failed to backup core=" + cname + " because " + e, e);
|
||||
}
|
||||
}),
|
||||
|
@ -845,6 +854,92 @@ enum CoreAdminOperation implements CoreAdminOp {
|
|||
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to restore core=" + core.getName());
|
||||
}
|
||||
}
|
||||
}),
|
||||
CREATESNAPSHOT_OP(CREATESNAPSHOT, it -> {
|
||||
CoreContainer cc = it.handler.getCoreContainer();
|
||||
final SolrParams params = it.req.getParams();
|
||||
|
||||
String commitName = params.required().get(CoreAdminParams.COMMIT_NAME);
|
||||
String cname = params.required().get(CoreAdminParams.CORE);
|
||||
try (SolrCore core = cc.getCore(cname)) {
|
||||
if (core == null) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
|
||||
}
|
||||
|
||||
String indexDirPath = core.getIndexDir();
|
||||
IndexCommit ic = core.getDeletionPolicy().getLatestCommit();
|
||||
if (ic == null) {
|
||||
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
||||
try {
|
||||
ic = searcher.get().getIndexReader().getIndexCommit();
|
||||
} finally {
|
||||
searcher.decref();
|
||||
}
|
||||
}
|
||||
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
|
||||
mgr.snapshot(commitName, indexDirPath, ic.getGeneration());
|
||||
|
||||
it.rsp.add("core", core.getName());
|
||||
it.rsp.add("commitName", commitName);
|
||||
it.rsp.add("indexDirPath", indexDirPath);
|
||||
it.rsp.add("generation", ic.getGeneration());
|
||||
}
|
||||
}),
|
||||
DELETESNAPSHOT_OP(DELETESNAPSHOT, it -> {
|
||||
CoreContainer cc = it.handler.getCoreContainer();
|
||||
final SolrParams params = it.req.getParams();
|
||||
|
||||
String commitName = params.required().get(CoreAdminParams.COMMIT_NAME);
|
||||
String cname = params.required().get(CoreAdminParams.CORE);
|
||||
try (SolrCore core = cc.getCore(cname)) {
|
||||
if (core == null) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
|
||||
}
|
||||
|
||||
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
|
||||
Optional<SnapshotMetaData> metadata = mgr.release(commitName);
|
||||
if (metadata.isPresent()) {
|
||||
long gen = metadata.get().getGenerationNumber();
|
||||
String indexDirPath = metadata.get().getIndexDirPath();
|
||||
|
||||
// If the directory storing the snapshot is not the same as the *current* core
|
||||
// index directory, then delete the files corresponding to this snapshot.
|
||||
// Otherwise we leave the index files related to snapshot as is (assuming the
|
||||
// underlying Solr IndexDeletionPolicy will clean them up appropriately).
|
||||
if (!indexDirPath.equals(core.getIndexDir())) {
|
||||
Directory d = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, DirectoryFactory.LOCK_TYPE_NONE);
|
||||
try {
|
||||
SolrSnapshotManager.deleteIndexFiles(d, mgr.listSnapshotsInIndexDir(indexDirPath), gen);
|
||||
} finally {
|
||||
core.getDirectoryFactory().release(d);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}),
|
||||
LISTSNAPSHOTS_OP(LISTSNAPSHOTS, it -> {
|
||||
CoreContainer cc = it.handler.getCoreContainer();
|
||||
final SolrParams params = it.req.getParams();
|
||||
|
||||
String cname = params.required().get(CoreAdminParams.CORE);
|
||||
try ( SolrCore core = cc.getCore(cname) ) {
|
||||
if (core == null) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
|
||||
}
|
||||
|
||||
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
|
||||
NamedList result = new NamedList();
|
||||
for (String name : mgr.listSnapshots()) {
|
||||
Optional<SnapshotMetaData> metadata = mgr.getSnapshotMetaData(name);
|
||||
if ( metadata.isPresent() ) {
|
||||
NamedList<String> props = new NamedList<>();
|
||||
props.add("generation", String.valueOf(metadata.get().getGenerationNumber()));
|
||||
props.add("indexDirPath", metadata.get().getIndexDirPath());
|
||||
result.add(name, props);
|
||||
}
|
||||
}
|
||||
it.rsp.add("snapshots", result);
|
||||
}
|
||||
});
|
||||
|
||||
final CoreAdminParams.CoreAdminAction action;
|
||||
|
|
|
@ -176,7 +176,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
|
|||
query = rb.getQuery();
|
||||
} else {
|
||||
try {
|
||||
QParser parser = QParser.getParser(qs, null, req);
|
||||
QParser parser = QParser.getParser(qs, req);
|
||||
query = parser.getQuery();
|
||||
} catch (Exception e) {
|
||||
throw new IOException(e);
|
||||
|
@ -198,7 +198,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
|
|||
try {
|
||||
for (String fq : fqs) {
|
||||
if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
|
||||
QParser fqp = QParser.getParser(fq, null, req);
|
||||
QParser fqp = QParser.getParser(fq, req);
|
||||
newFilters.add(fqp.getQuery());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -202,7 +202,7 @@ public class QueryComponent extends SearchComponent
|
|||
filters = filters == null ? new ArrayList<Query>(fqs.length) : new ArrayList<>(filters);
|
||||
for (String fq : fqs) {
|
||||
if (fq != null && fq.trim().length()!=0) {
|
||||
QParser fqp = QParser.getParser(fq, null, req);
|
||||
QParser fqp = QParser.getParser(fq, req);
|
||||
filters.add(fqp.getQuery());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -158,7 +158,7 @@ public class RealTimeGetComponent extends SearchComponent
|
|||
filters = filters == null ? new ArrayList<Query>(fqs.length) : new ArrayList<>(filters);
|
||||
for (String fq : fqs) {
|
||||
if (fq != null && fq.trim().length()!=0) {
|
||||
QParser fqp = QParser.getParser(fq, null, req);
|
||||
QParser fqp = QParser.getParser(fq, req);
|
||||
filters.add(fqp.getQuery());
|
||||
}
|
||||
}
|
||||
|
@ -249,7 +249,8 @@ public class RealTimeGetComponent extends SearchComponent
|
|||
docid = segid + ctx.docBase;
|
||||
|
||||
if (rb.getFilters() != null) {
|
||||
for (Query q : rb.getFilters()) {
|
||||
for (Query raw : rb.getFilters()) {
|
||||
Query q = raw.rewrite(searcher.getIndexReader());
|
||||
Scorer scorer = searcher.createWeight(q, false, 1f).scorer(ctx);
|
||||
if (scorer == null || segid != scorer.iterator().advance(segid)) {
|
||||
// filter doesn't match.
|
||||
|
@ -448,7 +449,7 @@ public class RealTimeGetComponent extends SearchComponent
|
|||
ZkController zkController = rb.req.getCore().getCoreDescriptor().getCoreContainer().getZkController();
|
||||
|
||||
// if shards=... then use that
|
||||
if (zkController != null && params.get("shards") == null) {
|
||||
if (zkController != null && params.get(ShardParams.SHARDS) == null) {
|
||||
CloudDescriptor cloudDescriptor = rb.req.getCore().getCoreDescriptor().getCloudDescriptor();
|
||||
|
||||
String collection = cloudDescriptor.getCollectionName();
|
||||
|
@ -470,32 +471,18 @@ public class RealTimeGetComponent extends SearchComponent
|
|||
|
||||
for (Map.Entry<String,List<String>> entry : sliceToId.entrySet()) {
|
||||
String shard = entry.getKey();
|
||||
String shardIdList = StrUtils.join(entry.getValue(), ',');
|
||||
|
||||
ShardRequest sreq = new ShardRequest();
|
||||
|
||||
sreq.purpose = 1;
|
||||
ShardRequest sreq = createShardRequest(rb, entry.getValue());
|
||||
// sreq.shards = new String[]{shard}; // TODO: would be nice if this would work...
|
||||
sreq.shards = sliceToShards(rb, collection, shard);
|
||||
sreq.actualShards = sreq.shards;
|
||||
sreq.params = new ModifiableSolrParams();
|
||||
sreq.params.set(ShardParams.SHARDS_QT,"/get"); // TODO: how to avoid hardcoding this and hit the same handler?
|
||||
sreq.params.set("distrib",false);
|
||||
sreq.params.set("ids", shardIdList);
|
||||
|
||||
rb.addRequest(this, sreq);
|
||||
}
|
||||
} else {
|
||||
String shardIdList = StrUtils.join(reqIds.allIds, ',');
|
||||
ShardRequest sreq = new ShardRequest();
|
||||
|
||||
sreq.purpose = 1;
|
||||
ShardRequest sreq = createShardRequest(rb, reqIds.allIds);
|
||||
sreq.shards = null; // ALL
|
||||
sreq.actualShards = sreq.shards;
|
||||
sreq.params = new ModifiableSolrParams();
|
||||
sreq.params.set(ShardParams.SHARDS_QT,"/get"); // TODO: how to avoid hardcoding this and hit the same handler?
|
||||
sreq.params.set("distrib",false);
|
||||
sreq.params.set("ids", shardIdList);
|
||||
|
||||
rb.addRequest(this, sreq);
|
||||
}
|
||||
|
@ -503,6 +490,28 @@ public class RealTimeGetComponent extends SearchComponent
|
|||
return ResponseBuilder.STAGE_DONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method for creating a new ShardRequest for the specified ids, based on the params
|
||||
* specified for the current request. The new ShardRequest does not yet know anything about
|
||||
* which shard/slice it will be sent to.
|
||||
*/
|
||||
private ShardRequest createShardRequest(final ResponseBuilder rb, final List<String> ids) {
|
||||
final ShardRequest sreq = new ShardRequest();
|
||||
sreq.purpose = 1;
|
||||
sreq.params = new ModifiableSolrParams(rb.req.getParams());
|
||||
|
||||
// TODO: how to avoid hardcoding this and hit the same handler?
|
||||
sreq.params.set(ShardParams.SHARDS_QT,"/get");
|
||||
sreq.params.set("distrib",false);
|
||||
|
||||
sreq.params.remove(ShardParams.SHARDS);
|
||||
sreq.params.remove("id");
|
||||
sreq.params.remove("ids");
|
||||
sreq.params.set("ids", StrUtils.join(ids, ','));
|
||||
|
||||
return sreq;
|
||||
}
|
||||
|
||||
private String[] sliceToShards(ResponseBuilder rb, String collection, String slice) {
|
||||
String lookup = collection + '_' + slice; // seems either form may be filled in rb.slices?
|
||||
|
||||
|
|
|
@ -60,7 +60,6 @@ import org.apache.solr.schema.FieldType;
|
|||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.QParserPlugin;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
|
||||
|
@ -242,7 +241,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
try {
|
||||
if (maxResultsFilterQueryString != null) {
|
||||
// Get the default Lucene query parser
|
||||
QParser parser = QParser.getParser(maxResultsFilterQueryString, QParserPlugin.DEFAULT_QTYPE, rb.req);
|
||||
QParser parser = QParser.getParser(maxResultsFilterQueryString, rb.req);
|
||||
DocSet s = searcher.getDocSet(parser.getQuery());
|
||||
maxResultsByFilters = s.size();
|
||||
} else {
|
||||
|
|
|
@ -321,7 +321,7 @@ public class SimpleFacets {
|
|||
public void getFacetQueryCount(ParsedParams parsed, NamedList<Integer> res) throws SyntaxError, IOException {
|
||||
// TODO: slight optimization would prevent double-parsing of any localParams
|
||||
// TODO: SOLR-7753
|
||||
Query qobj = QParser.getParser(parsed.facetValue, null, req).getQuery();
|
||||
Query qobj = QParser.getParser(parsed.facetValue, req).getQuery();
|
||||
|
||||
if (qobj == null) {
|
||||
res.add(parsed.key, 0);
|
||||
|
|
|
@ -77,7 +77,7 @@ public class ChildDocTransformerFactory extends TransformerFactory {
|
|||
|
||||
BitSetProducer parentsFilter = null;
|
||||
try {
|
||||
Query parentFilterQuery = QParser.getParser( parentFilter, null, req).getQuery();
|
||||
Query parentFilterQuery = QParser.getParser( parentFilter, req).getQuery();
|
||||
parentsFilter = new QueryBitSetProducer(new QueryWrapperFilter(parentFilterQuery));
|
||||
} catch (SyntaxError syntaxError) {
|
||||
throw new SolrException( ErrorCode.BAD_REQUEST, "Failed to create correct parent filter query" );
|
||||
|
@ -86,7 +86,7 @@ public class ChildDocTransformerFactory extends TransformerFactory {
|
|||
Query childFilterQuery = null;
|
||||
if(childFilter != null) {
|
||||
try {
|
||||
childFilterQuery = QParser.getParser( childFilter, null, req).getQuery();
|
||||
childFilterQuery = QParser.getParser( childFilter, req).getQuery();
|
||||
} catch (SyntaxError syntaxError) {
|
||||
throw new SolrException( ErrorCode.BAD_REQUEST, "Failed to create correct child filter query" );
|
||||
}
|
||||
|
|
|
@ -76,6 +76,15 @@ import org.apache.solr.search.TermsQParserPlugin;
|
|||
* its' native parameters like <code>collection, shards</code> for subquery, eg<br>
|
||||
* <code>q=*:*&fl=*,foo:[subquery]&foo.q=cloud&foo.collection=departments</code>
|
||||
*
|
||||
* <h3>When used in Real Time Get</h3>
|
||||
* <p>
|
||||
* When used in the context of a Real Time Get, the <i>values</i> from each document that are used
|
||||
* in the qubquery are the "real time" values (possibly from the transaction log), but the query
|
||||
* itself is still executed against the currently open searcher. Note that this means if a
|
||||
* document is updated but not yet committed, an RTG request for that document that uses
|
||||
* <code>[subquery]</code> could include the older (committed) version of that document,
|
||||
* with differnet field values, in the subquery results.
|
||||
* </p>
|
||||
*/
|
||||
public class SubQueryAugmenterFactory extends TransformerFactory{
|
||||
|
||||
|
@ -304,6 +313,14 @@ class SubQueryAugmenter extends DocTransformer {
|
|||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns false -- this transformer does use an IndexSearcher, but it does not (neccessarily) need
|
||||
* the searcher from the ResultContext of the document being returned. Instead we use the current
|
||||
* "live" searcher for the specified core.
|
||||
*/
|
||||
@Override
|
||||
public boolean needsSolrIndexSearcher() { return false; }
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid, float score) {
|
||||
|
||||
|
|
|
@ -1500,10 +1500,12 @@ public class IndexSchema {
|
|||
(v1, v2) -> v2,
|
||||
LinkedHashMap::new));
|
||||
}
|
||||
public static Map<String,String> nameMapping = Collections.unmodifiableMap(Stream.of(Handler.values())
|
||||
.collect(Collectors.toMap(Handler::getNameLower , Handler::getRealName)));
|
||||
}
|
||||
|
||||
public static Map<String,String> nameMapping = Collections.unmodifiableMap(Stream.of(SchemaProps.Handler.values())
|
||||
.collect(Collectors.toMap(SchemaProps.Handler::getNameLower , SchemaProps.Handler::getRealName)));
|
||||
|
||||
|
||||
public Map<String, Object> getNamedPropertyValues(String name, SolrParams params) {
|
||||
return new SchemaProps(name, params, this).toMap();
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue