Merge remote-tracking branch 'origin/master'

Conflicts:
	solr/CHANGES.txt
This commit is contained in:
Noble Paul 2016-08-11 12:13:38 +05:30
commit 92b5a76b54
216 changed files with 11609 additions and 1492 deletions

View File

@ -65,6 +65,9 @@ New Features
Polygon instances from a standard GeoJSON string (Robert Muir, Mike Polygon instances from a standard GeoJSON string (Robert Muir, Mike
McCandless) McCandless)
* SOLR-9279: Queries module: new ComparisonBoolFunction base class
(Doug Turnbull via David Smiley)
Bug Fixes Bug Fixes
* LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand) * LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand)
@ -135,10 +138,9 @@ Improvements
* LUCENE-7385: Improve/fix assert messages in SpanScorer. (David Smiley) * LUCENE-7385: Improve/fix assert messages in SpanScorer. (David Smiley)
* LUCENE-7390: Improve performance of indexing points by allowing the * LUCENE-7393: Add ICUTokenizer option to parse Myanmar text as syllables instead of words,
codec to use transient heap in proportion to IndexWriter's RAM because the ICU word-breaking algorithm has some issues. This allows for the previous
buffer, instead of a fixed 16.0 MB. A custom codec can still tokenization used before Lucene 5. (AM, Robert Muir)
override the buffer size itself. (Mike McCandless)
Optimizations Optimizations
@ -154,6 +156,12 @@ Optimizations
* LUCENE-7311: Cached term queries do not seek the terms dictionary anymore. * LUCENE-7311: Cached term queries do not seek the terms dictionary anymore.
(Adrien Grand) (Adrien Grand)
* LUCENE-7396, LUCENE-7399: Faster flush of points.
(Adrien Grand, Mike McCandless)
* LUCENE-7406: Automaton and PrefixQuery tweaks (fewer object (re)allocations).
(Christine Poerschke)
Other Other
* LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien * LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien

View File

@ -402,6 +402,7 @@ public class MinHashFilter extends TokenFilter {
} }
/** Returns the MurmurHash3_x64_128 hash, placing the result in "out". */ /** Returns the MurmurHash3_x64_128 hash, placing the result in "out". */
@SuppressWarnings("fallthrough") // the huge switch is designed to use fall through into cases!
static void murmurhash3_x64_128(byte[] key, int offset, int len, int seed, LongPair out) { static void murmurhash3_x64_128(byte[] key, int offset, int len, int seed, LongPair out) {
// The original algorithm does have a 32 bit unsigned seed. // The original algorithm does have a 32 bit unsigned seed.
// We have to mask to match the behavior of the unsigned types and prevent sign extension. // We have to mask to match the behavior of the unsigned types and prevent sign extension.

View File

@ -0,0 +1,50 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Parses Myanmar text, with syllable as token.
#
$Cons = [[:Other_Letter:]&[:Myanmar:]];
$Virama = [\u1039];
$Asat = [\u103A];
$WordJoin = [:Line_Break=Word_Joiner:];
#
# default numerical definitions
#
$Extend = [\p{Word_Break = Extend}];
$Format = [\p{Word_Break = Format}];
$MidNumLet = [\p{Word_Break = MidNumLet}];
$MidNum = [\p{Word_Break = MidNum}];
$Numeric = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
$MidNumLetEx = $MidNumLet ($Extend | $Format)*;
$MidNumEx = $MidNum ($Extend | $Format)*;
$NumericEx = $Numeric ($Extend | $Format)*;
$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*;
$ConsEx = $Cons ($Extend | $Format)*;
$AsatEx = $Cons $Asat ($Virama $ConsEx)? ($Extend | $Format)*;
$MyanmarSyllableEx = $ConsEx ($Virama $ConsEx)? ($AsatEx)*;
$MyanmarJoinedSyllableEx = $MyanmarSyllableEx ($WordJoin $MyanmarSyllableEx)*;
!!forward;
$MyanmarJoinedSyllableEx {200};
# default numeric rules
$NumericEx $ExtendNumLetEx? (($MidNumEx | $MidNumLetEx)? $NumericEx $ExtendNumLetEx?)* {100};

View File

@ -63,9 +63,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
// the same as ROOT, except no dictionary segmentation for cjk // the same as ROOT, except no dictionary segmentation for cjk
private static final BreakIterator defaultBreakIterator = private static final BreakIterator defaultBreakIterator =
readBreakIterator("Default.brk"); readBreakIterator("Default.brk");
private static final BreakIterator myanmarSyllableIterator =
readBreakIterator("MyanmarSyllable.brk");
// TODO: deprecate this boolean? you only care if you are doing super-expert stuff... // TODO: deprecate this boolean? you only care if you are doing super-expert stuff...
private final boolean cjkAsWords; private final boolean cjkAsWords;
private final boolean myanmarAsWords;
/** /**
* Creates a new config. This object is lightweight, but the first * Creates a new config. This object is lightweight, but the first
@ -74,9 +77,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
* otherwise text will be segmented according to UAX#29 defaults. * otherwise text will be segmented according to UAX#29 defaults.
* If this is true, all Han+Hiragana+Katakana words will be tagged as * If this is true, all Han+Hiragana+Katakana words will be tagged as
* IDEOGRAPHIC. * IDEOGRAPHIC.
* @param myanmarAsWords true if Myanmar text should undergo dictionary-based segmentation,
* otherwise it will be tokenized as syllables.
*/ */
public DefaultICUTokenizerConfig(boolean cjkAsWords) { public DefaultICUTokenizerConfig(boolean cjkAsWords, boolean myanmarAsWords) {
this.cjkAsWords = cjkAsWords; this.cjkAsWords = cjkAsWords;
this.myanmarAsWords = myanmarAsWords;
} }
@Override @Override
@ -88,6 +94,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
public BreakIterator getBreakIterator(int script) { public BreakIterator getBreakIterator(int script) {
switch(script) { switch(script) {
case UScript.JAPANESE: return (BreakIterator)cjkBreakIterator.clone(); case UScript.JAPANESE: return (BreakIterator)cjkBreakIterator.clone();
case UScript.MYANMAR:
if (myanmarAsWords) {
return (BreakIterator)defaultBreakIterator.clone();
} else {
return (BreakIterator)myanmarSyllableIterator.clone();
}
default: return (BreakIterator)defaultBreakIterator.clone(); default: return (BreakIterator)defaultBreakIterator.clone();
} }
} }

View File

@ -68,7 +68,7 @@ public final class ICUTokenizer extends Tokenizer {
* @see DefaultICUTokenizerConfig * @see DefaultICUTokenizerConfig
*/ */
public ICUTokenizer() { public ICUTokenizer() {
this(new DefaultICUTokenizerConfig(true)); this(new DefaultICUTokenizerConfig(true, true));
} }
/** /**

View File

@ -79,6 +79,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
private final Map<Integer,String> tailored; private final Map<Integer,String> tailored;
private ICUTokenizerConfig config; private ICUTokenizerConfig config;
private final boolean cjkAsWords; private final boolean cjkAsWords;
private final boolean myanmarAsWords;
/** Creates a new ICUTokenizerFactory */ /** Creates a new ICUTokenizerFactory */
public ICUTokenizerFactory(Map<String,String> args) { public ICUTokenizerFactory(Map<String,String> args) {
@ -95,6 +96,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
} }
} }
cjkAsWords = getBoolean(args, "cjkAsWords", true); cjkAsWords = getBoolean(args, "cjkAsWords", true);
myanmarAsWords = getBoolean(args, "myanmarAsWords", true);
if (!args.isEmpty()) { if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args); throw new IllegalArgumentException("Unknown parameters: " + args);
} }
@ -104,7 +106,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
public void inform(ResourceLoader loader) throws IOException { public void inform(ResourceLoader loader) throws IOException {
assert tailored != null : "init must be called first!"; assert tailored != null : "init must be called first!";
if (tailored.isEmpty()) { if (tailored.isEmpty()) {
config = new DefaultICUTokenizerConfig(cjkAsWords); config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords);
} else { } else {
final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT]; final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT];
for (Map.Entry<Integer,String> entry : tailored.entrySet()) { for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
@ -112,7 +114,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
String resourcePath = entry.getValue(); String resourcePath = entry.getValue();
breakers[code] = parseRules(resourcePath, loader); breakers[code] = parseRules(resourcePath, loader);
} }
config = new DefaultICUTokenizerConfig(cjkAsWords) { config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords) {
@Override @Override
public BreakIterator getBreakIterator(int script) { public BreakIterator getBreakIterator(int script) {

View File

@ -42,7 +42,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
sb.append(whitespace); sb.append(whitespace);
sb.append("testing 1234"); sb.append("testing 1234");
String input = sb.toString(); String input = sb.toString();
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false)); ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
tokenizer.setReader(new StringReader(input)); tokenizer.setReader(new StringReader(input));
assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" }); assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
} }
@ -53,7 +53,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
sb.append('a'); sb.append('a');
} }
String input = sb.toString(); String input = sb.toString();
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false)); ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
tokenizer.setReader(new StringReader(input)); tokenizer.setReader(new StringReader(input));
char token[] = new char[4096]; char token[] = new char[4096];
Arrays.fill(token, 'a'); Arrays.fill(token, 'a');
@ -75,7 +75,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
a = new Analyzer() { a = new Analyzer() {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName) { protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false)); Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
TokenFilter filter = new ICUNormalizer2Filter(tokenizer); TokenFilter filter = new ICUNormalizer2Filter(tokenizer);
return new TokenStreamComponents(tokenizer, filter); return new TokenStreamComponents(tokenizer, filter);
} }

View File

@ -34,7 +34,7 @@ public class TestICUTokenizerCJK extends BaseTokenStreamTestCase {
a = new Analyzer() { a = new Analyzer() {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName) { protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(true))); return new TokenStreamComponents(new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(true, true)));
} }
}; };
} }

View File

@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.icu.segmentation;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
/** Test tokenizing Myanmar text into syllables */
public class TestMyanmarSyllable extends BaseTokenStreamTestCase {
Analyzer a;
@Override
public void setUp() throws Exception {
super.setUp();
a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, false));
return new TokenStreamComponents(tokenizer);
}
};
}
@Override
public void tearDown() throws Exception {
a.close();
super.tearDown();
}
/** as opposed to dictionary break of သက်ဝင်|လှုပ်ရှား|စေ|ပြီး */
public void testBasics() throws Exception {
assertAnalyzesTo(a, "သက်ဝင်လှုပ်ရှားစေပြီး", new String[] { "သက်", "ဝင်", "လှုပ်", "ရှား", "စေ", "ပြီး" });
}
// simple tests from "A Rule-based Syllable Segmentation of Myanmar Text"
// * http://www.aclweb.org/anthology/I08-3010
// (see also the presentation: http://gii2.nagaokaut.ac.jp/gii/media/share/20080901-ZMM%20Presentation.pdf)
// The words are fake, we just test the categories.
// note that currently our algorithm is not sophisticated enough to handle some of the special cases!
/** constant */
public void testC() throws Exception {
assertAnalyzesTo(a, "ကက", new String[] { "က", "က" });
}
/** consonant + sign */
public void testCF() throws Exception {
assertAnalyzesTo(a, "ကံကံ", new String[] { "ကံ", "ကံ" });
}
/** consonant + consonant + asat */
public void testCCA() throws Exception {
assertAnalyzesTo(a, "ကင်ကင်", new String[] { "ကင်", "ကင်" });
}
/** consonant + consonant + asat + sign */
public void testCCAF() throws Exception {
assertAnalyzesTo(a, "ကင်းကင်း", new String[] { "ကင်း", "ကင်း" });
}
/** consonant + vowel */
public void testCV() throws Exception {
assertAnalyzesTo(a, "ကာကာ", new String[] { "ကာ", "ကာ" });
}
/** consonant + vowel + sign */
public void testCVF() throws Exception {
assertAnalyzesTo(a, "ကားကား", new String[] { "ကား", "ကား" });
}
/** consonant + vowel + vowel + asat */
public void testCVVA() throws Exception {
assertAnalyzesTo(a, "ကော်ကော်", new String[] { "ကော်", "ကော်" });
}
/** consonant + vowel + vowel + consonant + asat */
public void testCVVCA() throws Exception {
assertAnalyzesTo(a, "ကောင်ကောင်", new String[] { "ကောင်", "ကောင်" });
}
/** consonant + vowel + vowel + consonant + asat + sign */
public void testCVVCAF() throws Exception {
assertAnalyzesTo(a, "ကောင်းကောင်း", new String[] { "ကောင်း", "ကောင်း" });
}
/** consonant + medial */
public void testCM() throws Exception {
assertAnalyzesTo(a, "ကျကျ", new String[] { "ကျ", "ကျ" });
}
/** consonant + medial + sign */
public void testCMF() throws Exception {
assertAnalyzesTo(a, "ကျံကျံ", new String[] { "ကျံ", "ကျံ" });
}
/** consonant + medial + consonant + asat */
public void testCMCA() throws Exception {
assertAnalyzesTo(a, "ကျင်ကျင်", new String[] { "ကျင်", "ကျင်" });
}
/** consonant + medial + consonant + asat + sign */
public void testCMCAF() throws Exception {
assertAnalyzesTo(a, "ကျင်းကျင်း", new String[] { "ကျင်း", "ကျင်း" });
}
/** consonant + medial + vowel */
public void testCMV() throws Exception {
assertAnalyzesTo(a, "ကျာကျာ", new String[] { "ကျာ", "ကျာ" });
}
/** consonant + medial + vowel + sign */
public void testCMVF() throws Exception {
assertAnalyzesTo(a, "ကျားကျား", new String[] { "ကျား", "ကျား" });
}
/** consonant + medial + vowel + vowel + asat */
public void testCMVVA() throws Exception {
assertAnalyzesTo(a, "ကျော်ကျော်", new String[] { "ကျော်", "ကျော်" });
}
/** consonant + medial + vowel + vowel + consonant + asat */
public void testCMVVCA() throws Exception {
assertAnalyzesTo(a, "ကြောင်ကြောင်", new String[] { "ကြောင်", "ကြောင်"});
}
/** consonant + medial + vowel + vowel + consonant + asat + sign */
public void testCMVVCAF() throws Exception {
assertAnalyzesTo(a, "ကြောင်းကြောင်း", new String[] { "ကြောင်း", "ကြောင်း"});
}
/** independent vowel */
public void testI() throws Exception {
assertAnalyzesTo(a, "ဪဪ", new String[] { "", "" });
}
/** independent vowel */
public void testE() throws Exception {
assertAnalyzesTo(a, "ဣဣ", new String[] { "", "" });
}
}

View File

@ -46,7 +46,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase {
analyzer = new Analyzer() { analyzer = new Analyzer() {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName) { protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false)); Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
TokenStream result = new CJKBigramFilter(source); TokenStream result = new CJKBigramFilter(source);
return new TokenStreamComponents(source, new StopFilter(result, CharArraySet.EMPTY_SET)); return new TokenStreamComponents(source, new StopFilter(result, CharArraySet.EMPTY_SET));
} }
@ -60,7 +60,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase {
analyzer2 = new Analyzer() { analyzer2 = new Analyzer() {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName) { protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false)); Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
// we put this before the CJKBigramFilter, because the normalization might combine // we put this before the CJKBigramFilter, because the normalization might combine
// some halfwidth katakana forms, which will affect the bigramming. // some halfwidth katakana forms, which will affect the bigramming.
TokenStream result = new ICUNormalizer2Filter(source); TokenStream result = new ICUNormalizer2Filter(source);

View File

@ -30,6 +30,7 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
@ -82,11 +83,20 @@ public class DatasetSplitter {
// get the exact no. of existing classes // get the exact no. of existing classes
int noOfClasses = 0; int noOfClasses = 0;
for (LeafReaderContext leave : originalIndex.leaves()) { for (LeafReaderContext leave : originalIndex.leaves()) {
long valueCount = 0;
SortedDocValues classValues = leave.reader().getSortedDocValues(classFieldName); SortedDocValues classValues = leave.reader().getSortedDocValues(classFieldName);
if (classValues == null) { if (classValues != null) {
throw new IllegalStateException("the classFieldName \"" + classFieldName + "\" must index sorted doc values"); valueCount = classValues.getValueCount();
} else {
SortedSetDocValues sortedSetDocValues = leave.reader().getSortedSetDocValues(classFieldName);
if (sortedSetDocValues != null) {
valueCount = sortedSetDocValues.getValueCount();
} }
noOfClasses += classValues.getValueCount(); }
if (classValues == null) {
throw new IllegalStateException("field \"" + classFieldName + "\" must have sorted (set) doc values");
}
noOfClasses += valueCount;
} }
try { try {

View File

@ -68,7 +68,7 @@ class SimpleTextPointsWriter extends PointsWriter {
} }
@Override @Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException { public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name); boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
@ -79,7 +79,7 @@ class SimpleTextPointsWriter extends PointsWriter {
fieldInfo.getPointDimensionCount(), fieldInfo.getPointDimensionCount(),
fieldInfo.getPointNumBytes(), fieldInfo.getPointNumBytes(),
BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
maxMBSortInHeap, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
values.size(fieldInfo.name), values.size(fieldInfo.name),
singleValuePerDoc) { singleValuePerDoc) {

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs;
import org.apache.lucene.util.BytesRef;
/** {@link PointsReader} whose order of points can be changed.
* This class is useful for codecs to optimize flush.
* @lucene.internal */
public abstract class MutablePointsReader extends PointsReader {
/** Sole constructor. */
protected MutablePointsReader() {}
/** Set {@code packedValue} with a reference to the packed bytes of the i-th value. */
public abstract void getValue(int i, BytesRef packedValue);
/** Get the k-th byte of the i-th value. */
public abstract byte getByteAt(int i, int k);
/** Return the doc ID of the i-th value. */
public abstract int getDocID(int i);
/** Swap the i-th and j-th values. */
public abstract void swap(int i, int j);
}

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState; import org.apache.lucene.index.MergeState;
import org.apache.lucene.util.bkd.BKDWriter;
/** Abstract API to write points /** Abstract API to write points
* *
@ -35,9 +34,8 @@ public abstract class PointsWriter implements Closeable {
protected PointsWriter() { protected PointsWriter() {
} }
/** Write all values contained in the provided reader. {@code maxMBSortInHeap} is the maximum /** Write all values contained in the provided reader */
* transient heap that can be used to sort values, before spilling to disk for offline sorting */ public abstract void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException;
public abstract void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException;
/** Default naive merge implementation for one field: it just re-indexes all the values /** Default naive merge implementation for one field: it just re-indexes all the values
* from the incoming segment. The default codec overrides this for 1D fields and uses * from the incoming segment. The default codec overrides this for 1D fields and uses
@ -147,10 +145,7 @@ public abstract class PointsWriter implements Closeable {
public int getDocCount(String fieldName) { public int getDocCount(String fieldName) {
return finalDocCount; return finalDocCount;
} }
}, });
// TODO: also let merging of > 1D fields tap into IW's indexing buffer size, somehow (1D fields do an optimized merge sort
// and don't need heap)
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
} }
/** Default merge implementation to merge incoming points readers by visiting all their points and /** Default merge implementation to merge incoming points readers by visiting all their points and

View File

@ -25,6 +25,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.codecs.PointsReader; import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter; import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
@ -39,9 +40,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDReader; import org.apache.lucene.util.bkd.BKDReader;
import org.apache.lucene.util.bkd.BKDWriter; import org.apache.lucene.util.bkd.BKDWriter;
/** Writes dimensional values /** Writes dimensional values */
*
* @lucene.experimental */
public class Lucene60PointsWriter extends PointsWriter implements Closeable { public class Lucene60PointsWriter extends PointsWriter implements Closeable {
/** Output used to write the BKD tree data file */ /** Output used to write the BKD tree data file */
@ -52,13 +51,15 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
final SegmentWriteState writeState; final SegmentWriteState writeState;
final int maxPointsInLeafNode; final int maxPointsInLeafNode;
final double maxMBSortInHeap;
private boolean finished; private boolean finished;
/** Full constructor */ /** Full constructor */
public Lucene60PointsWriter(SegmentWriteState writeState, int maxPointsInLeafNode) throws IOException { public Lucene60PointsWriter(SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap) throws IOException {
assert writeState.fieldInfos.hasPointValues(); assert writeState.fieldInfos.hasPointValues();
this.writeState = writeState; this.writeState = writeState;
this.maxPointsInLeafNode = maxPointsInLeafNode; this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxMBSortInHeap = maxMBSortInHeap;
String dataFileName = IndexFileNames.segmentFileName(writeState.segmentInfo.name, String dataFileName = IndexFileNames.segmentFileName(writeState.segmentInfo.name,
writeState.segmentSuffix, writeState.segmentSuffix,
Lucene60PointsFormat.DATA_EXTENSION); Lucene60PointsFormat.DATA_EXTENSION);
@ -80,11 +81,11 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
/** Uses the defaults values for {@code maxPointsInLeafNode} (1024) and {@code maxMBSortInHeap} (16.0) */ /** Uses the defaults values for {@code maxPointsInLeafNode} (1024) and {@code maxMBSortInHeap} (16.0) */
public Lucene60PointsWriter(SegmentWriteState writeState) throws IOException { public Lucene60PointsWriter(SegmentWriteState writeState) throws IOException {
this(writeState, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE); this(writeState, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
} }
@Override @Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException { public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name); boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
@ -98,6 +99,14 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
values.size(fieldInfo.name), values.size(fieldInfo.name),
singleValuePerDoc)) { singleValuePerDoc)) {
if (values instanceof MutablePointsReader) {
final long fp = writer.writeField(dataOut, fieldInfo.name, (MutablePointsReader) values);
if (fp != -1) {
indexFPs.put(fieldInfo.name, fp);
}
return;
}
values.intersect(fieldInfo.name, new IntersectVisitor() { values.intersect(fieldInfo.name, new IntersectVisitor() {
@Override @Override
public void visit(int docID) { public void visit(int docID) {
@ -173,8 +182,7 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
fieldInfo.getPointDimensionCount(), fieldInfo.getPointDimensionCount(),
fieldInfo.getPointNumBytes(), fieldInfo.getPointNumBytes(),
maxPointsInLeafNode, maxPointsInLeafNode,
// NOTE: not used, since BKDWriter.merge does a merge sort: maxMBSortInHeap,
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
totMaxSize, totMaxSize,
singleValuePerDoc)) { singleValuePerDoc)) {
List<BKDReader> bkdReaders = new ArrayList<>(); List<BKDReader> bkdReaders = new ArrayList<>();

View File

@ -257,7 +257,7 @@ public class Field implements IndexableField {
/** /**
* The value of the field as a String, or null. If null, the Reader value or * The value of the field as a String, or null. If null, the Reader value or
* binary value is used. Exactly one of stringValue(), readerValue(), and * binary value is used. Exactly one of stringValue(), readerValue(), and
* getBinaryValue() must be set. * binaryValue() must be set.
*/ */
@Override @Override
public String stringValue() { public String stringValue() {
@ -271,7 +271,7 @@ public class Field implements IndexableField {
/** /**
* The value of the field as a Reader, or null. If null, the String value or * The value of the field as a Reader, or null. If null, the String value or
* binary value is used. Exactly one of stringValue(), readerValue(), and * binary value is used. Exactly one of stringValue(), readerValue(), and
* getBinaryValue() must be set. * binaryValue() must be set.
*/ */
@Override @Override
public Reader readerValue() { public Reader readerValue() {
@ -420,7 +420,7 @@ public class Field implements IndexableField {
/** /**
* Expert: sets the token stream to be used for indexing and causes * Expert: sets the token stream to be used for indexing and causes
* isIndexed() and isTokenized() to return true. May be combined with stored * isIndexed() and isTokenized() to return true. May be combined with stored
* values from stringValue() or getBinaryValue() * values from stringValue() or binaryValue()
*/ */
public void setTokenStream(TokenStream tokenStream) { public void setTokenStream(TokenStream tokenStream) {
if (type.indexOptions() == IndexOptions.NONE || !type.tokenized()) { if (type.indexOptions() == IndexOptions.NONE || !type.tokenized()) {

View File

@ -153,7 +153,7 @@ class DocumentsWriterPerThread {
final Allocator byteBlockAllocator; final Allocator byteBlockAllocator;
final IntBlockPool.Allocator intBlockAllocator; final IntBlockPool.Allocator intBlockAllocator;
private final AtomicLong pendingNumDocs; private final AtomicLong pendingNumDocs;
final LiveIndexWriterConfig indexWriterConfig; private final LiveIndexWriterConfig indexWriterConfig;
private final boolean enableTestPoints; private final boolean enableTestPoints;
private final IndexWriter indexWriter; private final IndexWriter indexWriter;

View File

@ -762,7 +762,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* {@link #getConfig()}. * {@link #getConfig()}.
* *
* <p> * <p>
* <b>NOTE:</b> after this writer is created, the given configuration instance * <b>NOTE:</b> after ths writer is created, the given configuration instance
* cannot be passed to another writer. * cannot be passed to another writer.
* *
* @param d * @param d

View File

@ -168,14 +168,9 @@ public class LiveIndexWriterConfig {
/** /**
* Determines the amount of RAM that may be used for buffering added documents * Determines the amount of RAM that may be used for buffering added documents
* and deletions before beginning to flush them to the Directory. For * and deletions before they are flushed to the Directory. Generally for
* faster indexing performance it's best to use as large a RAM buffer as you can. * faster indexing performance it's best to flush by RAM usage instead of
* <p> * document count and use as large a RAM buffer as you can.
* Note that this setting is not a hard limit on memory usage during indexing, as
* transient and non-trivial memory well beyond this buffer size may be used,
* for example due to segment merges or writing points to new segments.
* For application stability the available memory in the JVM
* should be significantly larger than the RAM buffer used for indexing.
* <p> * <p>
* When this is set, the writer will flush whenever buffered documents and * When this is set, the writer will flush whenever buffered documents and
* deletions use this much RAM. Pass in * deletions use this much RAM. Pass in
@ -183,6 +178,14 @@ public class LiveIndexWriterConfig {
* due to RAM usage. Note that if flushing by document count is also enabled, * due to RAM usage. Note that if flushing by document count is also enabled,
* then the flush will be triggered by whichever comes first. * then the flush will be triggered by whichever comes first.
* <p> * <p>
* The maximum RAM limit is inherently determined by the JVMs available
* memory. Yet, an {@link IndexWriter} session can consume a significantly
* larger amount of memory than the given RAM limit since this limit is just
* an indicator when to flush memory resident documents to the Directory.
* Flushes are likely happen concurrently while other threads adding documents
* to the writer. For application stability the available memory in the JVM
* should be significantly larger than the RAM buffer used for indexing.
* <p>
* <b>NOTE</b>: the account of RAM usage for pending deletions is only * <b>NOTE</b>: the account of RAM usage for pending deletions is only
* approximate. Specifically, if you delete by Query, Lucene currently has no * approximate. Specifically, if you delete by Query, Lucene currently has no
* way to measure the RAM usage of individual Queries so the accounting will * way to measure the RAM usage of individual Queries so the accounting will

View File

@ -18,13 +18,13 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.codecs.PointsReader; import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter; import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter; import org.apache.lucene.util.Counter;
import org.apache.lucene.util.bkd.BKDWriter;
/** Buffers up pending byte[][] value(s) per doc, then flushes when segment flushes. */ /** Buffers up pending byte[][] value(s) per doc, then flushes when segment flushes. */
class PointValuesWriter { class PointValuesWriter {
@ -35,8 +35,7 @@ class PointValuesWriter {
private int numPoints; private int numPoints;
private int numDocs; private int numDocs;
private int lastDocID = -1; private int lastDocID = -1;
private final byte[] packedValue; private final int packedBytesLength;
private final LiveIndexWriterConfig indexWriterConfig;
public PointValuesWriter(DocumentsWriterPerThread docWriter, FieldInfo fieldInfo) { public PointValuesWriter(DocumentsWriterPerThread docWriter, FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo; this.fieldInfo = fieldInfo;
@ -44,8 +43,7 @@ class PointValuesWriter {
this.bytes = new ByteBlockPool(docWriter.byteBlockAllocator); this.bytes = new ByteBlockPool(docWriter.byteBlockAllocator);
docIDs = new int[16]; docIDs = new int[16];
iwBytesUsed.addAndGet(16 * Integer.BYTES); iwBytesUsed.addAndGet(16 * Integer.BYTES);
packedValue = new byte[fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()]; packedBytesLength = fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes();
indexWriterConfig = docWriter.indexWriterConfig;
} }
// TODO: if exactly the same value is added to exactly the same doc, should we dedup? // TODO: if exactly the same value is added to exactly the same doc, should we dedup?
@ -53,9 +51,10 @@ class PointValuesWriter {
if (value == null) { if (value == null) {
throw new IllegalArgumentException("field=" + fieldInfo.name + ": point value must not be null"); throw new IllegalArgumentException("field=" + fieldInfo.name + ": point value must not be null");
} }
if (value.length != fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()) { if (value.length != packedBytesLength) {
throw new IllegalArgumentException("field=" + fieldInfo.name + ": this field's value has length=" + value.length + " but should be " + (fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes())); throw new IllegalArgumentException("field=" + fieldInfo.name + ": this field's value has length=" + value.length + " but should be " + (fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()));
} }
if (docIDs.length == numPoints) { if (docIDs.length == numPoints) {
docIDs = ArrayUtil.grow(docIDs, numPoints+1); docIDs = ArrayUtil.grow(docIDs, numPoints+1);
iwBytesUsed.addAndGet((docIDs.length - numPoints) * Integer.BYTES); iwBytesUsed.addAndGet((docIDs.length - numPoints) * Integer.BYTES);
@ -66,21 +65,32 @@ class PointValuesWriter {
numDocs++; numDocs++;
lastDocID = docID; lastDocID = docID;
} }
numPoints++; numPoints++;
} }
public void flush(SegmentWriteState state, PointsWriter writer) throws IOException { public void flush(SegmentWriteState state, PointsWriter writer) throws IOException {
PointsReader reader = new MutablePointsReader() {
final int[] ords = new int[numPoints];
{
for (int i = 0; i < numPoints; ++i) {
ords[i] = i;
}
}
writer.writeField(fieldInfo,
new PointsReader() {
@Override @Override
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException { public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
if (fieldName.equals(fieldInfo.name) == false) { if (fieldName.equals(fieldInfo.name) == false) {
throw new IllegalArgumentException("fieldName must be the same"); throw new IllegalArgumentException("fieldName must be the same");
} }
final BytesRef scratch = new BytesRef();
final byte[] packedValue = new byte[packedBytesLength];
for(int i=0;i<numPoints;i++) { for(int i=0;i<numPoints;i++) {
bytes.readBytes(packedValue.length * i, packedValue, 0, packedValue.length); getValue(i, scratch);
visitor.visit(docIDs[i], packedValue); assert scratch.length == packedValue.length;
System.arraycopy(scratch.bytes, scratch.offset, packedValue, 0, packedBytesLength);
visitor.visit(getDocID(i), packedValue);
} }
} }
@ -120,14 +130,46 @@ class PointValuesWriter {
@Override @Override
public long size(String fieldName) { public long size(String fieldName) {
if (fieldName.equals(fieldInfo.name) == false) {
throw new IllegalArgumentException("fieldName must be the same");
}
return numPoints; return numPoints;
} }
@Override @Override
public int getDocCount(String fieldName) { public int getDocCount(String fieldName) {
if (fieldName.equals(fieldInfo.name) == false) {
throw new IllegalArgumentException("fieldName must be the same");
}
return numDocs; return numDocs;
} }
},
Math.max(indexWriterConfig.getRAMBufferSizeMB()/8.0, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP)); @Override
public void swap(int i, int j) {
int tmp = ords[i];
ords[i] = ords[j];
ords[j] = tmp;
}
@Override
public int getDocID(int i) {
return docIDs[ords[i]];
}
@Override
public void getValue(int i, BytesRef packedValue) {
final long offset = (long) packedBytesLength * ords[i];
packedValue.length = packedBytesLength;
bytes.setRawBytesRef(packedValue, offset);
}
@Override
public byte getByteAt(int i, int k) {
final long offset = (long) packedBytesLength * ords[i] + k;
return bytes.readByte(offset);
}
};
writer.writeField(fieldInfo, reader);
} }
} }

View File

@ -405,6 +405,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
lock.lock(); lock.lock();
try { try {
cache.clear(); cache.clear();
// Note that this also clears the uniqueQueries map since mostRecentlyUsedQueries is the uniqueQueries.keySet view:
mostRecentlyUsedQueries.clear(); mostRecentlyUsedQueries.clear();
onClear(); onClear();
} finally { } finally {

View File

@ -41,7 +41,8 @@ public class PrefixQuery extends AutomatonQuery {
/** Build an automaton accepting all terms with the specified prefix. */ /** Build an automaton accepting all terms with the specified prefix. */
public static Automaton toAutomaton(BytesRef prefix) { public static Automaton toAutomaton(BytesRef prefix) {
Automaton automaton = new Automaton(); final int numStatesAndTransitions = prefix.length+1;
final Automaton automaton = new Automaton(numStatesAndTransitions, numStatesAndTransitions);
int lastState = automaton.createState(); int lastState = automaton.createState();
for(int i=0;i<prefix.length;i++) { for(int i=0;i<prefix.length;i++) {
int state = automaton.createState(); int state = automaton.createState();
@ -66,7 +67,7 @@ public class PrefixQuery extends AutomatonQuery {
StringBuilder buffer = new StringBuilder(); StringBuilder buffer = new StringBuilder();
if (!getField().equals(field)) { if (!getField().equals(field)) {
buffer.append(getField()); buffer.append(getField());
buffer.append(":"); buffer.append(':');
} }
buffer.append(term.text()); buffer.append(term.text());
buffer.append('*'); buffer.append('*');

View File

@ -459,69 +459,26 @@ public final class ArrayUtil {
* greater than or equal to it. * greater than or equal to it.
* This runs in linear time on average and in {@code n log(n)} time in the * This runs in linear time on average and in {@code n log(n)} time in the
* worst case.*/ * worst case.*/
public static <T> void select(T[] arr, int from, int to, int k, Comparator<T> comparator) { public static <T> void select(T[] arr, int from, int to, int k, Comparator<? super T> comparator) {
if (k < from) { new IntroSelector() {
throw new IllegalArgumentException("k must be >= from");
} T pivot;
if (k >= to) {
throw new IllegalArgumentException("k must be < to"); @Override
} protected void swap(int i, int j) {
final int maxDepth = 2 * MathUtil.log(to - from, 2); ArrayUtil.swap(arr, i, j);
quickSelect(arr, from, to, k, comparator, maxDepth);
} }
private static <T> void quickSelect(T[] arr, int from, int to, int k, Comparator<T> comparator, int maxDepth) { @Override
assert from <= k; protected void setPivot(int i) {
assert k < to; pivot = arr[i];
if (to - from == 1) {
return;
}
if (--maxDepth < 0) {
Arrays.sort(arr, from, to, comparator);
return;
} }
final int mid = (from + to) >>> 1; @Override
// heuristic: we use the median of the values at from, to-1 and mid as a pivot protected int comparePivot(int j) {
if (comparator.compare(arr[from], arr[to - 1]) > 0) { return comparator.compare(pivot, arr[j]);
swap(arr, from, to - 1);
}
if (comparator.compare(arr[to - 1], arr[mid]) > 0) {
swap(arr, to - 1, mid);
if (comparator.compare(arr[from], arr[to - 1]) > 0) {
swap(arr, from, to - 1);
} }
}.select(from, to, k);
} }
T pivot = arr[to - 1];
int left = from + 1;
int right = to - 2;
for (;;) {
while (comparator.compare(pivot, arr[left]) > 0) {
++left;
}
while (left < right && comparator.compare(pivot, arr[right]) <= 0) {
--right;
}
if (left < right) {
swap(arr, left, right);
--right;
} else {
break;
}
}
swap(arr, left, to - 1);
if (left == k) {
return;
} else if (left < k) {
quickSelect(arr, left + 1, to, k, comparator, maxDepth);
} else {
quickSelect(arr, from, left, k, comparator, maxDepth);
}
}
} }

View File

@ -378,5 +378,34 @@ public final class ByteBlockPool {
} }
} while (true); } while (true);
} }
/**
* Set the given {@link BytesRef} so that its content is equal to the
* {@code ref.length} bytes starting at {@code offset}. Most of the time this
* method will set pointers to internal data-structures. However, in case a
* value crosses a boundary, a fresh copy will be returned.
* On the contrary to {@link #setBytesRef(BytesRef, int)}, this does not
* expect the length to be encoded with the data.
*/
public void setRawBytesRef(BytesRef ref, final long offset) {
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
int pos = (int) (offset & BYTE_BLOCK_MASK);
if (pos + ref.length <= BYTE_BLOCK_SIZE) {
ref.bytes = buffers[bufferIndex];
ref.offset = pos;
} else {
ref.bytes = new byte[ref.length];
ref.offset = 0;
readBytes(offset, ref.bytes, 0, ref.length);
}
}
/** Read a single byte at the given {@code offset}. */
public byte readByte(long offset) {
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
int pos = (int) (offset & BYTE_BLOCK_MASK);
byte[] buffer = buffers[bufferIndex];
return buffer[pos];
}
} }

View File

@ -33,8 +33,8 @@ public abstract class InPlaceMergeSorter extends Sorter {
} }
void mergeSort(int from, int to) { void mergeSort(int from, int to) {
if (to - from < INSERTION_SORT_THRESHOLD) { if (to - from < BINARY_SORT_THRESHOLD) {
insertionSort(from, to); binarySort(from, to);
} else { } else {
final int mid = (from + to) >>> 1; final int mid = (from + to) >>> 1;
mergeSort(from, mid); mergeSort(from, mid);

View File

@ -0,0 +1,128 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Comparator;
/** Implementation of the quick select algorithm.
* <p>It uses the median of the first, middle and last values as a pivot and
* falls back to a heap sort when the number of recursion levels exceeds
* {@code 2 lg(n)}, as a consequence it runs in linear time on average and in
* {@code n log(n)} time in the worst case.</p>
* @lucene.internal */
public abstract class IntroSelector extends Selector {
@Override
public final void select(int from, int to, int k) {
checkArgs(from, to, k);
final int maxDepth = 2 * MathUtil.log(to - from, 2);
quickSelect(from, to, k, maxDepth);
}
// heap sort
// TODO: use median of median instead to have linear worst-case rather than
// n*log(n)
void slowSelect(int from, int to, int k) {
new Sorter() {
@Override
protected void swap(int i, int j) {
IntroSelector.this.swap(i, j);
}
@Override
protected int compare(int i, int j) {
return IntroSelector.this.compare(i, j);
}
public void sort(int from, int to) {
heapSort(from, to);
}
}.sort(from, to);
}
private void quickSelect(int from, int to, int k, int maxDepth) {
assert from <= k;
assert k < to;
if (to - from == 1) {
return;
}
if (--maxDepth < 0) {
slowSelect(from, to, k);
return;
}
final int mid = (from + to) >>> 1;
// heuristic: we use the median of the values at from, to-1 and mid as a pivot
if (compare(from, to - 1) > 0) {
swap(from, to - 1);
}
if (compare(to - 1, mid) > 0) {
swap(to - 1, mid);
if (compare(from, to - 1) > 0) {
swap(from, to - 1);
}
}
setPivot(to - 1);
int left = from + 1;
int right = to - 2;
for (;;) {
while (comparePivot(left) > 0) {
++left;
}
while (left < right && comparePivot(right) <= 0) {
--right;
}
if (left < right) {
swap(left, right);
--right;
} else {
break;
}
}
swap(left, to - 1);
if (left == k) {
return;
} else if (left < k) {
quickSelect(left + 1, to, k, maxDepth);
} else {
quickSelect(from, left, k, maxDepth);
}
}
/** Compare entries found in slots <code>i</code> and <code>j</code>.
* The contract for the returned value is the same as
* {@link Comparator#compare(Object, Object)}. */
protected int compare(int i, int j) {
setPivot(i);
return comparePivot(j);
}
/** Save the value at slot <code>i</code> so that it can later be used as a
* pivot, see {@link #comparePivot(int)}. */
protected abstract void setPivot(int i);
/** Compare the pivot with the slot at <code>j</code>, similarly to
* {@link #compare(int, int) compare(i, j)}. */
protected abstract int comparePivot(int j);
}

View File

@ -16,7 +16,6 @@
*/ */
package org.apache.lucene.util; package org.apache.lucene.util;
/** /**
* {@link Sorter} implementation based on a variant of the quicksort algorithm * {@link Sorter} implementation based on a variant of the quicksort algorithm
* called <a href="http://en.wikipedia.org/wiki/Introsort">introsort</a>: when * called <a href="http://en.wikipedia.org/wiki/Introsort">introsort</a>: when
@ -38,8 +37,8 @@ public abstract class IntroSorter extends Sorter {
} }
void quicksort(int from, int to, int maxDepth) { void quicksort(int from, int to, int maxDepth) {
if (to - from < INSERTION_SORT_THRESHOLD) { if (to - from < BINARY_SORT_THRESHOLD) {
insertionSort(from, to); binarySort(from, to);
return; return;
} else if (--maxDepth < 0) { } else if (--maxDepth < 0) {
heapSort(from, to); heapSort(from, to);
@ -84,11 +83,18 @@ public abstract class IntroSorter extends Sorter {
quicksort(left + 1, to, maxDepth); quicksort(left + 1, to, maxDepth);
} }
/** Save the value at slot <code>i</code> so that it can later be used as a // Don't rely on the slow default impl of setPivot/comparePivot since
* pivot, see {@link #comparePivot(int)}. */ // quicksort relies on these methods to be fast for good performance
@Override
protected abstract void setPivot(int i); protected abstract void setPivot(int i);
/** Compare the pivot with the slot at <code>j</code>, similarly to @Override
* {@link #compare(int, int) compare(i, j)}. */
protected abstract int comparePivot(int j); protected abstract int comparePivot(int j);
@Override
protected int compare(int i, int j) {
setPivot(i);
return comparePivot(j);
}
} }

View File

@ -38,6 +38,7 @@ public abstract class MSBRadixSorter extends Sorter {
// we store one histogram per recursion level // we store one histogram per recursion level
private final int[][] histograms = new int[LEVEL_THRESHOLD][]; private final int[][] histograms = new int[LEVEL_THRESHOLD][];
private final int[] endOffsets = new int[HISTOGRAM_SIZE]; private final int[] endOffsets = new int[HISTOGRAM_SIZE];
private final int[] commonPrefix;
private final int maxLength; private final int maxLength;
@ -47,6 +48,7 @@ public abstract class MSBRadixSorter extends Sorter {
*/ */
protected MSBRadixSorter(int maxLength) { protected MSBRadixSorter(int maxLength) {
this.maxLength = maxLength; this.maxLength = maxLength;
this.commonPrefix = new int[Math.min(24, maxLength)];
} }
/** Return the k-th byte of the entry at index {@code i}, or {@code -1} if /** Return the k-th byte of the entry at index {@code i}, or {@code -1} if
@ -116,14 +118,14 @@ public abstract class MSBRadixSorter extends Sorter {
@Override @Override
public void sort(int from, int to) { public void sort(int from, int to) {
checkRange(from, to); checkRange(from, to);
sort(from, to, 0); sort(from, to, 0, 0);
} }
private void sort(int from, int to, int k) { private void sort(int from, int to, int k, int l) {
if (to - from <= LENGTH_THRESHOLD || k >= LEVEL_THRESHOLD) { if (to - from <= LENGTH_THRESHOLD || l >= LEVEL_THRESHOLD) {
introSort(from, to, k); introSort(from, to, k);
} else { } else {
radixSort(from, to, k); radixSort(from, to, k, l);
} }
} }
@ -131,28 +133,30 @@ public abstract class MSBRadixSorter extends Sorter {
getFallbackSorter(k).sort(from, to); getFallbackSorter(k).sort(from, to);
} }
private void radixSort(int from, int to, int k) { /**
int[] histogram = histograms[k]; * @param k the character number to compare
* @param l the level of recursion
*/
private void radixSort(int from, int to, int k, int l) {
int[] histogram = histograms[l];
if (histogram == null) { if (histogram == null) {
histogram = histograms[k] = new int[HISTOGRAM_SIZE]; histogram = histograms[l] = new int[HISTOGRAM_SIZE];
} else { } else {
Arrays.fill(histogram, 0); Arrays.fill(histogram, 0);
} }
buildHistogram(from, to, k, histogram); final int commonPrefixLength = computeCommonPrefixLengthAndBuildHistogram(from, to, k, histogram);
if (commonPrefixLength > 0) {
// short-circuit: if all keys have the same byte at offset k, then recurse directly // if there are no more chars to compare or if all entries fell into the
for (int i = 0; i < HISTOGRAM_SIZE; ++i) { // first bucket (which means strings are shorter than k) then we are done
if (histogram[i] == to - from) { // otherwise recurse
// everything is in the same bucket, recurse if (k + commonPrefixLength < maxLength
if (i > 0) { && histogram[0] < to - from) {
sort(from, to, k + 1); radixSort(from, to, k + commonPrefixLength, l);
} }
return; return;
} else if (histogram[i] != 0) {
break;
}
} }
assert assertHistogram(commonPrefixLength, histogram);
int[] startOffsets = histogram; int[] startOffsets = histogram;
int[] endOffsets = this.endOffsets; int[] endOffsets = this.endOffsets;
@ -167,24 +171,83 @@ public abstract class MSBRadixSorter extends Sorter {
int h = endOffsets[i]; int h = endOffsets[i];
final int bucketLen = h - prev; final int bucketLen = h - prev;
if (bucketLen > 1) { if (bucketLen > 1) {
sort(from + prev, from + h, k + 1); sort(from + prev, from + h, k + 1, l + 1);
} }
prev = h; prev = h;
} }
} }
} }
// only used from assert
private boolean assertHistogram(int commonPrefixLength, int[] histogram) {
int numberOfUniqueBytes = 0;
for (int freq : histogram) {
if (freq > 0) {
numberOfUniqueBytes++;
}
}
if (numberOfUniqueBytes == 1) {
assert commonPrefixLength >= 1;
} else {
assert commonPrefixLength == 0 : commonPrefixLength;
}
return true;
}
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */ /** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
private int getBucket(int i, int k) { private int getBucket(int i, int k) {
return byteAt(i, k) + 1; return byteAt(i, k) + 1;
} }
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */ /** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}
private int[] buildHistogram(int from, int to, int k, int[] histogram) { * and return a common prefix length for all visited values.
* @see #buildHistogram */
private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
final int[] commonPrefix = this.commonPrefix;
int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
for (int j = 0; j < commonPrefixLength; ++j) {
final int b = byteAt(from, k + j);
commonPrefix[j] = b;
if (b == -1) {
commonPrefixLength = j + 1;
break;
}
}
int i;
outer: for (i = from + 1; i < to; ++i) {
for (int j = 0; j < commonPrefixLength; ++j) {
final int b = byteAt(i, k + j);
if (b != commonPrefix[j]) {
commonPrefixLength = j;
if (commonPrefixLength == 0) { // we have no common prefix
histogram[commonPrefix[0] + 1] = i - from;
histogram[b + 1] = 1;
break outer;
}
break;
}
}
}
if (i < to) {
// the loop got broken because there is no common prefix
assert commonPrefixLength == 0;
buildHistogram(i + 1, to, k, histogram);
} else {
assert commonPrefixLength > 0;
histogram[commonPrefix[0] + 1] = to - from;
}
return commonPrefixLength;
}
/** Build an histogram of the k-th characters of values occurring between
* offsets {@code from} and {@code to}, using {@link #getBucket}. */
private void buildHistogram(int from, int to, int k, int[] histogram) {
for (int i = from; i < to; ++i) { for (int i = from; i < to; ++i) {
histogram[getBucket(i, k)]++; histogram[getBucket(i, k)]++;
} }
return histogram;
} }
/** Accumulate values of the histogram so that it does not store counts but /** Accumulate values of the histogram so that it does not store counts but

View File

@ -0,0 +1,278 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
/** Radix selector.
* <p>This implementation works similarly to a MSB radix sort except that it
* only recurses into the sub partition that contains the desired value.
* @lucene.internal */
public abstract class RadixSelector extends Selector {
// after that many levels of recursion we fall back to introselect anyway
// this is used as a protection against the fact that radix sort performs
// worse when there are long common prefixes (probably because of cache
// locality)
private static final int LEVEL_THRESHOLD = 8;
// size of histograms: 256 + 1 to indicate that the string is finished
private static final int HISTOGRAM_SIZE = 257;
// buckets below this size will be sorted with introselect
private static final int LENGTH_THRESHOLD = 100;
// we store one histogram per recursion level
private final int[] histogram = new int[HISTOGRAM_SIZE];
private final int[] commonPrefix;
private final int maxLength;
/**
* Sole constructor.
* @param maxLength the maximum length of keys, pass {@link Integer#MAX_VALUE} if unknown.
*/
protected RadixSelector(int maxLength) {
this.maxLength = maxLength;
this.commonPrefix = new int[Math.min(24, maxLength)];
}
/** Return the k-th byte of the entry at index {@code i}, or {@code -1} if
* its length is less than or equal to {@code k}. This may only be called
* with a value of {@code i} between {@code 0} included and
* {@code maxLength} excluded. */
protected abstract int byteAt(int i, int k);
/** Get a fall-back selector which may assume that the first {@code d} bytes
* of all compared strings are equal. This fallback selector is used when
* the range becomes narrow or when the maximum level of recursion has
* been exceeded. */
protected Selector getFallbackSelector(int d) {
return new IntroSelector() {
@Override
protected void swap(int i, int j) {
RadixSelector.this.swap(i, j);
}
@Override
protected int compare(int i, int j) {
for (int o = d; o < maxLength; ++o) {
final int b1 = byteAt(i, o);
final int b2 = byteAt(j, o);
if (b1 != b2) {
return b1 - b2;
} else if (b1 == -1) {
break;
}
}
return 0;
}
@Override
protected void setPivot(int i) {
pivot.setLength(0);
for (int o = d; o < maxLength; ++o) {
final int b = byteAt(i, o);
if (b == -1) {
break;
}
pivot.append((byte) b);
}
}
@Override
protected int comparePivot(int j) {
for (int o = 0; o < pivot.length(); ++o) {
final int b1 = pivot.byteAt(o) & 0xff;
final int b2 = byteAt(j, d + o);
if (b1 != b2) {
return b1 - b2;
}
}
if (d + pivot.length() == maxLength) {
return 0;
}
return -1 - byteAt(j, d + pivot.length());
}
private final BytesRefBuilder pivot = new BytesRefBuilder();
};
}
@Override
public void select(int from, int to, int k) {
checkArgs(from, to, k);
select(from, to, k, 0, 0);
}
private void select(int from, int to, int k, int d, int l) {
if (to - from <= LENGTH_THRESHOLD || d >= LEVEL_THRESHOLD) {
getFallbackSelector(d).select(from, to, k);
} else {
radixSelect(from, to, k, d, l);
}
}
/**
* @param d the character number to compare
* @param l the level of recursion
*/
private void radixSelect(int from, int to, int k, int d, int l) {
final int[] histogram = this.histogram;
Arrays.fill(histogram, 0);
final int commonPrefixLength = computeCommonPrefixLengthAndBuildHistogram(from, to, d, histogram);
if (commonPrefixLength > 0) {
// if there are no more chars to compare or if all entries fell into the
// first bucket (which means strings are shorter than d) then we are done
// otherwise recurse
if (d + commonPrefixLength < maxLength
&& histogram[0] < to - from) {
radixSelect(from, to, k, d + commonPrefixLength, l);
}
return;
}
assert assertHistogram(commonPrefixLength, histogram);
int bucketFrom = from;
for (int bucket = 0; bucket < HISTOGRAM_SIZE; ++bucket) {
final int bucketTo = bucketFrom + histogram[bucket];
if (bucketTo > k) {
partition(from, to, bucket, bucketFrom, bucketTo, d);
if (bucket != 0 && d + 1 < maxLength) {
// all elements in bucket 0 are equal so we only need to recurse if bucket != 0
select(bucketFrom, bucketTo, k, d + 1, l + 1);
}
return;
}
bucketFrom = bucketTo;
}
throw new AssertionError("Unreachable code");
}
// only used from assert
private boolean assertHistogram(int commonPrefixLength, int[] histogram) {
int numberOfUniqueBytes = 0;
for (int freq : histogram) {
if (freq > 0) {
numberOfUniqueBytes++;
}
}
if (numberOfUniqueBytes == 1) {
assert commonPrefixLength >= 1;
} else {
assert commonPrefixLength == 0;
}
return true;
}
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
private int getBucket(int i, int k) {
return byteAt(i, k) + 1;
}
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}
* and return a common prefix length for all visited values.
* @see #buildHistogram */
private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
final int[] commonPrefix = this.commonPrefix;
int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
for (int j = 0; j < commonPrefixLength; ++j) {
final int b = byteAt(from, k + j);
commonPrefix[j] = b;
if (b == -1) {
commonPrefixLength = j + 1;
break;
}
}
int i;
outer: for (i = from + 1; i < to; ++i) {
for (int j = 0; j < commonPrefixLength; ++j) {
final int b = byteAt(i, k + j);
if (b != commonPrefix[j]) {
commonPrefixLength = j;
if (commonPrefixLength == 0) { // we have no common prefix
histogram[commonPrefix[0] + 1] = i - from;
histogram[b + 1] = 1;
break outer;
}
break;
}
}
}
if (i < to) {
// the loop got broken because there is no common prefix
assert commonPrefixLength == 0;
buildHistogram(i + 1, to, k, histogram);
} else {
assert commonPrefixLength > 0;
histogram[commonPrefix[0] + 1] = to - from;
}
return commonPrefixLength;
}
/** Build an histogram of the k-th characters of values occurring between
* offsets {@code from} and {@code to}, using {@link #getBucket}. */
private void buildHistogram(int from, int to, int k, int[] histogram) {
for (int i = from; i < to; ++i) {
histogram[getBucket(i, k)]++;
}
}
/** Reorder elements so that all of them that fall into {@code bucket} are
* between offsets {@code bucketFrom} and {@code bucketTo}. */
private void partition(int from, int to, int bucket, int bucketFrom, int bucketTo, int d) {
int left = from;
int right = to - 1;
int slot = bucketFrom;
for (;;) {
int leftBucket = getBucket(left, d);
int rightBucket = getBucket(right, d);
while (leftBucket <= bucket && left < bucketFrom) {
if (leftBucket == bucket) {
swap(left, slot++);
} else {
++left;
}
leftBucket = getBucket(left, d);
}
while (rightBucket >= bucket && right >= bucketTo) {
if (rightBucket == bucket) {
swap(right, slot++);
} else {
--right;
}
rightBucket = getBucket(right, d);
}
if (left < bucketFrom && right >= bucketTo) {
swap(left++, right--);
} else {
assert left == bucketFrom;
assert right == bucketTo - 1;
break;
}
}
}
}

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
/** An implementation of a selection algorithm, ie. computing the k-th greatest
* value from a collection. */
public abstract class Selector {
/** Reorder elements so that the element at position {@code k} is the same
* as if all elements were sorted and all other elements are partitioned
* around it: {@code [from, k)} only contains elements that are less than
* or equal to {@code k} and {@code (k, to)} only contains elements that
* are greater than or equal to {@code k}. */
public abstract void select(int from, int to, int k);
void checkArgs(int from, int to, int k) {
if (k < from) {
throw new IllegalArgumentException("k must be >= from");
}
if (k >= to) {
throw new IllegalArgumentException("k must be < to");
}
}
/** Swap values at slots <code>i</code> and <code>j</code>. */
protected abstract void swap(int i, int j);
}

View File

@ -23,7 +23,7 @@ import java.util.Comparator;
* @lucene.internal */ * @lucene.internal */
public abstract class Sorter { public abstract class Sorter {
static final int INSERTION_SORT_THRESHOLD = 20; static final int BINARY_SORT_THRESHOLD = 20;
/** Sole constructor, used for inheritance. */ /** Sole constructor, used for inheritance. */
protected Sorter() {} protected Sorter() {}
@ -36,6 +36,20 @@ public abstract class Sorter {
/** Swap values at slots <code>i</code> and <code>j</code>. */ /** Swap values at slots <code>i</code> and <code>j</code>. */
protected abstract void swap(int i, int j); protected abstract void swap(int i, int j);
private int pivotIndex;
/** Save the value at slot <code>i</code> so that it can later be used as a
* pivot, see {@link #comparePivot(int)}. */
protected void setPivot(int i) {
pivotIndex = i;
}
/** Compare the pivot with the slot at <code>j</code>, similarly to
* {@link #compare(int, int) compare(i, j)}. */
protected int comparePivot(int j) {
return compare(pivotIndex, j);
}
/** Sort the slice which starts at <code>from</code> (inclusive) and ends at /** Sort the slice which starts at <code>from</code> (inclusive) and ends at
* <code>to</code> (exclusive). */ * <code>to</code> (exclusive). */
public abstract void sort(int from, int to); public abstract void sort(int from, int to);
@ -163,54 +177,41 @@ public abstract class Sorter {
} }
} }
void insertionSort(int from, int to) { /**
for (int i = from + 1; i < to; ++i) { * A binary sort implementation. This performs {@code O(n*log(n))} comparisons
for (int j = i; j > from; --j) { * and {@code O(n^2)} swaps. It is typically used by more sophisticated
if (compare(j - 1, j) > 0) { * implementations as a fall-back when the numbers of items to sort has become
swap(j - 1, j); * less than {@value #BINARY_SORT_THRESHOLD}.
} else { */
break;
}
}
}
}
void binarySort(int from, int to) { void binarySort(int from, int to) {
binarySort(from, to, from + 1); binarySort(from, to, from + 1);
} }
void binarySort(int from, int to, int i) { void binarySort(int from, int to, int i) {
for ( ; i < to; ++i) { for ( ; i < to; ++i) {
setPivot(i);
int l = from; int l = from;
int h = i - 1; int h = i - 1;
while (l <= h) { while (l <= h) {
final int mid = (l + h) >>> 1; final int mid = (l + h) >>> 1;
final int cmp = compare(i, mid); final int cmp = comparePivot(mid);
if (cmp < 0) { if (cmp < 0) {
h = mid - 1; h = mid - 1;
} else { } else {
l = mid + 1; l = mid + 1;
} }
} }
switch (i - l) {
case 2:
swap(l + 1, l + 2);
swap(l, l + 1);
break;
case 1:
swap(l, l + 1);
break;
case 0:
break;
default:
for (int j = i; j > l; --j) { for (int j = i; j > l; --j) {
swap(j - 1, j); swap(j - 1, j);
} }
break;
}
} }
} }
/**
* Use heap sort to sort items between {@code from} inclusive and {@code to}
* exclusive. This runs in {@code O(n*log(n))} and is used as a fall-back by
* {@link IntroSorter}.
*/
void heapSort(int from, int to) { void heapSort(int from, int to) {
if (to - from <= 1) { if (to - from <= 1) {
return; return;

View File

@ -357,13 +357,13 @@ public class Automaton implements Accountable {
} }
private void growStates() { private void growStates() {
if (nextState+2 >= states.length) { if (nextState+2 > states.length) {
states = ArrayUtil.grow(states, nextState+2); states = ArrayUtil.grow(states, nextState+2);
} }
} }
private void growTransitions() { private void growTransitions() {
if (nextTransition+3 >= transitions.length) { if (nextTransition+3 > transitions.length) {
transitions = ArrayUtil.grow(transitions, nextTransition+3); transitions = ArrayUtil.grow(transitions, nextTransition+3);
} }
} }

View File

@ -25,6 +25,7 @@ import java.util.List;
import java.util.function.IntFunction; import java.util.function.IntFunction;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.index.MergeState; import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.index.PointValues.Relation;
@ -111,7 +112,8 @@ public class BKDWriter implements Closeable {
final byte[] scratchDiff; final byte[] scratchDiff;
final byte[] scratch1; final byte[] scratch1;
final byte[] scratch2; final byte[] scratch2;
final BytesRef scratchBytesRef = new BytesRef(); final BytesRef scratchBytesRef1 = new BytesRef();
final BytesRef scratchBytesRef2 = new BytesRef();
final int[] commonPrefixLengths; final int[] commonPrefixLengths;
protected final FixedBitSet docsSeen; protected final FixedBitSet docsSeen;
@ -173,7 +175,6 @@ public class BKDWriter implements Closeable {
packedBytesLength = numDims * bytesPerDim; packedBytesLength = numDims * bytesPerDim;
scratchDiff = new byte[bytesPerDim]; scratchDiff = new byte[bytesPerDim];
scratchBytesRef.length = packedBytesLength;
scratch1 = new byte[packedBytesLength]; scratch1 = new byte[packedBytesLength];
scratch2 = new byte[packedBytesLength]; scratch2 = new byte[packedBytesLength];
commonPrefixLengths = new int[numDims]; commonPrefixLengths = new int[numDims];
@ -204,7 +205,7 @@ public class BKDWriter implements Closeable {
// all recursive halves (i.e. 16 + 8 + 4 + 2) so the memory usage is 2X // all recursive halves (i.e. 16 + 8 + 4 + 2) so the memory usage is 2X
// what that level would consume, so we multiply by 0.5 to convert from // what that level would consume, so we multiply by 0.5 to convert from
// bytes to points here. Each dimension has its own sorted partition, so // bytes to points here. Each dimension has its own sorted partition, so
// we must divide by numDims as well. // we must divide by numDims as wel.
maxPointsSortInHeap = (int) (0.5 * (maxMBSortInHeap * 1024 * 1024) / (bytesPerDoc * numDims)); maxPointsSortInHeap = (int) (0.5 * (maxMBSortInHeap * 1024 * 1024) / (bytesPerDoc * numDims));
@ -416,15 +417,25 @@ public class BKDWriter implements Closeable {
} }
} }
/** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already /** Write a field from a {@link MutablePointsReader}. This way of writing
* sorted values and currently only works when numDims==1. This returns -1 if all documents containing * points is faster than regular writes with {@link BKDWriter#add} since
* dimensional values were deleted. */ * there is opportunity for reordering points before writing them to
public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers) throws IOException { * disk. This method does not use transient disk in order to reorder points.
if (numDims != 1) { */
throw new UnsupportedOperationException("numDims must be 1 but got " + numDims); public long writeField(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
if (numDims == 1) {
return writeField1Dim(out, fieldName, reader);
} else {
return writeFieldNDims(out, fieldName, reader);
} }
}
/* In the 2+D case, we recursively pick the split dimension, compute the
* median value and partition other values around it. */
private long writeFieldNDims(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
if (pointCount != 0) { if (pointCount != 0) {
throw new IllegalStateException("cannot mix add and merge"); throw new IllegalStateException("cannot mix add and writeField");
} }
// Catch user silliness: // Catch user silliness:
@ -435,6 +446,81 @@ public class BKDWriter implements Closeable {
// Mark that we already finished: // Mark that we already finished:
heapPointWriter = null; heapPointWriter = null;
long countPerLeaf = pointCount = reader.size(fieldName);
long innerNodeCount = 1;
while (countPerLeaf > maxPointsInLeafNode) {
countPerLeaf = (countPerLeaf+1)/2;
innerNodeCount *= 2;
}
int numLeaves = Math.toIntExact(innerNodeCount);
checkMaxLeafNodeCount(numLeaves);
final byte[] splitPackedValues = new byte[numLeaves * (bytesPerDim + 1)];
final long[] leafBlockFPs = new long[numLeaves];
// compute the min/max for this slice
Arrays.fill(minPackedValue, (byte) 0xff);
Arrays.fill(maxPackedValue, (byte) 0);
for (int i = 0; i < Math.toIntExact(pointCount); ++i) {
reader.getValue(i, scratchBytesRef1);
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset) < 0) {
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset, bytesPerDim);
}
if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset) > 0) {
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset, bytesPerDim);
}
}
docsSeen.set(reader.getDocID(i));
}
build(1, numLeaves, reader, 0, Math.toIntExact(pointCount), out,
minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs,
new int[maxPointsInLeafNode]);
long indexFP = out.getFilePointer();
writeIndex(out, leafBlockFPs, splitPackedValues);
return indexFP;
}
/* In the 1D case, we can simply sort points in ascending order and use the
* same writing logic as we use at merge time. */
private long writeField1Dim(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size(fieldName)));
final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
reader.intersect(fieldName, new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
oneDimWriter.add(packedValue, docID);
}
@Override
public void visit(int docID) throws IOException {
throw new IllegalStateException();
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});
return oneDimWriter.finish();
}
/** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already
* sorted values and currently only works when numDims==1. This returns -1 if all documents containing
* dimensional values were deleted. */
public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers) throws IOException {
assert docMaps == null || readers.size() == docMaps.size(); assert docMaps == null || readers.size() == docMaps.size();
BKDMergeQueue queue = new BKDMergeQueue(bytesPerDim, readers.size()); BKDMergeQueue queue = new BKDMergeQueue(bytesPerDim, readers.size());
@ -453,72 +539,14 @@ public class BKDWriter implements Closeable {
} }
} }
if (queue.size() == 0) { OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
return -1;
}
int leafCount = 0;
List<Long> leafBlockFPs = new ArrayList<>();
List<byte[]> leafBlockStartValues = new ArrayList<>();
// Target halfway between min and max allowed for the leaf:
int pointsPerLeafBlock = (int) (0.75 * maxPointsInLeafNode);
//System.out.println("POINTS PER: " + pointsPerLeafBlock);
byte[] lastPackedValue = new byte[bytesPerDim];
byte[] firstPackedValue = new byte[bytesPerDim];
long valueCount = 0;
// Buffer up each leaf block's docs and values
int[] leafBlockDocIDs = new int[maxPointsInLeafNode];
byte[][] leafBlockPackedValues = new byte[maxPointsInLeafNode][];
for(int i=0;i<maxPointsInLeafNode;i++) {
leafBlockPackedValues[i] = new byte[packedBytesLength];
}
Arrays.fill(commonPrefixLengths, bytesPerDim);
while (queue.size() != 0) { while (queue.size() != 0) {
MergeReader reader = queue.top(); MergeReader reader = queue.top();
// System.out.println("iter reader=" + reader); // System.out.println("iter reader=" + reader);
// NOTE: doesn't work with subclasses (e.g. SimpleText!) // NOTE: doesn't work with subclasses (e.g. SimpleText!)
int docID = reader.docID; oneDimWriter.add(reader.state.scratchPackedValue, reader.docID);
leafBlockDocIDs[leafCount] = docID;
System.arraycopy(reader.state.scratchPackedValue, 0, leafBlockPackedValues[leafCount], 0, packedBytesLength);
docsSeen.set(docID);
if (valueCount == 0) {
System.arraycopy(reader.state.scratchPackedValue, 0, minPackedValue, 0, packedBytesLength);
}
System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, 0, packedBytesLength);
assert numDims > 1 || valueInOrder(valueCount, lastPackedValue, reader.state.scratchPackedValue, 0);
valueCount++;
if (pointCount > totalPointCount) {
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
}
if (leafCount == 0) {
if (leafBlockFPs.size() > 0) {
// Save the first (minimum) value in each leaf block except the first, to build the split value index in the end:
leafBlockStartValues.add(Arrays.copyOf(reader.state.scratchPackedValue, bytesPerDim));
}
Arrays.fill(commonPrefixLengths, bytesPerDim);
System.arraycopy(reader.state.scratchPackedValue, 0, firstPackedValue, 0, bytesPerDim);
} else {
// Find per-dim common prefix:
for(int dim=0;dim<numDims;dim++) {
int offset = dim * bytesPerDim;
for(int j=0;j<commonPrefixLengths[dim];j++) {
if (firstPackedValue[offset+j] != reader.state.scratchPackedValue[offset+j]) {
commonPrefixLengths[dim] = j;
break;
}
}
}
}
leafCount++;
if (reader.next()) { if (reader.next()) {
queue.updateTop(); queue.updateTop();
@ -526,35 +554,78 @@ public class BKDWriter implements Closeable {
// This segment was exhausted // This segment was exhausted
queue.pop(); queue.pop();
} }
}
return oneDimWriter.finish();
}
private class OneDimensionBKDWriter {
final IndexOutput out;
final List<Long> leafBlockFPs = new ArrayList<>();
final List<byte[]> leafBlockStartValues = new ArrayList<>();
final byte[] leafValues = new byte[maxPointsInLeafNode * packedBytesLength];
final int[] leafDocs = new int[maxPointsInLeafNode];
long valueCount;
int leafCount;
OneDimensionBKDWriter(IndexOutput out) {
if (numDims != 1) {
throw new UnsupportedOperationException("numDims must be 1 but got " + numDims);
}
if (pointCount != 0) {
throw new IllegalStateException("cannot mix add and merge");
}
// Catch user silliness:
if (heapPointWriter == null && tempInput == null) {
throw new IllegalStateException("already finished");
}
// Mark that we already finished:
heapPointWriter = null;
this.out = out;
lastPackedValue = new byte[packedBytesLength];
}
// for asserts
final byte[] lastPackedValue;
int lastDocID;
void add(byte[] packedValue, int docID) throws IOException {
assert valueInOrder(valueCount + leafCount,
0, lastPackedValue, packedValue, 0, docID, lastDocID);
System.arraycopy(packedValue, 0, leafValues, leafCount * packedBytesLength, packedBytesLength);
leafDocs[leafCount] = docID;
docsSeen.set(docID);
leafCount++;
if (valueCount > totalPointCount) {
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
}
if (leafCount == maxPointsInLeafNode) {
// We write a block once we hit exactly the max count ... this is different from // We write a block once we hit exactly the max count ... this is different from
// when we flush a new segment, where we write between max/2 and max per leaf block, // when we flush a new segment, where we write between max/2 and max per leaf block,
// so merged segments will behave differently from newly flushed segments: // so merged segments will behave differently from newly flushed segments:
if (leafCount == pointsPerLeafBlock || queue.size() == 0) { writeLeafBlock();
leafBlockFPs.add(out.getFilePointer());
checkMaxLeafNodeCount(leafBlockFPs.size());
writeLeafBlockDocs(out, leafBlockDocIDs, 0, leafCount);
writeCommonPrefixes(out, commonPrefixLengths, firstPackedValue);
final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
final BytesRef scratch = new BytesRef();
{
scratch.length = packedBytesLength;
scratch.offset = 0;
}
@Override
public BytesRef apply(int i) {
scratch.bytes = leafBlockPackedValues[i];
return scratch;
}
};
writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
leafCount = 0; leafCount = 0;
} }
assert (lastDocID = docID) >= 0; // only assign when asserts are enabled
}
public long finish() throws IOException {
if (leafCount > 0) {
writeLeafBlock();
leafCount = 0;
}
if (valueCount == 0) {
return -1;
} }
pointCount = valueCount; pointCount = valueCount;
@ -575,6 +646,60 @@ public class BKDWriter implements Closeable {
return indexFP; return indexFP;
} }
private void writeLeafBlock() throws IOException {
assert leafCount != 0;
if (valueCount == 0) {
System.arraycopy(leafValues, 0, minPackedValue, 0, packedBytesLength);
}
System.arraycopy(leafValues, (leafCount - 1) * packedBytesLength, maxPackedValue, 0, packedBytesLength);
valueCount += leafCount;
if (leafBlockFPs.size() > 0) {
// Save the first (minimum) value in each leaf block except the first, to build the split value index in the end:
leafBlockStartValues.add(Arrays.copyOf(leafValues, packedBytesLength));
}
leafBlockFPs.add(out.getFilePointer());
checkMaxLeafNodeCount(leafBlockFPs.size());
Arrays.fill(commonPrefixLengths, bytesPerDim);
// Find per-dim common prefix:
for(int dim=0;dim<numDims;dim++) {
int offset1 = dim * bytesPerDim;
int offset2 = (leafCount - 1) * packedBytesLength + offset1;
for(int j=0;j<commonPrefixLengths[dim];j++) {
if (leafValues[offset1+j] != leafValues[offset2+j]) {
commonPrefixLengths[dim] = j;
break;
}
}
}
writeLeafBlockDocs(out, leafDocs, 0, leafCount);
writeCommonPrefixes(out, commonPrefixLengths, leafValues);
final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
final BytesRef scratch = new BytesRef();
{
scratch.length = packedBytesLength;
scratch.bytes = leafValues;
}
@Override
public BytesRef apply(int i) {
scratch.offset = packedBytesLength * i;
return scratch;
}
};
assert valuesInOrderAndBounds(leafCount, 0, Arrays.copyOf(leafValues, packedBytesLength),
Arrays.copyOfRange(leafValues, (leafCount - 1) * packedBytesLength, leafCount * packedBytesLength),
packedValues, leafDocs, 0);
writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
}
}
// TODO: there must be a simpler way? // TODO: there must be a simpler way?
private void rotateToTree(int nodeID, int offset, int count, byte[] index, List<byte[]> leafBlockStartValues) { private void rotateToTree(int nodeID, int offset, int count, byte[] index, List<byte[]> leafBlockStartValues) {
//System.out.println("ROTATE: nodeID=" + nodeID + " offset=" + offset + " count=" + count + " bpd=" + bytesPerDim + " index.length=" + index.length); //System.out.println("ROTATE: nodeID=" + nodeID + " offset=" + offset + " count=" + count + " bpd=" + bytesPerDim + " index.length=" + index.length);
@ -686,6 +811,7 @@ public class BKDWriter implements Closeable {
} }
private PointWriter sort(int dim) throws IOException { private PointWriter sort(int dim) throws IOException {
assert dim >= 0 && dim < numDims;
if (heapPointWriter != null) { if (heapPointWriter != null) {
@ -1110,6 +1236,132 @@ public class BKDWriter implements Closeable {
} }
} }
/* Recursively reorders the provided reader and writes the bkd-tree on the fly. */
private void build(int nodeID, int leafNodeOffset,
MutablePointsReader reader, int from, int to,
IndexOutput out,
byte[] minPackedValue, byte[] maxPackedValue,
byte[] splitPackedValues,
long[] leafBlockFPs,
int[] spareDocIds) throws IOException {
if (nodeID >= leafNodeOffset) {
// leaf node
final int count = to - from;
assert count <= maxPointsInLeafNode;
// Compute common prefixes
Arrays.fill(commonPrefixLengths, bytesPerDim);
reader.getValue(from, scratchBytesRef1);
for (int i = from + 1; i < to; ++i) {
reader.getValue(i, scratchBytesRef2);
for (int dim=0;dim<numDims;dim++) {
final int offset = dim * bytesPerDim;
for(int j=0;j<commonPrefixLengths[dim];j++) {
if (scratchBytesRef1.bytes[scratchBytesRef1.offset+offset+j] != scratchBytesRef2.bytes[scratchBytesRef2.offset+offset+j]) {
commonPrefixLengths[dim] = j;
break;
}
}
}
}
// Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
FixedBitSet[] usedBytes = new FixedBitSet[numDims];
for (int dim = 0; dim < numDims; ++dim) {
if (commonPrefixLengths[dim] < bytesPerDim) {
usedBytes[dim] = new FixedBitSet(256);
}
}
for (int i = from + 1; i < to; ++i) {
for (int dim=0;dim<numDims;dim++) {
if (usedBytes[dim] != null) {
byte b = reader.getByteAt(i, dim * bytesPerDim + commonPrefixLengths[dim]);
usedBytes[dim].set(Byte.toUnsignedInt(b));
}
}
}
int sortedDim = 0;
int sortedDimCardinality = Integer.MAX_VALUE;
for (int dim = 0; dim < numDims; ++dim) {
if (usedBytes[dim] != null) {
final int cardinality = usedBytes[dim].cardinality();
if (cardinality < sortedDimCardinality) {
sortedDim = dim;
sortedDimCardinality = cardinality;
}
}
}
// sort by sortedDim
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
reader, from, to, scratchBytesRef1, scratchBytesRef2);
// Save the block file pointer:
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
// Write doc IDs
int[] docIDs = spareDocIds;
for (int i = from; i < to; ++i) {
docIDs[i - from] = reader.getDocID(i);
}
writeLeafBlockDocs(out, docIDs, 0, count);
// Write the common prefixes:
reader.getValue(from, scratchBytesRef1);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
writeCommonPrefixes(out, commonPrefixLengths, scratch1);
// Write the full values:
IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
@Override
public BytesRef apply(int i) {
reader.getValue(from + i, scratchBytesRef1);
return scratchBytesRef1;
}
};
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
docIDs, 0);
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
} else {
// inner node
// compute the split dimension and partition around it
final int splitDim = split(minPackedValue, maxPackedValue);
final int mid = (from + to + 1) >>> 1;
int commonPrefixLen = bytesPerDim;
for (int i = 0; i < bytesPerDim; ++i) {
if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) {
commonPrefixLen = i;
break;
}
}
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
// set the split value
final int address = nodeID * (1+bytesPerDim);
splitPackedValues[address] = (byte) splitDim;
reader.getValue(mid, scratchBytesRef1);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
byte[] minSplitPackedValue = Arrays.copyOf(minPackedValue, packedBytesLength);
byte[] maxSplitPackedValue = Arrays.copyOf(maxPackedValue, packedBytesLength);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
// recurse
build(nodeID * 2, leafNodeOffset, reader, from, mid, out,
minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
build(nodeID * 2 + 1, leafNodeOffset, reader, mid, to, out,
minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
}
}
/** The array (sized numDims) of PathSlice describe the cell we have currently recursed to. */ /** The array (sized numDims) of PathSlice describe the cell we have currently recursed to. */
private void build(int nodeID, int leafNodeOffset, private void build(int nodeID, int leafNodeOffset,
PathSlice[] slices, PathSlice[] slices,
@ -1217,7 +1469,8 @@ public class BKDWriter implements Closeable {
return scratch; return scratch;
} }
}; };
assert valuesInOrderAndBounds(count, minPackedValue, maxPackedValue, packedValues); assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
heapSource.docIDs, Math.toIntExact(source.start));
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues); writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
} else { } else {
@ -1321,12 +1574,16 @@ public class BKDWriter implements Closeable {
} }
// only called from assert // only called from assert
private boolean valuesInOrderAndBounds(int count, byte[] minPackedValue, byte[] maxPackedValue, IntFunction<BytesRef> values) throws IOException { private boolean valuesInOrderAndBounds(int count, int sortedDim, byte[] minPackedValue, byte[] maxPackedValue,
byte[] lastPackedValue = new byte[bytesPerDim]; IntFunction<BytesRef> values, int[] docs, int docsOffset) throws IOException {
byte[] lastPackedValue = new byte[packedBytesLength];
int lastDoc = -1;
for (int i=0;i<count;i++) { for (int i=0;i<count;i++) {
BytesRef packedValue = values.apply(i); BytesRef packedValue = values.apply(i);
assert packedValue.length == packedBytesLength; assert packedValue.length == packedBytesLength;
assert numDims != 1 || valueInOrder(i, lastPackedValue, packedValue.bytes, packedValue.offset); assert valueInOrder(i, sortedDim, lastPackedValue, packedValue.bytes, packedValue.offset,
docs[docsOffset + i], lastDoc);
lastDoc = docs[docsOffset + i];
// Make sure this value does in fact fall within this leaf cell: // Make sure this value does in fact fall within this leaf cell:
assert valueInBounds(packedValue, minPackedValue, maxPackedValue); assert valueInBounds(packedValue, minPackedValue, maxPackedValue);
@ -1335,11 +1592,19 @@ public class BKDWriter implements Closeable {
} }
// only called from assert // only called from assert
private boolean valueInOrder(long ord, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset) { private boolean valueInOrder(long ord, int sortedDim, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset,
if (ord > 0 && StringHelper.compare(bytesPerDim, lastPackedValue, 0, packedValue, packedValueOffset) > 0) { int doc, int lastDoc) {
int dimOffset = sortedDim * bytesPerDim;
if (ord > 0) {
int cmp = StringHelper.compare(bytesPerDim, lastPackedValue, dimOffset, packedValue, packedValueOffset + dimOffset);
if (cmp > 0) {
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord); throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
} }
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, bytesPerDim); if (cmp == 0 && doc < lastDoc) {
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord);
}
}
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, packedBytesLength);
return true; return true;
} }

View File

@ -0,0 +1,186 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.bkd;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntroSelector;
import org.apache.lucene.util.IntroSorter;
import org.apache.lucene.util.MSBRadixSorter;
import org.apache.lucene.util.RadixSelector;
import org.apache.lucene.util.Selector;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.packed.PackedInts;
final class MutablePointsReaderUtils {
MutablePointsReaderUtils() {}
/** Sort the given {@link MutablePointsReader} based on its packed value then doc ID. */
static void sort(int maxDoc, int packedBytesLength,
MutablePointsReader reader, int from, int to) {
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
new MSBRadixSorter(packedBytesLength + (bitsPerDocId + 7) / 8) {
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected int byteAt(int i, int k) {
if (k < packedBytesLength) {
return Byte.toUnsignedInt(reader.getByteAt(i, k));
} else {
final int shift = bitsPerDocId - ((k - packedBytesLength + 1) << 3);
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
}
}
@Override
protected org.apache.lucene.util.Sorter getFallbackSorter(int k) {
return new IntroSorter() {
final BytesRef pivot = new BytesRef();
final BytesRef scratch = new BytesRef();
int pivotDoc;
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected void setPivot(int i) {
reader.getValue(i, pivot);
pivotDoc = reader.getDocID(i);
}
@Override
protected int comparePivot(int j) {
if (k < packedBytesLength) {
reader.getValue(j, scratch);
int cmp = StringHelper.compare(packedBytesLength - k, pivot.bytes, pivot.offset + k, scratch.bytes, scratch.offset + k);
if (cmp != 0) {
return cmp;
}
}
return pivotDoc - reader.getDocID(j);
}
};
}
}.sort(from, to);
}
/** Sort points on the given dimension. */
static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
MutablePointsReader reader, int from, int to,
BytesRef scratch1, BytesRef scratch2) {
// No need for a fancy radix sort here, this is called on the leaves only so
// there are not many values to sort
final int offset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
final int numBytesToCompare = bytesPerDim - commonPrefixLengths[sortedDim];
new IntroSorter() {
final BytesRef pivot = scratch1;
int pivotDoc = -1;
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected void setPivot(int i) {
reader.getValue(i, pivot);
pivotDoc = reader.getDocID(i);
}
@Override
protected int comparePivot(int j) {
reader.getValue(j, scratch2);
int cmp = StringHelper.compare(numBytesToCompare, pivot.bytes, pivot.offset + offset, scratch2.bytes, scratch2.offset + offset);
if (cmp == 0) {
cmp = pivotDoc - reader.getDocID(j);
}
return cmp;
}
}.sort(from, to);
}
/** Partition points around {@code mid}. All values on the left must be less
* than or equal to it and all values on the right must be greater than or
* equal to it. */
static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
MutablePointsReader reader, int from, int to, int mid,
BytesRef scratch1, BytesRef scratch2) {
final int offset = splitDim * bytesPerDim + commonPrefixLen;
final int cmpBytes = bytesPerDim - commonPrefixLen;
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
new RadixSelector(cmpBytes + (bitsPerDocId + 7) / 8) {
@Override
protected Selector getFallbackSelector(int k) {
return new IntroSelector() {
final BytesRef pivot = scratch1;
int pivotDoc;
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected void setPivot(int i) {
reader.getValue(i, pivot);
pivotDoc = reader.getDocID(i);
}
@Override
protected int comparePivot(int j) {
if (k < cmpBytes) {
reader.getValue(j, scratch2);
int cmp = StringHelper.compare(cmpBytes - k, pivot.bytes, pivot.offset + offset + k, scratch2.bytes, scratch2.offset + offset + k);
if (cmp != 0) {
return cmp;
}
}
return pivotDoc - reader.getDocID(j);
}
};
}
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected int byteAt(int i, int k) {
if (k < cmpBytes) {
return Byte.toUnsignedInt(reader.getByteAt(i, offset + k));
} else {
final int shift = bitsPerDocId - ((k - cmpBytes + 1) << 3);
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
}
}
}.select(from, to, mid);
}
}

View File

@ -41,8 +41,9 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
if (random().nextBoolean()) { if (random().nextBoolean()) {
// randomize parameters // randomize parameters
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500); int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
double maxMBSortInHeap = 3.0 + (3*random().nextDouble());
if (VERBOSE) { if (VERBOSE) {
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode); System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
} }
// sneaky impersonation! // sneaky impersonation!
@ -52,7 +53,7 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
return new PointsFormat() { return new PointsFormat() {
@Override @Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException { public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode); return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
} }
@Override @Override

View File

@ -1156,8 +1156,9 @@ public class TestPointQueries extends LuceneTestCase {
private static Codec getCodec() { private static Codec getCodec() {
if (Codec.getDefault().getName().equals("Lucene62")) { if (Codec.getDefault().getName().equals("Lucene62")) {
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048); int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
double maxMBSortInHeap = 5.0 + (3*random().nextDouble());
if (VERBOSE) { if (VERBOSE) {
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode); System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
} }
return new FilterCodec("Lucene62", Codec.getDefault()) { return new FilterCodec("Lucene62", Codec.getDefault()) {
@ -1166,7 +1167,7 @@ public class TestPointQueries extends LuceneTestCase {
return new PointsFormat() { return new PointsFormat() {
@Override @Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException { public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode); return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
} }
@Override @Override

View File

@ -45,7 +45,26 @@ public class TestByteBlockPool extends LuceneTestCase {
for (BytesRef expected : list) { for (BytesRef expected : list) {
ref.grow(expected.length); ref.grow(expected.length);
ref.setLength(expected.length); ref.setLength(expected.length);
switch (random().nextInt(3)) {
case 0:
// copy bytes
pool.readBytes(position, ref.bytes(), 0, ref.length()); pool.readBytes(position, ref.bytes(), 0, ref.length());
break;
case 1:
// copy bytes one by one
for (int i = 0; i < ref.length(); ++i) {
ref.setByteAt(i, pool.readByte(position + i));
}
break;
case 2:
BytesRef scratch = new BytesRef();
scratch.length = ref.length();
pool.setRawBytesRef(scratch, position);
System.arraycopy(scratch.bytes, scratch.offset, ref.bytes(), 0, ref.length());
break;
default:
fail();
}
assertEquals(expected, ref.get()); assertEquals(expected, ref.get());
position += ref.length(); position += ref.length();
} }

View File

@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
public class TestIntroSelector extends LuceneTestCase {
public void testSelect() {
for (int iter = 0; iter < 100; ++iter) {
doTestSelect(false);
}
}
public void testSlowSelect() {
for (int iter = 0; iter < 100; ++iter) {
doTestSelect(true);
}
}
private void doTestSelect(boolean slow) {
final int from = random().nextInt(5);
final int to = from + TestUtil.nextInt(random(), 1, 10000);
final int max = random().nextBoolean() ? random().nextInt(100) : random().nextInt(100000);
Integer[] arr = new Integer[from + to + random().nextInt(5)];
for (int i = 0; i < arr.length; ++i) {
arr[i] = TestUtil.nextInt(random(), 0, max);
}
final int k = TestUtil.nextInt(random(), from, to - 1);
Integer[] expected = arr.clone();
Arrays.sort(expected, from, to);
Integer[] actual = arr.clone();
IntroSelector selector = new IntroSelector() {
Integer pivot;
@Override
protected void swap(int i, int j) {
ArrayUtil.swap(actual, i, j);
}
@Override
protected void setPivot(int i) {
pivot = actual[i];
}
@Override
protected int comparePivot(int j) {
return pivot.compareTo(actual[j]);
}
};
if (slow) {
selector.slowSelect(from, to, k);
} else {
selector.select(from, to, k);
}
assertEquals(expected[k], actual[k]);
for (int i = 0; i < actual.length; ++i) {
if (i < from || i >= to) {
assertSame(arr[i], actual[i]);
} else if (i <= k) {
assertTrue(actual[i].intValue() <= actual[k].intValue());
} else {
assertTrue(actual[i].intValue() >= actual[k].intValue());
}
}
}
}

View File

@ -17,6 +17,8 @@
package org.apache.lucene.util; package org.apache.lucene.util;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
public class TestMSBRadixSorter extends LuceneTestCase { public class TestMSBRadixSorter extends LuceneTestCase {
@ -41,9 +43,12 @@ public class TestMSBRadixSorter extends LuceneTestCase {
break; break;
} }
final int finalMaxLength = maxLength;
new MSBRadixSorter(maxLength) { new MSBRadixSorter(maxLength) {
@Override
protected int byteAt(int i, int k) { protected int byteAt(int i, int k) {
assertTrue(k < finalMaxLength);
BytesRef ref = refs[i]; BytesRef ref = refs[i];
if (ref.length <= k) { if (ref.length <= k) {
return -1; return -1;
@ -114,4 +119,67 @@ public class TestMSBRadixSorter extends LuceneTestCase {
testRandom(TestUtil.nextInt(random(), 1, 30), 2); testRandom(TestUtil.nextInt(random(), 1, 30), 2);
} }
} }
public void testRandom2() {
// how large our alphabet is
int letterCount = TestUtil.nextInt(random(), 2, 10);
// how many substring fragments to use
int substringCount = TestUtil.nextInt(random(), 2, 10);
Set<BytesRef> substringsSet = new HashSet<>();
// how many strings to make
int stringCount = atLeast(10000);
//System.out.println("letterCount=" + letterCount + " substringCount=" + substringCount + " stringCount=" + stringCount);
while(substringsSet.size() < substringCount) {
int length = TestUtil.nextInt(random(), 2, 10);
byte[] bytes = new byte[length];
for(int i=0;i<length;i++) {
bytes[i] = (byte) random().nextInt(letterCount);
}
BytesRef br = new BytesRef(bytes);
substringsSet.add(br);
//System.out.println("add substring count=" + substringsSet.size() + ": " + br);
}
BytesRef[] substrings = substringsSet.toArray(new BytesRef[substringsSet.size()]);
double[] chance = new double[substrings.length];
double sum = 0.0;
for(int i=0;i<substrings.length;i++) {
chance[i] = random().nextDouble();
sum += chance[i];
}
// give each substring a random chance of occurring:
double accum = 0.0;
for(int i=0;i<substrings.length;i++) {
accum += chance[i]/sum;
chance[i] = accum;
}
Set<BytesRef> stringsSet = new HashSet<>();
int iters = 0;
while (stringsSet.size() < stringCount && iters < stringCount*5) {
int count = TestUtil.nextInt(random(), 1, 5);
BytesRefBuilder b = new BytesRefBuilder();
for(int i=0;i<count;i++) {
double v = random().nextDouble();
accum = 0.0;
for(int j=0;j<substrings.length;j++) {
accum += chance[j];
if (accum >= v) {
b.append(substrings[j]);
break;
}
}
}
BytesRef br = b.toBytesRef();
stringsSet.add(br);
//System.out.println("add string count=" + stringsSet.size() + ": " + br);
iters++;
}
test(stringsSet.toArray(new BytesRef[stringsSet.size()]), stringsSet.size());
}
} }

View File

@ -0,0 +1,106 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
public class TestRadixSelector extends LuceneTestCase {
public void testSelect() {
for (int iter = 0; iter < 100; ++iter) {
doTestSelect();
}
}
private void doTestSelect() {
final int from = random().nextInt(5);
final int to = from + TestUtil.nextInt(random(), 1, 10000);
final int maxLen = TestUtil.nextInt(random(), 1, 12);
BytesRef[] arr = new BytesRef[from + to + random().nextInt(5)];
for (int i = 0; i < arr.length; ++i) {
byte[] bytes = new byte[TestUtil.nextInt(random(), 0, maxLen)];
random().nextBytes(bytes);
arr[i] = new BytesRef(bytes);
}
doTest(arr, from, to, maxLen);
}
public void testSharedPrefixes() {
for (int iter = 0; iter < 100; ++iter) {
doTestSharedPrefixes();
}
}
private void doTestSharedPrefixes() {
final int from = random().nextInt(5);
final int to = from + TestUtil.nextInt(random(), 1, 10000);
final int maxLen = TestUtil.nextInt(random(), 1, 12);
BytesRef[] arr = new BytesRef[from + to + random().nextInt(5)];
for (int i = 0; i < arr.length; ++i) {
byte[] bytes = new byte[TestUtil.nextInt(random(), 0, maxLen)];
random().nextBytes(bytes);
arr[i] = new BytesRef(bytes);
}
final int sharedPrefixLength = Math.min(arr[0].length, TestUtil.nextInt(random(), 1, maxLen));
for (int i = 1; i < arr.length; ++i) {
System.arraycopy(arr[0].bytes, arr[0].offset, arr[i].bytes, arr[i].offset, Math.min(sharedPrefixLength, arr[i].length));
}
doTest(arr, from, to, maxLen);
}
private void doTest(BytesRef[] arr, int from, int to, int maxLen) {
final int k = TestUtil.nextInt(random(), from, to - 1);
BytesRef[] expected = arr.clone();
Arrays.sort(expected, from, to);
BytesRef[] actual = arr.clone();
final int enforcedMaxLen = random().nextBoolean() ? maxLen : Integer.MAX_VALUE;
RadixSelector selector = new RadixSelector(enforcedMaxLen) {
@Override
protected void swap(int i, int j) {
ArrayUtil.swap(actual, i, j);
}
@Override
protected int byteAt(int i, int k) {
assertTrue(k < enforcedMaxLen);
BytesRef b = actual[i];
if (k >= b.length) {
return -1;
} else {
return Byte.toUnsignedInt(b.bytes[b.offset + k]);
}
}
};
selector.select(from, to, k);
assertEquals(expected[k], actual[k]);
for (int i = 0; i < actual.length; ++i) {
if (i < from || i >= to) {
assertSame(arr[i], actual[i]);
} else if (i <= k) {
assertTrue(actual[i].compareTo(actual[k]) <= 0);
} else {
assertTrue(actual[i].compareTo(actual[k]) >= 0);
}
}
}
}

View File

@ -0,0 +1,270 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.bkd;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
public class TestMutablePointsReaderUtils extends LuceneTestCase {
public void testSort() {
for (int iter = 0; iter < 5; ++iter) {
doTestSort();
}
}
private void doTestSort() {
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
Point[] points = createRandomPoints(1, bytesPerDim, maxDoc);
DummyPointsReader reader = new DummyPointsReader(points);
MutablePointsReaderUtils.sort(maxDoc, bytesPerDim, reader, 0, points.length);
Arrays.sort(points, new Comparator<Point>() {
@Override
public int compare(Point o1, Point o2) {
int cmp = o1.packedValue.compareTo(o2.packedValue);
if (cmp == 0) {
cmp = Integer.compare(o1.doc, o2.doc);
}
return cmp;
}
});
assertNotSame(points, reader.points);
assertArrayEquals(points, reader.points);
}
public void testSortByDim() {
for (int iter = 0; iter < 5; ++iter) {
doTestSortByDim();
}
}
private void doTestSortByDim() {
final int numDims = TestUtil.nextInt(random(), 1, 8);
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
int[] commonPrefixLengths = new int[numDims];
for (int i = 0; i < commonPrefixLengths.length; ++i) {
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
}
BytesRef firstValue = points[0].packedValue;
for (int i = 1; i < points.length; ++i) {
for (int dim = 0; dim < numDims; ++dim) {
int offset = dim * bytesPerDim;
BytesRef packedValue = points[i].packedValue;
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
}
}
DummyPointsReader reader = new DummyPointsReader(points);
final int sortedDim = random().nextInt(numDims);
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
new BytesRef(), new BytesRef());
for (int i = 1; i < points.length; ++i) {
final int offset = sortedDim * bytesPerDim;
BytesRef previousValue = reader.points[i-1].packedValue;
BytesRef currentValue = reader.points[i].packedValue;
int cmp = StringHelper.compare(bytesPerDim,
previousValue.bytes, previousValue.offset + offset,
currentValue.bytes, currentValue.offset + offset);
if (cmp == 0) {
cmp = reader.points[i - 1].doc - reader.points[i].doc;
}
assertTrue(cmp <= 0);
}
}
public void testPartition() {
for (int iter = 0; iter < 5; ++iter) {
doTestPartition();
}
}
private void doTestPartition() {
final int numDims = TestUtil.nextInt(random(), 1, 8);
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
int commonPrefixLength = TestUtil.nextInt(random(), 0, bytesPerDim);
final int splitDim = random().nextInt(numDims);
BytesRef firstValue = points[0].packedValue;
for (int i = 1; i < points.length; ++i) {
BytesRef packedValue = points[i].packedValue;
int offset = splitDim * bytesPerDim;
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLength);
}
DummyPointsReader reader = new DummyPointsReader(points);
final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLength, reader, 0, points.length, pivot,
new BytesRef(), new BytesRef());
BytesRef pivotValue = reader.points[pivot].packedValue;
int offset = splitDim * bytesPerDim;
for (int i = 0; i < points.length; ++i) {
BytesRef value = reader.points[i].packedValue;
int cmp = StringHelper.compare(bytesPerDim,
value.bytes, value.offset + offset,
pivotValue.bytes, pivotValue.offset + offset);
if (cmp == 0) {
cmp = reader.points[i].doc - reader.points[pivot].doc;
}
if (i < pivot) {
assertTrue(cmp <= 0);
} else if (i > pivot) {
assertTrue(cmp >= 0);
} else {
assertEquals(0, cmp);
}
}
}
private static Point[] createRandomPoints(int numDims, int bytesPerDim, int maxDoc) {
final int packedBytesLength = numDims * bytesPerDim;
final int numPoints = TestUtil.nextInt(random(), 1, 100000);
Point[] points = new Point[numPoints];
for (int i = 0; i < numPoints; ++i) {
byte[] value = new byte[packedBytesLength];
random().nextBytes(value);
points[i] = new Point(value, random().nextInt(maxDoc));
}
return points;
}
private static class Point {
final BytesRef packedValue;
final int doc;
Point(byte[] packedValue, int doc) {
// use a non-null offset to make sure MutablePointsReaderUtils does not ignore it
this.packedValue = new BytesRef(packedValue.length + 1);
this.packedValue.bytes[0] = (byte) random().nextInt(256);
this.packedValue.offset = 1;
this.packedValue.length = packedValue.length;
this.doc = doc;
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj instanceof Point == false) {
return false;
}
Point that = (Point) obj;
return packedValue.equals(that.packedValue) && doc == that.doc;
}
@Override
public int hashCode() {
return 31 * packedValue.hashCode() + doc;
}
@Override
public String toString() {
return "value=" + packedValue + " doc=" + doc;
}
}
private static class DummyPointsReader extends MutablePointsReader {
private final Point[] points;
DummyPointsReader(Point[] points) {
this.points = points.clone();
}
@Override
public void close() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long ramBytesUsed() {
return 0;
}
@Override
public void getValue(int i, BytesRef packedValue) {
packedValue.bytes = points[i].packedValue.bytes;
packedValue.offset = points[i].packedValue.offset;
packedValue.length = points[i].packedValue.length;
}
@Override
public byte getByteAt(int i, int k) {
BytesRef packedValue = points[i].packedValue;
return packedValue.bytes[packedValue.offset + k];
}
@Override
public int getDocID(int i) {
return points[i].doc;
}
@Override
public void swap(int i, int j) {
ArrayUtil.swap(points, i, j);
}
@Override
public void checkIntegrity() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMinPackedValue(String fieldName) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMaxPackedValue(String fieldName) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getNumDimensions(String fieldName) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getBytesPerDimension(String fieldName) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long size(String fieldName) {
throw new UnsupportedOperationException();
}
@Override
public int getDocCount(String fieldName) {
throw new UnsupportedOperationException();
}
}
}

View File

@ -106,6 +106,7 @@ io.netty.netty-all.version = 4.0.36.Final
org.apache.curator.version = 2.8.0 org.apache.curator.version = 2.8.0
/org.apache.curator/curator-client = ${org.apache.curator.version} /org.apache.curator/curator-client = ${org.apache.curator.version}
/org.apache.curator/curator-framework = ${org.apache.curator.version} /org.apache.curator/curator-framework = ${org.apache.curator.version}
/org.apache.curator/curator-recipes = ${org.apache.curator.version}
/org.apache.derby/derby = 10.9.1.0 /org.apache.derby/derby = 10.9.1.0

View File

@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queries.function.valuesource;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
import org.apache.lucene.search.IndexSearcher;
/**
* Base class for comparison operators useful within an "if"/conditional.
*/
public abstract class ComparisonBoolFunction extends BoolFunction {
private final ValueSource lhs;
private final ValueSource rhs;
private final String name;
public ComparisonBoolFunction(ValueSource lhs, ValueSource rhs, String name) {
this.lhs = lhs;
this.rhs = rhs;
this.name = name;
}
/** Perform the comparison, returning true or false */
public abstract boolean compare(int doc, FunctionValues lhs, FunctionValues rhs);
/** Uniquely identify the operation (ie "gt", "lt" "gte", etc) */
public String name() {
return this.name;
}
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
final FunctionValues lhsVal = this.lhs.getValues(context, readerContext);
final FunctionValues rhsVal = this.rhs.getValues(context, readerContext);
final String compLabel = this.name();
return new BoolDocValues(this) {
@Override
public boolean boolVal(int doc) {
return compare(doc, lhsVal, rhsVal);
}
@Override
public String toString(int doc) {
return compLabel + "(" + lhsVal.toString(doc) + "," + rhsVal.toString(doc) + ")";
}
@Override
public boolean exists(int doc) {
return lhsVal.exists(doc) && rhsVal.exists(doc);
}
};
}
@Override
public boolean equals(Object o) {
if (this.getClass() != o.getClass()) return false;
ComparisonBoolFunction other = (ComparisonBoolFunction)o;
return name().equals(other.name())
&& lhs.equals(other.lhs)
&& rhs.equals(other.rhs); }
@Override
public int hashCode() {
int h = this.getClass().hashCode();
h = h * 31 + this.name().hashCode();
h = h * 31 + lhs.hashCode();
h = h * 31 + rhs.hashCode();
return h;
}
@Override
public String description() {
return name() + "(" + lhs.description() + "," + rhs.description() + ")";
}
@Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
lhs.createWeight(context, searcher);
rhs.createWeight(context, searcher);
}
}

View File

@ -38,5 +38,10 @@ public class FileMetaData {
this.length = length; this.length = length;
this.checksum = checksum; this.checksum = checksum;
} }
@Override
public String toString() {
return "FileMetaData(length=" + length + ")";
}
} }

View File

@ -118,6 +118,8 @@ class SimpleCopyJob extends CopyJob {
return highPriority ? -1 : 1; return highPriority ? -1 : 1;
} else if (ord < other.ord) { } else if (ord < other.ord) {
return -1; return -1;
} else if (ord > other.ord) {
return 1;
} else { } else {
return 0; return 0;
} }

View File

@ -121,4 +121,11 @@ public interface Bounds {
*/ */
public Bounds noBottomLatitudeBound(); public Bounds noBottomLatitudeBound();
/** Signal that there is no bound whatsoever.
* The bound is limited only by the constraints of the
* planet.
*@return the updated Bounds object.,
*/
public Bounds noBound(final PlanetModel planetModel);
} }

View File

@ -253,6 +253,11 @@ public class LatLonBounds implements Bounds {
return this; return this;
} }
@Override
public Bounds noBound(final PlanetModel planetModel) {
return noLongitudeBound().noTopLatitudeBound().noBottomLatitudeBound();
}
// Protected methods // Protected methods
/** Update latitude bound. /** Update latitude bound.

View File

@ -1003,13 +1003,14 @@ public class Plane extends Vector {
* D - MINIMUM_RESOLUTION. Both are examined and intersection points determined. * D - MINIMUM_RESOLUTION. Both are examined and intersection points determined.
*/ */
protected void findIntersectionBounds(final PlanetModel planetModel, final Bounds boundsInfo, final Plane q, final Membership... bounds) { protected void findIntersectionBounds(final PlanetModel planetModel, final Bounds boundsInfo, final Plane q, final Membership... bounds) {
//System.out.println("Finding intersection bounds");
// Unnormalized, unchecked... // Unnormalized, unchecked...
final double lineVectorX = y * q.z - z * q.y; final double lineVectorX = y * q.z - z * q.y;
final double lineVectorY = z * q.x - x * q.z; final double lineVectorY = z * q.x - x * q.z;
final double lineVectorZ = x * q.y - y * q.x; final double lineVectorZ = x * q.y - y * q.x;
if (Math.abs(lineVectorX) < MINIMUM_RESOLUTION && Math.abs(lineVectorY) < MINIMUM_RESOLUTION && Math.abs(lineVectorZ) < MINIMUM_RESOLUTION) { if (Math.abs(lineVectorX) < MINIMUM_RESOLUTION && Math.abs(lineVectorY) < MINIMUM_RESOLUTION && Math.abs(lineVectorZ) < MINIMUM_RESOLUTION) {
// Degenerate case: parallel planes // Degenerate case: parallel planes
//System.err.println(" planes are parallel - no intersection"); //System.out.println(" planes are parallel - no intersection");
return; return;
} }
@ -1037,9 +1038,10 @@ public class Plane extends Vector {
final double denomXZ = this.x * q.z - this.z * q.x; final double denomXZ = this.x * q.z - this.z * q.x;
final double denomXY = this.x * q.y - this.y * q.x; final double denomXY = this.x * q.y - this.y * q.x;
if (Math.abs(denomYZ) >= Math.abs(denomXZ) && Math.abs(denomYZ) >= Math.abs(denomXY)) { if (Math.abs(denomYZ) >= Math.abs(denomXZ) && Math.abs(denomYZ) >= Math.abs(denomXY)) {
//System.out.println("X biggest");
// X is the biggest, so our point will have x0 = 0.0 // X is the biggest, so our point will have x0 = 0.0
if (Math.abs(denomYZ) < MINIMUM_RESOLUTION_SQUARED) { if (Math.abs(denomYZ) < MINIMUM_RESOLUTION_SQUARED) {
//System.err.println(" Denominator is zero: no intersection"); //System.out.println(" Denominator is zero: no intersection");
return; return;
} }
final double denom = 1.0 / denomYZ; final double denom = 1.0 / denomYZ;
@ -1061,9 +1063,10 @@ public class Plane extends Vector {
0.0, (-(this.D-MINIMUM_RESOLUTION) * q.z - this.z * -(q.D-MINIMUM_RESOLUTION)) * denom, (this.y * -(q.D-MINIMUM_RESOLUTION) + (this.D-MINIMUM_RESOLUTION) * q.y) * denom, 0.0, (-(this.D-MINIMUM_RESOLUTION) * q.z - this.z * -(q.D-MINIMUM_RESOLUTION)) * denom, (this.y * -(q.D-MINIMUM_RESOLUTION) + (this.D-MINIMUM_RESOLUTION) * q.y) * denom,
bounds); bounds);
} else if (Math.abs(denomXZ) >= Math.abs(denomXY) && Math.abs(denomXZ) >= Math.abs(denomYZ)) { } else if (Math.abs(denomXZ) >= Math.abs(denomXY) && Math.abs(denomXZ) >= Math.abs(denomYZ)) {
//System.out.println("Y biggest");
// Y is the biggest, so y0 = 0.0 // Y is the biggest, so y0 = 0.0
if (Math.abs(denomXZ) < MINIMUM_RESOLUTION_SQUARED) { if (Math.abs(denomXZ) < MINIMUM_RESOLUTION_SQUARED) {
//System.err.println(" Denominator is zero: no intersection"); //System.out.println(" Denominator is zero: no intersection");
return; return;
} }
final double denom = 1.0 / denomXZ; final double denom = 1.0 / denomXZ;
@ -1084,9 +1087,10 @@ public class Plane extends Vector {
(-(this.D-MINIMUM_RESOLUTION) * q.z - this.z * -(q.D-MINIMUM_RESOLUTION)) * denom, 0.0, (this.x * -(q.D-MINIMUM_RESOLUTION) + (this.D-MINIMUM_RESOLUTION) * q.x) * denom, (-(this.D-MINIMUM_RESOLUTION) * q.z - this.z * -(q.D-MINIMUM_RESOLUTION)) * denom, 0.0, (this.x * -(q.D-MINIMUM_RESOLUTION) + (this.D-MINIMUM_RESOLUTION) * q.x) * denom,
bounds); bounds);
} else { } else {
//System.out.println("Z biggest");
// Z is the biggest, so Z0 = 0.0 // Z is the biggest, so Z0 = 0.0
if (Math.abs(denomXY) < MINIMUM_RESOLUTION_SQUARED) { if (Math.abs(denomXY) < MINIMUM_RESOLUTION_SQUARED) {
//System.err.println(" Denominator is zero: no intersection"); //System.out.println(" Denominator is zero: no intersection");
return; return;
} }
final double denom = 1.0 / denomXY; final double denom = 1.0 / denomXY;
@ -1178,6 +1182,10 @@ public class Plane extends Vector {
if (point2Valid) { if (point2Valid) {
boundsInfo.addPoint(new GeoPoint(point2X, point2Y, point2Z)); boundsInfo.addPoint(new GeoPoint(point2X, point2Y, point2Z));
} }
} else {
// If we can't intersect line with world, then it's outside the world, so
// we have to assume everything is included.
boundsInfo.noBound(planetModel);
} }
} }
@ -1351,8 +1359,6 @@ public class Plane extends Vector {
// m * [- 2*A*ab^2*r + 2*A^2*ab^2*r*q + 2*B^2*ab^2*r*q + 2*C^2*c^2*r*q] + // m * [- 2*A*ab^2*r + 2*A^2*ab^2*r*q + 2*B^2*ab^2*r*q + 2*C^2*c^2*r*q] +
// [ab^2 - 2*A*ab^2*q + A^2*ab^2*q^2 + B^2*ab^2*q^2 + C^2*c^2*q^2] = 0 // [ab^2 - 2*A*ab^2*q + A^2*ab^2*q^2 + B^2*ab^2*q^2 + C^2*c^2*q^2] = 0
//System.err.println(" computing X bound");
// Useful subexpressions for this bound // Useful subexpressions for this bound
final double q = A*abSquared*k; final double q = A*abSquared*k;
final double qSquared = q * q; final double qSquared = q * q;
@ -1392,6 +1398,7 @@ public class Plane extends Vector {
assert Math.abs(a * m1 * m1 + b * m1 + c) < MINIMUM_RESOLUTION; assert Math.abs(a * m1 * m1 + b * m1 + c) < MINIMUM_RESOLUTION;
final double m2 = (-b - sqrtResult) * commonDenom; final double m2 = (-b - sqrtResult) * commonDenom;
assert Math.abs(a * m2 * m2 + b * m2 + c) < MINIMUM_RESOLUTION; assert Math.abs(a * m2 * m2 + b * m2 + c) < MINIMUM_RESOLUTION;
if (Math.abs(m1) >= MINIMUM_RESOLUTION || Math.abs(m2) >= MINIMUM_RESOLUTION) {
final double l1 = r * m1 + q; final double l1 = r * m1 + q;
final double l2 = r * m2 + q; final double l2 = r * m2 + q;
// x = ((1 - l*A) * ab^2 ) / (2 * m) // x = ((1 - l*A) * ab^2 ) / (2 * m)
@ -1410,11 +1417,14 @@ public class Plane extends Vector {
//assert evaluateIsZero(thePoint2): "Evaluation of point2: "+evaluate(thePoint2); //assert evaluateIsZero(thePoint2): "Evaluation of point2: "+evaluate(thePoint2);
addPoint(boundsInfo, bounds, thePoint1); addPoint(boundsInfo, bounds, thePoint1);
addPoint(boundsInfo, bounds, thePoint2); addPoint(boundsInfo, bounds, thePoint2);
} else {
// This is a plane of the form A=n B=0 C=0. We can set a bound only by noting the D value.
boundsInfo.addXValue(-D/A);
}
} else { } else {
// No solutions // No solutions
} }
} else if (Math.abs(b) > MINIMUM_RESOLUTION_SQUARED) { } else if (Math.abs(b) > MINIMUM_RESOLUTION_SQUARED) {
//System.err.println("Not x quadratic");
// a = 0, so m = - c / b // a = 0, so m = - c / b
final double m = -c / b; final double m = -c / b;
final double l = r * m + q; final double l = r * m + q;
@ -1561,6 +1571,7 @@ public class Plane extends Vector {
assert Math.abs(a * m1 * m1 + b * m1 + c) < MINIMUM_RESOLUTION; assert Math.abs(a * m1 * m1 + b * m1 + c) < MINIMUM_RESOLUTION;
final double m2 = (-b - sqrtResult) * commonDenom; final double m2 = (-b - sqrtResult) * commonDenom;
assert Math.abs(a * m2 * m2 + b * m2 + c) < MINIMUM_RESOLUTION; assert Math.abs(a * m2 * m2 + b * m2 + c) < MINIMUM_RESOLUTION;
if (Math.abs(m1) >= MINIMUM_RESOLUTION || Math.abs(m2) >= MINIMUM_RESOLUTION) {
final double l1 = r * m1 + q; final double l1 = r * m1 + q;
final double l2 = r * m2 + q; final double l2 = r * m2 + q;
// x = (-l*A * ab^2 ) / (2 * m) // x = (-l*A * ab^2 ) / (2 * m)
@ -1579,6 +1590,10 @@ public class Plane extends Vector {
//assert evaluateIsZero(thePoint2): "Evaluation of point2: "+evaluate(thePoint2); //assert evaluateIsZero(thePoint2): "Evaluation of point2: "+evaluate(thePoint2);
addPoint(boundsInfo, bounds, thePoint1); addPoint(boundsInfo, bounds, thePoint1);
addPoint(boundsInfo, bounds, thePoint2); addPoint(boundsInfo, bounds, thePoint2);
} else {
// This is a plane of the form A=0 B=n C=0. We can set a bound only by noting the D value.
boundsInfo.addYValue(-D/B);
}
} else { } else {
// No solutions // No solutions
} }

View File

@ -292,6 +292,17 @@ public class XYZBounds implements Bounds {
return this; return this;
} }
@Override
public Bounds noBound(final PlanetModel planetModel) {
minX = planetModel.getMinimumXValue();
maxX = planetModel.getMaximumXValue();
minY = planetModel.getMinimumYValue();
maxY = planetModel.getMaximumYValue();
minZ = planetModel.getMinimumZValue();
maxZ = planetModel.getMaximumZValue();
return this;
}
@Override @Override
public String toString() { public String toString() {
return "XYZBounds: [xmin="+minX+" xmax="+maxX+" ymin="+minY+" ymax="+maxY+" zmin="+minZ+" zmax="+maxZ+"]"; return "XYZBounds: [xmin="+minX+" xmax="+maxX+" ymin="+minY+" ymax="+maxY+" zmin="+minZ+" zmax="+maxZ+"]";

View File

@ -87,8 +87,9 @@ public class TestGeo3DPoint extends LuceneTestCase {
private static Codec getCodec() { private static Codec getCodec() {
if (Codec.getDefault().getName().equals("Lucene62")) { if (Codec.getDefault().getName().equals("Lucene62")) {
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048); int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
double maxMBSortInHeap = 3.0 + (3*random().nextDouble());
if (VERBOSE) { if (VERBOSE) {
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode); System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
} }
return new FilterCodec("Lucene62", Codec.getDefault()) { return new FilterCodec("Lucene62", Codec.getDefault()) {
@ -97,7 +98,7 @@ public class TestGeo3DPoint extends LuceneTestCase {
return new PointsFormat() { return new PointsFormat() {
@Override @Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException { public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode); return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
} }
@Override @Override

View File

@ -372,4 +372,19 @@ public class GeoBBoxTest {
assertTrue(box.isWithin(point)?solid.isWithin(point):true); assertTrue(box.isWithin(point)?solid.isWithin(point):true);
} }
@Test
public void testFailureCase2() {
//final GeoPoint point = new GeoPoint(-0.7375647084975573, -2.3309121299774915E-10, 0.6746626163258577);
final GeoPoint point = new GeoPoint(-0.737564708579924, -9.032562595264542E-17, 0.6746626165197899);
final GeoBBox box = new GeoRectangle(PlanetModel.WGS84, 0.7988584710911523, 0.25383311815493353, -1.2236144735575564E-12, 7.356011300929654E-49);
final XYZBounds bounds = new XYZBounds();
box.getBounds(bounds);
final XYZSolid solid = XYZSolidFactory.makeXYZSolid(PlanetModel.WGS84, bounds.getMinimumX(), bounds.getMaximumX(), bounds.getMinimumY(), bounds.getMaximumY(), bounds.getMinimumZ(), bounds.getMaximumZ());
//System.out.println("Is within Y value? "+(point.y >= bounds.getMinimumY() && point.y <= bounds.getMaximumY()));
//System.out.println("Shape = "+box+" is within? "+box.isWithin(point));
//System.out.println("XYZBounds = "+bounds+" is within? "+solid.isWithin(point)+" solid="+solid);
assertTrue(box.isWithin(point) == solid.isWithin(point));
}
} }

View File

@ -405,4 +405,18 @@ public class GeoCircleTest extends LuceneTestCase {
assertTrue(solid.isWithin(gp)); assertTrue(solid.isWithin(gp));
} }
@Test
public void testBoundsFailureCase2() {
final GeoCircle gc = GeoCircleFactory.makeGeoCircle(PlanetModel.WGS84, -2.7574435614238194E-13, 0.0, 1.5887859182593391);
final GeoPoint gp = new GeoPoint(PlanetModel.WGS84, 0.7980359504429014, 1.5964981068121482);
final XYZBounds bounds = new XYZBounds();
gc.getBounds(bounds);
System.out.println("Bounds = "+bounds);
System.out.println("Point = "+gp);
final XYZSolid solid = XYZSolidFactory.makeXYZSolid(PlanetModel.WGS84, bounds.getMinimumX(), bounds.getMaximumX(), bounds.getMinimumY(), bounds.getMaximumY(), bounds.getMinimumZ(), bounds.getMaximumZ());
assert gc.isWithin(gp)?solid.isWithin(gp):true;
}
} }

View File

@ -126,6 +126,7 @@ public final class AssertingPointsFormat extends PointsFormat {
assert false: "point values are out of order"; assert false: "point values are out of order";
} }
System.arraycopy(packedValue, 0, lastDocValue, 0, bytesPerDim); System.arraycopy(packedValue, 0, lastDocValue, 0, bytesPerDim);
lastDocID = docID;
} }
in.visit(docID, packedValue); in.visit(docID, packedValue);
} }
@ -254,11 +255,11 @@ public final class AssertingPointsFormat extends PointsFormat {
} }
@Override @Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException { public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
if (fieldInfo.getPointDimensionCount() == 0) { if (fieldInfo.getPointDimensionCount() == 0) {
throw new IllegalArgumentException("writing field=\"" + fieldInfo.name + "\" but pointDimensionalCount is 0"); throw new IllegalArgumentException("writing field=\"" + fieldInfo.name + "\" but pointDimensionalCount is 0");
} }
in.writeField(fieldInfo, values, maxMBSortInHeap); in.writeField(fieldInfo, values);
} }
@Override @Override

View File

@ -56,11 +56,11 @@ class CrankyPointsFormat extends PointsFormat {
} }
@Override @Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException { public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
if (random.nextInt(100) == 0) { if (random.nextInt(100) == 0) {
throw new IOException("Fake IOException"); throw new IOException("Fake IOException");
} }
delegate.writeField(fieldInfo, values, maxMBSortInHeap); delegate.writeField(fieldInfo, values);
} }
@Override @Override

View File

@ -67,6 +67,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.SloppyMath; import org.apache.lucene.util.SloppyMath;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.bkd.BKDWriter;
/** /**
* Abstract class to do basic tests for a geospatial impl (high level * Abstract class to do basic tests for a geospatial impl (high level
@ -1247,7 +1248,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
return new PointsFormat() { return new PointsFormat() {
@Override @Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException { public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, pointsInLeaf); return new Lucene60PointsWriter(writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
} }
@Override @Override

View File

@ -92,6 +92,7 @@ public class RandomCodec extends AssertingCodec {
// which is less effective for testing. // which is less effective for testing.
// TODO: improve how we randomize this... // TODO: improve how we randomize this...
private final int maxPointsInLeafNode; private final int maxPointsInLeafNode;
private final double maxMBSortInHeap;
private final int bkdSplitRandomSeed; private final int bkdSplitRandomSeed;
@Override @Override
@ -102,9 +103,9 @@ public class RandomCodec extends AssertingCodec {
// Randomize how BKDWriter chooses its splis: // Randomize how BKDWriter chooses its splis:
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode) { return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap) {
@Override @Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException { public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name); boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
@ -184,6 +185,7 @@ public class RandomCodec extends AssertingCodec {
int lowFreqCutoff = TestUtil.nextInt(random, 2, 100); int lowFreqCutoff = TestUtil.nextInt(random, 2, 100);
maxPointsInLeafNode = TestUtil.nextInt(random, 16, 2048); maxPointsInLeafNode = TestUtil.nextInt(random, 16, 2048);
maxMBSortInHeap = 5.0 + (3*random.nextDouble());
bkdSplitRandomSeed = random.nextInt(); bkdSplitRandomSeed = random.nextInt();
add(avoidCodecs, add(avoidCodecs,
@ -251,7 +253,8 @@ public class RandomCodec extends AssertingCodec {
public String toString() { public String toString() {
return super.toString() + ": " + previousMappings.toString() + return super.toString() + ": " + previousMappings.toString() +
", docValues:" + previousDVMappings.toString() + ", docValues:" + previousDVMappings.toString() +
", maxPointsInLeafNode=" + maxPointsInLeafNode; ", maxPointsInLeafNode=" + maxPointsInLeafNode +
", maxMBSortInHeap=" + maxMBSortInHeap;
} }
/** Just like {@link BKDWriter} except it evilly picks random ways to split cells on /** Just like {@link BKDWriter} except it evilly picks random ways to split cells on

View File

@ -771,7 +771,7 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
} }
ii = new SlowOpeningMockIndexInputWrapper(this, name, delegateInput); ii = new SlowOpeningMockIndexInputWrapper(this, name, delegateInput);
} else { } else {
ii = new MockIndexInputWrapper(this, name, delegateInput); ii = new MockIndexInputWrapper(this, name, delegateInput, null);
} }
addFileHandle(ii, name, Handle.Input); addFileHandle(ii, name, Handle.Input);
return ii; return ii;

View File

@ -30,12 +30,19 @@ public class MockIndexInputWrapper extends IndexInput {
private MockDirectoryWrapper dir; private MockDirectoryWrapper dir;
final String name; final String name;
private IndexInput delegate; private IndexInput delegate;
private boolean isClone; private volatile boolean closed;
private boolean closed;
/** Construct an empty output buffer. */ // Which MockIndexInputWrapper we were cloned from, or null if we are not a clone:
public MockIndexInputWrapper(MockDirectoryWrapper dir, String name, IndexInput delegate) { private final MockIndexInputWrapper parent;
/** Sole constructor */
public MockIndexInputWrapper(MockDirectoryWrapper dir, String name, IndexInput delegate, MockIndexInputWrapper parent) {
super("MockIndexInputWrapper(name=" + name + " delegate=" + delegate + ")"); super("MockIndexInputWrapper(name=" + name + " delegate=" + delegate + ")");
// If we are a clone then our parent better not be a clone!
assert parent == null || parent.parent == null;
this.parent = parent;
this.name = name; this.name = name;
this.dir = dir; this.dir = dir;
this.delegate = delegate; this.delegate = delegate;
@ -54,7 +61,7 @@ public class MockIndexInputWrapper extends IndexInput {
// remove the conditional check so we also track that // remove the conditional check so we also track that
// all clones get closed: // all clones get closed:
assert delegate != null; assert delegate != null;
if (!isClone) { if (parent == null) {
dir.removeIndexInput(this, name); dir.removeIndexInput(this, name);
} }
dir.maybeThrowDeterministicException(); dir.maybeThrowDeterministicException();
@ -62,9 +69,13 @@ public class MockIndexInputWrapper extends IndexInput {
} }
private void ensureOpen() { private void ensureOpen() {
// TODO: not great this is a volatile read (closed) ... we should deploy heavy JVM voodoo like SwitchPoint to avoid this
if (closed) { if (closed) {
throw new RuntimeException("Abusing closed IndexInput!"); throw new RuntimeException("Abusing closed IndexInput!");
} }
if (parent != null && parent.closed) {
throw new RuntimeException("Abusing clone of a closed IndexInput!");
}
} }
@Override @Override
@ -75,8 +86,7 @@ public class MockIndexInputWrapper extends IndexInput {
} }
dir.inputCloneCount.incrementAndGet(); dir.inputCloneCount.incrementAndGet();
IndexInput iiclone = delegate.clone(); IndexInput iiclone = delegate.clone();
MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, name, iiclone); MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, name, iiclone, parent != null ? parent : this);
clone.isClone = true;
// Pending resolution on LUCENE-686 we may want to // Pending resolution on LUCENE-686 we may want to
// uncomment this code so that we also track that all // uncomment this code so that we also track that all
// clones get closed: // clones get closed:
@ -102,8 +112,7 @@ public class MockIndexInputWrapper extends IndexInput {
} }
dir.inputCloneCount.incrementAndGet(); dir.inputCloneCount.incrementAndGet();
IndexInput slice = delegate.slice(sliceDescription, offset, length); IndexInput slice = delegate.slice(sliceDescription, offset, length);
MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, sliceDescription, slice); MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, sliceDescription, slice, parent != null ? parent : this);
clone.isClone = true;
return clone; return clone;
} }

View File

@ -30,7 +30,7 @@ class SlowClosingMockIndexInputWrapper extends MockIndexInputWrapper {
public SlowClosingMockIndexInputWrapper(MockDirectoryWrapper dir, public SlowClosingMockIndexInputWrapper(MockDirectoryWrapper dir,
String name, IndexInput delegate) { String name, IndexInput delegate) {
super(dir, name, delegate); super(dir, name, delegate, null);
} }
@Override @Override

View File

@ -28,7 +28,7 @@ class SlowOpeningMockIndexInputWrapper extends MockIndexInputWrapper {
public SlowOpeningMockIndexInputWrapper(MockDirectoryWrapper dir, public SlowOpeningMockIndexInputWrapper(MockDirectoryWrapper dir,
String name, IndexInput delegate) throws IOException { String name, IndexInput delegate) throws IOException {
super(dir, name, delegate); super(dir, name, delegate, null);
try { try {
Thread.sleep(50); Thread.sleep(50);
} catch (InterruptedException ie) { } catch (InterruptedException ie) {

View File

@ -171,4 +171,40 @@ public class TestMockDirectoryWrapper extends BaseDirectoryTestCase {
assertTrue("MockDirectoryWrapper on dir=" + dir + " failed to corrupt an unsync'd file", changed); assertTrue("MockDirectoryWrapper on dir=" + dir + " failed to corrupt an unsync'd file", changed);
} }
public void testAbuseClosedIndexInput() throws Exception {
MockDirectoryWrapper dir = newMockDirectory();
IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT);
out.writeByte((byte) 42);
out.close();
final IndexInput in = dir.openInput("foo", IOContext.DEFAULT);
in.close();
expectThrows(RuntimeException.class, in::readByte);
dir.close();
}
public void testAbuseCloneAfterParentClosed() throws Exception {
MockDirectoryWrapper dir = newMockDirectory();
IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT);
out.writeByte((byte) 42);
out.close();
IndexInput in = dir.openInput("foo", IOContext.DEFAULT);
final IndexInput clone = in.clone();
in.close();
expectThrows(RuntimeException.class, clone::readByte);
dir.close();
}
public void testAbuseCloneOfCloneAfterParentClosed() throws Exception {
MockDirectoryWrapper dir = newMockDirectory();
IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT);
out.writeByte((byte) 42);
out.close();
IndexInput in = dir.openInput("foo", IOContext.DEFAULT);
IndexInput clone1 = in.clone();
IndexInput clone2 = clone1.clone();
in.close();
expectThrows(RuntimeException.class, clone2::readByte);
dir.close();
}
} }

View File

@ -100,6 +100,25 @@ New Features
* SOLR-9275: XML QueryParser support (defType=xmlparser) now extensible via configuration. * SOLR-9275: XML QueryParser support (defType=xmlparser) now extensible via configuration.
(Christine Poerschke) (Christine Poerschke)
* SOLR-9200: Add Delegation Token Support to Solr.
(Gregory Chanan)
* SOLR-9038: Solr core snapshots: The current commit can be snapshotted which retains the commit and associates it with
a name. The core admin API can create snapshots, list them, and delete them. Snapshot names can be referenced in
doing a core backup, and in replication. Snapshot metadata is stored in a new snapshot_metadata/ dir.
(Hrishikesh Gadre via David Smiley)
* SOLR-9279: New boolean comparison function queries comparing numeric arguments: gt, gte, lt, lte, eq
(Doug Turnbull, David Smiley)
* SOLR-9324: Support Secure Impersonation / Proxy User for solr authentication
(Gregory Chanan)
* SOLR-9252: Feature selection and logistic regression on text (Cao Manh Dat, Joel Bernstein)
* SOLR-6465: CDCR: fall back to whole-index replication when tlogs are insufficient.
(Noble Paul, Renaud Delbru, shalin)
* SOLR-9320: A REPLACENODE command to decommission an existing node with another new node * SOLR-9320: A REPLACENODE command to decommission an existing node with another new node
(noble, Nitin Sharma, Varun Thacker) (noble, Nitin Sharma, Varun Thacker)
@ -170,6 +189,19 @@ Bug Fixes
* SOLR-9339: NPE in CloudSolrClient when the response is null (noble) * SOLR-9339: NPE in CloudSolrClient when the response is null (noble)
* SOLR-8596: Web UI doesn't correctly generate queries which include local parameters (Alexandre Rafalovitch, janhoy)
* SOLR-8645: managed-schema is now syntax highlighted in cloud->Tree view (Alexandre Rafalovitch via janhoy)
* SOLR-8379: UI Cloud->Tree view now shows .txt files correctly (Alexandre Rafalovitch via janhoy)
* SOLR-9003: New Admin UI's Dataimport screen now correctly displays DIH Debug output (Alexandre Rafalovitch)
* SOLR-9308: Fix distributed RTG to forward request params, fixes fq and non-default fl params (hossman)
* SOLR-9179: NPE in IndexSchema using IBM JDK (noble, Colvin Cowie)
* SOLR-9397: Config API does not support adding caches (noble)
Optimizations Optimizations
---------------------- ----------------------
@ -179,6 +211,13 @@ Optimizations
* SOLR-9264: Optimize ZkController.publishAndWaitForDownStates to not read all collection states and * SOLR-9264: Optimize ZkController.publishAndWaitForDownStates to not read all collection states and
watch relevant collections instead. (Hrishikesh Gadre, shalin) watch relevant collections instead. (Hrishikesh Gadre, shalin)
* SOLR-9335: Solr cache/search/update stats counters now use LongAdder which are supposed to have higher throughput
under high contention. (Varun Thacker)
* SOLR-9350: JSON Facets: method="stream" will no longer always uses & populates the filter cache, likely
flushing it. 'cacheDf' can be configured to set a doc frequency threshold, now defaulting to 1/16th doc count.
Using -1 Disables use of the cache. (David Smiley, yonik)
Other Changes Other Changes
---------------------- ----------------------
@ -202,6 +241,25 @@ Other Changes
* SOLR-9163: Sync up basic_configs and data_driven_schema_configs, removing almost all differences * SOLR-9163: Sync up basic_configs and data_driven_schema_configs, removing almost all differences
except what is required for schemaless. (yonik) except what is required for schemaless. (yonik)
* SOLR-9340: Change ZooKeeper disconnect and session expiry related logging from INFO to WARN to
make debugging easier (Varun Thacker)
* SOLR-9358: [AngularUI] In Cloud->Tree file view area, collapse metadata by default (janhoy)
* SOLR-9256: asserting hasNext() contract in JdbcDataSource in DataImportHandler (Kristine Jetzke via Mikhai Khludnev)
* SOLR-9209: extracting JdbcDataSource.createResultSetIterator() for extension (Kristine Jetzke via Mikhai Khludnev)
* SOLR-9353: Factor out ReRankQParserPlugin.ReRankQueryRescorer private class. (Christine Poerschke)
* SOLR-9392: Fixed CDCR Test failures which were due to leaked resources. (shalin)
* SOLR-9385: Add QParser.getParser(String,SolrQueryRequest) variant. (Christine Poerschke)
* SOLR-9367: Improved TestInjection's randomization logic to use LuceneTestCase.random() (hossman)
* SOLR-9331: Remove ReRankQuery's length constructor argument and member. (Christine Poerschke)
================== 6.1.0 ================== ================== 6.1.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -604,7 +604,7 @@ public class FacetingAccumulator extends BasicAccumulator implements FacetValueA
QueryFacetAccumulator qAcc = new QueryFacetAccumulator(this,qfr.getName(),query); QueryFacetAccumulator qAcc = new QueryFacetAccumulator(this,qfr.getName(),query);
final Query q; final Query q;
try { try {
q = QParser.getParser(query, null, queryRequest).getQuery(); q = QParser.getParser(query, queryRequest).getQuery();
} catch( SyntaxError e ){ } catch( SyntaxError e ){
throw new SolrException(ErrorCode.BAD_REQUEST,"Invalid query '"+query+"'",e); throw new SolrException(ErrorCode.BAD_REQUEST,"Invalid query '"+query+"'",e);
} }

View File

@ -280,10 +280,14 @@ public class JdbcDataSource extends
resultSetIterator.close(); resultSetIterator.close();
resultSetIterator = null; resultSetIterator = null;
} }
resultSetIterator = new ResultSetIterator(query); resultSetIterator = createResultSetIterator(query);
return resultSetIterator.getIterator(); return resultSetIterator.getIterator();
} }
protected ResultSetIterator createResultSetIterator(String query) {
return new ResultSetIterator(query);
}
private void logError(String msg, Exception e) { private void logError(String msg, Exception e) {
LOG.warn(msg, e); LOG.warn(msg, e);
} }

View File

@ -510,6 +510,45 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase {
DriverManager.deregisterDriver(driver); DriverManager.deregisterDriver(driver);
} }
} }
@Test
public void testEmptyResultSet() throws Exception {
MockInitialContextFactory.bind("java:comp/env/jdbc/JndiDB", dataSource);
props.put(JdbcDataSource.JNDI_NAME, "java:comp/env/jdbc/JndiDB");
EasyMock.expect(dataSource.getConnection()).andReturn(connection);
jdbcDataSource.init(context, props);
connection.setAutoCommit(false);
Statement statement = mockControl.createMock(Statement.class);
EasyMock.expect(connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY))
.andReturn(statement);
statement.setFetchSize(500);
statement.setMaxRows(0);
EasyMock.expect(statement.execute("query")).andReturn(true);
ResultSet resultSet = mockControl.createMock(ResultSet.class);
EasyMock.expect(statement.getResultSet()).andReturn(resultSet);
ResultSetMetaData metaData = mockControl.createMock(ResultSetMetaData.class);
EasyMock.expect(resultSet.getMetaData()).andReturn(metaData);
EasyMock.expect(metaData.getColumnCount()).andReturn(0);
EasyMock.expect(resultSet.next()).andReturn(false);
resultSet.close();
EasyMock.expect(statement.getMoreResults()).andReturn(false);
EasyMock.expect(statement.getUpdateCount()).andReturn(-1);
statement.close();
mockControl.replay();
Iterator<Map<String,Object>> resultSetIterator = jdbcDataSource.getData("query");
resultSetIterator.hasNext();
resultSetIterator.hasNext();
mockControl.verify();
}
@Test @Test
@Ignore("Needs a Mock database server to work") @Ignore("Needs a Mock database server to work")
public void testBasic() throws Exception { public void testBasic() throws Exception {

View File

@ -16,6 +16,15 @@
*/ */
package org.apache.solr.hadoop; package org.apache.solr.hadoop;
import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import com.google.common.io.Files; import com.google.common.io.Files;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.ZkController;
@ -35,15 +44,6 @@ import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
/** /**
* Extracts SolrCloud information from ZooKeeper. * Extracts SolrCloud information from ZooKeeper.
*/ */
@ -78,8 +78,7 @@ final class ZooKeeperInspector {
} }
SolrZkClient zkClient = getZkClient(zkHost); SolrZkClient zkClient = getZkClient(zkHost);
try { try (ZkStateReader zkStateReader = new ZkStateReader(zkClient)) {
ZkStateReader zkStateReader = new ZkStateReader(zkClient);
try { try {
// first check for alias // first check for alias
collection = checkForAlias(zkClient, collection); collection = checkForAlias(zkClient, collection);

View File

@ -134,6 +134,10 @@
<dependency org="antlr" name="antlr" rev="${/antlr/antlr}" conf="test.MiniKdc"/> <dependency org="antlr" name="antlr" rev="${/antlr/antlr}" conf="test.MiniKdc"/>
<dependency org="net.sf.ehcache" name="ehcache-core" rev="${/net.sf.ehcache/ehcache-core}" conf="test.MiniKdc"/> <dependency org="net.sf.ehcache" name="ehcache-core" rev="${/net.sf.ehcache/ehcache-core}" conf="test.MiniKdc"/>
<dependency org="org.apache.curator" name="curator-framework" rev="${/org.apache.curator/curator-framework}" conf="compile"/>
<dependency org="org.apache.curator" name="curator-client" rev="${/org.apache.curator/curator-client}" conf="compile"/>
<dependency org="org.apache.curator" name="curator-recipes" rev="${/org.apache.curator/curator-recipes}" conf="compile"/>
<!-- StatsComponents percentiles Dependencies--> <!-- StatsComponents percentiles Dependencies-->
<dependency org="com.tdunning" name="t-digest" rev="${/com.tdunning/t-digest}" conf="compile->*"/> <dependency org="com.tdunning" name="t-digest" rev="${/com.tdunning/t-digest}" conf="compile->*"/>
<!-- SQL Parser --> <!-- SQL Parser -->

View File

@ -15,21 +15,26 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.core; package org.apache.solr.core;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy; import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.update.SolrIndexWriter; import org.apache.solr.update.SolrIndexWriter;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
/** /**
* A wrapper for an IndexDeletionPolicy instance. * A wrapper for an IndexDeletionPolicy instance.
* <p> * <p>
@ -52,9 +57,11 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
private final Map<Long, Long> reserves = new ConcurrentHashMap<>(); private final Map<Long, Long> reserves = new ConcurrentHashMap<>();
private volatile IndexCommit latestCommit; private volatile IndexCommit latestCommit;
private final ConcurrentHashMap<Long, AtomicInteger> savedCommits = new ConcurrentHashMap<>(); private final ConcurrentHashMap<Long, AtomicInteger> savedCommits = new ConcurrentHashMap<>();
private final SolrSnapshotMetaDataManager snapshotMgr;
public IndexDeletionPolicyWrapper(IndexDeletionPolicy deletionPolicy) { public IndexDeletionPolicyWrapper(IndexDeletionPolicy deletionPolicy, SolrSnapshotMetaDataManager snapshotMgr) {
this.deletionPolicy = deletionPolicy; this.deletionPolicy = deletionPolicy;
this.snapshotMgr = snapshotMgr;
} }
/** /**
@ -134,7 +141,6 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
} }
} }
/** /**
* Internal use for Lucene... do not explicitly call. * Internal use for Lucene... do not explicitly call.
*/ */
@ -186,6 +192,7 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
Long reserve = reserves.get(gen); Long reserve = reserves.get(gen);
if (reserve != null && System.nanoTime() < reserve) return; if (reserve != null && System.nanoTime() < reserve) return;
if (savedCommits.containsKey(gen)) return; if (savedCommits.containsKey(gen)) return;
if (snapshotMgr.isSnapshotted(gen)) return;
delegate.delete(); delegate.delete();
} }

View File

@ -28,7 +28,17 @@ import java.net.URL;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.*; import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -49,6 +59,7 @@ import org.apache.solr.schema.IndexSchemaFactory;
import org.apache.solr.search.CacheConfig; import org.apache.solr.search.CacheConfig;
import org.apache.solr.search.FastLRUCache; import org.apache.solr.search.FastLRUCache;
import org.apache.solr.search.QParserPlugin; import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.SolrCache;
import org.apache.solr.search.ValueSourceParser; import org.apache.solr.search.ValueSourceParser;
import org.apache.solr.search.stats.StatsCache; import org.apache.solr.search.stats.StatsCache;
import org.apache.solr.servlet.SolrRequestParsers; import org.apache.solr.servlet.SolrRequestParsers;
@ -91,7 +102,7 @@ public class SolrConfig extends Config implements MapSerializable {
public static final String DEFAULT_CONF_FILE = "solrconfig.xml"; public static final String DEFAULT_CONF_FILE = "solrconfig.xml";
private RequestParams requestParams; private RequestParams requestParams;
public static enum PluginOpts { public enum PluginOpts {
MULTI_OK, MULTI_OK,
REQUIRE_NAME, REQUIRE_NAME,
REQUIRE_NAME_IN_OVERLAY, REQUIRE_NAME_IN_OVERLAY,
@ -254,7 +265,6 @@ public class SolrConfig extends Config implements MapSerializable {
dataDir = get("dataDir", null); dataDir = get("dataDir", null);
if (dataDir != null && dataDir.length() == 0) dataDir = null; if (dataDir != null && dataDir.length() == 0) dataDir = null;
userCacheConfigs = CacheConfig.getMultipleConfigs(this, "query/cache");
org.apache.solr.search.SolrIndexSearcher.initRegenerators(this); org.apache.solr.search.SolrIndexSearcher.initRegenerators(this);
@ -276,6 +286,16 @@ public class SolrConfig extends Config implements MapSerializable {
maxWarmingSearchers = getInt("query/maxWarmingSearchers", Integer.MAX_VALUE); maxWarmingSearchers = getInt("query/maxWarmingSearchers", Integer.MAX_VALUE);
slowQueryThresholdMillis = getInt("query/slowQueryThresholdMillis", -1); slowQueryThresholdMillis = getInt("query/slowQueryThresholdMillis", -1);
for (SolrPluginInfo plugin : plugins) loadPluginInfo(plugin); for (SolrPluginInfo plugin : plugins) loadPluginInfo(plugin);
Map<String, CacheConfig> userCacheConfigs = CacheConfig.getMultipleConfigs(this, "query/cache");
List<PluginInfo> caches = getPluginInfos(SolrCache.class.getName());
if (!caches.isEmpty()) {
for (PluginInfo c : caches) {
userCacheConfigs.put(c.name, CacheConfig.getConfig(this, "cache", c.attributes, null));
}
}
this.userCacheConfigs = Collections.unmodifiableMap(userCacheConfigs);
updateHandlerInfo = loadUpdatehandlerInfo(); updateHandlerInfo = loadUpdatehandlerInfo();
multipartUploadLimitKB = getInt( multipartUploadLimitKB = getInt(
@ -317,6 +337,7 @@ public class SolrConfig extends Config implements MapSerializable {
.add(new SolrPluginInfo(TransformerFactory.class, "transformer", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK)) .add(new SolrPluginInfo(TransformerFactory.class, "transformer", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK))
.add(new SolrPluginInfo(SearchComponent.class, "searchComponent", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK)) .add(new SolrPluginInfo(SearchComponent.class, "searchComponent", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK))
.add(new SolrPluginInfo(UpdateRequestProcessorFactory.class, "updateProcessor", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK)) .add(new SolrPluginInfo(UpdateRequestProcessorFactory.class, "updateProcessor", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK))
.add(new SolrPluginInfo(SolrCache.class, "cache", REQUIRE_NAME, REQUIRE_CLASS, MULTI_OK))
// TODO: WTF is up with queryConverter??? // TODO: WTF is up with queryConverter???
// it apparently *only* works as a singleton? - SOLR-4304 // it apparently *only* works as a singleton? - SOLR-4304
// and even then -- only if there is a single SpellCheckComponent // and even then -- only if there is a single SpellCheckComponent
@ -457,7 +478,7 @@ public class SolrConfig extends Config implements MapSerializable {
public final CacheConfig queryResultCacheConfig; public final CacheConfig queryResultCacheConfig;
public final CacheConfig documentCacheConfig; public final CacheConfig documentCacheConfig;
public final CacheConfig fieldValueCacheConfig; public final CacheConfig fieldValueCacheConfig;
public final CacheConfig[] userCacheConfigs; public final Map<String, CacheConfig> userCacheConfigs;
// SolrIndexSearcher - more... // SolrIndexSearcher - more...
public final boolean useFilterForSortedQuery; public final boolean useFilterForSortedQuery;
public final int queryResultWindowSize; public final int queryResultWindowSize;

View File

@ -81,6 +81,7 @@ import org.apache.solr.common.util.ObjectReleaseTracker;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.Utils; import org.apache.solr.common.util.Utils;
import org.apache.solr.core.DirectoryFactory.DirContext; import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.handler.IndexFetcher; import org.apache.solr.handler.IndexFetcher;
import org.apache.solr.handler.ReplicationHandler; import org.apache.solr.handler.ReplicationHandler;
import org.apache.solr.handler.RequestHandlerBase; import org.apache.solr.handler.RequestHandlerBase;
@ -184,6 +185,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
private final Map<String,UpdateRequestProcessorChain> updateProcessorChains; private final Map<String,UpdateRequestProcessorChain> updateProcessorChains;
private final Map<String, SolrInfoMBean> infoRegistry; private final Map<String, SolrInfoMBean> infoRegistry;
private final IndexDeletionPolicyWrapper solrDelPolicy; private final IndexDeletionPolicyWrapper solrDelPolicy;
private final SolrSnapshotMetaDataManager snapshotMgr;
private final DirectoryFactory directoryFactory; private final DirectoryFactory directoryFactory;
private IndexReaderFactory indexReaderFactory; private IndexReaderFactory indexReaderFactory;
private final Codec codec; private final Codec codec;
@ -414,7 +416,19 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
} else { } else {
delPolicy = new SolrDeletionPolicy(); delPolicy = new SolrDeletionPolicy();
} }
return new IndexDeletionPolicyWrapper(delPolicy);
return new IndexDeletionPolicyWrapper(delPolicy, snapshotMgr);
}
private SolrSnapshotMetaDataManager initSnapshotMetaDataManager() {
try {
String dirName = getDataDir() + SolrSnapshotMetaDataManager.SNAPSHOT_METADATA_DIR + "/";
Directory snapshotDir = directoryFactory.get(dirName, DirContext.DEFAULT,
getSolrConfig().indexConfig.lockType);
return new SolrSnapshotMetaDataManager(this, snapshotDir);
} catch (IOException e) {
throw new IllegalStateException(e);
}
} }
private void initListeners() { private void initListeners() {
@ -739,6 +753,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
initListeners(); initListeners();
this.snapshotMgr = initSnapshotMetaDataManager();
this.solrDelPolicy = initDeletionPolicy(delPolicy); this.solrDelPolicy = initDeletionPolicy(delPolicy);
this.codec = initCodec(solrConfig, this.schema); this.codec = initCodec(solrConfig, this.schema);
@ -1242,6 +1257,17 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
} }
} }
// Close the snapshots meta-data directory.
Directory snapshotsDir = snapshotMgr.getSnapshotsDir();
try {
this.directoryFactory.release(snapshotsDir);
} catch (Throwable e) {
SolrException.log(log,e);
if (e instanceof Error) {
throw (Error) e;
}
}
if (coreStateClosed) { if (coreStateClosed) {
try { try {
@ -2343,6 +2369,14 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
return solrDelPolicy; return solrDelPolicy;
} }
/**
* @return A reference of {@linkplain SolrSnapshotMetaDataManager}
* managing the persistent snapshots for this Solr core.
*/
public SolrSnapshotMetaDataManager getSnapshotMetaDataManager() {
return snapshotMgr;
}
public ReentrantLock getRuleExpiryLock() { public ReentrantLock getRuleExpiryLock() {
return ruleExpiryLock; return ruleExpiryLock;
} }

View File

@ -32,6 +32,7 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.NoLockFactory; import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Constants;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.DirectoryFactory; import org.apache.solr.core.DirectoryFactory;
@ -59,10 +60,22 @@ public class LocalFileSystemRepository implements BackupRepository {
@Override @Override
public URI createURI(String... pathComponents) { public URI createURI(String... pathComponents) {
Preconditions.checkArgument(pathComponents.length > 0); Preconditions.checkArgument(pathComponents.length > 0);
Path result = Paths.get(pathComponents[0]);
String basePath = Preconditions.checkNotNull(pathComponents[0]);
// Note the URI.getPath() invocation on Windows platform generates an invalid URI.
// Refer to http://stackoverflow.com/questions/9834776/java-nio-file-path-issue
// Since the caller may have used this method to generate the string representation
// for the pathComponents, we implement a work-around specifically for Windows platform
// to remove the leading '/' character.
if (Constants.WINDOWS) {
basePath = basePath.replaceFirst("^/(.:/)", "$1");
}
Path result = Paths.get(basePath);
for (int i = 1; i < pathComponents.length; i++) { for (int i = 1; i < pathComponents.length; i++) {
result = result.resolve(pathComponents[i]); result = result.resolve(pathComponents[i]);
} }
return result.toUri(); return result.toUri();
} }

View File

@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core.snapshots;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import com.google.common.annotations.VisibleForTesting;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.store.Directory;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class provides functionality required to handle the data files corresponding to Solr snapshots.
*/
public class SolrSnapshotManager {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
/**
* This method deletes index files of the {@linkplain IndexCommit} for the specified generation number.
*
* @param dir The index directory storing the snapshot.
* @param gen The generation number for the {@linkplain IndexCommit}
* @throws IOException in case of I/O errors.
*/
public static void deleteIndexFiles ( Directory dir, Collection<SnapshotMetaData> snapshots, long gen ) throws IOException {
List<IndexCommit> commits = DirectoryReader.listCommits(dir);
Map<String, Integer> refCounts = buildRefCounts(snapshots, commits);
for (IndexCommit ic : commits) {
if (ic.getGeneration() == gen) {
deleteIndexFiles(dir,refCounts, ic);
break;
}
}
}
/**
* This method deletes all files not corresponding to a configured snapshot in the specified index directory.
*
* @param dir The index directory to search for.
* @throws IOException in case of I/O errors.
*/
public static void deleteNonSnapshotIndexFiles (Directory dir, Collection<SnapshotMetaData> snapshots) throws IOException {
List<IndexCommit> commits = DirectoryReader.listCommits(dir);
Map<String, Integer> refCounts = buildRefCounts(snapshots, commits);
Set<Long> snapshotGenNumbers = snapshots.stream()
.map(SnapshotMetaData::getGenerationNumber)
.collect(Collectors.toSet());
for (IndexCommit ic : commits) {
if (!snapshotGenNumbers.contains(ic.getGeneration())) {
deleteIndexFiles(dir,refCounts, ic);
}
}
}
/**
* This method computes reference count for the index files by taking into consideration
* (a) configured snapshots and (b) files sharing between two or more {@linkplain IndexCommit} instances.
*
* @param snapshots A collection of user configured snapshots
* @param commits A list of {@linkplain IndexCommit} instances
* @return A map containing reference count for each index file referred in one of the {@linkplain IndexCommit} instances.
* @throws IOException in case of I/O error.
*/
@VisibleForTesting
static Map<String, Integer> buildRefCounts (Collection<SnapshotMetaData> snapshots, List<IndexCommit> commits) throws IOException {
Map<String, Integer> result = new HashMap<>();
Map<Long, IndexCommit> commitsByGen = commits.stream().collect(
Collectors.toMap(IndexCommit::getGeneration, Function.identity()));
for(SnapshotMetaData md : snapshots) {
IndexCommit ic = commitsByGen.get(md.getGenerationNumber());
if (ic != null) {
Collection<String> fileNames = ic.getFileNames();
for(String fileName : fileNames) {
int refCount = result.getOrDefault(fileName, 0);
result.put(fileName, refCount+1);
}
}
}
return result;
}
/**
* This method deletes the index files associated with specified <code>indexCommit</code> provided they
* are not referred by some other {@linkplain IndexCommit}.
*
* @param dir The index directory containing the {@linkplain IndexCommit} to be deleted.
* @param refCounts A map containing reference counts for each file associated with every {@linkplain IndexCommit}
* in the specified directory.
* @param indexCommit The {@linkplain IndexCommit} whose files need to be deleted.
* @throws IOException in case of I/O errors.
*/
private static void deleteIndexFiles ( Directory dir, Map<String, Integer> refCounts, IndexCommit indexCommit ) throws IOException {
log.info("Deleting index files for index commit with generation {} in directory {}", indexCommit.getGeneration(), dir);
for (String fileName : indexCommit.getFileNames()) {
try {
// Ensure that a file being deleted is not referred by some other commit.
int ref = refCounts.getOrDefault(fileName, 0);
log.debug("Reference count for file {} is {}", fileName, ref);
if (ref == 0) {
dir.deleteFile(fileName);
}
} catch (IOException e) {
log.warn("Unable to delete file {} in directory {} due to exception {}", fileName, dir, e.getMessage());
}
}
}
}

View File

@ -0,0 +1,416 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core.snapshots;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.stream.Collectors;
import com.google.common.base.Preconditions;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.IndexDeletionPolicyWrapper;
import org.apache.solr.core.SolrCore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class is responsible to manage the persistent snapshots meta-data for the Solr indexes. The
* persistent snapshots are implemented by relying on Lucene {@linkplain IndexDeletionPolicy}
* abstraction to configure a specific {@linkplain IndexCommit} to be retained. The
* {@linkplain IndexDeletionPolicyWrapper} in Solr uses this class to create/delete the Solr index
* snapshots.
*/
public class SolrSnapshotMetaDataManager {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String SNAPSHOT_METADATA_DIR = "snapshot_metadata";
/**
* A class defining the meta-data for a specific snapshot.
*/
public static class SnapshotMetaData {
private String name;
private String indexDirPath;
private long generationNumber;
public SnapshotMetaData(String name, String indexDirPath, long generationNumber) {
super();
this.name = name;
this.indexDirPath = indexDirPath;
this.generationNumber = generationNumber;
}
public String getName() {
return name;
}
public String getIndexDirPath() {
return indexDirPath;
}
public long getGenerationNumber() {
return generationNumber;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("SnapshotMetaData[name=");
builder.append(name);
builder.append(", indexDirPath=");
builder.append(indexDirPath);
builder.append(", generation=");
builder.append(generationNumber);
builder.append("]");
return builder.toString();
}
}
/** Prefix used for the save file. */
public static final String SNAPSHOTS_PREFIX = "snapshots_";
private static final int VERSION_START = 0;
private static final int VERSION_CURRENT = VERSION_START;
private static final String CODEC_NAME = "solr-snapshots";
// The index writer which maintains the snapshots metadata
private long nextWriteGen;
private final Directory dir;
/** Used to map snapshot name to snapshot meta-data. */
protected final Map<String,SnapshotMetaData> nameToDetailsMapping = new LinkedHashMap<>();
/** Used to figure out the *current* index data directory path */
private final SolrCore solrCore;
/**
* A constructor.
*
* @param dir The directory where the snapshot meta-data should be stored. Enables updating
* the existing meta-data.
* @throws IOException in case of errors.
*/
public SolrSnapshotMetaDataManager(SolrCore solrCore, Directory dir) throws IOException {
this(solrCore, dir, OpenMode.CREATE_OR_APPEND);
}
/**
* A constructor.
*
* @param dir The directory where the snapshot meta-data is stored.
* @param mode CREATE If previous meta-data should be erased.
* APPEND If previous meta-data should be read and updated.
* CREATE_OR_APPEND Creates a new meta-data structure if one does not exist
* Updates the existing structure if one exists.
* @throws IOException in case of errors.
*/
public SolrSnapshotMetaDataManager(SolrCore solrCore, Directory dir, OpenMode mode) throws IOException {
this.solrCore = solrCore;
this.dir = dir;
if (mode == OpenMode.CREATE) {
deleteSnapshotMetadataFiles();
}
loadFromSnapshotMetadataFile();
if (mode == OpenMode.APPEND && nextWriteGen == 0) {
throw new IllegalStateException("no snapshots stored in this directory");
}
}
/**
* @return The snapshot meta-data directory
*/
public Directory getSnapshotsDir() {
return dir;
}
/**
* This method creates a new snapshot meta-data entry.
*
* @param name The name of the snapshot.
* @param indexDirPath The directory path where the index files are stored.
* @param gen The generation number for the {@linkplain IndexCommit} being snapshotted.
* @throws IOException in case of I/O errors.
*/
public synchronized void snapshot(String name, String indexDirPath, long gen) throws IOException {
Preconditions.checkNotNull(name);
log.info("Creating the snapshot named {} for core {} associated with index commit with generation {} in directory {}"
, name, solrCore.getName(), gen, indexDirPath);
if(nameToDetailsMapping.containsKey(name)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "A snapshot with name " + name + " already exists");
}
SnapshotMetaData d = new SnapshotMetaData(name, indexDirPath, gen);
nameToDetailsMapping.put(name, d);
boolean success = false;
try {
persist();
success = true;
} finally {
if (!success) {
try {
release(name);
} catch (Exception e) {
// Suppress so we keep throwing original exception
}
}
}
}
/**
* This method deletes a previously created snapshot (if any).
*
* @param name The name of the snapshot to be deleted.
* @return The snapshot meta-data if the snapshot with the snapshot name exists.
* @throws IOException in case of I/O error
*/
public synchronized Optional<SnapshotMetaData> release(String name) throws IOException {
log.info("Deleting the snapshot named {} for core {}", name, solrCore.getName());
SnapshotMetaData result = nameToDetailsMapping.remove(Preconditions.checkNotNull(name));
if(result != null) {
boolean success = false;
try {
persist();
success = true;
} finally {
if (!success) {
nameToDetailsMapping.put(name, result);
}
}
}
return Optional.ofNullable(result);
}
/**
* This method returns if snapshot is created for the specified generation number in
* the *current* index directory.
*
* @param genNumber The generation number for the {@linkplain IndexCommit} to be checked.
* @return true if the snapshot is created.
* false otherwise.
*/
public synchronized boolean isSnapshotted(long genNumber) {
return !nameToDetailsMapping.isEmpty() && isSnapshotted(solrCore.getIndexDir(), genNumber);
}
/**
* This method returns if snapshot is created for the specified generation number in
* the specified index directory.
*
* @param genNumber The generation number for the {@linkplain IndexCommit} to be checked.
* @return true if the snapshot is created.
* false otherwise.
*/
public synchronized boolean isSnapshotted(String indexDirPath, long genNumber) {
return !nameToDetailsMapping.isEmpty()
&& nameToDetailsMapping.values().stream()
.anyMatch(entry -> entry.getIndexDirPath().equals(indexDirPath) && entry.getGenerationNumber() == genNumber);
}
/**
* This method returns the snapshot meta-data for the specified name (if it exists).
*
* @param name The name of the snapshot
* @return The snapshot meta-data if exists.
*/
public synchronized Optional<SnapshotMetaData> getSnapshotMetaData(String name) {
return Optional.ofNullable(nameToDetailsMapping.get(name));
}
/**
* @return A list of snapshots created so far.
*/
public synchronized List<String> listSnapshots() {
// We create a copy for thread safety.
return new ArrayList<>(nameToDetailsMapping.keySet());
}
/**
* This method returns a list of snapshots created in a specified index directory.
*
* @param indexDirPath The index directory path.
* @return a list snapshots stored in the specified directory.
*/
public synchronized Collection<SnapshotMetaData> listSnapshotsInIndexDir(String indexDirPath) {
return nameToDetailsMapping.values().stream()
.filter(entry -> indexDirPath.equals(entry.getIndexDirPath()))
.collect(Collectors.toList());
}
/**
* This method returns the {@linkplain IndexCommit} associated with the specified
* <code>commitName</code>. A snapshot with specified <code>commitName</code> must
* be created before invoking this method.
*
* @param commitName The name of persisted commit
* @return the {@linkplain IndexCommit}
* @throws IOException in case of I/O error.
*/
public Optional<IndexCommit> getIndexCommitByName(String commitName) throws IOException {
Optional<IndexCommit> result = Optional.empty();
Optional<SnapshotMetaData> metaData = getSnapshotMetaData(commitName);
if (metaData.isPresent()) {
String indexDirPath = metaData.get().getIndexDirPath();
long gen = metaData.get().getGenerationNumber();
Directory d = solrCore.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, DirectoryFactory.LOCK_TYPE_NONE);
try {
result = DirectoryReader.listCommits(d)
.stream()
.filter(ic -> ic.getGeneration() == gen)
.findAny();
if (!result.isPresent()) {
log.warn("Unable to find commit with generation {} in the directory {}", gen, indexDirPath);
}
} finally {
solrCore.getDirectoryFactory().release(d);
}
} else {
log.warn("Commit with name {} is not persisted for core {}", commitName, solrCore.getName());
}
return result;
}
private synchronized void persist() throws IOException {
String fileName = SNAPSHOTS_PREFIX + nextWriteGen;
IndexOutput out = dir.createOutput(fileName, IOContext.DEFAULT);
boolean success = false;
try {
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
out.writeVInt(nameToDetailsMapping.size());
for(Entry<String,SnapshotMetaData> ent : nameToDetailsMapping.entrySet()) {
out.writeString(ent.getKey());
out.writeString(ent.getValue().getIndexDirPath());
out.writeVLong(ent.getValue().getGenerationNumber());
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(out);
IOUtils.deleteFilesIgnoringExceptions(dir, fileName);
} else {
IOUtils.close(out);
}
}
dir.sync(Collections.singletonList(fileName));
if (nextWriteGen > 0) {
String lastSaveFile = SNAPSHOTS_PREFIX + (nextWriteGen-1);
// exception OK: likely it didn't exist
IOUtils.deleteFilesIgnoringExceptions(dir, lastSaveFile);
}
nextWriteGen++;
}
private synchronized void deleteSnapshotMetadataFiles() throws IOException {
for(String file : dir.listAll()) {
if (file.startsWith(SNAPSHOTS_PREFIX)) {
dir.deleteFile(file);
}
}
}
/**
* Reads the snapshot meta-data information from the given {@link Directory}.
*/
private synchronized void loadFromSnapshotMetadataFile() throws IOException {
log.info("Loading from snapshot metadata file...");
long genLoaded = -1;
IOException ioe = null;
List<String> snapshotFiles = new ArrayList<>();
for(String file : dir.listAll()) {
if (file.startsWith(SNAPSHOTS_PREFIX)) {
long gen = Long.parseLong(file.substring(SNAPSHOTS_PREFIX.length()));
if (genLoaded == -1 || gen > genLoaded) {
snapshotFiles.add(file);
Map<String, SnapshotMetaData> snapshotMetaDataMapping = new HashMap<>();
IndexInput in = dir.openInput(file, IOContext.DEFAULT);
try {
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
int count = in.readVInt();
for(int i=0;i<count;i++) {
String name = in.readString();
String indexDirPath = in.readString();
long commitGen = in.readVLong();
snapshotMetaDataMapping.put(name, new SnapshotMetaData(name, indexDirPath, commitGen));
}
} catch (IOException ioe2) {
// Save first exception & throw in the end
if (ioe == null) {
ioe = ioe2;
}
} finally {
in.close();
}
genLoaded = gen;
nameToDetailsMapping.clear();
nameToDetailsMapping.putAll(snapshotMetaDataMapping);
}
}
}
if (genLoaded == -1) {
// Nothing was loaded...
if (ioe != null) {
// ... not for lack of trying:
throw ioe;
}
} else {
if (snapshotFiles.size() > 1) {
// Remove any broken / old snapshot files:
String curFileName = SNAPSHOTS_PREFIX + genLoaded;
for(String file : snapshotFiles) {
if (!curFileName.equals(file)) {
IOUtils.deleteFilesIgnoringExceptions(dir, file);
}
}
}
nextWriteGen = 1+genLoaded;
}
}
}

View File

@ -0,0 +1,22 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Core classes for Solr's persistent snapshots functionality
*/
package org.apache.solr.core.snapshots;

View File

@ -160,7 +160,7 @@ public class BlobHandler extends RequestHandlerBase implements PluginInfoInitial
} else { } else {
String q = "blobName:{0}"; String q = "blobName:{0}";
if (version != -1) q = "id:{0}/{1}"; if (version != -1) q = "id:{0}/{1}";
QParser qparser = QParser.getParser(StrUtils.formatString(q, blobName, version), "lucene", req); QParser qparser = QParser.getParser(StrUtils.formatString(q, blobName, version), req);
final TopDocs docs = req.getSearcher().search(qparser.parse(), 1, new Sort(new SortField("version", SortField.Type.LONG, true))); final TopDocs docs = req.getSearcher().search(qparser.parse(), 1, new Sort(new SortField("version", SortField.Type.LONG, true)));
if (docs.totalHits > 0) { if (docs.totalHits > 0) {
rsp.add(ReplicationHandler.FILE_STREAM, new SolrCore.RawWriter() { rsp.add(ReplicationHandler.FILE_STREAM, new SolrCore.RawWriter() {

View File

@ -121,6 +121,11 @@ public class CdcrParams {
*/ */
public final static String COUNTER_DELETES = "deletes"; public final static String COUNTER_DELETES = "deletes";
/**
* Counter for Bootstrap operations *
*/
public final static String COUNTER_BOOTSTRAP = "bootstraps";
/** /**
* A list of errors per target collection * * A list of errors per target collection *
*/ */
@ -165,7 +170,10 @@ public class CdcrParams {
LASTPROCESSEDVERSION, LASTPROCESSEDVERSION,
QUEUES, QUEUES,
OPS, OPS,
ERRORS; ERRORS,
BOOTSTRAP,
BOOTSTRAP_STATUS,
CANCEL_BOOTSTRAP;
public static CdcrAction get(String p) { public static CdcrAction get(String p) {
if (p != null) { if (p != null) {

View File

@ -119,7 +119,7 @@ public class CdcrReplicator implements Runnable {
// we might have read a single commit operation and reached the end of the update logs // we might have read a single commit operation and reached the end of the update logs
logReader.forwardSeek(subReader); logReader.forwardSeek(subReader);
log.debug("Forwarded {} updates to target {}", counter, state.getTargetCollection()); log.info("Forwarded {} updates to target {}", counter, state.getTargetCollection());
} catch (Exception e) { } catch (Exception e) {
// report error and update error stats // report error and update error stats
this.handleException(e); this.handleException(e);
@ -150,13 +150,13 @@ public class CdcrReplicator implements Runnable {
if (e instanceof CdcrReplicatorException) { if (e instanceof CdcrReplicatorException) {
UpdateRequest req = ((CdcrReplicatorException) e).req; UpdateRequest req = ((CdcrReplicatorException) e).req;
UpdateResponse rsp = ((CdcrReplicatorException) e).rsp; UpdateResponse rsp = ((CdcrReplicatorException) e).rsp;
log.warn("Failed to forward update request {}. Got response {}", req, rsp); log.warn("Failed to forward update request {} to target: {}. Got response {}", req, state.getTargetCollection(), rsp);
state.reportError(CdcrReplicatorState.ErrorType.BAD_REQUEST); state.reportError(CdcrReplicatorState.ErrorType.BAD_REQUEST);
} else if (e instanceof CloudSolrClient.RouteException) { } else if (e instanceof CloudSolrClient.RouteException) {
log.warn("Failed to forward update request", e); log.warn("Failed to forward update request to target: " + state.getTargetCollection(), e);
state.reportError(CdcrReplicatorState.ErrorType.BAD_REQUEST); state.reportError(CdcrReplicatorState.ErrorType.BAD_REQUEST);
} else { } else {
log.warn("Failed to forward update request", e); log.warn("Failed to forward update request to target: " + state.getTargetCollection(), e);
state.reportError(CdcrReplicatorState.ErrorType.INTERNAL); state.reportError(CdcrReplicatorState.ErrorType.INTERNAL);
} }
} }

View File

@ -16,29 +16,49 @@
*/ */
package org.apache.solr.handler; package org.apache.solr.handler;
import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import org.apache.http.client.HttpClient;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.CloudSolrClient.Builder; import org.apache.solr.client.solrj.impl.CloudSolrClient.Builder;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.ZkCoreNodeProps;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SolrjNamedThreadFactory;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.update.CdcrUpdateLog; import org.apache.solr.update.CdcrUpdateLog;
import org.apache.solr.util.TimeOut;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.apache.solr.handler.admin.CoreAdminHandler.RESPONSE_STATUS;
class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver { class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
private static final int MAX_BOOTSTRAP_ATTEMPTS = 5;
private static final int BOOTSTRAP_RETRY_DELAY_MS = 2000;
// 6 hours is hopefully long enough for most indexes
private static final long BOOTSTRAP_TIMEOUT_SECONDS = 6L * 3600L * 3600L;
private List<CdcrReplicatorState> replicatorStates; private List<CdcrReplicatorState> replicatorStates;
private final CdcrReplicatorScheduler scheduler; private final CdcrReplicatorScheduler scheduler;
@ -48,6 +68,9 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
private SolrCore core; private SolrCore core;
private String path; private String path;
private ExecutorService bootstrapExecutor;
private volatile BootstrapStatusRunnable bootstrapStatusRunnable;
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
CdcrReplicatorManager(final SolrCore core, String path, CdcrReplicatorManager(final SolrCore core, String path,
@ -104,12 +127,20 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
@Override @Override
public synchronized void stateUpdate() { public synchronized void stateUpdate() {
if (leaderStateManager.amILeader() && processStateManager.getState().equals(CdcrParams.ProcessState.STARTED)) { if (leaderStateManager.amILeader() && processStateManager.getState().equals(CdcrParams.ProcessState.STARTED)) {
if (replicatorStates.size() > 0) {
this.bootstrapExecutor = ExecutorUtil.newMDCAwareFixedThreadPool(replicatorStates.size(),
new SolrjNamedThreadFactory("cdcr-bootstrap-status"));
}
this.initLogReaders(); this.initLogReaders();
this.scheduler.start(); this.scheduler.start();
return; return;
} }
this.scheduler.shutdown(); this.scheduler.shutdown();
if (bootstrapExecutor != null) {
IOUtils.closeQuietly(bootstrapStatusRunnable);
ExecutorUtil.shutdownAndAwaitTermination(bootstrapExecutor);
}
this.closeLogReaders(); this.closeLogReaders();
} }
@ -117,7 +148,7 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
return replicatorStates; return replicatorStates;
} }
void initLogReaders() { private void initLogReaders() {
String collectionName = core.getCoreDescriptor().getCloudDescriptor().getCollectionName(); String collectionName = core.getCoreDescriptor().getCloudDescriptor().getCollectionName();
String shard = core.getCoreDescriptor().getCloudDescriptor().getShardId(); String shard = core.getCoreDescriptor().getCloudDescriptor().getShardId();
CdcrUpdateLog ulog = (CdcrUpdateLog) core.getUpdateHandler().getUpdateLog(); CdcrUpdateLog ulog = (CdcrUpdateLog) core.getUpdateHandler().getUpdateLog();
@ -129,8 +160,23 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
log.info("Create new update log reader for target {} with checkpoint {} @ {}:{}", state.getTargetCollection(), log.info("Create new update log reader for target {} with checkpoint {} @ {}:{}", state.getTargetCollection(),
checkpoint, collectionName, shard); checkpoint, collectionName, shard);
CdcrUpdateLog.CdcrLogReader reader = ulog.newLogReader(); CdcrUpdateLog.CdcrLogReader reader = ulog.newLogReader();
reader.seek(checkpoint); boolean seek = reader.seek(checkpoint);
state.init(reader); state.init(reader);
if (!seek) {
// targetVersion is lower than the oldest known entry.
// In this scenario, it probably means that there is a gap in the updates log.
// the best we can do here is to bootstrap the target leader by replicating the full index
final String targetCollection = state.getTargetCollection();
state.setBootstrapInProgress(true);
log.info("Attempting to bootstrap target collection: {}, shard: {}", targetCollection, shard);
bootstrapStatusRunnable = new BootstrapStatusRunnable(core, state);
log.info("Submitting bootstrap task to executor");
try {
bootstrapExecutor.submit(bootstrapStatusRunnable);
} catch (Exception e) {
log.error("Unable to submit bootstrap call to executor", e);
}
}
} catch (IOException | SolrServerException | SolrException e) { } catch (IOException | SolrServerException | SolrException e) {
log.warn("Unable to instantiate the log reader for target collection " + state.getTargetCollection(), e); log.warn("Unable to instantiate the log reader for target collection " + state.getTargetCollection(), e);
} catch (InterruptedException e) { } catch (InterruptedException e) {
@ -164,11 +210,203 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
*/ */
void shutdown() { void shutdown() {
this.scheduler.shutdown(); this.scheduler.shutdown();
if (bootstrapExecutor != null) {
IOUtils.closeQuietly(bootstrapStatusRunnable);
ExecutorUtil.shutdownAndAwaitTermination(bootstrapExecutor);
}
for (CdcrReplicatorState state : replicatorStates) { for (CdcrReplicatorState state : replicatorStates) {
state.shutdown(); state.shutdown();
} }
replicatorStates.clear(); replicatorStates.clear();
} }
private class BootstrapStatusRunnable implements Runnable, Closeable {
private final CdcrReplicatorState state;
private final String targetCollection;
private final String shard;
private final String collectionName;
private final CdcrUpdateLog ulog;
private final String myCoreUrl;
private volatile boolean closed = false;
BootstrapStatusRunnable(SolrCore core, CdcrReplicatorState state) {
this.collectionName = core.getCoreDescriptor().getCloudDescriptor().getCollectionName();
this.shard = core.getCoreDescriptor().getCloudDescriptor().getShardId();
this.ulog = (CdcrUpdateLog) core.getUpdateHandler().getUpdateLog();
this.state = state;
this.targetCollection = state.getTargetCollection();
String baseUrl = core.getCoreDescriptor().getCoreContainer().getZkController().getBaseUrl();
this.myCoreUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, core.getName());
}
@Override
public void close() throws IOException {
closed = true;
try {
Replica leader = state.getClient().getZkStateReader().getLeaderRetry(targetCollection, shard, 30000); // assume same shard exists on target
String leaderCoreUrl = leader.getCoreUrl();
HttpClient httpClient = state.getClient().getLbClient().getHttpClient();
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderCoreUrl).withHttpClient(httpClient).build()) {
sendCdcrCommand(client, CdcrParams.CdcrAction.CANCEL_BOOTSTRAP);
} catch (SolrServerException e) {
log.error("Error sending cancel bootstrap message to target collection: {} shard: {} leader: {}",
targetCollection, shard, leaderCoreUrl);
}
} catch (InterruptedException e) {
log.error("Interrupted while closing BootstrapStatusRunnable", e);
Thread.currentThread().interrupt();
}
}
@Override
public void run() {
int retries = 1;
boolean success = false;
try {
while (!closed && sendBootstrapCommand() != BootstrapStatus.SUBMITTED) {
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
}
TimeOut timeOut = new TimeOut(BOOTSTRAP_TIMEOUT_SECONDS, TimeUnit.SECONDS);
while (!timeOut.hasTimedOut()) {
if (closed) {
log.warn("Cancelling waiting for bootstrap on target: {} shard: {} to complete", targetCollection, shard);
state.setBootstrapInProgress(false);
break;
}
BootstrapStatus status = getBoostrapStatus();
if (status == BootstrapStatus.RUNNING) {
try {
log.info("CDCR bootstrap running for {} seconds, sleeping for {} ms",
BOOTSTRAP_TIMEOUT_SECONDS - timeOut.timeLeft(TimeUnit.SECONDS), BOOTSTRAP_RETRY_DELAY_MS);
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
} else if (status == BootstrapStatus.COMPLETED) {
log.info("CDCR bootstrap successful in {} seconds", BOOTSTRAP_TIMEOUT_SECONDS - timeOut.timeLeft(TimeUnit.SECONDS));
long checkpoint = CdcrReplicatorManager.this.getCheckpoint(state);
log.info("Create new update log reader for target {} with checkpoint {} @ {}:{}", state.getTargetCollection(),
checkpoint, collectionName, shard);
CdcrUpdateLog.CdcrLogReader reader1 = ulog.newLogReader();
reader1.seek(checkpoint);
success = true;
break;
} else if (status == BootstrapStatus.FAILED) {
log.warn("CDCR bootstrap failed in {} seconds", BOOTSTRAP_TIMEOUT_SECONDS - timeOut.timeLeft(TimeUnit.SECONDS));
// let's retry a fixed number of times before giving up
if (retries >= MAX_BOOTSTRAP_ATTEMPTS) {
log.error("Unable to bootstrap the target collection: {}, shard: {} even after {} retries", targetCollection, shard, retries);
break;
} else {
log.info("Retry: {} - Attempting to bootstrap target collection: {} shard: {}", retries, targetCollection, shard);
while (!closed && sendBootstrapCommand() != BootstrapStatus.SUBMITTED) {
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
}
timeOut = new TimeOut(BOOTSTRAP_TIMEOUT_SECONDS, TimeUnit.SECONDS); // reset the timer
retries++;
}
} else if (status == BootstrapStatus.NOTFOUND) {
// the leader of the target shard may have changed and therefore there is no record of the
// bootstrap process so we must retry the operation
while (!closed && sendBootstrapCommand() != BootstrapStatus.SUBMITTED) {
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
}
retries = 1;
timeOut = new TimeOut(6L * 3600L * 3600L, TimeUnit.SECONDS); // reset the timer
} else if (status == BootstrapStatus.UNKNOWN) {
// we were not able to query the status on the remote end
// so just sleep for a bit and try again
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);
}
}
} catch (InterruptedException e) {
log.info("Bootstrap thread interrupted");
state.reportError(CdcrReplicatorState.ErrorType.INTERNAL);
Thread.currentThread().interrupt();
} catch (IOException | SolrServerException | SolrException e) {
log.error("Unable to bootstrap the target collection " + targetCollection + " shard: " + shard, e);
state.reportError(CdcrReplicatorState.ErrorType.BAD_REQUEST);
} finally {
if (success) {
log.info("Bootstrap successful, giving the go-ahead to replicator");
state.setBootstrapInProgress(false);
}
}
}
private BootstrapStatus sendBootstrapCommand() throws InterruptedException {
Replica leader = state.getClient().getZkStateReader().getLeaderRetry(targetCollection, shard, 30000); // assume same shard exists on target
String leaderCoreUrl = leader.getCoreUrl();
HttpClient httpClient = state.getClient().getLbClient().getHttpClient();
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderCoreUrl).withHttpClient(httpClient).build()) {
log.info("Attempting to bootstrap target collection: {} shard: {} leader: {}", targetCollection, shard, leaderCoreUrl);
try {
NamedList response = sendCdcrCommand(client, CdcrParams.CdcrAction.BOOTSTRAP, ReplicationHandler.MASTER_URL, myCoreUrl);
log.debug("CDCR Bootstrap response: {}", response);
String status = response.get(RESPONSE_STATUS).toString();
return BootstrapStatus.valueOf(status.toUpperCase(Locale.ROOT));
} catch (Exception e) {
log.error("Exception submitting bootstrap request", e);
return BootstrapStatus.UNKNOWN;
}
} catch (IOException e) {
log.error("There shouldn't be an IOException while closing but there was!", e);
}
return BootstrapStatus.UNKNOWN;
}
private BootstrapStatus getBoostrapStatus() throws InterruptedException {
try {
Replica leader = state.getClient().getZkStateReader().getLeaderRetry(targetCollection, shard, 30000); // assume same shard exists on target
String leaderCoreUrl = leader.getCoreUrl();
HttpClient httpClient = state.getClient().getLbClient().getHttpClient();
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderCoreUrl).withHttpClient(httpClient).build()) {
NamedList response = sendCdcrCommand(client, CdcrParams.CdcrAction.BOOTSTRAP_STATUS);
String status = (String) response.get(RESPONSE_STATUS);
BootstrapStatus bootstrapStatus = BootstrapStatus.valueOf(status.toUpperCase(Locale.ROOT));
if (bootstrapStatus == BootstrapStatus.RUNNING) {
return BootstrapStatus.RUNNING;
} else if (bootstrapStatus == BootstrapStatus.COMPLETED) {
return BootstrapStatus.COMPLETED;
} else if (bootstrapStatus == BootstrapStatus.FAILED) {
return BootstrapStatus.FAILED;
} else if (bootstrapStatus == BootstrapStatus.NOTFOUND) {
log.warn("Bootstrap process was not found on target collection: {} shard: {}, leader: {}", targetCollection, shard, leaderCoreUrl);
return BootstrapStatus.NOTFOUND;
} else if (bootstrapStatus == BootstrapStatus.CANCELLED) {
return BootstrapStatus.CANCELLED;
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Unknown status: " + status + " returned by BOOTSTRAP_STATUS command");
}
}
} catch (Exception e) {
log.error("Exception during bootstrap status request", e);
return BootstrapStatus.UNKNOWN;
}
}
}
private NamedList sendCdcrCommand(SolrClient client, CdcrParams.CdcrAction action, String... params) throws SolrServerException, IOException {
ModifiableSolrParams solrParams = new ModifiableSolrParams();
solrParams.set(CommonParams.QT, "/cdcr");
solrParams.set(CommonParams.ACTION, action.toString());
for (int i = 0; i < params.length - 1; i+=2) {
solrParams.set(params[i], params[i + 1]);
}
SolrRequest request = new QueryRequest(solrParams);
return client.request(request);
}
private enum BootstrapStatus {
SUBMITTED,
RUNNING,
COMPLETED,
FAILED,
NOTFOUND,
CANCELLED,
UNKNOWN
}
} }

View File

@ -77,7 +77,11 @@ class CdcrReplicatorScheduler {
CdcrReplicatorState state = statesQueue.poll(); CdcrReplicatorState state = statesQueue.poll();
assert state != null; // Should never happen assert state != null; // Should never happen
try { try {
if (!state.isBootstrapInProgress()) {
new CdcrReplicator(state, batchSize).run(); new CdcrReplicator(state, batchSize).run();
} else {
log.debug("Replicator state is bootstrapping, skipping replication for target collection {}", state.getTargetCollection());
}
} finally { } finally {
statesQueue.offer(state); statesQueue.offer(state);
} }

View File

@ -27,6 +27,8 @@ import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.update.CdcrUpdateLog; import org.apache.solr.update.CdcrUpdateLog;
@ -53,6 +55,9 @@ class CdcrReplicatorState {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final AtomicBoolean bootstrapInProgress = new AtomicBoolean(false);
private final AtomicInteger numBootstraps = new AtomicInteger();
CdcrReplicatorState(final String targetCollection, final String zkHost, final CloudSolrClient targetClient) { CdcrReplicatorState(final String targetCollection, final String zkHost, final CloudSolrClient targetClient) {
this.targetCollection = targetCollection; this.targetCollection = targetCollection;
this.targetClient = targetClient; this.targetClient = targetClient;
@ -164,6 +169,24 @@ class CdcrReplicatorState {
return this.benchmarkTimer; return this.benchmarkTimer;
} }
/**
* @return true if a bootstrap operation is in progress, false otherwise
*/
boolean isBootstrapInProgress() {
return bootstrapInProgress.get();
}
void setBootstrapInProgress(boolean inProgress) {
if (bootstrapInProgress.compareAndSet(true, false)) {
numBootstraps.incrementAndGet();
}
bootstrapInProgress.set(inProgress);
}
public int getNumBootstraps() {
return numBootstraps.get();
}
enum ErrorType { enum ErrorType {
INTERNAL, INTERNAL,
BAD_REQUEST; BAD_REQUEST;

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.solr.handler; package org.apache.solr.handler;
import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.ArrayList; import java.util.ArrayList;
@ -24,14 +25,20 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future; import java.util.concurrent.Future;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.Lock;
import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.ClusterState;
@ -41,21 +48,33 @@ import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.UpdateParams;
import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.CloseHook; import org.apache.solr.core.CloseHook;
import org.apache.solr.core.PluginBag; import org.apache.solr.core.PluginBag;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestHandler; import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.CdcrUpdateLog; import org.apache.solr.update.CdcrUpdateLog;
import org.apache.solr.update.UpdateLog; import org.apache.solr.update.UpdateLog;
import org.apache.solr.update.VersionInfo;
import org.apache.solr.update.processor.DistributedUpdateProcessor;
import org.apache.solr.util.DefaultSolrThreadFactory; import org.apache.solr.util.DefaultSolrThreadFactory;
import org.apache.solr.util.plugin.SolrCoreAware; import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.apache.solr.handler.admin.CoreAdminHandler.COMPLETED;
import static org.apache.solr.handler.admin.CoreAdminHandler.FAILED;
import static org.apache.solr.handler.admin.CoreAdminHandler.RESPONSE;
import static org.apache.solr.handler.admin.CoreAdminHandler.RESPONSE_MESSAGE;
import static org.apache.solr.handler.admin.CoreAdminHandler.RESPONSE_STATUS;
import static org.apache.solr.handler.admin.CoreAdminHandler.RUNNING;
/** /**
* <p> * <p>
* This request handler implements the CDCR API and is responsible of the execution of the * This request handler implements the CDCR API and is responsible of the execution of the
@ -199,6 +218,18 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
this.handleErrorsAction(req, rsp); this.handleErrorsAction(req, rsp);
break; break;
} }
case BOOTSTRAP: {
this.handleBootstrapAction(req, rsp);
break;
}
case BOOTSTRAP_STATUS: {
this.handleBootstrapStatus(req, rsp);
break;
}
case CANCEL_BOOTSTRAP: {
this.handleCancelBootstrap(req, rsp);
break;
}
default: { default: {
throw new RuntimeException("Unknown action: " + action); throw new RuntimeException("Unknown action: " + action);
} }
@ -409,10 +440,20 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
} }
UpdateLog ulog = core.getUpdateHandler().getUpdateLog(); UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
VersionInfo versionInfo = ulog.getVersionInfo();
try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) { try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) {
List<Long> versions = recentUpdates.getVersions(1); long maxVersionFromRecent = recentUpdates.getMaxRecentVersion();
long lastVersion = versions.isEmpty() ? -1 : Math.abs(versions.get(0)); long maxVersionFromIndex = versionInfo.getMaxVersionFromIndex(req.getSearcher());
rsp.add(CdcrParams.CHECKPOINT, lastVersion); log.info("Found maxVersionFromRecent {} maxVersionFromIndex {}", maxVersionFromRecent, maxVersionFromIndex);
// there is no race with ongoing bootstrap because we don't expect any updates to come from the source
long maxVersion = Math.max(maxVersionFromIndex, maxVersionFromRecent);
if (maxVersion == 0L) {
maxVersion = -1;
}
rsp.add(CdcrParams.CHECKPOINT, maxVersion);
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Action '" + CdcrParams.CdcrAction.SHARDCHECKPOINT +
"' could not read max version");
} }
} }
@ -574,6 +615,192 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
rsp.add(CdcrParams.ERRORS, hosts); rsp.add(CdcrParams.ERRORS, hosts);
} }
private AtomicBoolean running = new AtomicBoolean();
private volatile Future<Boolean> bootstrapFuture;
private volatile BootstrapCallable bootstrapCallable;
private void handleBootstrapAction(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, SolrServerException {
String collectionName = core.getCoreDescriptor().getCloudDescriptor().getCollectionName();
String shard = core.getCoreDescriptor().getCloudDescriptor().getShardId();
if (!leaderStateManager.amILeader()) {
log.warn("Action {} sent to non-leader replica @ {}:{}", CdcrParams.CdcrAction.BOOTSTRAP, collectionName, shard);
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Action " + CdcrParams.CdcrAction.BOOTSTRAP +
" sent to non-leader replica");
}
Runnable runnable = () -> {
Lock recoveryLock = req.getCore().getSolrCoreState().getRecoveryLock();
boolean locked = recoveryLock.tryLock();
try {
if (!locked) {
handleCancelBootstrap(req, rsp);
} else if (leaderStateManager.amILeader()) {
running.set(true);
String masterUrl = req.getParams().get(ReplicationHandler.MASTER_URL);
bootstrapCallable = new BootstrapCallable(masterUrl, core);
bootstrapFuture = core.getCoreDescriptor().getCoreContainer().getUpdateShardHandler().getRecoveryExecutor().submit(bootstrapCallable);
try {
bootstrapFuture.get();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
log.warn("Bootstrap was interrupted", e);
} catch (ExecutionException e) {
log.error("Bootstrap operation failed", e);
}
} else {
log.error("Action {} sent to non-leader replica @ {}:{}. Aborting bootstrap.", CdcrParams.CdcrAction.BOOTSTRAP, collectionName, shard);
}
} finally {
if (locked) {
running.set(false);
recoveryLock.unlock();
}
}
};
try {
core.getCoreDescriptor().getCoreContainer().getUpdateShardHandler().getUpdateExecutor().submit(runnable);
rsp.add(RESPONSE_STATUS, "submitted");
} catch (RejectedExecutionException ree) {
// no problem, we're probably shutting down
rsp.add(RESPONSE_STATUS, "failed");
}
}
private void handleCancelBootstrap(SolrQueryRequest req, SolrQueryResponse rsp) {
BootstrapCallable callable = this.bootstrapCallable;
IOUtils.closeQuietly(callable);
rsp.add(RESPONSE_STATUS, "cancelled");
}
private void handleBootstrapStatus(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, SolrServerException {
if (running.get()) {
rsp.add(RESPONSE_STATUS, RUNNING);
return;
}
Future<Boolean> future = bootstrapFuture;
BootstrapCallable callable = this.bootstrapCallable;
if (future == null) {
rsp.add(RESPONSE_STATUS, "notfound");
rsp.add(RESPONSE_MESSAGE, "No bootstrap found in running, completed or failed states");
} else if (future.isCancelled() || callable.isClosed()) {
rsp.add(RESPONSE_STATUS, "cancelled");
} else if (future.isDone()) {
// could be a normal termination or an exception
try {
Boolean result = future.get();
if (result) {
rsp.add(RESPONSE_STATUS, COMPLETED);
} else {
rsp.add(RESPONSE_STATUS, FAILED);
}
} catch (InterruptedException e) {
// should not happen?
} catch (ExecutionException e) {
rsp.add(RESPONSE_STATUS, FAILED);
rsp.add(RESPONSE, e);
} catch (CancellationException ce) {
rsp.add(RESPONSE_STATUS, FAILED);
rsp.add(RESPONSE_MESSAGE, "Bootstrap was cancelled");
}
} else {
rsp.add(RESPONSE_STATUS, RUNNING);
}
}
private static class BootstrapCallable implements Callable<Boolean>, Closeable {
private final String masterUrl;
private final SolrCore core;
private volatile boolean closed = false;
BootstrapCallable(String masterUrl, SolrCore core) {
this.masterUrl = masterUrl;
this.core = core;
}
@Override
public void close() throws IOException {
closed = true;
SolrRequestHandler handler = core.getRequestHandler(ReplicationHandler.PATH);
ReplicationHandler replicationHandler = (ReplicationHandler) handler;
replicationHandler.abortFetch();
}
public boolean isClosed() {
return closed;
}
@Override
public Boolean call() throws Exception {
boolean success = false;
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
// we start buffering updates as a safeguard however we do not expect
// to receive any updates from the source during bootstrap
ulog.bufferUpdates();
try {
commitOnLeader(masterUrl);
// use rep handler directly, so we can do this sync rather than async
SolrRequestHandler handler = core.getRequestHandler(ReplicationHandler.PATH);
ReplicationHandler replicationHandler = (ReplicationHandler) handler;
if (replicationHandler == null) {
throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE,
"Skipping recovery, no " + ReplicationHandler.PATH + " handler found");
}
ModifiableSolrParams solrParams = new ModifiableSolrParams();
solrParams.set(ReplicationHandler.MASTER_URL, masterUrl);
// we do not want the raw tlog files from the source
solrParams.set(ReplicationHandler.TLOG_FILES, false);
success = replicationHandler.doFetch(solrParams, false);
// this is required because this callable can race with HttpSolrCall#destroy
// which clears the request info.
// Applying buffered updates fails without the following line because LogReplayer
// also tries to set request info and fails with AssertionError
SolrRequestInfo.clearRequestInfo();
Future<UpdateLog.RecoveryInfo> future = ulog.applyBufferedUpdates();
if (future == null) {
// no replay needed
log.info("No replay needed.");
} else {
log.info("Replaying buffered documents.");
// wait for replay
UpdateLog.RecoveryInfo report = future.get();
if (report.failed) {
SolrException.log(log, "Replay failed");
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Replay failed");
}
}
return success;
} finally {
if (closed || !success) {
// we cannot apply the buffer in this case because it will introduce newer versions in the
// update log and then the source cluster will get those versions via collectioncheckpoint
// causing the versions in between to be completely missed
boolean dropped = ulog.dropBufferedUpdates();
assert dropped;
}
}
}
private void commitOnLeader(String leaderUrl) throws SolrServerException,
IOException {
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderUrl).build()) {
client.setConnectionTimeout(30000);
UpdateRequest ureq = new UpdateRequest();
ureq.setParams(new ModifiableSolrParams());
ureq.getParams().set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
ureq.getParams().set(UpdateParams.OPEN_SEARCHER, false);
ureq.setAction(AbstractUpdateRequest.ACTION.COMMIT, false, true).process(
client);
}
}
}
@Override @Override
public String getDescription() { public String getDescription() {
return "Manage Cross Data Center Replication"; return "Manage Cross Data Center Replication";

View File

@ -82,6 +82,9 @@ import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext; import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.IndexDeletionPolicyWrapper; import org.apache.solr.core.IndexDeletionPolicyWrapper;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.core.snapshots.SolrSnapshotManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
import org.apache.solr.handler.ReplicationHandler.*; import org.apache.solr.handler.ReplicationHandler.*;
import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
@ -468,9 +471,18 @@ public class IndexFetcher {
// let the system know we are changing dir's and the old one // let the system know we are changing dir's and the old one
// may be closed // may be closed
if (indexDir != null) { if (indexDir != null) {
LOG.info("removing old index directory " + indexDir);
solrCore.getDirectoryFactory().doneWithDirectory(indexDir); solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
SolrSnapshotMetaDataManager snapshotsMgr = solrCore.getSnapshotMetaDataManager();
Collection<SnapshotMetaData> snapshots = snapshotsMgr.listSnapshotsInIndexDir(indexDirPath);
// Delete the old index directory only if no snapshot exists in that directory.
if(snapshots.isEmpty()) {
LOG.info("removing old index directory " + indexDir);
solrCore.getDirectoryFactory().remove(indexDir); solrCore.getDirectoryFactory().remove(indexDir);
} else {
SolrSnapshotManager.deleteNonSnapshotIndexFiles(indexDir, snapshots);
}
} }
} }
@ -738,14 +750,14 @@ public class IndexFetcher {
} }
private void openNewSearcherAndUpdateCommitPoint() throws IOException { private void openNewSearcherAndUpdateCommitPoint() throws IOException {
SolrQueryRequest req = new LocalSolrQueryRequest(solrCore,
new ModifiableSolrParams());
RefCounted<SolrIndexSearcher> searcher = null; RefCounted<SolrIndexSearcher> searcher = null;
IndexCommit commitPoint; IndexCommit commitPoint;
// must get the latest solrCore object because the one we have might be closed because of a reload
// todo stop keeping solrCore around
SolrCore core = solrCore.getCoreDescriptor().getCoreContainer().getCore(solrCore.getName());
try { try {
Future[] waitSearcher = new Future[1]; Future[] waitSearcher = new Future[1];
searcher = solrCore.getSearcher(true, true, waitSearcher, true); searcher = core.getSearcher(true, true, waitSearcher, true);
if (waitSearcher[0] != null) { if (waitSearcher[0] != null) {
try { try {
waitSearcher[0].get(); waitSearcher[0].get();
@ -755,10 +767,10 @@ public class IndexFetcher {
} }
commitPoint = searcher.get().getIndexReader().getIndexCommit(); commitPoint = searcher.get().getIndexReader().getIndexCommit();
} finally { } finally {
req.close();
if (searcher != null) { if (searcher != null) {
searcher.decref(); searcher.decref();
} }
core.close();
} }
// update the commit point in replication handler // update the commit point in replication handler

View File

@ -125,7 +125,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
filters = new ArrayList<>(); filters = new ArrayList<>();
for (String fq : fqs) { for (String fq : fqs) {
if (fq != null && fq.trim().length() != 0) { if (fq != null && fq.trim().length() != 0) {
QParser fqp = QParser.getParser(fq, null, req); QParser fqp = QParser.getParser(fq, req);
filters.add(fqp.getQuery()); filters.add(fqp.getQuery());
} }
} }

View File

@ -87,6 +87,7 @@ import org.apache.solr.core.SolrDeletionPolicy;
import org.apache.solr.core.SolrEventListener; import org.apache.solr.core.SolrEventListener;
import org.apache.solr.core.backup.repository.BackupRepository; import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.core.backup.repository.LocalFileSystemRepository; import org.apache.solr.core.backup.repository.LocalFileSystemRepository;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
@ -299,9 +300,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
rsp.add("message","No slave configured"); rsp.add("message","No slave configured");
} }
} else if (command.equalsIgnoreCase(CMD_ABORT_FETCH)) { } else if (command.equalsIgnoreCase(CMD_ABORT_FETCH)) {
IndexFetcher fetcher = currentIndexFetcher; if (abortFetch()){
if (fetcher != null){
fetcher.abortFetch();
rsp.add(STATUS, OK_STATUS); rsp.add(STATUS, OK_STATUS);
} else { } else {
rsp.add(STATUS,ERR_STATUS); rsp.add(STATUS,ERR_STATUS);
@ -320,6 +319,16 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
} }
} }
public boolean abortFetch() {
IndexFetcher fetcher = currentIndexFetcher;
if (fetcher != null){
fetcher.abortFetch();
return true;
} else {
return false;
}
}
private void deleteSnapshot(ModifiableSolrParams params) { private void deleteSnapshot(ModifiableSolrParams params) {
String name = params.get(NAME); String name = params.get(NAME);
if(name == null) { if(name == null) {
@ -512,12 +521,25 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
numberToKeep = Integer.MAX_VALUE; numberToKeep = Integer.MAX_VALUE;
} }
IndexCommit indexCommit = null;
String commitName = params.get(CoreAdminParams.COMMIT_NAME);
if (commitName != null) {
SolrSnapshotMetaDataManager snapshotMgr = core.getSnapshotMetaDataManager();
Optional<IndexCommit> commit = snapshotMgr.getIndexCommitByName(commitName);
if(commit.isPresent()) {
indexCommit = commit.get();
} else {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to find an index commit with name " + commitName +
" for core " + core.getName());
}
} else {
IndexDeletionPolicyWrapper delPolicy = core.getDeletionPolicy(); IndexDeletionPolicyWrapper delPolicy = core.getDeletionPolicy();
IndexCommit indexCommit = delPolicy.getLatestCommit(); indexCommit = delPolicy.getLatestCommit();
if (indexCommit == null) { if (indexCommit == null) {
indexCommit = req.getSearcher().getIndexReader().getIndexCommit(); indexCommit = req.getSearcher().getIndexReader().getIndexCommit();
} }
}
String location = params.get(CoreAdminParams.BACKUP_LOCATION); String location = params.get(CoreAdminParams.BACKUP_LOCATION);
String repoName = params.get(CoreAdminParams.BACKUP_REPOSITORY); String repoName = params.get(CoreAdminParams.BACKUP_REPOSITORY);
@ -539,7 +561,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
} }
// small race here before the commit point is saved // small race here before the commit point is saved
SnapShooter snapShooter = new SnapShooter(repo, core, location, params.get(NAME)); SnapShooter snapShooter = new SnapShooter(repo, core, location, params.get(NAME), commitName);
snapShooter.validateCreateSnapshot(); snapShooter.validateCreateSnapshot();
snapShooter.createSnapAsync(indexCommit, numberToKeep, (nl) -> snapShootDetails = nl); snapShooter.createSnapAsync(indexCommit, numberToKeep, (nl) -> snapShootDetails = nl);
@ -644,7 +666,8 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
rsp.add(CMD_GET_FILE_LIST, result); rsp.add(CMD_GET_FILE_LIST, result);
// fetch list of tlog files only if cdcr is activated // fetch list of tlog files only if cdcr is activated
if (core.getUpdateHandler().getUpdateLog() != null && core.getUpdateHandler().getUpdateLog() instanceof CdcrUpdateLog) { if (solrParams.getBool(TLOG_FILES, true) && core.getUpdateHandler().getUpdateLog() != null
&& core.getUpdateHandler().getUpdateLog() instanceof CdcrUpdateLog) {
try { try {
List<Map<String, Object>> tlogfiles = getTlogFileList(commit); List<Map<String, Object>> tlogfiles = getTlogFileList(commit);
LOG.info("Adding tlog files to list: " + tlogfiles); LOG.info("Adding tlog files to list: " + tlogfiles);

View File

@ -16,13 +16,17 @@
*/ */
package org.apache.solr.handler; package org.apache.solr.handler;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.util.concurrent.atomic.LongAdder;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.SuppressForbidden; import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.PluginBag; import org.apache.solr.core.PluginBag;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrInfoMBean; import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestHandler; import org.apache.solr.request.SolrRequestHandler;
@ -35,10 +39,6 @@ import org.apache.solr.util.stats.TimerContext;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.util.concurrent.atomic.AtomicLong;
import static org.apache.solr.core.RequestParams.USEPARAM; import static org.apache.solr.core.RequestParams.USEPARAM;
/** /**
@ -53,10 +53,10 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
protected boolean httpCaching = true; protected boolean httpCaching = true;
// Statistics // Statistics
private final AtomicLong numRequests = new AtomicLong(); private final LongAdder numRequests = new LongAdder();
private final AtomicLong numServerErrors = new AtomicLong(); private final LongAdder numServerErrors = new LongAdder();
private final AtomicLong numClientErrors = new AtomicLong(); private final LongAdder numClientErrors = new LongAdder();
private final AtomicLong numTimeouts = new AtomicLong(); private final LongAdder numTimeouts = new LongAdder();
private final Timer requestTimes = new Timer(); private final Timer requestTimes = new Timer();
private final long handlerStart; private final long handlerStart;
@ -144,7 +144,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
@Override @Override
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) { public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
numRequests.incrementAndGet(); numRequests.increment();
TimerContext timer = requestTimes.time(); TimerContext timer = requestTimes.time();
try { try {
if(pluginInfo != null && pluginInfo.attributes.containsKey(USEPARAM)) req.getContext().put(USEPARAM,pluginInfo.attributes.get(USEPARAM)); if(pluginInfo != null && pluginInfo.attributes.containsKey(USEPARAM)) req.getContext().put(USEPARAM,pluginInfo.attributes.get(USEPARAM));
@ -158,7 +158,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
Object partialResults = header.get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY); Object partialResults = header.get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY);
boolean timedOut = partialResults == null ? false : (Boolean)partialResults; boolean timedOut = partialResults == null ? false : (Boolean)partialResults;
if( timedOut ) { if( timedOut ) {
numTimeouts.incrementAndGet(); numTimeouts.increment();
rsp.setHttpCaching(false); rsp.setHttpCaching(false);
} }
} }
@ -185,9 +185,9 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
SolrException.log(log, e); SolrException.log(log, e);
if (isServerError) { if (isServerError) {
numServerErrors.incrementAndGet(); numServerErrors.increment();
} else { } else {
numClientErrors.incrementAndGet(); numClientErrors.increment();
} }
} }
} }

View File

@ -19,6 +19,7 @@ package org.apache.solr.handler;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.net.URI; import java.net.URI;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.Date; import java.util.Date;
import java.util.Locale; import java.util.Locale;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
@ -32,6 +33,9 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.core.DirectoryFactory; import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.core.backup.repository.BackupRepository; import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.core.snapshots.SolrSnapshotManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -63,6 +67,7 @@ public class RestoreCore implements Callable<Boolean> {
String restoreIndexName = "restore." + dateFormat.format(new Date()); String restoreIndexName = "restore." + dateFormat.format(new Date());
String restoreIndexPath = core.getDataDir() + restoreIndexName; String restoreIndexPath = core.getDataDir() + restoreIndexName;
String indexDirPath = core.getIndexDir();
Directory restoreIndexDir = null; Directory restoreIndexDir = null;
Directory indexDir = null; Directory indexDir = null;
try { try {
@ -71,7 +76,7 @@ public class RestoreCore implements Callable<Boolean> {
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
//Prefer local copy. //Prefer local copy.
indexDir = core.getDirectoryFactory().get(core.getIndexDir(), indexDir = core.getDirectoryFactory().get(indexDirPath,
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
//Move all files from backupDir to restoreIndexDir //Move all files from backupDir to restoreIndexDir
@ -130,7 +135,16 @@ public class RestoreCore implements Callable<Boolean> {
} }
if (success) { if (success) {
core.getDirectoryFactory().doneWithDirectory(indexDir); core.getDirectoryFactory().doneWithDirectory(indexDir);
SolrSnapshotMetaDataManager snapshotsMgr = core.getSnapshotMetaDataManager();
Collection<SnapshotMetaData> snapshots = snapshotsMgr.listSnapshotsInIndexDir(indexDirPath);
// Delete the old index directory only if no snapshot exists in that directory.
if (snapshots.isEmpty()) {
core.getDirectoryFactory().remove(indexDir); core.getDirectoryFactory().remove(indexDir);
} else {
SolrSnapshotManager.deleteNonSnapshotIndexFiles(indexDir, snapshots);
}
} }
return true; return true;

View File

@ -185,7 +185,7 @@ public class SchemaHandler extends RequestHandlerBase implements SolrCoreAware,
if (parts.get(0).isEmpty()) parts.remove(0); if (parts.get(0).isEmpty()) parts.remove(0);
if (parts.size() > 1 && level2.containsKey(parts.get(1))) { if (parts.size() > 1 && level2.containsKey(parts.get(1))) {
String realName = parts.get(1); String realName = parts.get(1);
String fieldName = IndexSchema.SchemaProps.nameMapping.get(realName); String fieldName = IndexSchema.nameMapping.get(realName);
String pathParam = level2.get(realName); String pathParam = level2.get(realName);
if (parts.size() > 2) { if (parts.size() > 2) {

View File

@ -26,12 +26,14 @@ import java.util.Collections;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Optional;
import java.util.function.Consumer; import java.util.function.Consumer;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.DirectoryFactory.DirContext; import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.IndexDeletionPolicyWrapper; import org.apache.solr.core.IndexDeletionPolicyWrapper;
@ -39,6 +41,7 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.core.backup.repository.BackupRepository; import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.core.backup.repository.BackupRepository.PathType; import org.apache.solr.core.backup.repository.BackupRepository.PathType;
import org.apache.solr.core.backup.repository.LocalFileSystemRepository; import org.apache.solr.core.backup.repository.LocalFileSystemRepository;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted; import org.apache.solr.util.RefCounted;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -59,6 +62,7 @@ public class SnapShooter {
private URI baseSnapDirPath = null; private URI baseSnapDirPath = null;
private URI snapshotDirPath = null; private URI snapshotDirPath = null;
private BackupRepository backupRepo = null; private BackupRepository backupRepo = null;
private String commitName; // can be null
@Deprecated @Deprecated
public SnapShooter(SolrCore core, String location, String snapshotName) { public SnapShooter(SolrCore core, String location, String snapshotName) {
@ -71,14 +75,14 @@ public class SnapShooter {
} else { } else {
snapDirStr = core.getCoreDescriptor().getInstanceDir().resolve(location).normalize().toString(); snapDirStr = core.getCoreDescriptor().getInstanceDir().resolve(location).normalize().toString();
} }
initialize(new LocalFileSystemRepository(), core, snapDirStr, snapshotName); initialize(new LocalFileSystemRepository(), core, snapDirStr, snapshotName, null);
} }
public SnapShooter(BackupRepository backupRepo, SolrCore core, String location, String snapshotName) { public SnapShooter(BackupRepository backupRepo, SolrCore core, String location, String snapshotName, String commitName) {
initialize(backupRepo, core, location, snapshotName); initialize(backupRepo, core, location, snapshotName, commitName);
} }
private void initialize(BackupRepository backupRepo, SolrCore core, String location, String snapshotName) { private void initialize(BackupRepository backupRepo, SolrCore core, String location, String snapshotName, String commitName) {
this.solrCore = Preconditions.checkNotNull(core); this.solrCore = Preconditions.checkNotNull(core);
this.backupRepo = Preconditions.checkNotNull(backupRepo); this.backupRepo = Preconditions.checkNotNull(backupRepo);
this.baseSnapDirPath = backupRepo.createURI(Preconditions.checkNotNull(location)).normalize(); this.baseSnapDirPath = backupRepo.createURI(Preconditions.checkNotNull(location)).normalize();
@ -90,6 +94,7 @@ public class SnapShooter {
directoryName = "snapshot." + fmt.format(new Date()); directoryName = "snapshot." + fmt.format(new Date());
} }
this.snapshotDirPath = backupRepo.createURI(location, directoryName); this.snapshotDirPath = backupRepo.createURI(location, directoryName);
this.commitName = commitName;
} }
public BackupRepository getBackupRepository() { public BackupRepository getBackupRepository() {
@ -145,10 +150,19 @@ public class SnapShooter {
} }
public NamedList createSnapshot() throws Exception { public NamedList createSnapshot() throws Exception {
IndexDeletionPolicyWrapper deletionPolicy = solrCore.getDeletionPolicy();
RefCounted<SolrIndexSearcher> searcher = solrCore.getSearcher(); RefCounted<SolrIndexSearcher> searcher = solrCore.getSearcher();
try { try {
if (commitName != null) {
SolrSnapshotMetaDataManager snapshotMgr = solrCore.getSnapshotMetaDataManager();
Optional<IndexCommit> commit = snapshotMgr.getIndexCommitByName(commitName);
if(commit.isPresent()) {
return createSnapshot(commit.get());
}
throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to find an index commit with name " + commitName +
" for core " + solrCore.getName());
} else {
//TODO should we try solrCore.getDeletionPolicy().getLatestCommit() first? //TODO should we try solrCore.getDeletionPolicy().getLatestCommit() first?
IndexDeletionPolicyWrapper deletionPolicy = solrCore.getDeletionPolicy();
IndexCommit indexCommit = searcher.get().getIndexReader().getIndexCommit(); IndexCommit indexCommit = searcher.get().getIndexReader().getIndexCommit();
deletionPolicy.saveCommitPoint(indexCommit.getGeneration()); deletionPolicy.saveCommitPoint(indexCommit.getGeneration());
try { try {
@ -156,6 +170,7 @@ public class SnapShooter {
} finally { } finally {
deletionPolicy.releaseCommitPoint(indexCommit.getGeneration()); deletionPolicy.releaseCommitPoint(indexCommit.getGeneration());
} }
}
} finally { } finally {
searcher.decref(); searcher.decref();
} }

View File

@ -122,6 +122,8 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
.withFunctionName("intersect", IntersectStream.class) .withFunctionName("intersect", IntersectStream.class)
.withFunctionName("complement", ComplementStream.class) .withFunctionName("complement", ComplementStream.class)
.withFunctionName("sort", SortStream.class) .withFunctionName("sort", SortStream.class)
.withFunctionName("train", TextLogitStream.class)
.withFunctionName("features", FeaturesSelectionStream.class)
.withFunctionName("daemon", DaemonStream.class) .withFunctionName("daemon", DaemonStream.class)
.withFunctionName("shortestPath", ShortestPathStream.class) .withFunctionName("shortestPath", ShortestPathStream.class)
.withFunctionName("gatherNodes", GatherNodesStream.class) .withFunctionName("gatherNodes", GatherNodesStream.class)

View File

@ -714,10 +714,8 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
String location = repository.getBackupLocation(req.getParams().get(CoreAdminParams.BACKUP_LOCATION)); String location = repository.getBackupLocation(req.getParams().get(CoreAdminParams.BACKUP_LOCATION));
if (location == null) { if (location == null) {
//Refresh the cluster property file to make sure the value set for location is the latest //Refresh the cluster property file to make sure the value set for location is the latest
h.coreContainer.getZkController().getZkStateReader().forceUpdateClusterProperties();
// Check if the location is specified in the cluster property. // Check if the location is specified in the cluster property.
location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty(CoreAdminParams.BACKUP_LOCATION, null); location = new ClusterProperties(h.coreContainer.getZkController().getZkClient()).getClusterProperty(CoreAdminParams.BACKUP_LOCATION, null);
if (location == null) { if (location == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query" throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query"
+ " parameter or as a default repository property or as a cluster property."); + " parameter or as a default repository property or as a cluster property.");
@ -755,10 +753,8 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
String location = repository.getBackupLocation(req.getParams().get(CoreAdminParams.BACKUP_LOCATION)); String location = repository.getBackupLocation(req.getParams().get(CoreAdminParams.BACKUP_LOCATION));
if (location == null) { if (location == null) {
//Refresh the cluster property file to make sure the value set for location is the latest //Refresh the cluster property file to make sure the value set for location is the latest
h.coreContainer.getZkController().getZkStateReader().forceUpdateClusterProperties();
// Check if the location is specified in the cluster property. // Check if the location is specified in the cluster property.
location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty("location", null); location = new ClusterProperties(h.coreContainer.getZkController().getZkClient()).getClusterProperty("location", null);
if (location == null) { if (location == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query" throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query"
+ " parameter or as a default repository property or as a cluster property."); + " parameter or as a default repository property or as a cluster property.");

View File

@ -34,6 +34,7 @@ import java.util.concurrent.Future;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
@ -59,9 +60,13 @@ import org.apache.solr.core.CachingDirectoryFactory;
import org.apache.solr.core.CoreContainer; import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor; import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.DirectoryFactory; import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.core.backup.repository.BackupRepository; import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.core.snapshots.SolrSnapshotManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
import org.apache.solr.handler.RestoreCore; import org.apache.solr.handler.RestoreCore;
import org.apache.solr.handler.SnapShooter; import org.apache.solr.handler.SnapShooter;
import org.apache.solr.handler.admin.CoreAdminHandler.CoreAdminOp; import org.apache.solr.handler.admin.CoreAdminHandler.CoreAdminOp;
@ -794,20 +799,24 @@ enum CoreAdminOperation implements CoreAdminOp {
+ " parameter or as a default repository property"); + " parameter or as a default repository property");
} }
// An optional parameter to describe the snapshot to be backed-up. If this
// parameter is not supplied, the latest index commit is backed-up.
String commitName = params.get(CoreAdminParams.COMMIT_NAME);
try (SolrCore core = it.handler.coreContainer.getCore(cname)) { try (SolrCore core = it.handler.coreContainer.getCore(cname)) {
SnapShooter snapShooter = new SnapShooter(repository, core, location, name); SnapShooter snapShooter = new SnapShooter(repository, core, location, name, commitName);
// validateCreateSnapshot will create parent dirs instead of throw; that choice is dubious. // validateCreateSnapshot will create parent dirs instead of throw; that choice is dubious.
// But we want to throw. One reason is that // But we want to throw. One reason is that
// this dir really should, in fact must, already exist here if triggered via a collection backup on a shared // this dir really should, in fact must, already exist here if triggered via a collection backup on a shared
// file system. Otherwise, perhaps the FS location isn't shared -- we want an error. // file system. Otherwise, perhaps the FS location isn't shared -- we want an error.
if (!snapShooter.getBackupRepository().exists(snapShooter.getLocation())) { if (!snapShooter.getBackupRepository().exists(snapShooter.getLocation())) {
throw new SolrException(ErrorCode.BAD_REQUEST, throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Directory to contain snapshots doesn't exist: " + snapShooter.getLocation()); "Directory to contain snapshots doesn't exist: " + snapShooter.getLocation());
} }
snapShooter.validateCreateSnapshot(); snapShooter.validateCreateSnapshot();
snapShooter.createSnapshot(); snapShooter.createSnapshot();
} catch (Exception e) { } catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Failed to backup core=" + cname + " because " + e, e); "Failed to backup core=" + cname + " because " + e, e);
} }
}), }),
@ -845,6 +854,92 @@ enum CoreAdminOperation implements CoreAdminOp {
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to restore core=" + core.getName()); throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to restore core=" + core.getName());
} }
} }
}),
CREATESNAPSHOT_OP(CREATESNAPSHOT, it -> {
CoreContainer cc = it.handler.getCoreContainer();
final SolrParams params = it.req.getParams();
String commitName = params.required().get(CoreAdminParams.COMMIT_NAME);
String cname = params.required().get(CoreAdminParams.CORE);
try (SolrCore core = cc.getCore(cname)) {
if (core == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
}
String indexDirPath = core.getIndexDir();
IndexCommit ic = core.getDeletionPolicy().getLatestCommit();
if (ic == null) {
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
try {
ic = searcher.get().getIndexReader().getIndexCommit();
} finally {
searcher.decref();
}
}
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
mgr.snapshot(commitName, indexDirPath, ic.getGeneration());
it.rsp.add("core", core.getName());
it.rsp.add("commitName", commitName);
it.rsp.add("indexDirPath", indexDirPath);
it.rsp.add("generation", ic.getGeneration());
}
}),
DELETESNAPSHOT_OP(DELETESNAPSHOT, it -> {
CoreContainer cc = it.handler.getCoreContainer();
final SolrParams params = it.req.getParams();
String commitName = params.required().get(CoreAdminParams.COMMIT_NAME);
String cname = params.required().get(CoreAdminParams.CORE);
try (SolrCore core = cc.getCore(cname)) {
if (core == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
}
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
Optional<SnapshotMetaData> metadata = mgr.release(commitName);
if (metadata.isPresent()) {
long gen = metadata.get().getGenerationNumber();
String indexDirPath = metadata.get().getIndexDirPath();
// If the directory storing the snapshot is not the same as the *current* core
// index directory, then delete the files corresponding to this snapshot.
// Otherwise we leave the index files related to snapshot as is (assuming the
// underlying Solr IndexDeletionPolicy will clean them up appropriately).
if (!indexDirPath.equals(core.getIndexDir())) {
Directory d = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, DirectoryFactory.LOCK_TYPE_NONE);
try {
SolrSnapshotManager.deleteIndexFiles(d, mgr.listSnapshotsInIndexDir(indexDirPath), gen);
} finally {
core.getDirectoryFactory().release(d);
}
}
}
}
}),
LISTSNAPSHOTS_OP(LISTSNAPSHOTS, it -> {
CoreContainer cc = it.handler.getCoreContainer();
final SolrParams params = it.req.getParams();
String cname = params.required().get(CoreAdminParams.CORE);
try ( SolrCore core = cc.getCore(cname) ) {
if (core == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
}
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
NamedList result = new NamedList();
for (String name : mgr.listSnapshots()) {
Optional<SnapshotMetaData> metadata = mgr.getSnapshotMetaData(name);
if ( metadata.isPresent() ) {
NamedList<String> props = new NamedList<>();
props.add("generation", String.valueOf(metadata.get().getGenerationNumber()));
props.add("indexDirPath", metadata.get().getIndexDirPath());
result.add(name, props);
}
}
it.rsp.add("snapshots", result);
}
}); });
final CoreAdminParams.CoreAdminAction action; final CoreAdminParams.CoreAdminAction action;

View File

@ -176,7 +176,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
query = rb.getQuery(); query = rb.getQuery();
} else { } else {
try { try {
QParser parser = QParser.getParser(qs, null, req); QParser parser = QParser.getParser(qs, req);
query = parser.getQuery(); query = parser.getQuery();
} catch (Exception e) { } catch (Exception e) {
throw new IOException(e); throw new IOException(e);
@ -198,7 +198,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
try { try {
for (String fq : fqs) { for (String fq : fqs) {
if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) { if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
QParser fqp = QParser.getParser(fq, null, req); QParser fqp = QParser.getParser(fq, req);
newFilters.add(fqp.getQuery()); newFilters.add(fqp.getQuery());
} }
} }

View File

@ -202,7 +202,7 @@ public class QueryComponent extends SearchComponent
filters = filters == null ? new ArrayList<Query>(fqs.length) : new ArrayList<>(filters); filters = filters == null ? new ArrayList<Query>(fqs.length) : new ArrayList<>(filters);
for (String fq : fqs) { for (String fq : fqs) {
if (fq != null && fq.trim().length()!=0) { if (fq != null && fq.trim().length()!=0) {
QParser fqp = QParser.getParser(fq, null, req); QParser fqp = QParser.getParser(fq, req);
filters.add(fqp.getQuery()); filters.add(fqp.getQuery());
} }
} }

View File

@ -158,7 +158,7 @@ public class RealTimeGetComponent extends SearchComponent
filters = filters == null ? new ArrayList<Query>(fqs.length) : new ArrayList<>(filters); filters = filters == null ? new ArrayList<Query>(fqs.length) : new ArrayList<>(filters);
for (String fq : fqs) { for (String fq : fqs) {
if (fq != null && fq.trim().length()!=0) { if (fq != null && fq.trim().length()!=0) {
QParser fqp = QParser.getParser(fq, null, req); QParser fqp = QParser.getParser(fq, req);
filters.add(fqp.getQuery()); filters.add(fqp.getQuery());
} }
} }
@ -249,7 +249,8 @@ public class RealTimeGetComponent extends SearchComponent
docid = segid + ctx.docBase; docid = segid + ctx.docBase;
if (rb.getFilters() != null) { if (rb.getFilters() != null) {
for (Query q : rb.getFilters()) { for (Query raw : rb.getFilters()) {
Query q = raw.rewrite(searcher.getIndexReader());
Scorer scorer = searcher.createWeight(q, false, 1f).scorer(ctx); Scorer scorer = searcher.createWeight(q, false, 1f).scorer(ctx);
if (scorer == null || segid != scorer.iterator().advance(segid)) { if (scorer == null || segid != scorer.iterator().advance(segid)) {
// filter doesn't match. // filter doesn't match.
@ -448,7 +449,7 @@ public class RealTimeGetComponent extends SearchComponent
ZkController zkController = rb.req.getCore().getCoreDescriptor().getCoreContainer().getZkController(); ZkController zkController = rb.req.getCore().getCoreDescriptor().getCoreContainer().getZkController();
// if shards=... then use that // if shards=... then use that
if (zkController != null && params.get("shards") == null) { if (zkController != null && params.get(ShardParams.SHARDS) == null) {
CloudDescriptor cloudDescriptor = rb.req.getCore().getCoreDescriptor().getCloudDescriptor(); CloudDescriptor cloudDescriptor = rb.req.getCore().getCoreDescriptor().getCloudDescriptor();
String collection = cloudDescriptor.getCollectionName(); String collection = cloudDescriptor.getCollectionName();
@ -470,32 +471,18 @@ public class RealTimeGetComponent extends SearchComponent
for (Map.Entry<String,List<String>> entry : sliceToId.entrySet()) { for (Map.Entry<String,List<String>> entry : sliceToId.entrySet()) {
String shard = entry.getKey(); String shard = entry.getKey();
String shardIdList = StrUtils.join(entry.getValue(), ',');
ShardRequest sreq = new ShardRequest(); ShardRequest sreq = createShardRequest(rb, entry.getValue());
sreq.purpose = 1;
// sreq.shards = new String[]{shard}; // TODO: would be nice if this would work... // sreq.shards = new String[]{shard}; // TODO: would be nice if this would work...
sreq.shards = sliceToShards(rb, collection, shard); sreq.shards = sliceToShards(rb, collection, shard);
sreq.actualShards = sreq.shards; sreq.actualShards = sreq.shards;
sreq.params = new ModifiableSolrParams();
sreq.params.set(ShardParams.SHARDS_QT,"/get"); // TODO: how to avoid hardcoding this and hit the same handler?
sreq.params.set("distrib",false);
sreq.params.set("ids", shardIdList);
rb.addRequest(this, sreq); rb.addRequest(this, sreq);
} }
} else { } else {
String shardIdList = StrUtils.join(reqIds.allIds, ','); ShardRequest sreq = createShardRequest(rb, reqIds.allIds);
ShardRequest sreq = new ShardRequest();
sreq.purpose = 1;
sreq.shards = null; // ALL sreq.shards = null; // ALL
sreq.actualShards = sreq.shards; sreq.actualShards = sreq.shards;
sreq.params = new ModifiableSolrParams();
sreq.params.set(ShardParams.SHARDS_QT,"/get"); // TODO: how to avoid hardcoding this and hit the same handler?
sreq.params.set("distrib",false);
sreq.params.set("ids", shardIdList);
rb.addRequest(this, sreq); rb.addRequest(this, sreq);
} }
@ -503,6 +490,28 @@ public class RealTimeGetComponent extends SearchComponent
return ResponseBuilder.STAGE_DONE; return ResponseBuilder.STAGE_DONE;
} }
/**
* Helper method for creating a new ShardRequest for the specified ids, based on the params
* specified for the current request. The new ShardRequest does not yet know anything about
* which shard/slice it will be sent to.
*/
private ShardRequest createShardRequest(final ResponseBuilder rb, final List<String> ids) {
final ShardRequest sreq = new ShardRequest();
sreq.purpose = 1;
sreq.params = new ModifiableSolrParams(rb.req.getParams());
// TODO: how to avoid hardcoding this and hit the same handler?
sreq.params.set(ShardParams.SHARDS_QT,"/get");
sreq.params.set("distrib",false);
sreq.params.remove(ShardParams.SHARDS);
sreq.params.remove("id");
sreq.params.remove("ids");
sreq.params.set("ids", StrUtils.join(ids, ','));
return sreq;
}
private String[] sliceToShards(ResponseBuilder rb, String collection, String slice) { private String[] sliceToShards(ResponseBuilder rb, String collection, String slice) {
String lookup = collection + '_' + slice; // seems either form may be filled in rb.slices? String lookup = collection + '_' + slice; // seems either form may be filled in rb.slices?

View File

@ -60,7 +60,6 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.DocSet; import org.apache.solr.search.DocSet;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.SyntaxError; import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.spelling.AbstractLuceneSpellChecker; import org.apache.solr.spelling.AbstractLuceneSpellChecker;
@ -242,7 +241,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
try { try {
if (maxResultsFilterQueryString != null) { if (maxResultsFilterQueryString != null) {
// Get the default Lucene query parser // Get the default Lucene query parser
QParser parser = QParser.getParser(maxResultsFilterQueryString, QParserPlugin.DEFAULT_QTYPE, rb.req); QParser parser = QParser.getParser(maxResultsFilterQueryString, rb.req);
DocSet s = searcher.getDocSet(parser.getQuery()); DocSet s = searcher.getDocSet(parser.getQuery());
maxResultsByFilters = s.size(); maxResultsByFilters = s.size();
} else { } else {

View File

@ -321,7 +321,7 @@ public class SimpleFacets {
public void getFacetQueryCount(ParsedParams parsed, NamedList<Integer> res) throws SyntaxError, IOException { public void getFacetQueryCount(ParsedParams parsed, NamedList<Integer> res) throws SyntaxError, IOException {
// TODO: slight optimization would prevent double-parsing of any localParams // TODO: slight optimization would prevent double-parsing of any localParams
// TODO: SOLR-7753 // TODO: SOLR-7753
Query qobj = QParser.getParser(parsed.facetValue, null, req).getQuery(); Query qobj = QParser.getParser(parsed.facetValue, req).getQuery();
if (qobj == null) { if (qobj == null) {
res.add(parsed.key, 0); res.add(parsed.key, 0);

View File

@ -77,7 +77,7 @@ public class ChildDocTransformerFactory extends TransformerFactory {
BitSetProducer parentsFilter = null; BitSetProducer parentsFilter = null;
try { try {
Query parentFilterQuery = QParser.getParser( parentFilter, null, req).getQuery(); Query parentFilterQuery = QParser.getParser( parentFilter, req).getQuery();
parentsFilter = new QueryBitSetProducer(new QueryWrapperFilter(parentFilterQuery)); parentsFilter = new QueryBitSetProducer(new QueryWrapperFilter(parentFilterQuery));
} catch (SyntaxError syntaxError) { } catch (SyntaxError syntaxError) {
throw new SolrException( ErrorCode.BAD_REQUEST, "Failed to create correct parent filter query" ); throw new SolrException( ErrorCode.BAD_REQUEST, "Failed to create correct parent filter query" );
@ -86,7 +86,7 @@ public class ChildDocTransformerFactory extends TransformerFactory {
Query childFilterQuery = null; Query childFilterQuery = null;
if(childFilter != null) { if(childFilter != null) {
try { try {
childFilterQuery = QParser.getParser( childFilter, null, req).getQuery(); childFilterQuery = QParser.getParser( childFilter, req).getQuery();
} catch (SyntaxError syntaxError) { } catch (SyntaxError syntaxError) {
throw new SolrException( ErrorCode.BAD_REQUEST, "Failed to create correct child filter query" ); throw new SolrException( ErrorCode.BAD_REQUEST, "Failed to create correct child filter query" );
} }

View File

@ -76,6 +76,15 @@ import org.apache.solr.search.TermsQParserPlugin;
* its' native parameters like <code>collection, shards</code> for subquery, eg<br> * its' native parameters like <code>collection, shards</code> for subquery, eg<br>
* <code>q=*:*&amp;fl=*,foo:[subquery]&amp;foo.q=cloud&amp;foo.collection=departments</code> * <code>q=*:*&amp;fl=*,foo:[subquery]&amp;foo.q=cloud&amp;foo.collection=departments</code>
* *
* <h3>When used in Real Time Get</h3>
* <p>
* When used in the context of a Real Time Get, the <i>values</i> from each document that are used
* in the qubquery are the "real time" values (possibly from the transaction log), but the query
* itself is still executed against the currently open searcher. Note that this means if a
* document is updated but not yet committed, an RTG request for that document that uses
* <code>[subquery]</code> could include the older (committed) version of that document,
* with differnet field values, in the subquery results.
* </p>
*/ */
public class SubQueryAugmenterFactory extends TransformerFactory{ public class SubQueryAugmenterFactory extends TransformerFactory{
@ -304,6 +313,14 @@ class SubQueryAugmenter extends DocTransformer {
return name; return name;
} }
/**
* Returns false -- this transformer does use an IndexSearcher, but it does not (neccessarily) need
* the searcher from the ResultContext of the document being returned. Instead we use the current
* "live" searcher for the specified core.
*/
@Override
public boolean needsSolrIndexSearcher() { return false; }
@Override @Override
public void transform(SolrDocument doc, int docid, float score) { public void transform(SolrDocument doc, int docid, float score) {

View File

@ -1500,10 +1500,12 @@ public class IndexSchema {
(v1, v2) -> v2, (v1, v2) -> v2,
LinkedHashMap::new)); LinkedHashMap::new));
} }
public static Map<String,String> nameMapping = Collections.unmodifiableMap(Stream.of(Handler.values())
.collect(Collectors.toMap(Handler::getNameLower , Handler::getRealName)));
} }
public static Map<String,String> nameMapping = Collections.unmodifiableMap(Stream.of(SchemaProps.Handler.values())
.collect(Collectors.toMap(SchemaProps.Handler::getNameLower , SchemaProps.Handler::getRealName)));
public Map<String, Object> getNamedPropertyValues(String name, SolrParams params) { public Map<String, Object> getNamedPropertyValues(String name, SolrParams params) {
return new SchemaProps(name, params, this).toMap(); return new SchemaProps(name, params, this).toMap();

Some files were not shown because too many files have changed in this diff Show More