mirror of
https://github.com/apache/lucene.git
synced 2025-02-12 21:15:19 +00:00
Merge remote-tracking branch 'origin/branch_6x' into branch_6x
This commit is contained in:
commit
74653e9dbf
@ -40,6 +40,13 @@ New Features
|
||||
Polygon instances from a standard GeoJSON string (Robert Muir, Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-7395: PerFieldSimilarityWrapper requires a default similarity
|
||||
for calculating query norm and coordination factor in Lucene 6.x.
|
||||
Lucene 7 will no longer have those factors. (Uwe Schindler, Sascha Markus)
|
||||
|
||||
* SOLR-9279: Queries module: new ComparisonBoolFunction base class
|
||||
(Doug Turnbull via David Smiley)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand)
|
||||
@ -57,6 +64,10 @@ Bug Fixes
|
||||
* LUCENE-7391: Fix performance regression in MemoryIndex's fields() introduced
|
||||
in Lucene 6. (Steve Mason via David Smiley)
|
||||
|
||||
* LUCENE-7395, SOLR-9315: Fix PerFieldSimilarityWrapper to also delegate query
|
||||
norm and coordination factor using a default similarity added as ctor param.
|
||||
(Uwe Schindler, Sascha Markus)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7323: Compound file writing now verifies the incoming
|
||||
@ -110,10 +121,9 @@ Improvements
|
||||
|
||||
* LUCENE-7385: Improve/fix assert messages in SpanScorer. (David Smiley)
|
||||
|
||||
* LUCENE-7390: Improve performance of indexing points by allowing the
|
||||
codec to use transient heap in proportion to IndexWriter's RAM
|
||||
buffer, instead of a fixed 16.0 MB. A custom codec can still
|
||||
override the buffer size itself. (Mike McCandless)
|
||||
* LUCENE-7393: Add ICUTokenizer option to parse Myanmar text as syllables instead of words,
|
||||
because the ICU word-breaking algorithm has some issues. This allows for the previous
|
||||
tokenization used before Lucene 5. (AM, Robert Muir)
|
||||
|
||||
Optimizations
|
||||
|
||||
@ -129,6 +139,9 @@ Optimizations
|
||||
* LUCENE-7311: Cached term queries do not seek the terms dictionary anymore.
|
||||
(Adrien Grand)
|
||||
|
||||
* LUCENE-7396, LUCENE-7399: Faster flush of points.
|
||||
(Adrien Grand, Mike McCandless)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien
|
||||
|
@ -402,6 +402,7 @@ public class MinHashFilter extends TokenFilter {
|
||||
}
|
||||
|
||||
/** Returns the MurmurHash3_x64_128 hash, placing the result in "out". */
|
||||
@SuppressWarnings("fallthrough") // the huge switch is designed to use fall through into cases!
|
||||
static void murmurhash3_x64_128(byte[] key, int offset, int len, int seed, LongPair out) {
|
||||
// The original algorithm does have a 32 bit unsigned seed.
|
||||
// We have to mask to match the behavior of the unsigned types and prevent sign extension.
|
||||
|
50
lucene/analysis/icu/src/data/uax29/MyanmarSyllable.rbbi
Normal file
50
lucene/analysis/icu/src/data/uax29/MyanmarSyllable.rbbi
Normal file
@ -0,0 +1,50 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
#
|
||||
# Parses Myanmar text, with syllable as token.
|
||||
#
|
||||
|
||||
$Cons = [[:Other_Letter:]&[:Myanmar:]];
|
||||
$Virama = [\u1039];
|
||||
$Asat = [\u103A];
|
||||
|
||||
$WordJoin = [:Line_Break=Word_Joiner:];
|
||||
|
||||
#
|
||||
# default numerical definitions
|
||||
#
|
||||
$Extend = [\p{Word_Break = Extend}];
|
||||
$Format = [\p{Word_Break = Format}];
|
||||
$MidNumLet = [\p{Word_Break = MidNumLet}];
|
||||
$MidNum = [\p{Word_Break = MidNum}];
|
||||
$Numeric = [\p{Word_Break = Numeric}];
|
||||
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
|
||||
$MidNumLetEx = $MidNumLet ($Extend | $Format)*;
|
||||
$MidNumEx = $MidNum ($Extend | $Format)*;
|
||||
$NumericEx = $Numeric ($Extend | $Format)*;
|
||||
$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*;
|
||||
|
||||
$ConsEx = $Cons ($Extend | $Format)*;
|
||||
$AsatEx = $Cons $Asat ($Virama $ConsEx)? ($Extend | $Format)*;
|
||||
$MyanmarSyllableEx = $ConsEx ($Virama $ConsEx)? ($AsatEx)*;
|
||||
$MyanmarJoinedSyllableEx = $MyanmarSyllableEx ($WordJoin $MyanmarSyllableEx)*;
|
||||
|
||||
!!forward;
|
||||
$MyanmarJoinedSyllableEx {200};
|
||||
|
||||
# default numeric rules
|
||||
$NumericEx $ExtendNumLetEx? (($MidNumEx | $MidNumLetEx)? $NumericEx $ExtendNumLetEx?)* {100};
|
@ -63,9 +63,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
|
||||
// the same as ROOT, except no dictionary segmentation for cjk
|
||||
private static final BreakIterator defaultBreakIterator =
|
||||
readBreakIterator("Default.brk");
|
||||
private static final BreakIterator myanmarSyllableIterator =
|
||||
readBreakIterator("MyanmarSyllable.brk");
|
||||
|
||||
// TODO: deprecate this boolean? you only care if you are doing super-expert stuff...
|
||||
private final boolean cjkAsWords;
|
||||
private final boolean myanmarAsWords;
|
||||
|
||||
/**
|
||||
* Creates a new config. This object is lightweight, but the first
|
||||
@ -74,9 +77,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
|
||||
* otherwise text will be segmented according to UAX#29 defaults.
|
||||
* If this is true, all Han+Hiragana+Katakana words will be tagged as
|
||||
* IDEOGRAPHIC.
|
||||
* @param myanmarAsWords true if Myanmar text should undergo dictionary-based segmentation,
|
||||
* otherwise it will be tokenized as syllables.
|
||||
*/
|
||||
public DefaultICUTokenizerConfig(boolean cjkAsWords) {
|
||||
public DefaultICUTokenizerConfig(boolean cjkAsWords, boolean myanmarAsWords) {
|
||||
this.cjkAsWords = cjkAsWords;
|
||||
this.myanmarAsWords = myanmarAsWords;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -88,6 +94,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
|
||||
public BreakIterator getBreakIterator(int script) {
|
||||
switch(script) {
|
||||
case UScript.JAPANESE: return (BreakIterator)cjkBreakIterator.clone();
|
||||
case UScript.MYANMAR:
|
||||
if (myanmarAsWords) {
|
||||
return (BreakIterator)defaultBreakIterator.clone();
|
||||
} else {
|
||||
return (BreakIterator)myanmarSyllableIterator.clone();
|
||||
}
|
||||
default: return (BreakIterator)defaultBreakIterator.clone();
|
||||
}
|
||||
}
|
||||
|
@ -68,7 +68,7 @@ public final class ICUTokenizer extends Tokenizer {
|
||||
* @see DefaultICUTokenizerConfig
|
||||
*/
|
||||
public ICUTokenizer() {
|
||||
this(new DefaultICUTokenizerConfig(true));
|
||||
this(new DefaultICUTokenizerConfig(true, true));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -79,6 +79,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
|
||||
private final Map<Integer,String> tailored;
|
||||
private ICUTokenizerConfig config;
|
||||
private final boolean cjkAsWords;
|
||||
private final boolean myanmarAsWords;
|
||||
|
||||
/** Creates a new ICUTokenizerFactory */
|
||||
public ICUTokenizerFactory(Map<String,String> args) {
|
||||
@ -95,6 +96,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
|
||||
}
|
||||
}
|
||||
cjkAsWords = getBoolean(args, "cjkAsWords", true);
|
||||
myanmarAsWords = getBoolean(args, "myanmarAsWords", true);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
@ -104,7 +106,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
assert tailored != null : "init must be called first!";
|
||||
if (tailored.isEmpty()) {
|
||||
config = new DefaultICUTokenizerConfig(cjkAsWords);
|
||||
config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords);
|
||||
} else {
|
||||
final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT];
|
||||
for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
|
||||
@ -112,7 +114,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
|
||||
String resourcePath = entry.getValue();
|
||||
breakers[code] = parseRules(resourcePath, loader);
|
||||
}
|
||||
config = new DefaultICUTokenizerConfig(cjkAsWords) {
|
||||
config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords) {
|
||||
|
||||
@Override
|
||||
public BreakIterator getBreakIterator(int script) {
|
||||
|
Binary file not shown.
@ -42,7 +42,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
|
||||
sb.append(whitespace);
|
||||
sb.append("testing 1234");
|
||||
String input = sb.toString();
|
||||
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
|
||||
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
|
||||
tokenizer.setReader(new StringReader(input));
|
||||
assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
|
||||
}
|
||||
@ -53,7 +53,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
|
||||
sb.append('a');
|
||||
}
|
||||
String input = sb.toString();
|
||||
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
|
||||
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
|
||||
tokenizer.setReader(new StringReader(input));
|
||||
char token[] = new char[4096];
|
||||
Arrays.fill(token, 'a');
|
||||
@ -75,7 +75,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
|
||||
a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
|
||||
Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
|
||||
TokenFilter filter = new ICUNormalizer2Filter(tokenizer);
|
||||
return new TokenStreamComponents(tokenizer, filter);
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ public class TestICUTokenizerCJK extends BaseTokenStreamTestCase {
|
||||
a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(true)));
|
||||
return new TokenStreamComponents(new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(true, true)));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -0,0 +1,156 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.icu.segmentation;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
||||
/** Test tokenizing Myanmar text into syllables */
|
||||
public class TestMyanmarSyllable extends BaseTokenStreamTestCase {
|
||||
|
||||
Analyzer a;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, false));
|
||||
return new TokenStreamComponents(tokenizer);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
a.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** as opposed to dictionary break of သက်ဝင်|လှုပ်ရှား|စေ|ပြီး */
|
||||
public void testBasics() throws Exception {
|
||||
assertAnalyzesTo(a, "သက်ဝင်လှုပ်ရှားစေပြီး", new String[] { "သက်", "ဝင်", "လှုပ်", "ရှား", "စေ", "ပြီး" });
|
||||
}
|
||||
|
||||
// simple tests from "A Rule-based Syllable Segmentation of Myanmar Text"
|
||||
// * http://www.aclweb.org/anthology/I08-3010
|
||||
// (see also the presentation: http://gii2.nagaokaut.ac.jp/gii/media/share/20080901-ZMM%20Presentation.pdf)
|
||||
// The words are fake, we just test the categories.
|
||||
// note that currently our algorithm is not sophisticated enough to handle some of the special cases!
|
||||
|
||||
/** constant */
|
||||
public void testC() throws Exception {
|
||||
assertAnalyzesTo(a, "ကက", new String[] { "က", "က" });
|
||||
}
|
||||
|
||||
/** consonant + sign */
|
||||
public void testCF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကံကံ", new String[] { "ကံ", "ကံ" });
|
||||
}
|
||||
|
||||
/** consonant + consonant + asat */
|
||||
public void testCCA() throws Exception {
|
||||
assertAnalyzesTo(a, "ကင်ကင်", new String[] { "ကင်", "ကင်" });
|
||||
}
|
||||
|
||||
/** consonant + consonant + asat + sign */
|
||||
public void testCCAF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကင်းကင်း", new String[] { "ကင်း", "ကင်း" });
|
||||
}
|
||||
|
||||
/** consonant + vowel */
|
||||
public void testCV() throws Exception {
|
||||
assertAnalyzesTo(a, "ကာကာ", new String[] { "ကာ", "ကာ" });
|
||||
}
|
||||
|
||||
/** consonant + vowel + sign */
|
||||
public void testCVF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကားကား", new String[] { "ကား", "ကား" });
|
||||
}
|
||||
|
||||
/** consonant + vowel + vowel + asat */
|
||||
public void testCVVA() throws Exception {
|
||||
assertAnalyzesTo(a, "ကော်ကော်", new String[] { "ကော်", "ကော်" });
|
||||
}
|
||||
|
||||
/** consonant + vowel + vowel + consonant + asat */
|
||||
public void testCVVCA() throws Exception {
|
||||
assertAnalyzesTo(a, "ကောင်ကောင်", new String[] { "ကောင်", "ကောင်" });
|
||||
}
|
||||
|
||||
/** consonant + vowel + vowel + consonant + asat + sign */
|
||||
public void testCVVCAF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကောင်းကောင်း", new String[] { "ကောင်း", "ကောင်း" });
|
||||
}
|
||||
|
||||
/** consonant + medial */
|
||||
public void testCM() throws Exception {
|
||||
assertAnalyzesTo(a, "ကျကျ", new String[] { "ကျ", "ကျ" });
|
||||
}
|
||||
|
||||
/** consonant + medial + sign */
|
||||
public void testCMF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကျံကျံ", new String[] { "ကျံ", "ကျံ" });
|
||||
}
|
||||
|
||||
/** consonant + medial + consonant + asat */
|
||||
public void testCMCA() throws Exception {
|
||||
assertAnalyzesTo(a, "ကျင်ကျင်", new String[] { "ကျင်", "ကျင်" });
|
||||
}
|
||||
|
||||
/** consonant + medial + consonant + asat + sign */
|
||||
public void testCMCAF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကျင်းကျင်း", new String[] { "ကျင်း", "ကျင်း" });
|
||||
}
|
||||
|
||||
/** consonant + medial + vowel */
|
||||
public void testCMV() throws Exception {
|
||||
assertAnalyzesTo(a, "ကျာကျာ", new String[] { "ကျာ", "ကျာ" });
|
||||
}
|
||||
|
||||
/** consonant + medial + vowel + sign */
|
||||
public void testCMVF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကျားကျား", new String[] { "ကျား", "ကျား" });
|
||||
}
|
||||
|
||||
/** consonant + medial + vowel + vowel + asat */
|
||||
public void testCMVVA() throws Exception {
|
||||
assertAnalyzesTo(a, "ကျော်ကျော်", new String[] { "ကျော်", "ကျော်" });
|
||||
}
|
||||
|
||||
/** consonant + medial + vowel + vowel + consonant + asat */
|
||||
public void testCMVVCA() throws Exception {
|
||||
assertAnalyzesTo(a, "ကြောင်ကြောင်", new String[] { "ကြောင်", "ကြောင်"});
|
||||
}
|
||||
|
||||
/** consonant + medial + vowel + vowel + consonant + asat + sign */
|
||||
public void testCMVVCAF() throws Exception {
|
||||
assertAnalyzesTo(a, "ကြောင်းကြောင်း", new String[] { "ကြောင်း", "ကြောင်း"});
|
||||
}
|
||||
|
||||
/** independent vowel */
|
||||
public void testI() throws Exception {
|
||||
assertAnalyzesTo(a, "ဪဪ", new String[] { "ဪ", "ဪ" });
|
||||
}
|
||||
|
||||
/** independent vowel */
|
||||
public void testE() throws Exception {
|
||||
assertAnalyzesTo(a, "ဣဣ", new String[] { "ဣ", "ဣ" });
|
||||
}
|
||||
}
|
@ -46,7 +46,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase {
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
|
||||
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
|
||||
TokenStream result = new CJKBigramFilter(source);
|
||||
return new TokenStreamComponents(source, new StopFilter(result, CharArraySet.EMPTY_SET));
|
||||
}
|
||||
@ -60,7 +60,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase {
|
||||
analyzer2 = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
|
||||
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
|
||||
// we put this before the CJKBigramFilter, because the normalization might combine
|
||||
// some halfwidth katakana forms, which will affect the bigramming.
|
||||
TokenStream result = new ICUNormalizer2Filter(source);
|
||||
|
@ -68,7 +68,7 @@ class SimpleTextPointsWriter extends PointsWriter {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
|
||||
|
||||
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
|
||||
|
||||
@ -79,7 +79,7 @@ class SimpleTextPointsWriter extends PointsWriter {
|
||||
fieldInfo.getPointDimensionCount(),
|
||||
fieldInfo.getPointNumBytes(),
|
||||
BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
|
||||
maxMBSortInHeap,
|
||||
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
|
||||
values.size(fieldInfo.name),
|
||||
singleValuePerDoc) {
|
||||
|
||||
|
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** {@link PointsReader} whose order of points can be changed.
|
||||
* This class is useful for codecs to optimize flush.
|
||||
* @lucene.internal */
|
||||
public abstract class MutablePointsReader extends PointsReader {
|
||||
|
||||
/** Sole constructor. */
|
||||
protected MutablePointsReader() {}
|
||||
|
||||
/** Set {@code packedValue} with a reference to the packed bytes of the i-th value. */
|
||||
public abstract void getValue(int i, BytesRef packedValue);
|
||||
|
||||
/** Get the k-th byte of the i-th value. */
|
||||
public abstract byte getByteAt(int i, int k);
|
||||
|
||||
/** Return the doc ID of the i-th value. */
|
||||
public abstract int getDocID(int i);
|
||||
|
||||
/** Swap the i-th and j-th values. */
|
||||
public abstract void swap(int i, int j);
|
||||
|
||||
}
|
@ -22,7 +22,6 @@ import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.util.bkd.BKDWriter;
|
||||
|
||||
/** Abstract API to write points
|
||||
*
|
||||
@ -35,9 +34,8 @@ public abstract class PointsWriter implements Closeable {
|
||||
protected PointsWriter() {
|
||||
}
|
||||
|
||||
/** Write all values contained in the provided reader. {@code maxMBSortInHeap} is the maximum
|
||||
* transient heap that can be used to sort values, before spilling to disk for offline sorting */
|
||||
public abstract void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException;
|
||||
/** Write all values contained in the provided reader */
|
||||
public abstract void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException;
|
||||
|
||||
/** Default naive merge implementation for one field: it just re-indexes all the values
|
||||
* from the incoming segment. The default codec overrides this for 1D fields and uses
|
||||
@ -147,10 +145,7 @@ public abstract class PointsWriter implements Closeable {
|
||||
public int getDocCount(String fieldName) {
|
||||
return finalDocCount;
|
||||
}
|
||||
},
|
||||
// TODO: also let merging of > 1D fields tap into IW's indexing buffer size, somehow (1D fields do an optimized merge sort
|
||||
// and don't need heap)
|
||||
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
|
||||
});
|
||||
}
|
||||
|
||||
/** Default merge implementation to merge incoming points readers by visiting all their points and
|
||||
|
@ -25,6 +25,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.MutablePointsReader;
|
||||
import org.apache.lucene.codecs.PointsReader;
|
||||
import org.apache.lucene.codecs.PointsWriter;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
@ -39,9 +40,7 @@ import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.bkd.BKDReader;
|
||||
import org.apache.lucene.util.bkd.BKDWriter;
|
||||
|
||||
/** Writes dimensional values
|
||||
*
|
||||
* @lucene.experimental */
|
||||
/** Writes dimensional values */
|
||||
public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
||||
|
||||
/** Output used to write the BKD tree data file */
|
||||
@ -52,13 +51,15 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
||||
|
||||
final SegmentWriteState writeState;
|
||||
final int maxPointsInLeafNode;
|
||||
final double maxMBSortInHeap;
|
||||
private boolean finished;
|
||||
|
||||
/** Full constructor */
|
||||
public Lucene60PointsWriter(SegmentWriteState writeState, int maxPointsInLeafNode) throws IOException {
|
||||
public Lucene60PointsWriter(SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap) throws IOException {
|
||||
assert writeState.fieldInfos.hasPointValues();
|
||||
this.writeState = writeState;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.maxMBSortInHeap = maxMBSortInHeap;
|
||||
String dataFileName = IndexFileNames.segmentFileName(writeState.segmentInfo.name,
|
||||
writeState.segmentSuffix,
|
||||
Lucene60PointsFormat.DATA_EXTENSION);
|
||||
@ -80,11 +81,11 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
||||
|
||||
/** Uses the defaults values for {@code maxPointsInLeafNode} (1024) and {@code maxMBSortInHeap} (16.0) */
|
||||
public Lucene60PointsWriter(SegmentWriteState writeState) throws IOException {
|
||||
this(writeState, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
|
||||
this(writeState, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
|
||||
|
||||
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
|
||||
|
||||
@ -98,6 +99,14 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
||||
values.size(fieldInfo.name),
|
||||
singleValuePerDoc)) {
|
||||
|
||||
if (values instanceof MutablePointsReader) {
|
||||
final long fp = writer.writeField(dataOut, fieldInfo.name, (MutablePointsReader) values);
|
||||
if (fp != -1) {
|
||||
indexFPs.put(fieldInfo.name, fp);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
values.intersect(fieldInfo.name, new IntersectVisitor() {
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
@ -173,8 +182,7 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
||||
fieldInfo.getPointDimensionCount(),
|
||||
fieldInfo.getPointNumBytes(),
|
||||
maxPointsInLeafNode,
|
||||
// NOTE: not used, since BKDWriter.merge does a merge sort:
|
||||
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
|
||||
maxMBSortInHeap,
|
||||
totMaxSize,
|
||||
singleValuePerDoc)) {
|
||||
List<BKDReader> bkdReaders = new ArrayList<>();
|
||||
|
@ -257,7 +257,7 @@ public class Field implements IndexableField {
|
||||
/**
|
||||
* The value of the field as a String, or null. If null, the Reader value or
|
||||
* binary value is used. Exactly one of stringValue(), readerValue(), and
|
||||
* getBinaryValue() must be set.
|
||||
* binaryValue() must be set.
|
||||
*/
|
||||
@Override
|
||||
public String stringValue() {
|
||||
@ -271,7 +271,7 @@ public class Field implements IndexableField {
|
||||
/**
|
||||
* The value of the field as a Reader, or null. If null, the String value or
|
||||
* binary value is used. Exactly one of stringValue(), readerValue(), and
|
||||
* getBinaryValue() must be set.
|
||||
* binaryValue() must be set.
|
||||
*/
|
||||
@Override
|
||||
public Reader readerValue() {
|
||||
@ -420,7 +420,7 @@ public class Field implements IndexableField {
|
||||
/**
|
||||
* Expert: sets the token stream to be used for indexing and causes
|
||||
* isIndexed() and isTokenized() to return true. May be combined with stored
|
||||
* values from stringValue() or getBinaryValue()
|
||||
* values from stringValue() or binaryValue()
|
||||
*/
|
||||
public void setTokenStream(TokenStream tokenStream) {
|
||||
if (type.indexOptions() == IndexOptions.NONE || !type.tokenized()) {
|
||||
|
@ -153,7 +153,7 @@ class DocumentsWriterPerThread {
|
||||
final Allocator byteBlockAllocator;
|
||||
final IntBlockPool.Allocator intBlockAllocator;
|
||||
private final AtomicLong pendingNumDocs;
|
||||
final LiveIndexWriterConfig indexWriterConfig;
|
||||
private final LiveIndexWriterConfig indexWriterConfig;
|
||||
private final boolean enableTestPoints;
|
||||
private final IndexWriter indexWriter;
|
||||
|
||||
|
@ -762,7 +762,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||
* {@link #getConfig()}.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> after this writer is created, the given configuration instance
|
||||
* <b>NOTE:</b> after ths writer is created, the given configuration instance
|
||||
* cannot be passed to another writer.
|
||||
*
|
||||
* @param d
|
||||
|
@ -168,14 +168,9 @@ public class LiveIndexWriterConfig {
|
||||
|
||||
/**
|
||||
* Determines the amount of RAM that may be used for buffering added documents
|
||||
* and deletions before beginning to flush them to the Directory. For
|
||||
* faster indexing performance it's best to use as large a RAM buffer as you can.
|
||||
* <p>
|
||||
* Note that this setting is not a hard limit on memory usage during indexing, as
|
||||
* transient and non-trivial memory well beyond this buffer size may be used,
|
||||
* for example due to segment merges or writing points to new segments.
|
||||
* For application stability the available memory in the JVM
|
||||
* should be significantly larger than the RAM buffer used for indexing.
|
||||
* and deletions before they are flushed to the Directory. Generally for
|
||||
* faster indexing performance it's best to flush by RAM usage instead of
|
||||
* document count and use as large a RAM buffer as you can.
|
||||
* <p>
|
||||
* When this is set, the writer will flush whenever buffered documents and
|
||||
* deletions use this much RAM. Pass in
|
||||
@ -183,6 +178,14 @@ public class LiveIndexWriterConfig {
|
||||
* due to RAM usage. Note that if flushing by document count is also enabled,
|
||||
* then the flush will be triggered by whichever comes first.
|
||||
* <p>
|
||||
* The maximum RAM limit is inherently determined by the JVMs available
|
||||
* memory. Yet, an {@link IndexWriter} session can consume a significantly
|
||||
* larger amount of memory than the given RAM limit since this limit is just
|
||||
* an indicator when to flush memory resident documents to the Directory.
|
||||
* Flushes are likely happen concurrently while other threads adding documents
|
||||
* to the writer. For application stability the available memory in the JVM
|
||||
* should be significantly larger than the RAM buffer used for indexing.
|
||||
* <p>
|
||||
* <b>NOTE</b>: the account of RAM usage for pending deletions is only
|
||||
* approximate. Specifically, if you delete by Query, Lucene currently has no
|
||||
* way to measure the RAM usage of individual Queries so the accounting will
|
||||
|
@ -18,13 +18,13 @@ package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.MutablePointsReader;
|
||||
import org.apache.lucene.codecs.PointsReader;
|
||||
import org.apache.lucene.codecs.PointsWriter;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.bkd.BKDWriter;
|
||||
|
||||
/** Buffers up pending byte[][] value(s) per doc, then flushes when segment flushes. */
|
||||
class PointValuesWriter {
|
||||
@ -35,8 +35,7 @@ class PointValuesWriter {
|
||||
private int numPoints;
|
||||
private int numDocs;
|
||||
private int lastDocID = -1;
|
||||
private final byte[] packedValue;
|
||||
private final LiveIndexWriterConfig indexWriterConfig;
|
||||
private final int packedBytesLength;
|
||||
|
||||
public PointValuesWriter(DocumentsWriterPerThread docWriter, FieldInfo fieldInfo) {
|
||||
this.fieldInfo = fieldInfo;
|
||||
@ -44,8 +43,7 @@ class PointValuesWriter {
|
||||
this.bytes = new ByteBlockPool(docWriter.byteBlockAllocator);
|
||||
docIDs = new int[16];
|
||||
iwBytesUsed.addAndGet(16 * Integer.BYTES);
|
||||
packedValue = new byte[fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()];
|
||||
indexWriterConfig = docWriter.indexWriterConfig;
|
||||
packedBytesLength = fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes();
|
||||
}
|
||||
|
||||
// TODO: if exactly the same value is added to exactly the same doc, should we dedup?
|
||||
@ -53,9 +51,10 @@ class PointValuesWriter {
|
||||
if (value == null) {
|
||||
throw new IllegalArgumentException("field=" + fieldInfo.name + ": point value must not be null");
|
||||
}
|
||||
if (value.length != fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()) {
|
||||
if (value.length != packedBytesLength) {
|
||||
throw new IllegalArgumentException("field=" + fieldInfo.name + ": this field's value has length=" + value.length + " but should be " + (fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()));
|
||||
}
|
||||
|
||||
if (docIDs.length == numPoints) {
|
||||
docIDs = ArrayUtil.grow(docIDs, numPoints+1);
|
||||
iwBytesUsed.addAndGet((docIDs.length - numPoints) * Integer.BYTES);
|
||||
@ -66,68 +65,111 @@ class PointValuesWriter {
|
||||
numDocs++;
|
||||
lastDocID = docID;
|
||||
}
|
||||
|
||||
numPoints++;
|
||||
}
|
||||
|
||||
public void flush(SegmentWriteState state, PointsWriter writer) throws IOException {
|
||||
PointsReader reader = new MutablePointsReader() {
|
||||
|
||||
writer.writeField(fieldInfo,
|
||||
new PointsReader() {
|
||||
@Override
|
||||
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
|
||||
if (fieldName.equals(fieldInfo.name) == false) {
|
||||
throw new IllegalArgumentException("fieldName must be the same");
|
||||
}
|
||||
for(int i=0;i<numPoints;i++) {
|
||||
bytes.readBytes(packedValue.length * i, packedValue, 0, packedValue.length);
|
||||
visitor.visit(docIDs[i], packedValue);
|
||||
}
|
||||
}
|
||||
final int[] ords = new int[numPoints];
|
||||
{
|
||||
for (int i = 0; i < numPoints; ++i) {
|
||||
ords[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
@Override
|
||||
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
|
||||
if (fieldName.equals(fieldInfo.name) == false) {
|
||||
throw new IllegalArgumentException("fieldName must be the same");
|
||||
}
|
||||
final BytesRef scratch = new BytesRef();
|
||||
final byte[] packedValue = new byte[packedBytesLength];
|
||||
for(int i=0;i<numPoints;i++) {
|
||||
getValue(i, scratch);
|
||||
assert scratch.length == packedValue.length;
|
||||
System.arraycopy(scratch.bytes, scratch.offset, packedValue, 0, packedBytesLength);
|
||||
visitor.visit(getDocID(i), packedValue);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return 0L;
|
||||
}
|
||||
@Override
|
||||
public void checkIntegrity() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return 0L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size(String fieldName) {
|
||||
return numPoints;
|
||||
}
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocCount(String fieldName) {
|
||||
return numDocs;
|
||||
}
|
||||
},
|
||||
Math.max(indexWriterConfig.getRAMBufferSizeMB()/8.0, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP));
|
||||
@Override
|
||||
public long size(String fieldName) {
|
||||
if (fieldName.equals(fieldInfo.name) == false) {
|
||||
throw new IllegalArgumentException("fieldName must be the same");
|
||||
}
|
||||
return numPoints;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocCount(String fieldName) {
|
||||
if (fieldName.equals(fieldInfo.name) == false) {
|
||||
throw new IllegalArgumentException("fieldName must be the same");
|
||||
}
|
||||
return numDocs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void swap(int i, int j) {
|
||||
int tmp = ords[i];
|
||||
ords[i] = ords[j];
|
||||
ords[j] = tmp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocID(int i) {
|
||||
return docIDs[ords[i]];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getValue(int i, BytesRef packedValue) {
|
||||
final long offset = (long) packedBytesLength * ords[i];
|
||||
packedValue.length = packedBytesLength;
|
||||
bytes.setRawBytesRef(packedValue, offset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getByteAt(int i, int k) {
|
||||
final long offset = (long) packedBytesLength * ords[i] + k;
|
||||
return bytes.readByte(offset);
|
||||
}
|
||||
};
|
||||
|
||||
writer.writeField(fieldInfo, reader);
|
||||
}
|
||||
}
|
||||
|
@ -29,16 +29,54 @@ import org.apache.lucene.search.TermStatistics;
|
||||
* <p>
|
||||
* Subclasses should implement {@link #get(String)} to return an appropriate
|
||||
* Similarity (for example, using field-specific parameter values) for the field.
|
||||
* <p>
|
||||
* For Lucene 6, you should pass a default similarity that is used for all non
|
||||
* field-specific methods. From Lucene 7 on, this is no longer required.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class PerFieldSimilarityWrapper extends Similarity {
|
||||
|
||||
/** Default similarity used for query norm and coordination factors. */
|
||||
protected final Similarity defaultSim;
|
||||
|
||||
/**
|
||||
* Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.)
|
||||
* Constructor taking a default similarity for all non-field specific calculations.
|
||||
* @param defaultSim is used for all non field-specific calculations, like
|
||||
* {@link #queryNorm(float)} and {@link #coord(int, int)}.
|
||||
*/
|
||||
public PerFieldSimilarityWrapper() {}
|
||||
public PerFieldSimilarityWrapper(Similarity defaultSim) {
|
||||
this.defaultSim = defaultSim;
|
||||
}
|
||||
|
||||
/**
|
||||
* Backwards compatibility constructor for 6.x series that creates a per-field
|
||||
* similarity where all non field-specific methods return a constant (1).
|
||||
* <p>
|
||||
* From Lucene 7 on, this will get the default again, because coordination
|
||||
* factors and query normalization will be removed.
|
||||
* @deprecated specify a default similarity for non field-specific calculations.
|
||||
*/
|
||||
@Deprecated
|
||||
public PerFieldSimilarityWrapper() {
|
||||
// a fake similarity that is only used to return the default of 1 for queryNorm and coord.
|
||||
this(new Similarity() {
|
||||
@Override
|
||||
public long computeNorm(FieldInvertState state) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
|
||||
throw new AssertionError();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public final long computeNorm(FieldInvertState state) {
|
||||
@ -59,6 +97,16 @@ public abstract class PerFieldSimilarityWrapper extends Similarity {
|
||||
return perFieldWeight.delegate.simScorer(perFieldWeight.delegateWeight, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final float coord(int overlap, int maxOverlap) {
|
||||
return defaultSim.coord(overlap, maxOverlap);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final float queryNorm(float valueForNormalization) {
|
||||
return defaultSim.queryNorm(valueForNormalization);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link Similarity} for scoring a field.
|
||||
*/
|
||||
|
@ -459,69 +459,26 @@ public final class ArrayUtil {
|
||||
* greater than or equal to it.
|
||||
* This runs in linear time on average and in {@code n log(n)} time in the
|
||||
* worst case.*/
|
||||
public static <T> void select(T[] arr, int from, int to, int k, Comparator<T> comparator) {
|
||||
if (k < from) {
|
||||
throw new IllegalArgumentException("k must be >= from");
|
||||
}
|
||||
if (k >= to) {
|
||||
throw new IllegalArgumentException("k must be < to");
|
||||
}
|
||||
final int maxDepth = 2 * MathUtil.log(to - from, 2);
|
||||
quickSelect(arr, from, to, k, comparator, maxDepth);
|
||||
public static <T> void select(T[] arr, int from, int to, int k, Comparator<? super T> comparator) {
|
||||
new IntroSelector() {
|
||||
|
||||
T pivot;
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
ArrayUtil.swap(arr, i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
pivot = arr[i];
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
return comparator.compare(pivot, arr[j]);
|
||||
}
|
||||
}.select(from, to, k);
|
||||
}
|
||||
|
||||
private static <T> void quickSelect(T[] arr, int from, int to, int k, Comparator<T> comparator, int maxDepth) {
|
||||
assert from <= k;
|
||||
assert k < to;
|
||||
if (to - from == 1) {
|
||||
return;
|
||||
}
|
||||
if (--maxDepth < 0) {
|
||||
Arrays.sort(arr, from, to, comparator);
|
||||
return;
|
||||
}
|
||||
|
||||
final int mid = (from + to) >>> 1;
|
||||
// heuristic: we use the median of the values at from, to-1 and mid as a pivot
|
||||
if (comparator.compare(arr[from], arr[to - 1]) > 0) {
|
||||
swap(arr, from, to - 1);
|
||||
}
|
||||
if (comparator.compare(arr[to - 1], arr[mid]) > 0) {
|
||||
swap(arr, to - 1, mid);
|
||||
if (comparator.compare(arr[from], arr[to - 1]) > 0) {
|
||||
swap(arr, from, to - 1);
|
||||
}
|
||||
}
|
||||
|
||||
T pivot = arr[to - 1];
|
||||
|
||||
int left = from + 1;
|
||||
int right = to - 2;
|
||||
|
||||
for (;;) {
|
||||
while (comparator.compare(pivot, arr[left]) > 0) {
|
||||
++left;
|
||||
}
|
||||
|
||||
while (left < right && comparator.compare(pivot, arr[right]) <= 0) {
|
||||
--right;
|
||||
}
|
||||
|
||||
if (left < right) {
|
||||
swap(arr, left, right);
|
||||
--right;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
swap(arr, left, to - 1);
|
||||
|
||||
if (left == k) {
|
||||
return;
|
||||
} else if (left < k) {
|
||||
quickSelect(arr, left + 1, to, k, comparator, maxDepth);
|
||||
} else {
|
||||
quickSelect(arr, from, left, k, comparator, maxDepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -378,5 +378,34 @@ public final class ByteBlockPool {
|
||||
}
|
||||
} while (true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the given {@link BytesRef} so that its content is equal to the
|
||||
* {@code ref.length} bytes starting at {@code offset}. Most of the time this
|
||||
* method will set pointers to internal data-structures. However, in case a
|
||||
* value crosses a boundary, a fresh copy will be returned.
|
||||
* On the contrary to {@link #setBytesRef(BytesRef, int)}, this does not
|
||||
* expect the length to be encoded with the data.
|
||||
*/
|
||||
public void setRawBytesRef(BytesRef ref, final long offset) {
|
||||
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
|
||||
int pos = (int) (offset & BYTE_BLOCK_MASK);
|
||||
if (pos + ref.length <= BYTE_BLOCK_SIZE) {
|
||||
ref.bytes = buffers[bufferIndex];
|
||||
ref.offset = pos;
|
||||
} else {
|
||||
ref.bytes = new byte[ref.length];
|
||||
ref.offset = 0;
|
||||
readBytes(offset, ref.bytes, 0, ref.length);
|
||||
}
|
||||
}
|
||||
|
||||
/** Read a single byte at the given {@code offset}. */
|
||||
public byte readByte(long offset) {
|
||||
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
|
||||
int pos = (int) (offset & BYTE_BLOCK_MASK);
|
||||
byte[] buffer = buffers[bufferIndex];
|
||||
return buffer[pos];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -33,8 +33,8 @@ public abstract class InPlaceMergeSorter extends Sorter {
|
||||
}
|
||||
|
||||
void mergeSort(int from, int to) {
|
||||
if (to - from < INSERTION_SORT_THRESHOLD) {
|
||||
insertionSort(from, to);
|
||||
if (to - from < BINARY_SORT_THRESHOLD) {
|
||||
binarySort(from, to);
|
||||
} else {
|
||||
final int mid = (from + to) >>> 1;
|
||||
mergeSort(from, mid);
|
||||
|
128
lucene/core/src/java/org/apache/lucene/util/IntroSelector.java
Normal file
128
lucene/core/src/java/org/apache/lucene/util/IntroSelector.java
Normal file
@ -0,0 +1,128 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
/** Implementation of the quick select algorithm.
|
||||
* <p>It uses the median of the first, middle and last values as a pivot and
|
||||
* falls back to a heap sort when the number of recursion levels exceeds
|
||||
* {@code 2 lg(n)}, as a consequence it runs in linear time on average and in
|
||||
* {@code n log(n)} time in the worst case.</p>
|
||||
* @lucene.internal */
|
||||
public abstract class IntroSelector extends Selector {
|
||||
|
||||
@Override
|
||||
public final void select(int from, int to, int k) {
|
||||
checkArgs(from, to, k);
|
||||
final int maxDepth = 2 * MathUtil.log(to - from, 2);
|
||||
quickSelect(from, to, k, maxDepth);
|
||||
}
|
||||
|
||||
// heap sort
|
||||
// TODO: use median of median instead to have linear worst-case rather than
|
||||
// n*log(n)
|
||||
void slowSelect(int from, int to, int k) {
|
||||
new Sorter() {
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
IntroSelector.this.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return IntroSelector.this.compare(i, j);
|
||||
}
|
||||
|
||||
public void sort(int from, int to) {
|
||||
heapSort(from, to);
|
||||
}
|
||||
}.sort(from, to);
|
||||
}
|
||||
|
||||
private void quickSelect(int from, int to, int k, int maxDepth) {
|
||||
assert from <= k;
|
||||
assert k < to;
|
||||
if (to - from == 1) {
|
||||
return;
|
||||
}
|
||||
if (--maxDepth < 0) {
|
||||
slowSelect(from, to, k);
|
||||
return;
|
||||
}
|
||||
|
||||
final int mid = (from + to) >>> 1;
|
||||
// heuristic: we use the median of the values at from, to-1 and mid as a pivot
|
||||
if (compare(from, to - 1) > 0) {
|
||||
swap(from, to - 1);
|
||||
}
|
||||
if (compare(to - 1, mid) > 0) {
|
||||
swap(to - 1, mid);
|
||||
if (compare(from, to - 1) > 0) {
|
||||
swap(from, to - 1);
|
||||
}
|
||||
}
|
||||
|
||||
setPivot(to - 1);
|
||||
|
||||
int left = from + 1;
|
||||
int right = to - 2;
|
||||
|
||||
for (;;) {
|
||||
while (comparePivot(left) > 0) {
|
||||
++left;
|
||||
}
|
||||
|
||||
while (left < right && comparePivot(right) <= 0) {
|
||||
--right;
|
||||
}
|
||||
|
||||
if (left < right) {
|
||||
swap(left, right);
|
||||
--right;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
swap(left, to - 1);
|
||||
|
||||
if (left == k) {
|
||||
return;
|
||||
} else if (left < k) {
|
||||
quickSelect(left + 1, to, k, maxDepth);
|
||||
} else {
|
||||
quickSelect(from, left, k, maxDepth);
|
||||
}
|
||||
}
|
||||
|
||||
/** Compare entries found in slots <code>i</code> and <code>j</code>.
|
||||
* The contract for the returned value is the same as
|
||||
* {@link Comparator#compare(Object, Object)}. */
|
||||
protected int compare(int i, int j) {
|
||||
setPivot(i);
|
||||
return comparePivot(j);
|
||||
}
|
||||
|
||||
/** Save the value at slot <code>i</code> so that it can later be used as a
|
||||
* pivot, see {@link #comparePivot(int)}. */
|
||||
protected abstract void setPivot(int i);
|
||||
|
||||
/** Compare the pivot with the slot at <code>j</code>, similarly to
|
||||
* {@link #compare(int, int) compare(i, j)}. */
|
||||
protected abstract int comparePivot(int j);
|
||||
}
|
@ -16,7 +16,6 @@
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
|
||||
/**
|
||||
* {@link Sorter} implementation based on a variant of the quicksort algorithm
|
||||
* called <a href="http://en.wikipedia.org/wiki/Introsort">introsort</a>: when
|
||||
@ -38,8 +37,8 @@ public abstract class IntroSorter extends Sorter {
|
||||
}
|
||||
|
||||
void quicksort(int from, int to, int maxDepth) {
|
||||
if (to - from < INSERTION_SORT_THRESHOLD) {
|
||||
insertionSort(from, to);
|
||||
if (to - from < BINARY_SORT_THRESHOLD) {
|
||||
binarySort(from, to);
|
||||
return;
|
||||
} else if (--maxDepth < 0) {
|
||||
heapSort(from, to);
|
||||
@ -84,11 +83,18 @@ public abstract class IntroSorter extends Sorter {
|
||||
quicksort(left + 1, to, maxDepth);
|
||||
}
|
||||
|
||||
/** Save the value at slot <code>i</code> so that it can later be used as a
|
||||
* pivot, see {@link #comparePivot(int)}. */
|
||||
// Don't rely on the slow default impl of setPivot/comparePivot since
|
||||
// quicksort relies on these methods to be fast for good performance
|
||||
|
||||
@Override
|
||||
protected abstract void setPivot(int i);
|
||||
|
||||
/** Compare the pivot with the slot at <code>j</code>, similarly to
|
||||
* {@link #compare(int, int) compare(i, j)}. */
|
||||
@Override
|
||||
protected abstract int comparePivot(int j);
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
setPivot(i);
|
||||
return comparePivot(j);
|
||||
}
|
||||
}
|
||||
|
@ -38,6 +38,7 @@ public abstract class MSBRadixSorter extends Sorter {
|
||||
// we store one histogram per recursion level
|
||||
private final int[][] histograms = new int[LEVEL_THRESHOLD][];
|
||||
private final int[] endOffsets = new int[HISTOGRAM_SIZE];
|
||||
private final int[] commonPrefix;
|
||||
|
||||
private final int maxLength;
|
||||
|
||||
@ -47,6 +48,7 @@ public abstract class MSBRadixSorter extends Sorter {
|
||||
*/
|
||||
protected MSBRadixSorter(int maxLength) {
|
||||
this.maxLength = maxLength;
|
||||
this.commonPrefix = new int[Math.min(24, maxLength)];
|
||||
}
|
||||
|
||||
/** Return the k-th byte of the entry at index {@code i}, or {@code -1} if
|
||||
@ -116,14 +118,14 @@ public abstract class MSBRadixSorter extends Sorter {
|
||||
@Override
|
||||
public void sort(int from, int to) {
|
||||
checkRange(from, to);
|
||||
sort(from, to, 0);
|
||||
sort(from, to, 0, 0);
|
||||
}
|
||||
|
||||
private void sort(int from, int to, int k) {
|
||||
if (to - from <= LENGTH_THRESHOLD || k >= LEVEL_THRESHOLD) {
|
||||
private void sort(int from, int to, int k, int l) {
|
||||
if (to - from <= LENGTH_THRESHOLD || l >= LEVEL_THRESHOLD) {
|
||||
introSort(from, to, k);
|
||||
} else {
|
||||
radixSort(from, to, k);
|
||||
radixSort(from, to, k, l);
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,28 +133,30 @@ public abstract class MSBRadixSorter extends Sorter {
|
||||
getFallbackSorter(k).sort(from, to);
|
||||
}
|
||||
|
||||
private void radixSort(int from, int to, int k) {
|
||||
int[] histogram = histograms[k];
|
||||
/**
|
||||
* @param k the character number to compare
|
||||
* @param l the level of recursion
|
||||
*/
|
||||
private void radixSort(int from, int to, int k, int l) {
|
||||
int[] histogram = histograms[l];
|
||||
if (histogram == null) {
|
||||
histogram = histograms[k] = new int[HISTOGRAM_SIZE];
|
||||
histogram = histograms[l] = new int[HISTOGRAM_SIZE];
|
||||
} else {
|
||||
Arrays.fill(histogram, 0);
|
||||
}
|
||||
|
||||
buildHistogram(from, to, k, histogram);
|
||||
|
||||
// short-circuit: if all keys have the same byte at offset k, then recurse directly
|
||||
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
|
||||
if (histogram[i] == to - from) {
|
||||
// everything is in the same bucket, recurse
|
||||
if (i > 0) {
|
||||
sort(from, to, k + 1);
|
||||
}
|
||||
return;
|
||||
} else if (histogram[i] != 0) {
|
||||
break;
|
||||
final int commonPrefixLength = computeCommonPrefixLengthAndBuildHistogram(from, to, k, histogram);
|
||||
if (commonPrefixLength > 0) {
|
||||
// if there are no more chars to compare or if all entries fell into the
|
||||
// first bucket (which means strings are shorter than k) then we are done
|
||||
// otherwise recurse
|
||||
if (k + commonPrefixLength < maxLength
|
||||
&& histogram[0] < to - from) {
|
||||
radixSort(from, to, k + commonPrefixLength, l);
|
||||
}
|
||||
return;
|
||||
}
|
||||
assert assertHistogram(commonPrefixLength, histogram);
|
||||
|
||||
int[] startOffsets = histogram;
|
||||
int[] endOffsets = this.endOffsets;
|
||||
@ -167,24 +171,83 @@ public abstract class MSBRadixSorter extends Sorter {
|
||||
int h = endOffsets[i];
|
||||
final int bucketLen = h - prev;
|
||||
if (bucketLen > 1) {
|
||||
sort(from + prev, from + h, k + 1);
|
||||
sort(from + prev, from + h, k + 1, l + 1);
|
||||
}
|
||||
prev = h;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// only used from assert
|
||||
private boolean assertHistogram(int commonPrefixLength, int[] histogram) {
|
||||
int numberOfUniqueBytes = 0;
|
||||
for (int freq : histogram) {
|
||||
if (freq > 0) {
|
||||
numberOfUniqueBytes++;
|
||||
}
|
||||
}
|
||||
if (numberOfUniqueBytes == 1) {
|
||||
assert commonPrefixLength >= 1;
|
||||
} else {
|
||||
assert commonPrefixLength == 0 : commonPrefixLength;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
|
||||
private int getBucket(int i, int k) {
|
||||
return byteAt(i, k) + 1;
|
||||
}
|
||||
|
||||
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */
|
||||
private int[] buildHistogram(int from, int to, int k, int[] histogram) {
|
||||
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}
|
||||
* and return a common prefix length for all visited values.
|
||||
* @see #buildHistogram */
|
||||
private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
|
||||
final int[] commonPrefix = this.commonPrefix;
|
||||
int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
|
||||
for (int j = 0; j < commonPrefixLength; ++j) {
|
||||
final int b = byteAt(from, k + j);
|
||||
commonPrefix[j] = b;
|
||||
if (b == -1) {
|
||||
commonPrefixLength = j + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int i;
|
||||
outer: for (i = from + 1; i < to; ++i) {
|
||||
for (int j = 0; j < commonPrefixLength; ++j) {
|
||||
final int b = byteAt(i, k + j);
|
||||
if (b != commonPrefix[j]) {
|
||||
commonPrefixLength = j;
|
||||
if (commonPrefixLength == 0) { // we have no common prefix
|
||||
histogram[commonPrefix[0] + 1] = i - from;
|
||||
histogram[b + 1] = 1;
|
||||
break outer;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (i < to) {
|
||||
// the loop got broken because there is no common prefix
|
||||
assert commonPrefixLength == 0;
|
||||
buildHistogram(i + 1, to, k, histogram);
|
||||
} else {
|
||||
assert commonPrefixLength > 0;
|
||||
histogram[commonPrefix[0] + 1] = to - from;
|
||||
}
|
||||
|
||||
return commonPrefixLength;
|
||||
}
|
||||
|
||||
/** Build an histogram of the k-th characters of values occurring between
|
||||
* offsets {@code from} and {@code to}, using {@link #getBucket}. */
|
||||
private void buildHistogram(int from, int to, int k, int[] histogram) {
|
||||
for (int i = from; i < to; ++i) {
|
||||
histogram[getBucket(i, k)]++;
|
||||
}
|
||||
return histogram;
|
||||
}
|
||||
|
||||
/** Accumulate values of the histogram so that it does not store counts but
|
||||
|
278
lucene/core/src/java/org/apache/lucene/util/RadixSelector.java
Normal file
278
lucene/core/src/java/org/apache/lucene/util/RadixSelector.java
Normal file
@ -0,0 +1,278 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/** Radix selector.
|
||||
* <p>This implementation works similarly to a MSB radix sort except that it
|
||||
* only recurses into the sub partition that contains the desired value.
|
||||
* @lucene.internal */
|
||||
public abstract class RadixSelector extends Selector {
|
||||
|
||||
// after that many levels of recursion we fall back to introselect anyway
|
||||
// this is used as a protection against the fact that radix sort performs
|
||||
// worse when there are long common prefixes (probably because of cache
|
||||
// locality)
|
||||
private static final int LEVEL_THRESHOLD = 8;
|
||||
// size of histograms: 256 + 1 to indicate that the string is finished
|
||||
private static final int HISTOGRAM_SIZE = 257;
|
||||
// buckets below this size will be sorted with introselect
|
||||
private static final int LENGTH_THRESHOLD = 100;
|
||||
|
||||
// we store one histogram per recursion level
|
||||
private final int[] histogram = new int[HISTOGRAM_SIZE];
|
||||
private final int[] commonPrefix;
|
||||
|
||||
private final int maxLength;
|
||||
|
||||
/**
|
||||
* Sole constructor.
|
||||
* @param maxLength the maximum length of keys, pass {@link Integer#MAX_VALUE} if unknown.
|
||||
*/
|
||||
protected RadixSelector(int maxLength) {
|
||||
this.maxLength = maxLength;
|
||||
this.commonPrefix = new int[Math.min(24, maxLength)];
|
||||
}
|
||||
|
||||
/** Return the k-th byte of the entry at index {@code i}, or {@code -1} if
|
||||
* its length is less than or equal to {@code k}. This may only be called
|
||||
* with a value of {@code i} between {@code 0} included and
|
||||
* {@code maxLength} excluded. */
|
||||
protected abstract int byteAt(int i, int k);
|
||||
|
||||
/** Get a fall-back selector which may assume that the first {@code d} bytes
|
||||
* of all compared strings are equal. This fallback selector is used when
|
||||
* the range becomes narrow or when the maximum level of recursion has
|
||||
* been exceeded. */
|
||||
protected Selector getFallbackSelector(int d) {
|
||||
return new IntroSelector() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
RadixSelector.this.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
for (int o = d; o < maxLength; ++o) {
|
||||
final int b1 = byteAt(i, o);
|
||||
final int b2 = byteAt(j, o);
|
||||
if (b1 != b2) {
|
||||
return b1 - b2;
|
||||
} else if (b1 == -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
pivot.setLength(0);
|
||||
for (int o = d; o < maxLength; ++o) {
|
||||
final int b = byteAt(i, o);
|
||||
if (b == -1) {
|
||||
break;
|
||||
}
|
||||
pivot.append((byte) b);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
for (int o = 0; o < pivot.length(); ++o) {
|
||||
final int b1 = pivot.byteAt(o) & 0xff;
|
||||
final int b2 = byteAt(j, d + o);
|
||||
if (b1 != b2) {
|
||||
return b1 - b2;
|
||||
}
|
||||
}
|
||||
if (d + pivot.length() == maxLength) {
|
||||
return 0;
|
||||
}
|
||||
return -1 - byteAt(j, d + pivot.length());
|
||||
}
|
||||
|
||||
private final BytesRefBuilder pivot = new BytesRefBuilder();
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void select(int from, int to, int k) {
|
||||
checkArgs(from, to, k);
|
||||
select(from, to, k, 0, 0);
|
||||
}
|
||||
|
||||
private void select(int from, int to, int k, int d, int l) {
|
||||
if (to - from <= LENGTH_THRESHOLD || d >= LEVEL_THRESHOLD) {
|
||||
getFallbackSelector(d).select(from, to, k);
|
||||
} else {
|
||||
radixSelect(from, to, k, d, l);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param d the character number to compare
|
||||
* @param l the level of recursion
|
||||
*/
|
||||
private void radixSelect(int from, int to, int k, int d, int l) {
|
||||
final int[] histogram = this.histogram;
|
||||
Arrays.fill(histogram, 0);
|
||||
|
||||
final int commonPrefixLength = computeCommonPrefixLengthAndBuildHistogram(from, to, d, histogram);
|
||||
if (commonPrefixLength > 0) {
|
||||
// if there are no more chars to compare or if all entries fell into the
|
||||
// first bucket (which means strings are shorter than d) then we are done
|
||||
// otherwise recurse
|
||||
if (d + commonPrefixLength < maxLength
|
||||
&& histogram[0] < to - from) {
|
||||
radixSelect(from, to, k, d + commonPrefixLength, l);
|
||||
}
|
||||
return;
|
||||
}
|
||||
assert assertHistogram(commonPrefixLength, histogram);
|
||||
|
||||
int bucketFrom = from;
|
||||
for (int bucket = 0; bucket < HISTOGRAM_SIZE; ++bucket) {
|
||||
final int bucketTo = bucketFrom + histogram[bucket];
|
||||
|
||||
if (bucketTo > k) {
|
||||
partition(from, to, bucket, bucketFrom, bucketTo, d);
|
||||
|
||||
if (bucket != 0 && d + 1 < maxLength) {
|
||||
// all elements in bucket 0 are equal so we only need to recurse if bucket != 0
|
||||
select(bucketFrom, bucketTo, k, d + 1, l + 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
bucketFrom = bucketTo;
|
||||
}
|
||||
throw new AssertionError("Unreachable code");
|
||||
}
|
||||
|
||||
// only used from assert
|
||||
private boolean assertHistogram(int commonPrefixLength, int[] histogram) {
|
||||
int numberOfUniqueBytes = 0;
|
||||
for (int freq : histogram) {
|
||||
if (freq > 0) {
|
||||
numberOfUniqueBytes++;
|
||||
}
|
||||
}
|
||||
if (numberOfUniqueBytes == 1) {
|
||||
assert commonPrefixLength >= 1;
|
||||
} else {
|
||||
assert commonPrefixLength == 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
|
||||
private int getBucket(int i, int k) {
|
||||
return byteAt(i, k) + 1;
|
||||
}
|
||||
|
||||
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}
|
||||
* and return a common prefix length for all visited values.
|
||||
* @see #buildHistogram */
|
||||
private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
|
||||
final int[] commonPrefix = this.commonPrefix;
|
||||
int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
|
||||
for (int j = 0; j < commonPrefixLength; ++j) {
|
||||
final int b = byteAt(from, k + j);
|
||||
commonPrefix[j] = b;
|
||||
if (b == -1) {
|
||||
commonPrefixLength = j + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int i;
|
||||
outer: for (i = from + 1; i < to; ++i) {
|
||||
for (int j = 0; j < commonPrefixLength; ++j) {
|
||||
final int b = byteAt(i, k + j);
|
||||
if (b != commonPrefix[j]) {
|
||||
commonPrefixLength = j;
|
||||
if (commonPrefixLength == 0) { // we have no common prefix
|
||||
histogram[commonPrefix[0] + 1] = i - from;
|
||||
histogram[b + 1] = 1;
|
||||
break outer;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (i < to) {
|
||||
// the loop got broken because there is no common prefix
|
||||
assert commonPrefixLength == 0;
|
||||
buildHistogram(i + 1, to, k, histogram);
|
||||
} else {
|
||||
assert commonPrefixLength > 0;
|
||||
histogram[commonPrefix[0] + 1] = to - from;
|
||||
}
|
||||
|
||||
return commonPrefixLength;
|
||||
}
|
||||
|
||||
/** Build an histogram of the k-th characters of values occurring between
|
||||
* offsets {@code from} and {@code to}, using {@link #getBucket}. */
|
||||
private void buildHistogram(int from, int to, int k, int[] histogram) {
|
||||
for (int i = from; i < to; ++i) {
|
||||
histogram[getBucket(i, k)]++;
|
||||
}
|
||||
}
|
||||
|
||||
/** Reorder elements so that all of them that fall into {@code bucket} are
|
||||
* between offsets {@code bucketFrom} and {@code bucketTo}. */
|
||||
private void partition(int from, int to, int bucket, int bucketFrom, int bucketTo, int d) {
|
||||
int left = from;
|
||||
int right = to - 1;
|
||||
|
||||
int slot = bucketFrom;
|
||||
|
||||
for (;;) {
|
||||
int leftBucket = getBucket(left, d);
|
||||
int rightBucket = getBucket(right, d);
|
||||
|
||||
while (leftBucket <= bucket && left < bucketFrom) {
|
||||
if (leftBucket == bucket) {
|
||||
swap(left, slot++);
|
||||
} else {
|
||||
++left;
|
||||
}
|
||||
leftBucket = getBucket(left, d);
|
||||
}
|
||||
|
||||
while (rightBucket >= bucket && right >= bucketTo) {
|
||||
if (rightBucket == bucket) {
|
||||
swap(right, slot++);
|
||||
} else {
|
||||
--right;
|
||||
}
|
||||
rightBucket = getBucket(right, d);
|
||||
}
|
||||
|
||||
if (left < bucketFrom && right >= bucketTo) {
|
||||
swap(left++, right--);
|
||||
} else {
|
||||
assert left == bucketFrom;
|
||||
assert right == bucketTo - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
41
lucene/core/src/java/org/apache/lucene/util/Selector.java
Normal file
41
lucene/core/src/java/org/apache/lucene/util/Selector.java
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
/** An implementation of a selection algorithm, ie. computing the k-th greatest
|
||||
* value from a collection. */
|
||||
public abstract class Selector {
|
||||
|
||||
/** Reorder elements so that the element at position {@code k} is the same
|
||||
* as if all elements were sorted and all other elements are partitioned
|
||||
* around it: {@code [from, k)} only contains elements that are less than
|
||||
* or equal to {@code k} and {@code (k, to)} only contains elements that
|
||||
* are greater than or equal to {@code k}. */
|
||||
public abstract void select(int from, int to, int k);
|
||||
|
||||
void checkArgs(int from, int to, int k) {
|
||||
if (k < from) {
|
||||
throw new IllegalArgumentException("k must be >= from");
|
||||
}
|
||||
if (k >= to) {
|
||||
throw new IllegalArgumentException("k must be < to");
|
||||
}
|
||||
}
|
||||
|
||||
/** Swap values at slots <code>i</code> and <code>j</code>. */
|
||||
protected abstract void swap(int i, int j);
|
||||
}
|
@ -23,7 +23,7 @@ import java.util.Comparator;
|
||||
* @lucene.internal */
|
||||
public abstract class Sorter {
|
||||
|
||||
static final int INSERTION_SORT_THRESHOLD = 20;
|
||||
static final int BINARY_SORT_THRESHOLD = 20;
|
||||
|
||||
/** Sole constructor, used for inheritance. */
|
||||
protected Sorter() {}
|
||||
@ -36,6 +36,20 @@ public abstract class Sorter {
|
||||
/** Swap values at slots <code>i</code> and <code>j</code>. */
|
||||
protected abstract void swap(int i, int j);
|
||||
|
||||
private int pivotIndex;
|
||||
|
||||
/** Save the value at slot <code>i</code> so that it can later be used as a
|
||||
* pivot, see {@link #comparePivot(int)}. */
|
||||
protected void setPivot(int i) {
|
||||
pivotIndex = i;
|
||||
}
|
||||
|
||||
/** Compare the pivot with the slot at <code>j</code>, similarly to
|
||||
* {@link #compare(int, int) compare(i, j)}. */
|
||||
protected int comparePivot(int j) {
|
||||
return compare(pivotIndex, j);
|
||||
}
|
||||
|
||||
/** Sort the slice which starts at <code>from</code> (inclusive) and ends at
|
||||
* <code>to</code> (exclusive). */
|
||||
public abstract void sort(int from, int to);
|
||||
@ -163,54 +177,41 @@ public abstract class Sorter {
|
||||
}
|
||||
}
|
||||
|
||||
void insertionSort(int from, int to) {
|
||||
for (int i = from + 1; i < to; ++i) {
|
||||
for (int j = i; j > from; --j) {
|
||||
if (compare(j - 1, j) > 0) {
|
||||
swap(j - 1, j);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A binary sort implementation. This performs {@code O(n*log(n))} comparisons
|
||||
* and {@code O(n^2)} swaps. It is typically used by more sophisticated
|
||||
* implementations as a fall-back when the numbers of items to sort has become
|
||||
* less than {@value #BINARY_SORT_THRESHOLD}.
|
||||
*/
|
||||
void binarySort(int from, int to) {
|
||||
binarySort(from, to, from + 1);
|
||||
}
|
||||
|
||||
void binarySort(int from, int to, int i) {
|
||||
for ( ; i < to; ++i) {
|
||||
setPivot(i);
|
||||
int l = from;
|
||||
int h = i - 1;
|
||||
while (l <= h) {
|
||||
final int mid = (l + h) >>> 1;
|
||||
final int cmp = compare(i, mid);
|
||||
final int cmp = comparePivot(mid);
|
||||
if (cmp < 0) {
|
||||
h = mid - 1;
|
||||
} else {
|
||||
l = mid + 1;
|
||||
}
|
||||
}
|
||||
switch (i - l) {
|
||||
case 2:
|
||||
swap(l + 1, l + 2);
|
||||
swap(l, l + 1);
|
||||
break;
|
||||
case 1:
|
||||
swap(l, l + 1);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
for (int j = i; j > l; --j) {
|
||||
swap(j - 1, j);
|
||||
}
|
||||
break;
|
||||
for (int j = i; j > l; --j) {
|
||||
swap(j - 1, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Use heap sort to sort items between {@code from} inclusive and {@code to}
|
||||
* exclusive. This runs in {@code O(n*log(n))} and is used as a fall-back by
|
||||
* {@link IntroSorter}.
|
||||
*/
|
||||
void heapSort(int from, int to) {
|
||||
if (to - from <= 1) {
|
||||
return;
|
||||
|
@ -25,6 +25,7 @@ import java.util.List;
|
||||
import java.util.function.IntFunction;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.MutablePointsReader;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||
import org.apache.lucene.index.PointValues.Relation;
|
||||
@ -67,7 +68,7 @@ import org.apache.lucene.util.StringHelper;
|
||||
* <p>
|
||||
* See <a href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for details.
|
||||
*
|
||||
* <p>This consumes heap during writing: it allocates a <code>LongBitSet(numPoints)</code>,
|
||||
* <p>This consumes heap during writing: it allocates a <code>LongBitSet(numPoints)</code>,
|
||||
* and then uses up to the specified {@code maxMBSortInHeap} heap space for writing.
|
||||
*
|
||||
* <p>
|
||||
@ -111,7 +112,8 @@ public class BKDWriter implements Closeable {
|
||||
final byte[] scratchDiff;
|
||||
final byte[] scratch1;
|
||||
final byte[] scratch2;
|
||||
final BytesRef scratchBytesRef = new BytesRef();
|
||||
final BytesRef scratchBytesRef1 = new BytesRef();
|
||||
final BytesRef scratchBytesRef2 = new BytesRef();
|
||||
final int[] commonPrefixLengths;
|
||||
|
||||
protected final FixedBitSet docsSeen;
|
||||
@ -140,10 +142,10 @@ public class BKDWriter implements Closeable {
|
||||
/** True if every document has at most one value. We specialize this case by not bothering to store the ord since it's redundant with docID. */
|
||||
protected final boolean singleValuePerDoc;
|
||||
|
||||
/** How much heap OfflineSorter is allowed to use */
|
||||
/** How much heap OfflineSorter is allowed to use */
|
||||
protected final OfflineSorter.BufferSize offlineSorterBufferMB;
|
||||
|
||||
/** How much heap OfflineSorter is allowed to use */
|
||||
/** How much heap OfflineSorter is allowed to use */
|
||||
protected final int offlineSorterMaxTempFiles;
|
||||
|
||||
private final int maxDoc;
|
||||
@ -173,7 +175,6 @@ public class BKDWriter implements Closeable {
|
||||
packedBytesLength = numDims * bytesPerDim;
|
||||
|
||||
scratchDiff = new byte[bytesPerDim];
|
||||
scratchBytesRef.length = packedBytesLength;
|
||||
scratch1 = new byte[packedBytesLength];
|
||||
scratch2 = new byte[packedBytesLength];
|
||||
commonPrefixLengths = new int[numDims];
|
||||
@ -204,7 +205,7 @@ public class BKDWriter implements Closeable {
|
||||
// all recursive halves (i.e. 16 + 8 + 4 + 2) so the memory usage is 2X
|
||||
// what that level would consume, so we multiply by 0.5 to convert from
|
||||
// bytes to points here. Each dimension has its own sorted partition, so
|
||||
// we must divide by numDims as well.
|
||||
// we must divide by numDims as wel.
|
||||
|
||||
maxPointsSortInHeap = (int) (0.5 * (maxMBSortInHeap * 1024 * 1024) / (bytesPerDoc * numDims));
|
||||
|
||||
@ -381,7 +382,7 @@ public class BKDWriter implements Closeable {
|
||||
} else {
|
||||
mappedDocID = docMap.get(oldDocID);
|
||||
}
|
||||
|
||||
|
||||
if (mappedDocID != -1) {
|
||||
// Not deleted!
|
||||
docID = mappedDocID;
|
||||
@ -416,15 +417,25 @@ public class BKDWriter implements Closeable {
|
||||
}
|
||||
}
|
||||
|
||||
/** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already
|
||||
* sorted values and currently only works when numDims==1. This returns -1 if all documents containing
|
||||
* dimensional values were deleted. */
|
||||
public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers) throws IOException {
|
||||
if (numDims != 1) {
|
||||
throw new UnsupportedOperationException("numDims must be 1 but got " + numDims);
|
||||
/** Write a field from a {@link MutablePointsReader}. This way of writing
|
||||
* points is faster than regular writes with {@link BKDWriter#add} since
|
||||
* there is opportunity for reordering points before writing them to
|
||||
* disk. This method does not use transient disk in order to reorder points.
|
||||
*/
|
||||
public long writeField(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
|
||||
if (numDims == 1) {
|
||||
return writeField1Dim(out, fieldName, reader);
|
||||
} else {
|
||||
return writeFieldNDims(out, fieldName, reader);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* In the 2+D case, we recursively pick the split dimension, compute the
|
||||
* median value and partition other values around it. */
|
||||
private long writeFieldNDims(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
|
||||
if (pointCount != 0) {
|
||||
throw new IllegalStateException("cannot mix add and merge");
|
||||
throw new IllegalStateException("cannot mix add and writeField");
|
||||
}
|
||||
|
||||
// Catch user silliness:
|
||||
@ -435,6 +446,81 @@ public class BKDWriter implements Closeable {
|
||||
// Mark that we already finished:
|
||||
heapPointWriter = null;
|
||||
|
||||
long countPerLeaf = pointCount = reader.size(fieldName);
|
||||
long innerNodeCount = 1;
|
||||
|
||||
while (countPerLeaf > maxPointsInLeafNode) {
|
||||
countPerLeaf = (countPerLeaf+1)/2;
|
||||
innerNodeCount *= 2;
|
||||
}
|
||||
|
||||
int numLeaves = Math.toIntExact(innerNodeCount);
|
||||
|
||||
checkMaxLeafNodeCount(numLeaves);
|
||||
|
||||
final byte[] splitPackedValues = new byte[numLeaves * (bytesPerDim + 1)];
|
||||
final long[] leafBlockFPs = new long[numLeaves];
|
||||
|
||||
// compute the min/max for this slice
|
||||
Arrays.fill(minPackedValue, (byte) 0xff);
|
||||
Arrays.fill(maxPackedValue, (byte) 0);
|
||||
for (int i = 0; i < Math.toIntExact(pointCount); ++i) {
|
||||
reader.getValue(i, scratchBytesRef1);
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset) < 0) {
|
||||
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset, bytesPerDim);
|
||||
}
|
||||
if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset) > 0) {
|
||||
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset, bytesPerDim);
|
||||
}
|
||||
}
|
||||
|
||||
docsSeen.set(reader.getDocID(i));
|
||||
}
|
||||
|
||||
build(1, numLeaves, reader, 0, Math.toIntExact(pointCount), out,
|
||||
minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs,
|
||||
new int[maxPointsInLeafNode]);
|
||||
|
||||
long indexFP = out.getFilePointer();
|
||||
writeIndex(out, leafBlockFPs, splitPackedValues);
|
||||
return indexFP;
|
||||
}
|
||||
|
||||
|
||||
/* In the 1D case, we can simply sort points in ascending order and use the
|
||||
* same writing logic as we use at merge time. */
|
||||
private long writeField1Dim(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
|
||||
MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size(fieldName)));
|
||||
|
||||
final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
|
||||
|
||||
reader.intersect(fieldName, new IntersectVisitor() {
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) throws IOException {
|
||||
oneDimWriter.add(packedValue, docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID) throws IOException {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
});
|
||||
|
||||
return oneDimWriter.finish();
|
||||
}
|
||||
|
||||
/** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already
|
||||
* sorted values and currently only works when numDims==1. This returns -1 if all documents containing
|
||||
* dimensional values were deleted. */
|
||||
public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers) throws IOException {
|
||||
assert docMaps == null || readers.size() == docMaps.size();
|
||||
|
||||
BKDMergeQueue queue = new BKDMergeQueue(bytesPerDim, readers.size());
|
||||
@ -453,72 +539,14 @@ public class BKDWriter implements Closeable {
|
||||
}
|
||||
}
|
||||
|
||||
if (queue.size() == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int leafCount = 0;
|
||||
List<Long> leafBlockFPs = new ArrayList<>();
|
||||
List<byte[]> leafBlockStartValues = new ArrayList<>();
|
||||
|
||||
// Target halfway between min and max allowed for the leaf:
|
||||
int pointsPerLeafBlock = (int) (0.75 * maxPointsInLeafNode);
|
||||
//System.out.println("POINTS PER: " + pointsPerLeafBlock);
|
||||
|
||||
byte[] lastPackedValue = new byte[bytesPerDim];
|
||||
byte[] firstPackedValue = new byte[bytesPerDim];
|
||||
long valueCount = 0;
|
||||
|
||||
// Buffer up each leaf block's docs and values
|
||||
int[] leafBlockDocIDs = new int[maxPointsInLeafNode];
|
||||
byte[][] leafBlockPackedValues = new byte[maxPointsInLeafNode][];
|
||||
for(int i=0;i<maxPointsInLeafNode;i++) {
|
||||
leafBlockPackedValues[i] = new byte[packedBytesLength];
|
||||
}
|
||||
Arrays.fill(commonPrefixLengths, bytesPerDim);
|
||||
OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
|
||||
|
||||
while (queue.size() != 0) {
|
||||
MergeReader reader = queue.top();
|
||||
// System.out.println("iter reader=" + reader);
|
||||
|
||||
// NOTE: doesn't work with subclasses (e.g. SimpleText!)
|
||||
int docID = reader.docID;
|
||||
leafBlockDocIDs[leafCount] = docID;
|
||||
System.arraycopy(reader.state.scratchPackedValue, 0, leafBlockPackedValues[leafCount], 0, packedBytesLength);
|
||||
docsSeen.set(docID);
|
||||
|
||||
if (valueCount == 0) {
|
||||
System.arraycopy(reader.state.scratchPackedValue, 0, minPackedValue, 0, packedBytesLength);
|
||||
}
|
||||
System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, 0, packedBytesLength);
|
||||
|
||||
assert numDims > 1 || valueInOrder(valueCount, lastPackedValue, reader.state.scratchPackedValue, 0);
|
||||
valueCount++;
|
||||
if (pointCount > totalPointCount) {
|
||||
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
|
||||
}
|
||||
|
||||
if (leafCount == 0) {
|
||||
if (leafBlockFPs.size() > 0) {
|
||||
// Save the first (minimum) value in each leaf block except the first, to build the split value index in the end:
|
||||
leafBlockStartValues.add(Arrays.copyOf(reader.state.scratchPackedValue, bytesPerDim));
|
||||
}
|
||||
Arrays.fill(commonPrefixLengths, bytesPerDim);
|
||||
System.arraycopy(reader.state.scratchPackedValue, 0, firstPackedValue, 0, bytesPerDim);
|
||||
} else {
|
||||
// Find per-dim common prefix:
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim * bytesPerDim;
|
||||
for(int j=0;j<commonPrefixLengths[dim];j++) {
|
||||
if (firstPackedValue[offset+j] != reader.state.scratchPackedValue[offset+j]) {
|
||||
commonPrefixLengths[dim] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
leafCount++;
|
||||
oneDimWriter.add(reader.state.scratchPackedValue, reader.docID);
|
||||
|
||||
if (reader.next()) {
|
||||
queue.updateTop();
|
||||
@ -526,53 +554,150 @@ public class BKDWriter implements Closeable {
|
||||
// This segment was exhausted
|
||||
queue.pop();
|
||||
}
|
||||
}
|
||||
|
||||
// We write a block once we hit exactly the max count ... this is different from
|
||||
// when we flush a new segment, where we write between max/2 and max per leaf block,
|
||||
// so merged segments will behave differently from newly flushed segments:
|
||||
if (leafCount == pointsPerLeafBlock || queue.size() == 0) {
|
||||
leafBlockFPs.add(out.getFilePointer());
|
||||
checkMaxLeafNodeCount(leafBlockFPs.size());
|
||||
return oneDimWriter.finish();
|
||||
}
|
||||
|
||||
writeLeafBlockDocs(out, leafBlockDocIDs, 0, leafCount);
|
||||
writeCommonPrefixes(out, commonPrefixLengths, firstPackedValue);
|
||||
private class OneDimensionBKDWriter {
|
||||
|
||||
final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
|
||||
final BytesRef scratch = new BytesRef();
|
||||
final IndexOutput out;
|
||||
final List<Long> leafBlockFPs = new ArrayList<>();
|
||||
final List<byte[]> leafBlockStartValues = new ArrayList<>();
|
||||
final byte[] leafValues = new byte[maxPointsInLeafNode * packedBytesLength];
|
||||
final int[] leafDocs = new int[maxPointsInLeafNode];
|
||||
long valueCount;
|
||||
int leafCount;
|
||||
|
||||
{
|
||||
scratch.length = packedBytesLength;
|
||||
scratch.offset = 0;
|
||||
}
|
||||
OneDimensionBKDWriter(IndexOutput out) {
|
||||
if (numDims != 1) {
|
||||
throw new UnsupportedOperationException("numDims must be 1 but got " + numDims);
|
||||
}
|
||||
if (pointCount != 0) {
|
||||
throw new IllegalStateException("cannot mix add and merge");
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef apply(int i) {
|
||||
scratch.bytes = leafBlockPackedValues[i];
|
||||
return scratch;
|
||||
}
|
||||
};
|
||||
writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
|
||||
// Catch user silliness:
|
||||
if (heapPointWriter == null && tempInput == null) {
|
||||
throw new IllegalStateException("already finished");
|
||||
}
|
||||
|
||||
// Mark that we already finished:
|
||||
heapPointWriter = null;
|
||||
|
||||
this.out = out;
|
||||
|
||||
lastPackedValue = new byte[packedBytesLength];
|
||||
}
|
||||
|
||||
// for asserts
|
||||
final byte[] lastPackedValue;
|
||||
int lastDocID;
|
||||
|
||||
void add(byte[] packedValue, int docID) throws IOException {
|
||||
assert valueInOrder(valueCount + leafCount,
|
||||
0, lastPackedValue, packedValue, 0, docID, lastDocID);
|
||||
|
||||
System.arraycopy(packedValue, 0, leafValues, leafCount * packedBytesLength, packedBytesLength);
|
||||
leafDocs[leafCount] = docID;
|
||||
docsSeen.set(docID);
|
||||
leafCount++;
|
||||
|
||||
if (valueCount > totalPointCount) {
|
||||
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
|
||||
}
|
||||
|
||||
if (leafCount == maxPointsInLeafNode) {
|
||||
// We write a block once we hit exactly the max count ... this is different from
|
||||
// when we flush a new segment, where we write between max/2 and max per leaf block,
|
||||
// so merged segments will behave differently from newly flushed segments:
|
||||
writeLeafBlock();
|
||||
leafCount = 0;
|
||||
}
|
||||
|
||||
assert (lastDocID = docID) >= 0; // only assign when asserts are enabled
|
||||
}
|
||||
|
||||
pointCount = valueCount;
|
||||
public long finish() throws IOException {
|
||||
if (leafCount > 0) {
|
||||
writeLeafBlock();
|
||||
leafCount = 0;
|
||||
}
|
||||
|
||||
long indexFP = out.getFilePointer();
|
||||
if (valueCount == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int numInnerNodes = leafBlockStartValues.size();
|
||||
pointCount = valueCount;
|
||||
|
||||
//System.out.println("BKDW: now rotate numInnerNodes=" + numInnerNodes + " leafBlockStarts=" + leafBlockStartValues.size());
|
||||
long indexFP = out.getFilePointer();
|
||||
|
||||
byte[] index = new byte[(1+numInnerNodes) * (1+bytesPerDim)];
|
||||
rotateToTree(1, 0, numInnerNodes, index, leafBlockStartValues);
|
||||
long[] arr = new long[leafBlockFPs.size()];
|
||||
for(int i=0;i<leafBlockFPs.size();i++) {
|
||||
arr[i] = leafBlockFPs.get(i);
|
||||
int numInnerNodes = leafBlockStartValues.size();
|
||||
|
||||
//System.out.println("BKDW: now rotate numInnerNodes=" + numInnerNodes + " leafBlockStarts=" + leafBlockStartValues.size());
|
||||
|
||||
byte[] index = new byte[(1+numInnerNodes) * (1+bytesPerDim)];
|
||||
rotateToTree(1, 0, numInnerNodes, index, leafBlockStartValues);
|
||||
long[] arr = new long[leafBlockFPs.size()];
|
||||
for(int i=0;i<leafBlockFPs.size();i++) {
|
||||
arr[i] = leafBlockFPs.get(i);
|
||||
}
|
||||
writeIndex(out, arr, index);
|
||||
return indexFP;
|
||||
}
|
||||
writeIndex(out, arr, index);
|
||||
return indexFP;
|
||||
|
||||
private void writeLeafBlock() throws IOException {
|
||||
assert leafCount != 0;
|
||||
if (valueCount == 0) {
|
||||
System.arraycopy(leafValues, 0, minPackedValue, 0, packedBytesLength);
|
||||
}
|
||||
System.arraycopy(leafValues, (leafCount - 1) * packedBytesLength, maxPackedValue, 0, packedBytesLength);
|
||||
|
||||
valueCount += leafCount;
|
||||
|
||||
if (leafBlockFPs.size() > 0) {
|
||||
// Save the first (minimum) value in each leaf block except the first, to build the split value index in the end:
|
||||
leafBlockStartValues.add(Arrays.copyOf(leafValues, packedBytesLength));
|
||||
}
|
||||
leafBlockFPs.add(out.getFilePointer());
|
||||
checkMaxLeafNodeCount(leafBlockFPs.size());
|
||||
|
||||
Arrays.fill(commonPrefixLengths, bytesPerDim);
|
||||
// Find per-dim common prefix:
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset1 = dim * bytesPerDim;
|
||||
int offset2 = (leafCount - 1) * packedBytesLength + offset1;
|
||||
for(int j=0;j<commonPrefixLengths[dim];j++) {
|
||||
if (leafValues[offset1+j] != leafValues[offset2+j]) {
|
||||
commonPrefixLengths[dim] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writeLeafBlockDocs(out, leafDocs, 0, leafCount);
|
||||
writeCommonPrefixes(out, commonPrefixLengths, leafValues);
|
||||
|
||||
final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
|
||||
final BytesRef scratch = new BytesRef();
|
||||
|
||||
{
|
||||
scratch.length = packedBytesLength;
|
||||
scratch.bytes = leafValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef apply(int i) {
|
||||
scratch.offset = packedBytesLength * i;
|
||||
return scratch;
|
||||
}
|
||||
};
|
||||
assert valuesInOrderAndBounds(leafCount, 0, Arrays.copyOf(leafValues, packedBytesLength),
|
||||
Arrays.copyOfRange(leafValues, (leafCount - 1) * packedBytesLength, leafCount * packedBytesLength),
|
||||
packedValues, leafDocs, 0);
|
||||
writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// TODO: there must be a simpler way?
|
||||
@ -686,6 +811,7 @@ public class BKDWriter implements Closeable {
|
||||
}
|
||||
|
||||
private PointWriter sort(int dim) throws IOException {
|
||||
assert dim >= 0 && dim < numDims;
|
||||
|
||||
if (heapPointWriter != null) {
|
||||
|
||||
@ -937,7 +1063,7 @@ public class BKDWriter implements Closeable {
|
||||
int compressedByteOffset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
|
||||
commonPrefixLengths[sortedDim]++;
|
||||
for (int i = 0; i < count; ) {
|
||||
// do run-length compression on the byte at compressedByteOffset
|
||||
// do run-length compression on the byte at compressedByteOffset
|
||||
int runLen = runLen(packedValues, i, Math.min(i + 0xff, count), compressedByteOffset);
|
||||
assert runLen <= 0xff;
|
||||
BytesRef first = packedValues.apply(i);
|
||||
@ -1016,7 +1142,7 @@ public class BKDWriter implements Closeable {
|
||||
}
|
||||
}
|
||||
|
||||
/** Called on exception, to check whether the checksum is also corrupt in this source, and add that
|
||||
/** Called on exception, to check whether the checksum is also corrupt in this source, and add that
|
||||
* information (checksum matched or didn't) as a suppressed exception. */
|
||||
private void verifyChecksum(Throwable priorException, PointWriter writer) throws IOException {
|
||||
// TODO: we could improve this, to always validate checksum as we recurse, if we shared left and
|
||||
@ -1110,6 +1236,132 @@ public class BKDWriter implements Closeable {
|
||||
}
|
||||
}
|
||||
|
||||
/* Recursively reorders the provided reader and writes the bkd-tree on the fly. */
|
||||
private void build(int nodeID, int leafNodeOffset,
|
||||
MutablePointsReader reader, int from, int to,
|
||||
IndexOutput out,
|
||||
byte[] minPackedValue, byte[] maxPackedValue,
|
||||
byte[] splitPackedValues,
|
||||
long[] leafBlockFPs,
|
||||
int[] spareDocIds) throws IOException {
|
||||
|
||||
if (nodeID >= leafNodeOffset) {
|
||||
// leaf node
|
||||
final int count = to - from;
|
||||
assert count <= maxPointsInLeafNode;
|
||||
|
||||
// Compute common prefixes
|
||||
Arrays.fill(commonPrefixLengths, bytesPerDim);
|
||||
reader.getValue(from, scratchBytesRef1);
|
||||
for (int i = from + 1; i < to; ++i) {
|
||||
reader.getValue(i, scratchBytesRef2);
|
||||
for (int dim=0;dim<numDims;dim++) {
|
||||
final int offset = dim * bytesPerDim;
|
||||
for(int j=0;j<commonPrefixLengths[dim];j++) {
|
||||
if (scratchBytesRef1.bytes[scratchBytesRef1.offset+offset+j] != scratchBytesRef2.bytes[scratchBytesRef2.offset+offset+j]) {
|
||||
commonPrefixLengths[dim] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
|
||||
FixedBitSet[] usedBytes = new FixedBitSet[numDims];
|
||||
for (int dim = 0; dim < numDims; ++dim) {
|
||||
if (commonPrefixLengths[dim] < bytesPerDim) {
|
||||
usedBytes[dim] = new FixedBitSet(256);
|
||||
}
|
||||
}
|
||||
for (int i = from + 1; i < to; ++i) {
|
||||
for (int dim=0;dim<numDims;dim++) {
|
||||
if (usedBytes[dim] != null) {
|
||||
byte b = reader.getByteAt(i, dim * bytesPerDim + commonPrefixLengths[dim]);
|
||||
usedBytes[dim].set(Byte.toUnsignedInt(b));
|
||||
}
|
||||
}
|
||||
}
|
||||
int sortedDim = 0;
|
||||
int sortedDimCardinality = Integer.MAX_VALUE;
|
||||
for (int dim = 0; dim < numDims; ++dim) {
|
||||
if (usedBytes[dim] != null) {
|
||||
final int cardinality = usedBytes[dim].cardinality();
|
||||
if (cardinality < sortedDimCardinality) {
|
||||
sortedDim = dim;
|
||||
sortedDimCardinality = cardinality;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sort by sortedDim
|
||||
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
|
||||
reader, from, to, scratchBytesRef1, scratchBytesRef2);
|
||||
|
||||
// Save the block file pointer:
|
||||
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
|
||||
|
||||
// Write doc IDs
|
||||
int[] docIDs = spareDocIds;
|
||||
for (int i = from; i < to; ++i) {
|
||||
docIDs[i - from] = reader.getDocID(i);
|
||||
}
|
||||
writeLeafBlockDocs(out, docIDs, 0, count);
|
||||
|
||||
// Write the common prefixes:
|
||||
reader.getValue(from, scratchBytesRef1);
|
||||
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
|
||||
writeCommonPrefixes(out, commonPrefixLengths, scratch1);
|
||||
|
||||
// Write the full values:
|
||||
IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
|
||||
@Override
|
||||
public BytesRef apply(int i) {
|
||||
reader.getValue(from + i, scratchBytesRef1);
|
||||
return scratchBytesRef1;
|
||||
}
|
||||
};
|
||||
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
|
||||
docIDs, 0);
|
||||
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
|
||||
|
||||
} else {
|
||||
// inner node
|
||||
|
||||
// compute the split dimension and partition around it
|
||||
final int splitDim = split(minPackedValue, maxPackedValue);
|
||||
final int mid = (from + to + 1) >>> 1;
|
||||
|
||||
int commonPrefixLen = bytesPerDim;
|
||||
for (int i = 0; i < bytesPerDim; ++i) {
|
||||
if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) {
|
||||
commonPrefixLen = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
|
||||
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
|
||||
|
||||
// set the split value
|
||||
final int address = nodeID * (1+bytesPerDim);
|
||||
splitPackedValues[address] = (byte) splitDim;
|
||||
reader.getValue(mid, scratchBytesRef1);
|
||||
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
|
||||
|
||||
byte[] minSplitPackedValue = Arrays.copyOf(minPackedValue, packedBytesLength);
|
||||
byte[] maxSplitPackedValue = Arrays.copyOf(maxPackedValue, packedBytesLength);
|
||||
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
|
||||
minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
|
||||
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
|
||||
maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
|
||||
|
||||
// recurse
|
||||
build(nodeID * 2, leafNodeOffset, reader, from, mid, out,
|
||||
minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
|
||||
build(nodeID * 2 + 1, leafNodeOffset, reader, mid, to, out,
|
||||
minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
|
||||
}
|
||||
}
|
||||
|
||||
/** The array (sized numDims) of PathSlice describe the cell we have currently recursed to. */
|
||||
private void build(int nodeID, int leafNodeOffset,
|
||||
PathSlice[] slices,
|
||||
@ -1217,7 +1469,8 @@ public class BKDWriter implements Closeable {
|
||||
return scratch;
|
||||
}
|
||||
};
|
||||
assert valuesInOrderAndBounds(count, minPackedValue, maxPackedValue, packedValues);
|
||||
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
|
||||
heapSource.docIDs, Math.toIntExact(source.start));
|
||||
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
|
||||
|
||||
} else {
|
||||
@ -1321,12 +1574,16 @@ public class BKDWriter implements Closeable {
|
||||
}
|
||||
|
||||
// only called from assert
|
||||
private boolean valuesInOrderAndBounds(int count, byte[] minPackedValue, byte[] maxPackedValue, IntFunction<BytesRef> values) throws IOException {
|
||||
byte[] lastPackedValue = new byte[bytesPerDim];
|
||||
private boolean valuesInOrderAndBounds(int count, int sortedDim, byte[] minPackedValue, byte[] maxPackedValue,
|
||||
IntFunction<BytesRef> values, int[] docs, int docsOffset) throws IOException {
|
||||
byte[] lastPackedValue = new byte[packedBytesLength];
|
||||
int lastDoc = -1;
|
||||
for (int i=0;i<count;i++) {
|
||||
BytesRef packedValue = values.apply(i);
|
||||
assert packedValue.length == packedBytesLength;
|
||||
assert numDims != 1 || valueInOrder(i, lastPackedValue, packedValue.bytes, packedValue.offset);
|
||||
assert valueInOrder(i, sortedDim, lastPackedValue, packedValue.bytes, packedValue.offset,
|
||||
docs[docsOffset + i], lastDoc);
|
||||
lastDoc = docs[docsOffset + i];
|
||||
|
||||
// Make sure this value does in fact fall within this leaf cell:
|
||||
assert valueInBounds(packedValue, minPackedValue, maxPackedValue);
|
||||
@ -1335,11 +1592,19 @@ public class BKDWriter implements Closeable {
|
||||
}
|
||||
|
||||
// only called from assert
|
||||
private boolean valueInOrder(long ord, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset) {
|
||||
if (ord > 0 && StringHelper.compare(bytesPerDim, lastPackedValue, 0, packedValue, packedValueOffset) > 0) {
|
||||
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
|
||||
private boolean valueInOrder(long ord, int sortedDim, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset,
|
||||
int doc, int lastDoc) {
|
||||
int dimOffset = sortedDim * bytesPerDim;
|
||||
if (ord > 0) {
|
||||
int cmp = StringHelper.compare(bytesPerDim, lastPackedValue, dimOffset, packedValue, packedValueOffset + dimOffset);
|
||||
if (cmp > 0) {
|
||||
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
|
||||
}
|
||||
if (cmp == 0 && doc < lastDoc) {
|
||||
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord);
|
||||
}
|
||||
}
|
||||
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, bytesPerDim);
|
||||
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, packedBytesLength);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,186 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util.bkd;
|
||||
|
||||
import org.apache.lucene.codecs.MutablePointsReader;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntroSelector;
|
||||
import org.apache.lucene.util.IntroSorter;
|
||||
import org.apache.lucene.util.MSBRadixSorter;
|
||||
import org.apache.lucene.util.RadixSelector;
|
||||
import org.apache.lucene.util.Selector;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
final class MutablePointsReaderUtils {
|
||||
|
||||
MutablePointsReaderUtils() {}
|
||||
|
||||
/** Sort the given {@link MutablePointsReader} based on its packed value then doc ID. */
|
||||
static void sort(int maxDoc, int packedBytesLength,
|
||||
MutablePointsReader reader, int from, int to) {
|
||||
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
|
||||
new MSBRadixSorter(packedBytesLength + (bitsPerDocId + 7) / 8) {
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
reader.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int byteAt(int i, int k) {
|
||||
if (k < packedBytesLength) {
|
||||
return Byte.toUnsignedInt(reader.getByteAt(i, k));
|
||||
} else {
|
||||
final int shift = bitsPerDocId - ((k - packedBytesLength + 1) << 3);
|
||||
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected org.apache.lucene.util.Sorter getFallbackSorter(int k) {
|
||||
return new IntroSorter() {
|
||||
|
||||
final BytesRef pivot = new BytesRef();
|
||||
final BytesRef scratch = new BytesRef();
|
||||
int pivotDoc;
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
reader.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
reader.getValue(i, pivot);
|
||||
pivotDoc = reader.getDocID(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
if (k < packedBytesLength) {
|
||||
reader.getValue(j, scratch);
|
||||
int cmp = StringHelper.compare(packedBytesLength - k, pivot.bytes, pivot.offset + k, scratch.bytes, scratch.offset + k);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
return pivotDoc - reader.getDocID(j);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}.sort(from, to);
|
||||
}
|
||||
|
||||
/** Sort points on the given dimension. */
|
||||
static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
|
||||
MutablePointsReader reader, int from, int to,
|
||||
BytesRef scratch1, BytesRef scratch2) {
|
||||
|
||||
// No need for a fancy radix sort here, this is called on the leaves only so
|
||||
// there are not many values to sort
|
||||
final int offset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
|
||||
final int numBytesToCompare = bytesPerDim - commonPrefixLengths[sortedDim];
|
||||
new IntroSorter() {
|
||||
|
||||
final BytesRef pivot = scratch1;
|
||||
int pivotDoc = -1;
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
reader.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
reader.getValue(i, pivot);
|
||||
pivotDoc = reader.getDocID(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
reader.getValue(j, scratch2);
|
||||
int cmp = StringHelper.compare(numBytesToCompare, pivot.bytes, pivot.offset + offset, scratch2.bytes, scratch2.offset + offset);
|
||||
if (cmp == 0) {
|
||||
cmp = pivotDoc - reader.getDocID(j);
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
}.sort(from, to);
|
||||
}
|
||||
|
||||
/** Partition points around {@code mid}. All values on the left must be less
|
||||
* than or equal to it and all values on the right must be greater than or
|
||||
* equal to it. */
|
||||
static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
|
||||
MutablePointsReader reader, int from, int to, int mid,
|
||||
BytesRef scratch1, BytesRef scratch2) {
|
||||
final int offset = splitDim * bytesPerDim + commonPrefixLen;
|
||||
final int cmpBytes = bytesPerDim - commonPrefixLen;
|
||||
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
|
||||
new RadixSelector(cmpBytes + (bitsPerDocId + 7) / 8) {
|
||||
|
||||
@Override
|
||||
protected Selector getFallbackSelector(int k) {
|
||||
return new IntroSelector() {
|
||||
|
||||
final BytesRef pivot = scratch1;
|
||||
int pivotDoc;
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
reader.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
reader.getValue(i, pivot);
|
||||
pivotDoc = reader.getDocID(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
if (k < cmpBytes) {
|
||||
reader.getValue(j, scratch2);
|
||||
int cmp = StringHelper.compare(cmpBytes - k, pivot.bytes, pivot.offset + offset + k, scratch2.bytes, scratch2.offset + offset + k);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
return pivotDoc - reader.getDocID(j);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
reader.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int byteAt(int i, int k) {
|
||||
if (k < cmpBytes) {
|
||||
return Byte.toUnsignedInt(reader.getByteAt(i, offset + k));
|
||||
} else {
|
||||
final int shift = bitsPerDocId - ((k - cmpBytes + 1) << 3);
|
||||
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
|
||||
}
|
||||
}
|
||||
}.select(from, to, mid);
|
||||
}
|
||||
}
|
@ -41,8 +41,9 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
|
||||
if (random().nextBoolean()) {
|
||||
// randomize parameters
|
||||
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
|
||||
double maxMBSortInHeap = 3.0 + (3*random().nextDouble());
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode);
|
||||
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
|
||||
}
|
||||
|
||||
// sneaky impersonation!
|
||||
@ -52,7 +53,7 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
|
||||
return new PointsFormat() {
|
||||
@Override
|
||||
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode);
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -80,11 +80,8 @@ public class TestCustomNorms extends LuceneTestCase {
|
||||
}
|
||||
|
||||
public class MySimProvider extends PerFieldSimilarityWrapper {
|
||||
Similarity delegate = new ClassicSimilarity();
|
||||
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
return delegate.queryNorm(sumOfSquaredWeights);
|
||||
public MySimProvider() {
|
||||
super(new ClassicSimilarity());
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -92,14 +89,9 @@ public class TestCustomNorms extends LuceneTestCase {
|
||||
if (floatTestField.equals(field)) {
|
||||
return new FloatEncodingBoostSimilarity();
|
||||
} else {
|
||||
return delegate;
|
||||
return defaultSim;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return delegate.coord(overlap, maxOverlap);
|
||||
}
|
||||
}
|
||||
|
||||
public static class FloatEncodingBoostSimilarity extends Similarity {
|
||||
|
@ -154,12 +154,8 @@ public class TestNorms extends LuceneTestCase {
|
||||
|
||||
|
||||
public class MySimProvider extends PerFieldSimilarityWrapper {
|
||||
Similarity delegate = new ClassicSimilarity();
|
||||
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
|
||||
return delegate.queryNorm(sumOfSquaredWeights);
|
||||
public MySimProvider() {
|
||||
super(new ClassicSimilarity());
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -167,14 +163,9 @@ public class TestNorms extends LuceneTestCase {
|
||||
if (byteTestField.equals(field)) {
|
||||
return new ByteEncodingBoostSimilarity();
|
||||
} else {
|
||||
return delegate;
|
||||
return defaultSim;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return delegate.coord(overlap, maxOverlap);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -71,23 +71,13 @@ public class TestDocValuesScoring extends LuceneTestCase {
|
||||
final Similarity base = searcher1.getSimilarity(true);
|
||||
// boosting
|
||||
IndexSearcher searcher2 = newSearcher(ir, false);
|
||||
searcher2.setSimilarity(new PerFieldSimilarityWrapper() {
|
||||
searcher2.setSimilarity(new PerFieldSimilarityWrapper(base) {
|
||||
final Similarity fooSim = new BoostingSimilarity(base, "foo_boost");
|
||||
|
||||
@Override
|
||||
public Similarity get(String field) {
|
||||
return "foo".equals(field) ? fooSim : base;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return base.coord(overlap, maxOverlap);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
return base.queryNorm(sumOfSquaredWeights);
|
||||
}
|
||||
});
|
||||
|
||||
// in this case, we searched on field "foo". first document should have 2x the score.
|
||||
|
@ -1156,8 +1156,9 @@ public class TestPointQueries extends LuceneTestCase {
|
||||
private static Codec getCodec() {
|
||||
if (Codec.getDefault().getName().equals("Lucene62")) {
|
||||
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
|
||||
double maxMBSortInHeap = 5.0 + (3*random().nextDouble());
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode);
|
||||
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
|
||||
}
|
||||
|
||||
return new FilterCodec("Lucene62", Codec.getDefault()) {
|
||||
@ -1166,7 +1167,7 @@ public class TestPointQueries extends LuceneTestCase {
|
||||
return new PointsFormat() {
|
||||
@Override
|
||||
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode);
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -89,9 +89,13 @@ public class TestSimilarityProvider extends LuceneTestCase {
|
||||
}
|
||||
|
||||
private class ExampleSimilarityProvider extends PerFieldSimilarityWrapper {
|
||||
private Similarity sim1 = new Sim1();
|
||||
private Similarity sim2 = new Sim2();
|
||||
private final Similarity sim1 = new Sim1();
|
||||
private final Similarity sim2 = new Sim2();
|
||||
|
||||
public ExampleSimilarityProvider() {
|
||||
super(new Sim1());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity get(String field) {
|
||||
if (field.equals("foo")) {
|
||||
|
@ -45,7 +45,26 @@ public class TestByteBlockPool extends LuceneTestCase {
|
||||
for (BytesRef expected : list) {
|
||||
ref.grow(expected.length);
|
||||
ref.setLength(expected.length);
|
||||
pool.readBytes(position, ref.bytes(), 0, ref.length());
|
||||
switch (random().nextInt(3)) {
|
||||
case 0:
|
||||
// copy bytes
|
||||
pool.readBytes(position, ref.bytes(), 0, ref.length());
|
||||
break;
|
||||
case 1:
|
||||
// copy bytes one by one
|
||||
for (int i = 0; i < ref.length(); ++i) {
|
||||
ref.setByteAt(i, pool.readByte(position + i));
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
BytesRef scratch = new BytesRef();
|
||||
scratch.length = ref.length();
|
||||
pool.setRawBytesRef(scratch, position);
|
||||
System.arraycopy(scratch.bytes, scratch.offset, ref.bytes(), 0, ref.length());
|
||||
break;
|
||||
default:
|
||||
fail();
|
||||
}
|
||||
assertEquals(expected, ref.get());
|
||||
position += ref.length();
|
||||
}
|
||||
|
@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class TestIntroSelector extends LuceneTestCase {
|
||||
|
||||
public void testSelect() {
|
||||
for (int iter = 0; iter < 100; ++iter) {
|
||||
doTestSelect(false);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSlowSelect() {
|
||||
for (int iter = 0; iter < 100; ++iter) {
|
||||
doTestSelect(true);
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestSelect(boolean slow) {
|
||||
final int from = random().nextInt(5);
|
||||
final int to = from + TestUtil.nextInt(random(), 1, 10000);
|
||||
final int max = random().nextBoolean() ? random().nextInt(100) : random().nextInt(100000);
|
||||
Integer[] arr = new Integer[from + to + random().nextInt(5)];
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
arr[i] = TestUtil.nextInt(random(), 0, max);
|
||||
}
|
||||
final int k = TestUtil.nextInt(random(), from, to - 1);
|
||||
|
||||
Integer[] expected = arr.clone();
|
||||
Arrays.sort(expected, from, to);
|
||||
|
||||
Integer[] actual = arr.clone();
|
||||
IntroSelector selector = new IntroSelector() {
|
||||
|
||||
Integer pivot;
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
ArrayUtil.swap(actual, i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
pivot = actual[i];
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
return pivot.compareTo(actual[j]);
|
||||
}
|
||||
};
|
||||
if (slow) {
|
||||
selector.slowSelect(from, to, k);
|
||||
} else {
|
||||
selector.select(from, to, k);
|
||||
}
|
||||
|
||||
assertEquals(expected[k], actual[k]);
|
||||
for (int i = 0; i < actual.length; ++i) {
|
||||
if (i < from || i >= to) {
|
||||
assertSame(arr[i], actual[i]);
|
||||
} else if (i <= k) {
|
||||
assertTrue(actual[i].intValue() <= actual[k].intValue());
|
||||
} else {
|
||||
assertTrue(actual[i].intValue() >= actual[k].intValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -17,6 +17,8 @@
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
public class TestMSBRadixSorter extends LuceneTestCase {
|
||||
|
||||
@ -41,9 +43,12 @@ public class TestMSBRadixSorter extends LuceneTestCase {
|
||||
break;
|
||||
}
|
||||
|
||||
final int finalMaxLength = maxLength;
|
||||
new MSBRadixSorter(maxLength) {
|
||||
|
||||
@Override
|
||||
protected int byteAt(int i, int k) {
|
||||
assertTrue(k < finalMaxLength);
|
||||
BytesRef ref = refs[i];
|
||||
if (ref.length <= k) {
|
||||
return -1;
|
||||
@ -114,4 +119,67 @@ public class TestMSBRadixSorter extends LuceneTestCase {
|
||||
testRandom(TestUtil.nextInt(random(), 1, 30), 2);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandom2() {
|
||||
// how large our alphabet is
|
||||
int letterCount = TestUtil.nextInt(random(), 2, 10);
|
||||
|
||||
// how many substring fragments to use
|
||||
int substringCount = TestUtil.nextInt(random(), 2, 10);
|
||||
Set<BytesRef> substringsSet = new HashSet<>();
|
||||
|
||||
// how many strings to make
|
||||
int stringCount = atLeast(10000);
|
||||
|
||||
//System.out.println("letterCount=" + letterCount + " substringCount=" + substringCount + " stringCount=" + stringCount);
|
||||
while(substringsSet.size() < substringCount) {
|
||||
int length = TestUtil.nextInt(random(), 2, 10);
|
||||
byte[] bytes = new byte[length];
|
||||
for(int i=0;i<length;i++) {
|
||||
bytes[i] = (byte) random().nextInt(letterCount);
|
||||
}
|
||||
BytesRef br = new BytesRef(bytes);
|
||||
substringsSet.add(br);
|
||||
//System.out.println("add substring count=" + substringsSet.size() + ": " + br);
|
||||
}
|
||||
|
||||
BytesRef[] substrings = substringsSet.toArray(new BytesRef[substringsSet.size()]);
|
||||
double[] chance = new double[substrings.length];
|
||||
double sum = 0.0;
|
||||
for(int i=0;i<substrings.length;i++) {
|
||||
chance[i] = random().nextDouble();
|
||||
sum += chance[i];
|
||||
}
|
||||
|
||||
// give each substring a random chance of occurring:
|
||||
double accum = 0.0;
|
||||
for(int i=0;i<substrings.length;i++) {
|
||||
accum += chance[i]/sum;
|
||||
chance[i] = accum;
|
||||
}
|
||||
|
||||
Set<BytesRef> stringsSet = new HashSet<>();
|
||||
int iters = 0;
|
||||
while (stringsSet.size() < stringCount && iters < stringCount*5) {
|
||||
int count = TestUtil.nextInt(random(), 1, 5);
|
||||
BytesRefBuilder b = new BytesRefBuilder();
|
||||
for(int i=0;i<count;i++) {
|
||||
double v = random().nextDouble();
|
||||
accum = 0.0;
|
||||
for(int j=0;j<substrings.length;j++) {
|
||||
accum += chance[j];
|
||||
if (accum >= v) {
|
||||
b.append(substrings[j]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
BytesRef br = b.toBytesRef();
|
||||
stringsSet.add(br);
|
||||
//System.out.println("add string count=" + stringsSet.size() + ": " + br);
|
||||
iters++;
|
||||
}
|
||||
|
||||
test(stringsSet.toArray(new BytesRef[stringsSet.size()]), stringsSet.size());
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,106 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class TestRadixSelector extends LuceneTestCase {
|
||||
|
||||
public void testSelect() {
|
||||
for (int iter = 0; iter < 100; ++iter) {
|
||||
doTestSelect();
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestSelect() {
|
||||
final int from = random().nextInt(5);
|
||||
final int to = from + TestUtil.nextInt(random(), 1, 10000);
|
||||
final int maxLen = TestUtil.nextInt(random(), 1, 12);
|
||||
BytesRef[] arr = new BytesRef[from + to + random().nextInt(5)];
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
byte[] bytes = new byte[TestUtil.nextInt(random(), 0, maxLen)];
|
||||
random().nextBytes(bytes);
|
||||
arr[i] = new BytesRef(bytes);
|
||||
}
|
||||
doTest(arr, from, to, maxLen);
|
||||
}
|
||||
|
||||
public void testSharedPrefixes() {
|
||||
for (int iter = 0; iter < 100; ++iter) {
|
||||
doTestSharedPrefixes();
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestSharedPrefixes() {
|
||||
final int from = random().nextInt(5);
|
||||
final int to = from + TestUtil.nextInt(random(), 1, 10000);
|
||||
final int maxLen = TestUtil.nextInt(random(), 1, 12);
|
||||
BytesRef[] arr = new BytesRef[from + to + random().nextInt(5)];
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
byte[] bytes = new byte[TestUtil.nextInt(random(), 0, maxLen)];
|
||||
random().nextBytes(bytes);
|
||||
arr[i] = new BytesRef(bytes);
|
||||
}
|
||||
final int sharedPrefixLength = Math.min(arr[0].length, TestUtil.nextInt(random(), 1, maxLen));
|
||||
for (int i = 1; i < arr.length; ++i) {
|
||||
System.arraycopy(arr[0].bytes, arr[0].offset, arr[i].bytes, arr[i].offset, Math.min(sharedPrefixLength, arr[i].length));
|
||||
}
|
||||
doTest(arr, from, to, maxLen);
|
||||
}
|
||||
|
||||
private void doTest(BytesRef[] arr, int from, int to, int maxLen) {
|
||||
final int k = TestUtil.nextInt(random(), from, to - 1);
|
||||
|
||||
BytesRef[] expected = arr.clone();
|
||||
Arrays.sort(expected, from, to);
|
||||
|
||||
BytesRef[] actual = arr.clone();
|
||||
final int enforcedMaxLen = random().nextBoolean() ? maxLen : Integer.MAX_VALUE;
|
||||
RadixSelector selector = new RadixSelector(enforcedMaxLen) {
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
ArrayUtil.swap(actual, i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int byteAt(int i, int k) {
|
||||
assertTrue(k < enforcedMaxLen);
|
||||
BytesRef b = actual[i];
|
||||
if (k >= b.length) {
|
||||
return -1;
|
||||
} else {
|
||||
return Byte.toUnsignedInt(b.bytes[b.offset + k]);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
selector.select(from, to, k);
|
||||
|
||||
assertEquals(expected[k], actual[k]);
|
||||
for (int i = 0; i < actual.length; ++i) {
|
||||
if (i < from || i >= to) {
|
||||
assertSame(arr[i], actual[i]);
|
||||
} else if (i <= k) {
|
||||
assertTrue(actual[i].compareTo(actual[k]) <= 0);
|
||||
} else {
|
||||
assertTrue(actual[i].compareTo(actual[k]) >= 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,270 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util.bkd;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.codecs.MutablePointsReader;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
||||
|
||||
public void testSort() {
|
||||
for (int iter = 0; iter < 5; ++iter) {
|
||||
doTestSort();
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestSort() {
|
||||
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||
Point[] points = createRandomPoints(1, bytesPerDim, maxDoc);
|
||||
DummyPointsReader reader = new DummyPointsReader(points);
|
||||
MutablePointsReaderUtils.sort(maxDoc, bytesPerDim, reader, 0, points.length);
|
||||
Arrays.sort(points, new Comparator<Point>() {
|
||||
@Override
|
||||
public int compare(Point o1, Point o2) {
|
||||
int cmp = o1.packedValue.compareTo(o2.packedValue);
|
||||
if (cmp == 0) {
|
||||
cmp = Integer.compare(o1.doc, o2.doc);
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
});
|
||||
assertNotSame(points, reader.points);
|
||||
assertArrayEquals(points, reader.points);
|
||||
}
|
||||
|
||||
public void testSortByDim() {
|
||||
for (int iter = 0; iter < 5; ++iter) {
|
||||
doTestSortByDim();
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestSortByDim() {
|
||||
final int numDims = TestUtil.nextInt(random(), 1, 8);
|
||||
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
|
||||
int[] commonPrefixLengths = new int[numDims];
|
||||
for (int i = 0; i < commonPrefixLengths.length; ++i) {
|
||||
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
|
||||
}
|
||||
BytesRef firstValue = points[0].packedValue;
|
||||
for (int i = 1; i < points.length; ++i) {
|
||||
for (int dim = 0; dim < numDims; ++dim) {
|
||||
int offset = dim * bytesPerDim;
|
||||
BytesRef packedValue = points[i].packedValue;
|
||||
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
|
||||
}
|
||||
}
|
||||
DummyPointsReader reader = new DummyPointsReader(points);
|
||||
final int sortedDim = random().nextInt(numDims);
|
||||
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
|
||||
new BytesRef(), new BytesRef());
|
||||
for (int i = 1; i < points.length; ++i) {
|
||||
final int offset = sortedDim * bytesPerDim;
|
||||
BytesRef previousValue = reader.points[i-1].packedValue;
|
||||
BytesRef currentValue = reader.points[i].packedValue;
|
||||
int cmp = StringHelper.compare(bytesPerDim,
|
||||
previousValue.bytes, previousValue.offset + offset,
|
||||
currentValue.bytes, currentValue.offset + offset);
|
||||
if (cmp == 0) {
|
||||
cmp = reader.points[i - 1].doc - reader.points[i].doc;
|
||||
}
|
||||
assertTrue(cmp <= 0);
|
||||
}
|
||||
}
|
||||
|
||||
public void testPartition() {
|
||||
for (int iter = 0; iter < 5; ++iter) {
|
||||
doTestPartition();
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestPartition() {
|
||||
final int numDims = TestUtil.nextInt(random(), 1, 8);
|
||||
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
|
||||
int commonPrefixLength = TestUtil.nextInt(random(), 0, bytesPerDim);
|
||||
final int splitDim = random().nextInt(numDims);
|
||||
BytesRef firstValue = points[0].packedValue;
|
||||
for (int i = 1; i < points.length; ++i) {
|
||||
BytesRef packedValue = points[i].packedValue;
|
||||
int offset = splitDim * bytesPerDim;
|
||||
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLength);
|
||||
}
|
||||
DummyPointsReader reader = new DummyPointsReader(points);
|
||||
final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
|
||||
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLength, reader, 0, points.length, pivot,
|
||||
new BytesRef(), new BytesRef());
|
||||
BytesRef pivotValue = reader.points[pivot].packedValue;
|
||||
int offset = splitDim * bytesPerDim;
|
||||
for (int i = 0; i < points.length; ++i) {
|
||||
BytesRef value = reader.points[i].packedValue;
|
||||
int cmp = StringHelper.compare(bytesPerDim,
|
||||
value.bytes, value.offset + offset,
|
||||
pivotValue.bytes, pivotValue.offset + offset);
|
||||
if (cmp == 0) {
|
||||
cmp = reader.points[i].doc - reader.points[pivot].doc;
|
||||
}
|
||||
if (i < pivot) {
|
||||
assertTrue(cmp <= 0);
|
||||
} else if (i > pivot) {
|
||||
assertTrue(cmp >= 0);
|
||||
} else {
|
||||
assertEquals(0, cmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Point[] createRandomPoints(int numDims, int bytesPerDim, int maxDoc) {
|
||||
final int packedBytesLength = numDims * bytesPerDim;
|
||||
final int numPoints = TestUtil.nextInt(random(), 1, 100000);
|
||||
Point[] points = new Point[numPoints];
|
||||
for (int i = 0; i < numPoints; ++i) {
|
||||
byte[] value = new byte[packedBytesLength];
|
||||
random().nextBytes(value);
|
||||
points[i] = new Point(value, random().nextInt(maxDoc));
|
||||
}
|
||||
return points;
|
||||
}
|
||||
|
||||
private static class Point {
|
||||
final BytesRef packedValue;
|
||||
final int doc;
|
||||
|
||||
Point(byte[] packedValue, int doc) {
|
||||
// use a non-null offset to make sure MutablePointsReaderUtils does not ignore it
|
||||
this.packedValue = new BytesRef(packedValue.length + 1);
|
||||
this.packedValue.bytes[0] = (byte) random().nextInt(256);
|
||||
this.packedValue.offset = 1;
|
||||
this.packedValue.length = packedValue.length;
|
||||
this.doc = doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj == null || obj instanceof Point == false) {
|
||||
return false;
|
||||
}
|
||||
Point that = (Point) obj;
|
||||
return packedValue.equals(that.packedValue) && doc == that.doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 31 * packedValue.hashCode() + doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "value=" + packedValue + " doc=" + doc;
|
||||
}
|
||||
}
|
||||
|
||||
private static class DummyPointsReader extends MutablePointsReader {
|
||||
|
||||
private final Point[] points;
|
||||
|
||||
DummyPointsReader(Point[] points) {
|
||||
this.points = points.clone();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getValue(int i, BytesRef packedValue) {
|
||||
packedValue.bytes = points[i].packedValue.bytes;
|
||||
packedValue.offset = points[i].packedValue.offset;
|
||||
packedValue.length = points[i].packedValue.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getByteAt(int i, int k) {
|
||||
BytesRef packedValue = points[i].packedValue;
|
||||
return packedValue.bytes[packedValue.offset + k];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocID(int i) {
|
||||
return points[i].doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void swap(int i, int j) {
|
||||
ArrayUtil.swap(points, i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocCount(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -259,13 +259,11 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
||||
Directory dir = newDirectory();
|
||||
Directory taxoDir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc.setSimilarity(new PerFieldSimilarityWrapper() {
|
||||
final Similarity sim = new ClassicSimilarity();
|
||||
|
||||
iwc.setSimilarity(new PerFieldSimilarityWrapper(new ClassicSimilarity()) {
|
||||
@Override
|
||||
public Similarity get(String name) {
|
||||
assertEquals("field", name);
|
||||
return sim;
|
||||
return defaultSim;
|
||||
}
|
||||
});
|
||||
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
|
||||
|
@ -106,6 +106,7 @@ io.netty.netty-all.version = 4.0.36.Final
|
||||
org.apache.curator.version = 2.8.0
|
||||
/org.apache.curator/curator-client = ${org.apache.curator.version}
|
||||
/org.apache.curator/curator-framework = ${org.apache.curator.version}
|
||||
/org.apache.curator/curator-recipes = ${org.apache.curator.version}
|
||||
|
||||
/org.apache.derby/derby = 10.9.1.0
|
||||
|
||||
|
@ -91,7 +91,7 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
|
||||
final SweetSpotSimilarity ssB = new SweetSpotSimilarity();
|
||||
ssB.setLengthNormFactors(5,8,0.1f, false);
|
||||
|
||||
Similarity sp = new PerFieldSimilarityWrapper() {
|
||||
Similarity sp = new PerFieldSimilarityWrapper(ss) {
|
||||
@Override
|
||||
public Similarity get(String field) {
|
||||
if (field.equals("bar"))
|
||||
|
@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.queries.function.valuesource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
||||
/**
|
||||
* Base class for comparison operators useful within an "if"/conditional.
|
||||
*/
|
||||
public abstract class ComparisonBoolFunction extends BoolFunction {
|
||||
|
||||
private final ValueSource lhs;
|
||||
private final ValueSource rhs;
|
||||
private final String name;
|
||||
|
||||
public ComparisonBoolFunction(ValueSource lhs, ValueSource rhs, String name) {
|
||||
this.lhs = lhs;
|
||||
this.rhs = rhs;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/** Perform the comparison, returning true or false */
|
||||
public abstract boolean compare(int doc, FunctionValues lhs, FunctionValues rhs);
|
||||
|
||||
/** Uniquely identify the operation (ie "gt", "lt" "gte", etc) */
|
||||
public String name() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
||||
final FunctionValues lhsVal = this.lhs.getValues(context, readerContext);
|
||||
final FunctionValues rhsVal = this.rhs.getValues(context, readerContext);
|
||||
final String compLabel = this.name();
|
||||
|
||||
return new BoolDocValues(this) {
|
||||
@Override
|
||||
public boolean boolVal(int doc) {
|
||||
return compare(doc, lhsVal, rhsVal);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(int doc) {
|
||||
return compLabel + "(" + lhsVal.toString(doc) + "," + rhsVal.toString(doc) + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean exists(int doc) {
|
||||
return lhsVal.exists(doc) && rhsVal.exists(doc);
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this.getClass() != o.getClass()) return false;
|
||||
ComparisonBoolFunction other = (ComparisonBoolFunction)o;
|
||||
return name().equals(other.name())
|
||||
&& lhs.equals(other.lhs)
|
||||
&& rhs.equals(other.rhs); }
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = this.getClass().hashCode();
|
||||
h = h * 31 + this.name().hashCode();
|
||||
h = h * 31 + lhs.hashCode();
|
||||
h = h * 31 + rhs.hashCode();
|
||||
return h;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return name() + "(" + lhs.description() + "," + rhs.description() + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
|
||||
lhs.createWeight(context, searcher);
|
||||
rhs.createWeight(context, searcher);
|
||||
}
|
||||
|
||||
}
|
@ -38,5 +38,10 @@ public class FileMetaData {
|
||||
this.length = length;
|
||||
this.checksum = checksum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "FileMetaData(length=" + length + ")";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -118,6 +118,8 @@ class SimpleCopyJob extends CopyJob {
|
||||
return highPriority ? -1 : 1;
|
||||
} else if (ord < other.ord) {
|
||||
return -1;
|
||||
} else if (ord > other.ord) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
@ -87,8 +87,9 @@ public class TestGeo3DPoint extends LuceneTestCase {
|
||||
private static Codec getCodec() {
|
||||
if (Codec.getDefault().getName().equals("Lucene62")) {
|
||||
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
|
||||
double maxMBSortInHeap = 3.0 + (3*random().nextDouble());
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode);
|
||||
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
|
||||
}
|
||||
|
||||
return new FilterCodec("Lucene62", Codec.getDefault()) {
|
||||
@ -97,7 +98,7 @@ public class TestGeo3DPoint extends LuceneTestCase {
|
||||
return new PointsFormat() {
|
||||
@Override
|
||||
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode);
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -126,6 +126,7 @@ public final class AssertingPointsFormat extends PointsFormat {
|
||||
assert false: "point values are out of order";
|
||||
}
|
||||
System.arraycopy(packedValue, 0, lastDocValue, 0, bytesPerDim);
|
||||
lastDocID = docID;
|
||||
}
|
||||
in.visit(docID, packedValue);
|
||||
}
|
||||
@ -254,11 +255,11 @@ public final class AssertingPointsFormat extends PointsFormat {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
|
||||
if (fieldInfo.getPointDimensionCount() == 0) {
|
||||
throw new IllegalArgumentException("writing field=\"" + fieldInfo.name + "\" but pointDimensionalCount is 0");
|
||||
}
|
||||
in.writeField(fieldInfo, values, maxMBSortInHeap);
|
||||
in.writeField(fieldInfo, values);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -56,11 +56,11 @@ class CrankyPointsFormat extends PointsFormat {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
|
||||
if (random.nextInt(100) == 0) {
|
||||
throw new IOException("Fake IOException");
|
||||
}
|
||||
delegate.writeField(fieldInfo, values, maxMBSortInHeap);
|
||||
delegate.writeField(fieldInfo, values);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -67,6 +67,7 @@ import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.SloppyMath;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.bkd.BKDWriter;
|
||||
|
||||
/**
|
||||
* Abstract class to do basic tests for a geospatial impl (high level
|
||||
@ -1247,7 +1248,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
||||
return new PointsFormat() {
|
||||
@Override
|
||||
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
|
||||
return new Lucene60PointsWriter(writeState, pointsInLeaf);
|
||||
return new Lucene60PointsWriter(writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -92,6 +92,7 @@ public class RandomCodec extends AssertingCodec {
|
||||
// which is less effective for testing.
|
||||
// TODO: improve how we randomize this...
|
||||
private final int maxPointsInLeafNode;
|
||||
private final double maxMBSortInHeap;
|
||||
private final int bkdSplitRandomSeed;
|
||||
|
||||
@Override
|
||||
@ -102,9 +103,9 @@ public class RandomCodec extends AssertingCodec {
|
||||
|
||||
// Randomize how BKDWriter chooses its splis:
|
||||
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode) {
|
||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap) {
|
||||
@Override
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
|
||||
|
||||
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
|
||||
|
||||
@ -184,6 +185,7 @@ public class RandomCodec extends AssertingCodec {
|
||||
int lowFreqCutoff = TestUtil.nextInt(random, 2, 100);
|
||||
|
||||
maxPointsInLeafNode = TestUtil.nextInt(random, 16, 2048);
|
||||
maxMBSortInHeap = 5.0 + (3*random.nextDouble());
|
||||
bkdSplitRandomSeed = random.nextInt();
|
||||
|
||||
add(avoidCodecs,
|
||||
@ -251,7 +253,8 @@ public class RandomCodec extends AssertingCodec {
|
||||
public String toString() {
|
||||
return super.toString() + ": " + previousMappings.toString() +
|
||||
", docValues:" + previousDVMappings.toString() +
|
||||
", maxPointsInLeafNode=" + maxPointsInLeafNode;
|
||||
", maxPointsInLeafNode=" + maxPointsInLeafNode +
|
||||
", maxMBSortInHeap=" + maxMBSortInHeap;
|
||||
}
|
||||
|
||||
/** Just like {@link BKDWriter} except it evilly picks random ways to split cells on
|
||||
|
@ -31,41 +31,54 @@ import java.util.Random;
|
||||
* for the same field.
|
||||
*/
|
||||
public class RandomSimilarity extends PerFieldSimilarityWrapper {
|
||||
final ClassicSimilarity defaultSim = new ClassicSimilarity();
|
||||
final List<Similarity> knownSims;
|
||||
Map<String,Similarity> previousMappings = new HashMap<>();
|
||||
final Map<String,Similarity> previousMappings = new HashMap<>();
|
||||
final int perFieldSeed;
|
||||
final int coordType; // 0 = no coord, 1 = coord, 2 = crazy coord
|
||||
final boolean shouldQueryNorm;
|
||||
|
||||
public RandomSimilarity(Random random) {
|
||||
super(new ClassicSimilarity() {
|
||||
final int coordType = random.nextInt(3); // 0 = no coord, 1 = coord, 2 = crazy coord
|
||||
final boolean shouldQueryNorm = random.nextBoolean();
|
||||
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
if (coordType == 0) {
|
||||
return 1.0f;
|
||||
} else if (coordType == 1) {
|
||||
return super.coord(overlap, maxOverlap);
|
||||
} else {
|
||||
return overlap / ((float)maxOverlap + 1);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
if (shouldQueryNorm) {
|
||||
return super.queryNorm(sumOfSquaredWeights);
|
||||
} else {
|
||||
return 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized String toString() {
|
||||
final String coordMethod;
|
||||
if (coordType == 0) {
|
||||
coordMethod = "no";
|
||||
} else if (coordType == 1) {
|
||||
coordMethod = "yes";
|
||||
} else {
|
||||
coordMethod = "crazy";
|
||||
}
|
||||
return "queryNorm=" + shouldQueryNorm + ",coord=" + coordMethod;
|
||||
}
|
||||
|
||||
});
|
||||
perFieldSeed = random.nextInt();
|
||||
coordType = random.nextInt(3);
|
||||
shouldQueryNorm = random.nextBoolean();
|
||||
knownSims = new ArrayList<>(allSims);
|
||||
Collections.shuffle(knownSims, random);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
if (coordType == 0) {
|
||||
return 1.0f;
|
||||
} else if (coordType == 1) {
|
||||
return defaultSim.coord(overlap, maxOverlap);
|
||||
} else {
|
||||
return overlap / ((float)maxOverlap + 1);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
if (shouldQueryNorm) {
|
||||
return defaultSim.queryNorm(sumOfSquaredWeights);
|
||||
} else {
|
||||
return 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized Similarity get(String field) {
|
||||
assert field != null;
|
||||
@ -138,14 +151,6 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
|
||||
|
||||
@Override
|
||||
public synchronized String toString() {
|
||||
final String coordMethod;
|
||||
if (coordType == 0) {
|
||||
coordMethod = "no";
|
||||
} else if (coordType == 1) {
|
||||
coordMethod = "yes";
|
||||
} else {
|
||||
coordMethod = "crazy";
|
||||
}
|
||||
return "RandomSimilarity(queryNorm=" + shouldQueryNorm + ",coord=" + coordMethod + "): " + previousMappings.toString();
|
||||
return "RandomSimilarity(" + defaultSim + "): " + previousMappings.toString();
|
||||
}
|
||||
}
|
||||
|
@ -66,6 +66,19 @@ New Features
|
||||
* SOLR-9275: XML QueryParser support (defType=xmlparser) now extensible via configuration.
|
||||
(Christine Poerschke)
|
||||
|
||||
* SOLR-9038: Solr core snapshots: The current commit can be snapshotted which retains the commit and associates it with
|
||||
a name. The core admin API can create snapshots, list them, and delete them. Snapshot names can be referenced in
|
||||
doing a core backup, and in replication. Snapshot metadata is stored in a new snapshot_metadata/ dir.
|
||||
(Hrishikesh Gadre via David Smiley)
|
||||
|
||||
* SOLR-9279: New boolean comparison function queries comparing numeric arguments: gt, gte, lt, lte, eq
|
||||
(Doug Turnbull, David Smiley)
|
||||
|
||||
* SOLR-9200: Add Delegation Token Support to Solr.
|
||||
(Gregory Chanan)
|
||||
|
||||
* SOLR-9252: Feature selection and logistic regression on text (Cao Manh Dat, Joel Bernstein)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
@ -132,6 +145,15 @@ Bug Fixes
|
||||
|
||||
* SOLR-9334: CloudSolrClient.collectionStateCache is unbounded (noble)
|
||||
|
||||
* SOLR-9339: NPE in CloudSolrClient when the response is null (noble)
|
||||
|
||||
* SOLR-8596: Web UI doesn't correctly generate queries which include local parameters (Alexandre Rafalovitch, janhoy)
|
||||
|
||||
* SOLR-8645: managed-schema is now syntax highlighted in cloud->Tree view (Alexandre Rafalovitch via janhoy)
|
||||
|
||||
* SOLR-8379: UI Cloud->Tree view now shows .txt files correctly (Alexandre Rafalovitch via janhoy)
|
||||
|
||||
* SOLR-9308: Fix distributed RTG to forward request params, fixes fq and non-default fl params (hossman)
|
||||
|
||||
* SOLR-9179: NPE in IndexSchema using IBM JDK (noble, Colvin Cowie)
|
||||
|
||||
@ -143,6 +165,9 @@ Optimizations
|
||||
* SOLR-9264: Optimize ZkController.publishAndWaitForDownStates to not read all collection states and
|
||||
watch relevant collections instead. (Hrishikesh Gadre, shalin)
|
||||
|
||||
* SOLR-9335: Solr cache/search/update stats counters now use LongAdder which are supposed to have higher throughput
|
||||
under high contention. (Varun Thacker)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
@ -166,6 +191,15 @@ Other Changes
|
||||
* SOLR-9163: Sync up basic_configs and data_driven_schema_configs, removing almost all differences
|
||||
except what is required for schemaless. (yonik)
|
||||
|
||||
* SOLR-9340: Change ZooKeeper disconnect and session expiry related logging from INFO to WARN to
|
||||
make debugging easier (Varun Thacker)
|
||||
|
||||
* SOLR-9358: [AngularUI] In Cloud->Tree file view area, collapse metadata by default (janhoy)
|
||||
|
||||
* SOLR-9256: asserting hasNext() contract in JdbcDataSource in DataImportHandler (Kristine Jetzke via Mikhai Khludnev)
|
||||
|
||||
* SOLR-9209: extracting JdbcDataSource.createResultSetIterator() for extension (Kristine Jetzke via Mikhai Khludnev)
|
||||
|
||||
================== 6.1.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
@ -280,10 +280,14 @@ public class JdbcDataSource extends
|
||||
resultSetIterator.close();
|
||||
resultSetIterator = null;
|
||||
}
|
||||
resultSetIterator = new ResultSetIterator(query);
|
||||
resultSetIterator = createResultSetIterator(query);
|
||||
return resultSetIterator.getIterator();
|
||||
}
|
||||
|
||||
protected ResultSetIterator createResultSetIterator(String query) {
|
||||
return new ResultSetIterator(query);
|
||||
}
|
||||
|
||||
private void logError(String msg, Exception e) {
|
||||
LOG.warn(msg, e);
|
||||
}
|
||||
|
@ -510,6 +510,45 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase {
|
||||
DriverManager.deregisterDriver(driver);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testEmptyResultSet() throws Exception {
|
||||
MockInitialContextFactory.bind("java:comp/env/jdbc/JndiDB", dataSource);
|
||||
|
||||
props.put(JdbcDataSource.JNDI_NAME, "java:comp/env/jdbc/JndiDB");
|
||||
EasyMock.expect(dataSource.getConnection()).andReturn(connection);
|
||||
|
||||
jdbcDataSource.init(context, props);
|
||||
|
||||
connection.setAutoCommit(false);
|
||||
|
||||
Statement statement = mockControl.createMock(Statement.class);
|
||||
EasyMock.expect(connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY))
|
||||
.andReturn(statement);
|
||||
statement.setFetchSize(500);
|
||||
statement.setMaxRows(0);
|
||||
EasyMock.expect(statement.execute("query")).andReturn(true);
|
||||
ResultSet resultSet = mockControl.createMock(ResultSet.class);
|
||||
EasyMock.expect(statement.getResultSet()).andReturn(resultSet);
|
||||
ResultSetMetaData metaData = mockControl.createMock(ResultSetMetaData.class);
|
||||
EasyMock.expect(resultSet.getMetaData()).andReturn(metaData);
|
||||
EasyMock.expect(metaData.getColumnCount()).andReturn(0);
|
||||
EasyMock.expect(resultSet.next()).andReturn(false);
|
||||
resultSet.close();
|
||||
EasyMock.expect(statement.getMoreResults()).andReturn(false);
|
||||
EasyMock.expect(statement.getUpdateCount()).andReturn(-1);
|
||||
statement.close();
|
||||
|
||||
mockControl.replay();
|
||||
|
||||
Iterator<Map<String,Object>> resultSetIterator = jdbcDataSource.getData("query");
|
||||
resultSetIterator.hasNext();
|
||||
resultSetIterator.hasNext();
|
||||
|
||||
mockControl.verify();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore("Needs a Mock database server to work")
|
||||
public void testBasic() throws Exception {
|
||||
|
@ -16,6 +16,15 @@
|
||||
*/
|
||||
package org.apache.solr.hadoop;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import com.google.common.io.Files;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
@ -35,15 +44,6 @@ import org.apache.zookeeper.KeeperException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Extracts SolrCloud information from ZooKeeper.
|
||||
*/
|
||||
@ -78,8 +78,7 @@ final class ZooKeeperInspector {
|
||||
}
|
||||
SolrZkClient zkClient = getZkClient(zkHost);
|
||||
|
||||
try {
|
||||
ZkStateReader zkStateReader = new ZkStateReader(zkClient);
|
||||
try (ZkStateReader zkStateReader = new ZkStateReader(zkClient)) {
|
||||
try {
|
||||
// first check for alias
|
||||
collection = checkForAlias(zkClient, collection);
|
||||
|
@ -134,6 +134,10 @@
|
||||
<dependency org="antlr" name="antlr" rev="${/antlr/antlr}" conf="test.MiniKdc"/>
|
||||
<dependency org="net.sf.ehcache" name="ehcache-core" rev="${/net.sf.ehcache/ehcache-core}" conf="test.MiniKdc"/>
|
||||
|
||||
<dependency org="org.apache.curator" name="curator-framework" rev="${/org.apache.curator/curator-framework}" conf="compile"/>
|
||||
<dependency org="org.apache.curator" name="curator-client" rev="${/org.apache.curator/curator-client}" conf="compile"/>
|
||||
<dependency org="org.apache.curator" name="curator-recipes" rev="${/org.apache.curator/curator-recipes}" conf="compile"/>
|
||||
|
||||
<!-- StatsComponents percentiles Dependencies-->
|
||||
<dependency org="com.tdunning" name="t-digest" rev="${/com.tdunning/t-digest}" conf="compile->*"/>
|
||||
<!-- SQL Parser -->
|
||||
|
@ -15,21 +15,26 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.core;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
|
||||
import org.apache.solr.update.SolrIndexWriter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/**
|
||||
* A wrapper for an IndexDeletionPolicy instance.
|
||||
* <p>
|
||||
@ -52,9 +57,11 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
|
||||
private final Map<Long, Long> reserves = new ConcurrentHashMap<>();
|
||||
private volatile IndexCommit latestCommit;
|
||||
private final ConcurrentHashMap<Long, AtomicInteger> savedCommits = new ConcurrentHashMap<>();
|
||||
private final SolrSnapshotMetaDataManager snapshotMgr;
|
||||
|
||||
public IndexDeletionPolicyWrapper(IndexDeletionPolicy deletionPolicy) {
|
||||
public IndexDeletionPolicyWrapper(IndexDeletionPolicy deletionPolicy, SolrSnapshotMetaDataManager snapshotMgr) {
|
||||
this.deletionPolicy = deletionPolicy;
|
||||
this.snapshotMgr = snapshotMgr;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -134,7 +141,6 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Internal use for Lucene... do not explicitly call.
|
||||
*/
|
||||
@ -185,7 +191,8 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
|
||||
Long gen = delegate.getGeneration();
|
||||
Long reserve = reserves.get(gen);
|
||||
if (reserve != null && System.nanoTime() < reserve) return;
|
||||
if(savedCommits.containsKey(gen)) return;
|
||||
if (savedCommits.containsKey(gen)) return;
|
||||
if (snapshotMgr.isSnapshotted(gen)) return;
|
||||
delegate.delete();
|
||||
}
|
||||
|
||||
|
@ -81,6 +81,7 @@ import org.apache.solr.common.util.ObjectReleaseTracker;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
|
||||
import org.apache.solr.handler.IndexFetcher;
|
||||
import org.apache.solr.handler.ReplicationHandler;
|
||||
import org.apache.solr.handler.RequestHandlerBase;
|
||||
@ -184,6 +185,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
||||
private final Map<String,UpdateRequestProcessorChain> updateProcessorChains;
|
||||
private final Map<String, SolrInfoMBean> infoRegistry;
|
||||
private final IndexDeletionPolicyWrapper solrDelPolicy;
|
||||
private final SolrSnapshotMetaDataManager snapshotMgr;
|
||||
private final DirectoryFactory directoryFactory;
|
||||
private IndexReaderFactory indexReaderFactory;
|
||||
private final Codec codec;
|
||||
@ -414,7 +416,19 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
||||
} else {
|
||||
delPolicy = new SolrDeletionPolicy();
|
||||
}
|
||||
return new IndexDeletionPolicyWrapper(delPolicy);
|
||||
|
||||
return new IndexDeletionPolicyWrapper(delPolicy, snapshotMgr);
|
||||
}
|
||||
|
||||
private SolrSnapshotMetaDataManager initSnapshotMetaDataManager() {
|
||||
try {
|
||||
String dirName = getDataDir() + SolrSnapshotMetaDataManager.SNAPSHOT_METADATA_DIR + "/";
|
||||
Directory snapshotDir = directoryFactory.get(dirName, DirContext.DEFAULT,
|
||||
getSolrConfig().indexConfig.lockType);
|
||||
return new SolrSnapshotMetaDataManager(this, snapshotDir);
|
||||
} catch (IOException e) {
|
||||
throw new IllegalStateException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void initListeners() {
|
||||
@ -739,6 +753,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
||||
|
||||
initListeners();
|
||||
|
||||
this.snapshotMgr = initSnapshotMetaDataManager();
|
||||
this.solrDelPolicy = initDeletionPolicy(delPolicy);
|
||||
|
||||
this.codec = initCodec(solrConfig, this.schema);
|
||||
@ -1242,6 +1257,17 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
||||
}
|
||||
}
|
||||
|
||||
// Close the snapshots meta-data directory.
|
||||
Directory snapshotsDir = snapshotMgr.getSnapshotsDir();
|
||||
try {
|
||||
this.directoryFactory.release(snapshotsDir);
|
||||
} catch (Throwable e) {
|
||||
SolrException.log(log,e);
|
||||
if (e instanceof Error) {
|
||||
throw (Error) e;
|
||||
}
|
||||
}
|
||||
|
||||
if (coreStateClosed) {
|
||||
|
||||
try {
|
||||
@ -2343,6 +2369,14 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
||||
return solrDelPolicy;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return A reference of {@linkplain SolrSnapshotMetaDataManager}
|
||||
* managing the persistent snapshots for this Solr core.
|
||||
*/
|
||||
public SolrSnapshotMetaDataManager getSnapshotMetaDataManager() {
|
||||
return snapshotMgr;
|
||||
}
|
||||
|
||||
public ReentrantLock getRuleExpiryLock() {
|
||||
return ruleExpiryLock;
|
||||
}
|
||||
|
@ -0,0 +1,134 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.core.snapshots;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* This class provides functionality required to handle the data files corresponding to Solr snapshots.
|
||||
*/
|
||||
public class SolrSnapshotManager {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
/**
|
||||
* This method deletes index files of the {@linkplain IndexCommit} for the specified generation number.
|
||||
*
|
||||
* @param dir The index directory storing the snapshot.
|
||||
* @param gen The generation number for the {@linkplain IndexCommit}
|
||||
* @throws IOException in case of I/O errors.
|
||||
*/
|
||||
public static void deleteIndexFiles ( Directory dir, Collection<SnapshotMetaData> snapshots, long gen ) throws IOException {
|
||||
List<IndexCommit> commits = DirectoryReader.listCommits(dir);
|
||||
Map<String, Integer> refCounts = buildRefCounts(snapshots, commits);
|
||||
for (IndexCommit ic : commits) {
|
||||
if (ic.getGeneration() == gen) {
|
||||
deleteIndexFiles(dir,refCounts, ic);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method deletes all files not corresponding to a configured snapshot in the specified index directory.
|
||||
*
|
||||
* @param dir The index directory to search for.
|
||||
* @throws IOException in case of I/O errors.
|
||||
*/
|
||||
public static void deleteNonSnapshotIndexFiles (Directory dir, Collection<SnapshotMetaData> snapshots) throws IOException {
|
||||
List<IndexCommit> commits = DirectoryReader.listCommits(dir);
|
||||
Map<String, Integer> refCounts = buildRefCounts(snapshots, commits);
|
||||
Set<Long> snapshotGenNumbers = snapshots.stream()
|
||||
.map(SnapshotMetaData::getGenerationNumber)
|
||||
.collect(Collectors.toSet());
|
||||
for (IndexCommit ic : commits) {
|
||||
if (!snapshotGenNumbers.contains(ic.getGeneration())) {
|
||||
deleteIndexFiles(dir,refCounts, ic);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method computes reference count for the index files by taking into consideration
|
||||
* (a) configured snapshots and (b) files sharing between two or more {@linkplain IndexCommit} instances.
|
||||
*
|
||||
* @param snapshots A collection of user configured snapshots
|
||||
* @param commits A list of {@linkplain IndexCommit} instances
|
||||
* @return A map containing reference count for each index file referred in one of the {@linkplain IndexCommit} instances.
|
||||
* @throws IOException in case of I/O error.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
static Map<String, Integer> buildRefCounts (Collection<SnapshotMetaData> snapshots, List<IndexCommit> commits) throws IOException {
|
||||
Map<String, Integer> result = new HashMap<>();
|
||||
Map<Long, IndexCommit> commitsByGen = commits.stream().collect(
|
||||
Collectors.toMap(IndexCommit::getGeneration, Function.identity()));
|
||||
|
||||
for(SnapshotMetaData md : snapshots) {
|
||||
IndexCommit ic = commitsByGen.get(md.getGenerationNumber());
|
||||
if (ic != null) {
|
||||
Collection<String> fileNames = ic.getFileNames();
|
||||
for(String fileName : fileNames) {
|
||||
int refCount = result.getOrDefault(fileName, 0);
|
||||
result.put(fileName, refCount+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method deletes the index files associated with specified <code>indexCommit</code> provided they
|
||||
* are not referred by some other {@linkplain IndexCommit}.
|
||||
*
|
||||
* @param dir The index directory containing the {@linkplain IndexCommit} to be deleted.
|
||||
* @param refCounts A map containing reference counts for each file associated with every {@linkplain IndexCommit}
|
||||
* in the specified directory.
|
||||
* @param indexCommit The {@linkplain IndexCommit} whose files need to be deleted.
|
||||
* @throws IOException in case of I/O errors.
|
||||
*/
|
||||
private static void deleteIndexFiles ( Directory dir, Map<String, Integer> refCounts, IndexCommit indexCommit ) throws IOException {
|
||||
log.info("Deleting index files for index commit with generation {} in directory {}", indexCommit.getGeneration(), dir);
|
||||
for (String fileName : indexCommit.getFileNames()) {
|
||||
try {
|
||||
// Ensure that a file being deleted is not referred by some other commit.
|
||||
int ref = refCounts.getOrDefault(fileName, 0);
|
||||
log.debug("Reference count for file {} is {}", fileName, ref);
|
||||
if (ref == 0) {
|
||||
dir.deleteFile(fileName);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.warn("Unable to delete file {} in directory {} due to exception {}", fileName, dir, e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,416 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.core.snapshots;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.IndexDeletionPolicyWrapper;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* This class is responsible to manage the persistent snapshots meta-data for the Solr indexes. The
|
||||
* persistent snapshots are implemented by relying on Lucene {@linkplain IndexDeletionPolicy}
|
||||
* abstraction to configure a specific {@linkplain IndexCommit} to be retained. The
|
||||
* {@linkplain IndexDeletionPolicyWrapper} in Solr uses this class to create/delete the Solr index
|
||||
* snapshots.
|
||||
*/
|
||||
public class SolrSnapshotMetaDataManager {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
public static final String SNAPSHOT_METADATA_DIR = "snapshot_metadata";
|
||||
|
||||
/**
|
||||
* A class defining the meta-data for a specific snapshot.
|
||||
*/
|
||||
public static class SnapshotMetaData {
|
||||
private String name;
|
||||
private String indexDirPath;
|
||||
private long generationNumber;
|
||||
|
||||
public SnapshotMetaData(String name, String indexDirPath, long generationNumber) {
|
||||
super();
|
||||
this.name = name;
|
||||
this.indexDirPath = indexDirPath;
|
||||
this.generationNumber = generationNumber;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getIndexDirPath() {
|
||||
return indexDirPath;
|
||||
}
|
||||
|
||||
public long getGenerationNumber() {
|
||||
return generationNumber;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append("SnapshotMetaData[name=");
|
||||
builder.append(name);
|
||||
builder.append(", indexDirPath=");
|
||||
builder.append(indexDirPath);
|
||||
builder.append(", generation=");
|
||||
builder.append(generationNumber);
|
||||
builder.append("]");
|
||||
return builder.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/** Prefix used for the save file. */
|
||||
public static final String SNAPSHOTS_PREFIX = "snapshots_";
|
||||
private static final int VERSION_START = 0;
|
||||
private static final int VERSION_CURRENT = VERSION_START;
|
||||
private static final String CODEC_NAME = "solr-snapshots";
|
||||
|
||||
// The index writer which maintains the snapshots metadata
|
||||
private long nextWriteGen;
|
||||
|
||||
private final Directory dir;
|
||||
|
||||
/** Used to map snapshot name to snapshot meta-data. */
|
||||
protected final Map<String,SnapshotMetaData> nameToDetailsMapping = new LinkedHashMap<>();
|
||||
/** Used to figure out the *current* index data directory path */
|
||||
private final SolrCore solrCore;
|
||||
|
||||
/**
|
||||
* A constructor.
|
||||
*
|
||||
* @param dir The directory where the snapshot meta-data should be stored. Enables updating
|
||||
* the existing meta-data.
|
||||
* @throws IOException in case of errors.
|
||||
*/
|
||||
public SolrSnapshotMetaDataManager(SolrCore solrCore, Directory dir) throws IOException {
|
||||
this(solrCore, dir, OpenMode.CREATE_OR_APPEND);
|
||||
}
|
||||
|
||||
/**
|
||||
* A constructor.
|
||||
*
|
||||
* @param dir The directory where the snapshot meta-data is stored.
|
||||
* @param mode CREATE If previous meta-data should be erased.
|
||||
* APPEND If previous meta-data should be read and updated.
|
||||
* CREATE_OR_APPEND Creates a new meta-data structure if one does not exist
|
||||
* Updates the existing structure if one exists.
|
||||
* @throws IOException in case of errors.
|
||||
*/
|
||||
public SolrSnapshotMetaDataManager(SolrCore solrCore, Directory dir, OpenMode mode) throws IOException {
|
||||
this.solrCore = solrCore;
|
||||
this.dir = dir;
|
||||
|
||||
if (mode == OpenMode.CREATE) {
|
||||
deleteSnapshotMetadataFiles();
|
||||
}
|
||||
|
||||
loadFromSnapshotMetadataFile();
|
||||
|
||||
if (mode == OpenMode.APPEND && nextWriteGen == 0) {
|
||||
throw new IllegalStateException("no snapshots stored in this directory");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The snapshot meta-data directory
|
||||
*/
|
||||
public Directory getSnapshotsDir() {
|
||||
return dir;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method creates a new snapshot meta-data entry.
|
||||
*
|
||||
* @param name The name of the snapshot.
|
||||
* @param indexDirPath The directory path where the index files are stored.
|
||||
* @param gen The generation number for the {@linkplain IndexCommit} being snapshotted.
|
||||
* @throws IOException in case of I/O errors.
|
||||
*/
|
||||
public synchronized void snapshot(String name, String indexDirPath, long gen) throws IOException {
|
||||
Preconditions.checkNotNull(name);
|
||||
|
||||
log.info("Creating the snapshot named {} for core {} associated with index commit with generation {} in directory {}"
|
||||
, name, solrCore.getName(), gen, indexDirPath);
|
||||
|
||||
if(nameToDetailsMapping.containsKey(name)) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "A snapshot with name " + name + " already exists");
|
||||
}
|
||||
|
||||
SnapshotMetaData d = new SnapshotMetaData(name, indexDirPath, gen);
|
||||
nameToDetailsMapping.put(name, d);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
persist();
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
try {
|
||||
release(name);
|
||||
} catch (Exception e) {
|
||||
// Suppress so we keep throwing original exception
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method deletes a previously created snapshot (if any).
|
||||
*
|
||||
* @param name The name of the snapshot to be deleted.
|
||||
* @return The snapshot meta-data if the snapshot with the snapshot name exists.
|
||||
* @throws IOException in case of I/O error
|
||||
*/
|
||||
public synchronized Optional<SnapshotMetaData> release(String name) throws IOException {
|
||||
log.info("Deleting the snapshot named {} for core {}", name, solrCore.getName());
|
||||
SnapshotMetaData result = nameToDetailsMapping.remove(Preconditions.checkNotNull(name));
|
||||
if(result != null) {
|
||||
boolean success = false;
|
||||
try {
|
||||
persist();
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
nameToDetailsMapping.put(name, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
return Optional.ofNullable(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns if snapshot is created for the specified generation number in
|
||||
* the *current* index directory.
|
||||
*
|
||||
* @param genNumber The generation number for the {@linkplain IndexCommit} to be checked.
|
||||
* @return true if the snapshot is created.
|
||||
* false otherwise.
|
||||
*/
|
||||
public synchronized boolean isSnapshotted(long genNumber) {
|
||||
return !nameToDetailsMapping.isEmpty() && isSnapshotted(solrCore.getIndexDir(), genNumber);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns if snapshot is created for the specified generation number in
|
||||
* the specified index directory.
|
||||
*
|
||||
* @param genNumber The generation number for the {@linkplain IndexCommit} to be checked.
|
||||
* @return true if the snapshot is created.
|
||||
* false otherwise.
|
||||
*/
|
||||
public synchronized boolean isSnapshotted(String indexDirPath, long genNumber) {
|
||||
return !nameToDetailsMapping.isEmpty()
|
||||
&& nameToDetailsMapping.values().stream()
|
||||
.anyMatch(entry -> entry.getIndexDirPath().equals(indexDirPath) && entry.getGenerationNumber() == genNumber);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns the snapshot meta-data for the specified name (if it exists).
|
||||
*
|
||||
* @param name The name of the snapshot
|
||||
* @return The snapshot meta-data if exists.
|
||||
*/
|
||||
public synchronized Optional<SnapshotMetaData> getSnapshotMetaData(String name) {
|
||||
return Optional.ofNullable(nameToDetailsMapping.get(name));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return A list of snapshots created so far.
|
||||
*/
|
||||
public synchronized List<String> listSnapshots() {
|
||||
// We create a copy for thread safety.
|
||||
return new ArrayList<>(nameToDetailsMapping.keySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns a list of snapshots created in a specified index directory.
|
||||
*
|
||||
* @param indexDirPath The index directory path.
|
||||
* @return a list snapshots stored in the specified directory.
|
||||
*/
|
||||
public synchronized Collection<SnapshotMetaData> listSnapshotsInIndexDir(String indexDirPath) {
|
||||
return nameToDetailsMapping.values().stream()
|
||||
.filter(entry -> indexDirPath.equals(entry.getIndexDirPath()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns the {@linkplain IndexCommit} associated with the specified
|
||||
* <code>commitName</code>. A snapshot with specified <code>commitName</code> must
|
||||
* be created before invoking this method.
|
||||
*
|
||||
* @param commitName The name of persisted commit
|
||||
* @return the {@linkplain IndexCommit}
|
||||
* @throws IOException in case of I/O error.
|
||||
*/
|
||||
public Optional<IndexCommit> getIndexCommitByName(String commitName) throws IOException {
|
||||
Optional<IndexCommit> result = Optional.empty();
|
||||
Optional<SnapshotMetaData> metaData = getSnapshotMetaData(commitName);
|
||||
if (metaData.isPresent()) {
|
||||
String indexDirPath = metaData.get().getIndexDirPath();
|
||||
long gen = metaData.get().getGenerationNumber();
|
||||
|
||||
Directory d = solrCore.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, DirectoryFactory.LOCK_TYPE_NONE);
|
||||
try {
|
||||
result = DirectoryReader.listCommits(d)
|
||||
.stream()
|
||||
.filter(ic -> ic.getGeneration() == gen)
|
||||
.findAny();
|
||||
|
||||
if (!result.isPresent()) {
|
||||
log.warn("Unable to find commit with generation {} in the directory {}", gen, indexDirPath);
|
||||
}
|
||||
|
||||
} finally {
|
||||
solrCore.getDirectoryFactory().release(d);
|
||||
}
|
||||
} else {
|
||||
log.warn("Commit with name {} is not persisted for core {}", commitName, solrCore.getName());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private synchronized void persist() throws IOException {
|
||||
String fileName = SNAPSHOTS_PREFIX + nextWriteGen;
|
||||
IndexOutput out = dir.createOutput(fileName, IOContext.DEFAULT);
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
||||
out.writeVInt(nameToDetailsMapping.size());
|
||||
for(Entry<String,SnapshotMetaData> ent : nameToDetailsMapping.entrySet()) {
|
||||
out.writeString(ent.getKey());
|
||||
out.writeString(ent.getValue().getIndexDirPath());
|
||||
out.writeVLong(ent.getValue().getGenerationNumber());
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(out);
|
||||
IOUtils.deleteFilesIgnoringExceptions(dir, fileName);
|
||||
} else {
|
||||
IOUtils.close(out);
|
||||
}
|
||||
}
|
||||
|
||||
dir.sync(Collections.singletonList(fileName));
|
||||
|
||||
if (nextWriteGen > 0) {
|
||||
String lastSaveFile = SNAPSHOTS_PREFIX + (nextWriteGen-1);
|
||||
// exception OK: likely it didn't exist
|
||||
IOUtils.deleteFilesIgnoringExceptions(dir, lastSaveFile);
|
||||
}
|
||||
|
||||
nextWriteGen++;
|
||||
}
|
||||
|
||||
private synchronized void deleteSnapshotMetadataFiles() throws IOException {
|
||||
for(String file : dir.listAll()) {
|
||||
if (file.startsWith(SNAPSHOTS_PREFIX)) {
|
||||
dir.deleteFile(file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the snapshot meta-data information from the given {@link Directory}.
|
||||
*/
|
||||
private synchronized void loadFromSnapshotMetadataFile() throws IOException {
|
||||
log.info("Loading from snapshot metadata file...");
|
||||
long genLoaded = -1;
|
||||
IOException ioe = null;
|
||||
List<String> snapshotFiles = new ArrayList<>();
|
||||
for(String file : dir.listAll()) {
|
||||
if (file.startsWith(SNAPSHOTS_PREFIX)) {
|
||||
long gen = Long.parseLong(file.substring(SNAPSHOTS_PREFIX.length()));
|
||||
if (genLoaded == -1 || gen > genLoaded) {
|
||||
snapshotFiles.add(file);
|
||||
Map<String, SnapshotMetaData> snapshotMetaDataMapping = new HashMap<>();
|
||||
IndexInput in = dir.openInput(file, IOContext.DEFAULT);
|
||||
try {
|
||||
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
||||
int count = in.readVInt();
|
||||
for(int i=0;i<count;i++) {
|
||||
String name = in.readString();
|
||||
String indexDirPath = in.readString();
|
||||
long commitGen = in.readVLong();
|
||||
snapshotMetaDataMapping.put(name, new SnapshotMetaData(name, indexDirPath, commitGen));
|
||||
}
|
||||
} catch (IOException ioe2) {
|
||||
// Save first exception & throw in the end
|
||||
if (ioe == null) {
|
||||
ioe = ioe2;
|
||||
}
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
|
||||
genLoaded = gen;
|
||||
nameToDetailsMapping.clear();
|
||||
nameToDetailsMapping.putAll(snapshotMetaDataMapping);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (genLoaded == -1) {
|
||||
// Nothing was loaded...
|
||||
if (ioe != null) {
|
||||
// ... not for lack of trying:
|
||||
throw ioe;
|
||||
}
|
||||
} else {
|
||||
if (snapshotFiles.size() > 1) {
|
||||
// Remove any broken / old snapshot files:
|
||||
String curFileName = SNAPSHOTS_PREFIX + genLoaded;
|
||||
for(String file : snapshotFiles) {
|
||||
if (!curFileName.equals(file)) {
|
||||
IOUtils.deleteFilesIgnoringExceptions(dir, file);
|
||||
}
|
||||
}
|
||||
}
|
||||
nextWriteGen = 1+genLoaded;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Core classes for Solr's persistent snapshots functionality
|
||||
*/
|
||||
package org.apache.solr.core.snapshots;
|
@ -81,6 +81,9 @@ import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.IndexDeletionPolicyWrapper;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotManager;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
|
||||
import org.apache.solr.handler.ReplicationHandler.*;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
@ -454,9 +457,18 @@ public class IndexFetcher {
|
||||
// let the system know we are changing dir's and the old one
|
||||
// may be closed
|
||||
if (indexDir != null) {
|
||||
LOG.info("removing old index directory " + indexDir);
|
||||
solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
|
||||
solrCore.getDirectoryFactory().remove(indexDir);
|
||||
|
||||
SolrSnapshotMetaDataManager snapshotsMgr = solrCore.getSnapshotMetaDataManager();
|
||||
Collection<SnapshotMetaData> snapshots = snapshotsMgr.listSnapshotsInIndexDir(indexDirPath);
|
||||
|
||||
// Delete the old index directory only if no snapshot exists in that directory.
|
||||
if(snapshots.isEmpty()) {
|
||||
LOG.info("removing old index directory " + indexDir);
|
||||
solrCore.getDirectoryFactory().remove(indexDir);
|
||||
} else {
|
||||
SolrSnapshotManager.deleteNonSnapshotIndexFiles(indexDir, snapshots);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -87,6 +87,7 @@ import org.apache.solr.core.SolrDeletionPolicy;
|
||||
import org.apache.solr.core.SolrEventListener;
|
||||
import org.apache.solr.core.backup.repository.BackupRepository;
|
||||
import org.apache.solr.core.backup.repository.LocalFileSystemRepository;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
@ -505,11 +506,24 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||
numberToKeep = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
IndexDeletionPolicyWrapper delPolicy = core.getDeletionPolicy();
|
||||
IndexCommit indexCommit = delPolicy.getLatestCommit();
|
||||
IndexCommit indexCommit = null;
|
||||
String commitName = params.get(CoreAdminParams.COMMIT_NAME);
|
||||
if (commitName != null) {
|
||||
SolrSnapshotMetaDataManager snapshotMgr = core.getSnapshotMetaDataManager();
|
||||
Optional<IndexCommit> commit = snapshotMgr.getIndexCommitByName(commitName);
|
||||
if(commit.isPresent()) {
|
||||
indexCommit = commit.get();
|
||||
} else {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to find an index commit with name " + commitName +
|
||||
" for core " + core.getName());
|
||||
}
|
||||
} else {
|
||||
IndexDeletionPolicyWrapper delPolicy = core.getDeletionPolicy();
|
||||
indexCommit = delPolicy.getLatestCommit();
|
||||
|
||||
if (indexCommit == null) {
|
||||
indexCommit = req.getSearcher().getIndexReader().getIndexCommit();
|
||||
if (indexCommit == null) {
|
||||
indexCommit = req.getSearcher().getIndexReader().getIndexCommit();
|
||||
}
|
||||
}
|
||||
|
||||
String location = params.get(CoreAdminParams.BACKUP_LOCATION);
|
||||
@ -532,7 +546,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||
}
|
||||
|
||||
// small race here before the commit point is saved
|
||||
SnapShooter snapShooter = new SnapShooter(repo, core, location, params.get(NAME));
|
||||
SnapShooter snapShooter = new SnapShooter(repo, core, location, params.get(NAME), commitName);
|
||||
snapShooter.validateCreateSnapshot();
|
||||
snapShooter.createSnapAsync(indexCommit, numberToKeep, (nl) -> snapShootDetails = nl);
|
||||
|
||||
|
@ -16,13 +16,17 @@
|
||||
*/
|
||||
package org.apache.solr.handler;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URL;
|
||||
import java.util.concurrent.atomic.LongAdder;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
import org.apache.solr.core.PluginBag;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
import org.apache.solr.core.SolrInfoMBean;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestHandler;
|
||||
@ -35,10 +39,6 @@ import org.apache.solr.util.stats.TimerContext;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URL;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import static org.apache.solr.core.RequestParams.USEPARAM;
|
||||
|
||||
/**
|
||||
@ -53,10 +53,10 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
|
||||
protected boolean httpCaching = true;
|
||||
|
||||
// Statistics
|
||||
private final AtomicLong numRequests = new AtomicLong();
|
||||
private final AtomicLong numServerErrors = new AtomicLong();
|
||||
private final AtomicLong numClientErrors = new AtomicLong();
|
||||
private final AtomicLong numTimeouts = new AtomicLong();
|
||||
private final LongAdder numRequests = new LongAdder();
|
||||
private final LongAdder numServerErrors = new LongAdder();
|
||||
private final LongAdder numClientErrors = new LongAdder();
|
||||
private final LongAdder numTimeouts = new LongAdder();
|
||||
private final Timer requestTimes = new Timer();
|
||||
|
||||
private final long handlerStart;
|
||||
@ -144,7 +144,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
|
||||
|
||||
@Override
|
||||
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
|
||||
numRequests.incrementAndGet();
|
||||
numRequests.increment();
|
||||
TimerContext timer = requestTimes.time();
|
||||
try {
|
||||
if(pluginInfo != null && pluginInfo.attributes.containsKey(USEPARAM)) req.getContext().put(USEPARAM,pluginInfo.attributes.get(USEPARAM));
|
||||
@ -158,7 +158,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
|
||||
Object partialResults = header.get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY);
|
||||
boolean timedOut = partialResults == null ? false : (Boolean)partialResults;
|
||||
if( timedOut ) {
|
||||
numTimeouts.incrementAndGet();
|
||||
numTimeouts.increment();
|
||||
rsp.setHttpCaching(false);
|
||||
}
|
||||
}
|
||||
@ -185,9 +185,9 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
|
||||
SolrException.log(log, e);
|
||||
|
||||
if (isServerError) {
|
||||
numServerErrors.incrementAndGet();
|
||||
numServerErrors.increment();
|
||||
} else {
|
||||
numClientErrors.incrementAndGet();
|
||||
numClientErrors.increment();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ package org.apache.solr.handler;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URI;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Collection;
|
||||
import java.util.Date;
|
||||
import java.util.Locale;
|
||||
import java.util.concurrent.Callable;
|
||||
@ -32,6 +33,9 @@ import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.backup.repository.BackupRepository;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotManager;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -63,6 +67,7 @@ public class RestoreCore implements Callable<Boolean> {
|
||||
String restoreIndexName = "restore." + dateFormat.format(new Date());
|
||||
String restoreIndexPath = core.getDataDir() + restoreIndexName;
|
||||
|
||||
String indexDirPath = core.getIndexDir();
|
||||
Directory restoreIndexDir = null;
|
||||
Directory indexDir = null;
|
||||
try {
|
||||
@ -71,7 +76,7 @@ public class RestoreCore implements Callable<Boolean> {
|
||||
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
|
||||
//Prefer local copy.
|
||||
indexDir = core.getDirectoryFactory().get(core.getIndexDir(),
|
||||
indexDir = core.getDirectoryFactory().get(indexDirPath,
|
||||
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
|
||||
//Move all files from backupDir to restoreIndexDir
|
||||
@ -130,7 +135,16 @@ public class RestoreCore implements Callable<Boolean> {
|
||||
}
|
||||
if (success) {
|
||||
core.getDirectoryFactory().doneWithDirectory(indexDir);
|
||||
core.getDirectoryFactory().remove(indexDir);
|
||||
|
||||
SolrSnapshotMetaDataManager snapshotsMgr = core.getSnapshotMetaDataManager();
|
||||
Collection<SnapshotMetaData> snapshots = snapshotsMgr.listSnapshotsInIndexDir(indexDirPath);
|
||||
|
||||
// Delete the old index directory only if no snapshot exists in that directory.
|
||||
if (snapshots.isEmpty()) {
|
||||
core.getDirectoryFactory().remove(indexDir);
|
||||
} else {
|
||||
SolrSnapshotManager.deleteNonSnapshotIndexFiles(indexDir, snapshots);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -26,12 +26,14 @@ import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.IndexDeletionPolicyWrapper;
|
||||
@ -39,6 +41,7 @@ import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.backup.repository.BackupRepository;
|
||||
import org.apache.solr.core.backup.repository.BackupRepository.PathType;
|
||||
import org.apache.solr.core.backup.repository.LocalFileSystemRepository;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.slf4j.Logger;
|
||||
@ -59,6 +62,7 @@ public class SnapShooter {
|
||||
private URI baseSnapDirPath = null;
|
||||
private URI snapshotDirPath = null;
|
||||
private BackupRepository backupRepo = null;
|
||||
private String commitName; // can be null
|
||||
|
||||
@Deprecated
|
||||
public SnapShooter(SolrCore core, String location, String snapshotName) {
|
||||
@ -71,14 +75,14 @@ public class SnapShooter {
|
||||
} else {
|
||||
snapDirStr = core.getCoreDescriptor().getInstanceDir().resolve(location).normalize().toString();
|
||||
}
|
||||
initialize(new LocalFileSystemRepository(), core, snapDirStr, snapshotName);
|
||||
initialize(new LocalFileSystemRepository(), core, snapDirStr, snapshotName, null);
|
||||
}
|
||||
|
||||
public SnapShooter(BackupRepository backupRepo, SolrCore core, String location, String snapshotName) {
|
||||
initialize(backupRepo, core, location, snapshotName);
|
||||
public SnapShooter(BackupRepository backupRepo, SolrCore core, String location, String snapshotName, String commitName) {
|
||||
initialize(backupRepo, core, location, snapshotName, commitName);
|
||||
}
|
||||
|
||||
private void initialize(BackupRepository backupRepo, SolrCore core, String location, String snapshotName) {
|
||||
private void initialize(BackupRepository backupRepo, SolrCore core, String location, String snapshotName, String commitName) {
|
||||
this.solrCore = Preconditions.checkNotNull(core);
|
||||
this.backupRepo = Preconditions.checkNotNull(backupRepo);
|
||||
this.baseSnapDirPath = backupRepo.createURI(Preconditions.checkNotNull(location)).normalize();
|
||||
@ -90,6 +94,7 @@ public class SnapShooter {
|
||||
directoryName = "snapshot." + fmt.format(new Date());
|
||||
}
|
||||
this.snapshotDirPath = backupRepo.createURI(location, directoryName);
|
||||
this.commitName = commitName;
|
||||
}
|
||||
|
||||
public BackupRepository getBackupRepository() {
|
||||
@ -145,16 +150,26 @@ public class SnapShooter {
|
||||
}
|
||||
|
||||
public NamedList createSnapshot() throws Exception {
|
||||
IndexDeletionPolicyWrapper deletionPolicy = solrCore.getDeletionPolicy();
|
||||
RefCounted<SolrIndexSearcher> searcher = solrCore.getSearcher();
|
||||
try {
|
||||
//TODO should we try solrCore.getDeletionPolicy().getLatestCommit() first?
|
||||
IndexCommit indexCommit = searcher.get().getIndexReader().getIndexCommit();
|
||||
deletionPolicy.saveCommitPoint(indexCommit.getGeneration());
|
||||
try {
|
||||
return createSnapshot(indexCommit);
|
||||
} finally {
|
||||
deletionPolicy.releaseCommitPoint(indexCommit.getGeneration());
|
||||
if (commitName != null) {
|
||||
SolrSnapshotMetaDataManager snapshotMgr = solrCore.getSnapshotMetaDataManager();
|
||||
Optional<IndexCommit> commit = snapshotMgr.getIndexCommitByName(commitName);
|
||||
if(commit.isPresent()) {
|
||||
return createSnapshot(commit.get());
|
||||
}
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to find an index commit with name " + commitName +
|
||||
" for core " + solrCore.getName());
|
||||
} else {
|
||||
//TODO should we try solrCore.getDeletionPolicy().getLatestCommit() first?
|
||||
IndexDeletionPolicyWrapper deletionPolicy = solrCore.getDeletionPolicy();
|
||||
IndexCommit indexCommit = searcher.get().getIndexReader().getIndexCommit();
|
||||
deletionPolicy.saveCommitPoint(indexCommit.getGeneration());
|
||||
try {
|
||||
return createSnapshot(indexCommit);
|
||||
} finally {
|
||||
deletionPolicy.releaseCommitPoint(indexCommit.getGeneration());
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
searcher.decref();
|
||||
|
@ -121,6 +121,9 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
|
||||
.withFunctionName("outerHashJoin", OuterHashJoinStream.class)
|
||||
.withFunctionName("intersect", IntersectStream.class)
|
||||
.withFunctionName("complement", ComplementStream.class)
|
||||
.withFunctionName("sort", SortStream.class)
|
||||
.withFunctionName("train", TextLogitStream.class)
|
||||
.withFunctionName("features", FeaturesSelectionStream.class)
|
||||
.withFunctionName("daemon", DaemonStream.class)
|
||||
.withFunctionName("sort", SortStream.class)
|
||||
.withFunctionName("select", SelectStream.class)
|
||||
|
@ -34,6 +34,7 @@ import java.util.concurrent.Future;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
@ -59,9 +60,13 @@ import org.apache.solr.core.CachingDirectoryFactory;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.core.CoreDescriptor;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.core.backup.repository.BackupRepository;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotManager;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
|
||||
import org.apache.solr.handler.RestoreCore;
|
||||
import org.apache.solr.handler.SnapShooter;
|
||||
import org.apache.solr.handler.admin.CoreAdminHandler.CoreAdminOp;
|
||||
@ -794,22 +799,26 @@ enum CoreAdminOperation implements CoreAdminOp {
|
||||
+ " parameter or as a default repository property");
|
||||
}
|
||||
|
||||
try (SolrCore core = it.handler.coreContainer.getCore(cname)) {
|
||||
SnapShooter snapShooter = new SnapShooter(repository, core, location, name);
|
||||
// validateCreateSnapshot will create parent dirs instead of throw; that choice is dubious.
|
||||
// But we want to throw. One reason is that
|
||||
// this dir really should, in fact must, already exist here if triggered via a collection backup on a shared
|
||||
// file system. Otherwise, perhaps the FS location isn't shared -- we want an error.
|
||||
if (!snapShooter.getBackupRepository().exists(snapShooter.getLocation())) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST,
|
||||
"Directory to contain snapshots doesn't exist: " + snapShooter.getLocation());
|
||||
// An optional parameter to describe the snapshot to be backed-up. If this
|
||||
// parameter is not supplied, the latest index commit is backed-up.
|
||||
String commitName = params.get(CoreAdminParams.COMMIT_NAME);
|
||||
|
||||
try (SolrCore core = it.handler.coreContainer.getCore(cname)) {
|
||||
SnapShooter snapShooter = new SnapShooter(repository, core, location, name, commitName);
|
||||
// validateCreateSnapshot will create parent dirs instead of throw; that choice is dubious.
|
||||
// But we want to throw. One reason is that
|
||||
// this dir really should, in fact must, already exist here if triggered via a collection backup on a shared
|
||||
// file system. Otherwise, perhaps the FS location isn't shared -- we want an error.
|
||||
if (!snapShooter.getBackupRepository().exists(snapShooter.getLocation())) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Directory to contain snapshots doesn't exist: " + snapShooter.getLocation());
|
||||
}
|
||||
snapShooter.validateCreateSnapshot();
|
||||
snapShooter.createSnapshot();
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Failed to backup core=" + cname + " because " + e, e);
|
||||
}
|
||||
snapShooter.validateCreateSnapshot();
|
||||
snapShooter.createSnapshot();
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"Failed to backup core=" + cname + " because " + e, e);
|
||||
}
|
||||
}),
|
||||
|
||||
RESTORECORE_OP(RESTORECORE, it -> {
|
||||
@ -845,6 +854,92 @@ enum CoreAdminOperation implements CoreAdminOp {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to restore core=" + core.getName());
|
||||
}
|
||||
}
|
||||
}),
|
||||
CREATESNAPSHOT_OP(CREATESNAPSHOT, it -> {
|
||||
CoreContainer cc = it.handler.getCoreContainer();
|
||||
final SolrParams params = it.req.getParams();
|
||||
|
||||
String commitName = params.required().get(CoreAdminParams.COMMIT_NAME);
|
||||
String cname = params.required().get(CoreAdminParams.CORE);
|
||||
try (SolrCore core = cc.getCore(cname)) {
|
||||
if (core == null) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
|
||||
}
|
||||
|
||||
String indexDirPath = core.getIndexDir();
|
||||
IndexCommit ic = core.getDeletionPolicy().getLatestCommit();
|
||||
if (ic == null) {
|
||||
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
||||
try {
|
||||
ic = searcher.get().getIndexReader().getIndexCommit();
|
||||
} finally {
|
||||
searcher.decref();
|
||||
}
|
||||
}
|
||||
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
|
||||
mgr.snapshot(commitName, indexDirPath, ic.getGeneration());
|
||||
|
||||
it.rsp.add("core", core.getName());
|
||||
it.rsp.add("commitName", commitName);
|
||||
it.rsp.add("indexDirPath", indexDirPath);
|
||||
it.rsp.add("generation", ic.getGeneration());
|
||||
}
|
||||
}),
|
||||
DELETESNAPSHOT_OP(DELETESNAPSHOT, it -> {
|
||||
CoreContainer cc = it.handler.getCoreContainer();
|
||||
final SolrParams params = it.req.getParams();
|
||||
|
||||
String commitName = params.required().get(CoreAdminParams.COMMIT_NAME);
|
||||
String cname = params.required().get(CoreAdminParams.CORE);
|
||||
try (SolrCore core = cc.getCore(cname)) {
|
||||
if (core == null) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
|
||||
}
|
||||
|
||||
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
|
||||
Optional<SnapshotMetaData> metadata = mgr.release(commitName);
|
||||
if (metadata.isPresent()) {
|
||||
long gen = metadata.get().getGenerationNumber();
|
||||
String indexDirPath = metadata.get().getIndexDirPath();
|
||||
|
||||
// If the directory storing the snapshot is not the same as the *current* core
|
||||
// index directory, then delete the files corresponding to this snapshot.
|
||||
// Otherwise we leave the index files related to snapshot as is (assuming the
|
||||
// underlying Solr IndexDeletionPolicy will clean them up appropriately).
|
||||
if (!indexDirPath.equals(core.getIndexDir())) {
|
||||
Directory d = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, DirectoryFactory.LOCK_TYPE_NONE);
|
||||
try {
|
||||
SolrSnapshotManager.deleteIndexFiles(d, mgr.listSnapshotsInIndexDir(indexDirPath), gen);
|
||||
} finally {
|
||||
core.getDirectoryFactory().release(d);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}),
|
||||
LISTSNAPSHOTS_OP(LISTSNAPSHOTS, it -> {
|
||||
CoreContainer cc = it.handler.getCoreContainer();
|
||||
final SolrParams params = it.req.getParams();
|
||||
|
||||
String cname = params.required().get(CoreAdminParams.CORE);
|
||||
try ( SolrCore core = cc.getCore(cname) ) {
|
||||
if (core == null) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
|
||||
}
|
||||
|
||||
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
|
||||
NamedList result = new NamedList();
|
||||
for (String name : mgr.listSnapshots()) {
|
||||
Optional<SnapshotMetaData> metadata = mgr.getSnapshotMetaData(name);
|
||||
if ( metadata.isPresent() ) {
|
||||
NamedList<String> props = new NamedList<>();
|
||||
props.add("generation", String.valueOf(metadata.get().getGenerationNumber()));
|
||||
props.add("indexDirPath", metadata.get().getIndexDirPath());
|
||||
result.add(name, props);
|
||||
}
|
||||
}
|
||||
it.rsp.add("snapshots", result);
|
||||
}
|
||||
});
|
||||
|
||||
final CoreAdminParams.CoreAdminAction action;
|
||||
|
@ -249,7 +249,8 @@ public class RealTimeGetComponent extends SearchComponent
|
||||
docid = segid + ctx.docBase;
|
||||
|
||||
if (rb.getFilters() != null) {
|
||||
for (Query q : rb.getFilters()) {
|
||||
for (Query raw : rb.getFilters()) {
|
||||
Query q = raw.rewrite(searcher.getIndexReader());
|
||||
Scorer scorer = searcher.createWeight(q, false).scorer(ctx);
|
||||
if (scorer == null || segid != scorer.iterator().advance(segid)) {
|
||||
// filter doesn't match.
|
||||
@ -448,7 +449,7 @@ public class RealTimeGetComponent extends SearchComponent
|
||||
ZkController zkController = rb.req.getCore().getCoreDescriptor().getCoreContainer().getZkController();
|
||||
|
||||
// if shards=... then use that
|
||||
if (zkController != null && params.get("shards") == null) {
|
||||
if (zkController != null && params.get(ShardParams.SHARDS) == null) {
|
||||
CloudDescriptor cloudDescriptor = rb.req.getCore().getCoreDescriptor().getCloudDescriptor();
|
||||
|
||||
String collection = cloudDescriptor.getCollectionName();
|
||||
@ -470,38 +471,46 @@ public class RealTimeGetComponent extends SearchComponent
|
||||
|
||||
for (Map.Entry<String,List<String>> entry : sliceToId.entrySet()) {
|
||||
String shard = entry.getKey();
|
||||
String shardIdList = StrUtils.join(entry.getValue(), ',');
|
||||
|
||||
ShardRequest sreq = new ShardRequest();
|
||||
|
||||
sreq.purpose = 1;
|
||||
ShardRequest sreq = createShardRequest(rb, entry.getValue());
|
||||
// sreq.shards = new String[]{shard}; // TODO: would be nice if this would work...
|
||||
sreq.shards = sliceToShards(rb, collection, shard);
|
||||
sreq.actualShards = sreq.shards;
|
||||
sreq.params = new ModifiableSolrParams();
|
||||
sreq.params.set(ShardParams.SHARDS_QT,"/get"); // TODO: how to avoid hardcoding this and hit the same handler?
|
||||
sreq.params.set("distrib",false);
|
||||
sreq.params.set("ids", shardIdList);
|
||||
|
||||
|
||||
rb.addRequest(this, sreq);
|
||||
}
|
||||
} else {
|
||||
String shardIdList = StrUtils.join(reqIds.allIds, ',');
|
||||
ShardRequest sreq = new ShardRequest();
|
||||
|
||||
sreq.purpose = 1;
|
||||
ShardRequest sreq = createShardRequest(rb, reqIds.allIds);
|
||||
sreq.shards = null; // ALL
|
||||
sreq.actualShards = sreq.shards;
|
||||
sreq.params = new ModifiableSolrParams();
|
||||
sreq.params.set(ShardParams.SHARDS_QT,"/get"); // TODO: how to avoid hardcoding this and hit the same handler?
|
||||
sreq.params.set("distrib",false);
|
||||
sreq.params.set("ids", shardIdList);
|
||||
|
||||
rb.addRequest(this, sreq);
|
||||
}
|
||||
|
||||
return ResponseBuilder.STAGE_DONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method for creating a new ShardRequest for the specified ids, based on the params
|
||||
* specified for the current request. The new ShardRequest does not yet know anything about
|
||||
* which shard/slice it will be sent to.
|
||||
*/
|
||||
private ShardRequest createShardRequest(final ResponseBuilder rb, final List<String> ids) {
|
||||
final ShardRequest sreq = new ShardRequest();
|
||||
sreq.purpose = 1;
|
||||
sreq.params = new ModifiableSolrParams(rb.req.getParams());
|
||||
|
||||
// TODO: how to avoid hardcoding this and hit the same handler?
|
||||
sreq.params.set(ShardParams.SHARDS_QT,"/get");
|
||||
sreq.params.set("distrib",false);
|
||||
|
||||
sreq.params.remove(ShardParams.SHARDS);
|
||||
sreq.params.remove("id");
|
||||
sreq.params.remove("ids");
|
||||
sreq.params.set("ids", StrUtils.join(ids, ','));
|
||||
|
||||
return sreq;
|
||||
}
|
||||
|
||||
private String[] sliceToShards(ResponseBuilder rb, String collection, String slice) {
|
||||
String lookup = collection + '_' + slice; // seems either form may be filled in rb.slices?
|
||||
|
@ -0,0 +1,240 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.search;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.SparseFixedBitSet;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.handler.component.ResponseBuilder;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
|
||||
public class IGainTermsQParserPlugin extends QParserPlugin {
|
||||
|
||||
public static final String NAME = "igain";
|
||||
|
||||
@Override
|
||||
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
|
||||
return new IGainTermsQParser(qstr, localParams, params, req);
|
||||
}
|
||||
|
||||
private static class IGainTermsQParser extends QParser {
|
||||
|
||||
public IGainTermsQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
|
||||
super(qstr, localParams, params, req);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query parse() throws SyntaxError {
|
||||
|
||||
String field = getParam("field");
|
||||
String outcome = getParam("outcome");
|
||||
int numTerms = Integer.parseInt(getParam("numTerms"));
|
||||
int positiveLabel = Integer.parseInt(getParam("positiveLabel"));
|
||||
|
||||
return new IGainTermsQuery(field, outcome, positiveLabel, numTerms);
|
||||
}
|
||||
}
|
||||
|
||||
private static class IGainTermsQuery extends AnalyticsQuery {
|
||||
|
||||
private String field;
|
||||
private String outcome;
|
||||
private int numTerms;
|
||||
private int positiveLabel;
|
||||
|
||||
public IGainTermsQuery(String field, String outcome, int positiveLabel, int numTerms) {
|
||||
this.field = field;
|
||||
this.outcome = outcome;
|
||||
this.numTerms = numTerms;
|
||||
this.positiveLabel = positiveLabel;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DelegatingCollector getAnalyticsCollector(ResponseBuilder rb, IndexSearcher searcher) {
|
||||
return new IGainTermsCollector(rb, searcher, field, outcome, positiveLabel, numTerms);
|
||||
}
|
||||
}
|
||||
|
||||
private static class IGainTermsCollector extends DelegatingCollector {
|
||||
|
||||
private String field;
|
||||
private String outcome;
|
||||
private IndexSearcher searcher;
|
||||
private ResponseBuilder rb;
|
||||
private int positiveLabel;
|
||||
private int numTerms;
|
||||
private int count;
|
||||
|
||||
private NumericDocValues leafOutcomeValue;
|
||||
private SparseFixedBitSet positiveSet;
|
||||
private SparseFixedBitSet negativeSet;
|
||||
|
||||
|
||||
private int numPositiveDocs;
|
||||
|
||||
|
||||
public IGainTermsCollector(ResponseBuilder rb, IndexSearcher searcher, String field, String outcome, int positiveLabel, int numTerms) {
|
||||
this.rb = rb;
|
||||
this.searcher = searcher;
|
||||
this.field = field;
|
||||
this.outcome = outcome;
|
||||
this.positiveSet = new SparseFixedBitSet(searcher.getIndexReader().maxDoc());
|
||||
this.negativeSet = new SparseFixedBitSet(searcher.getIndexReader().maxDoc());
|
||||
|
||||
this.numTerms = numTerms;
|
||||
this.positiveLabel = positiveLabel;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
super.doSetNextReader(context);
|
||||
LeafReader reader = context.reader();
|
||||
leafOutcomeValue = reader.getNumericDocValues(outcome);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
super.collect(doc);
|
||||
++count;
|
||||
if (leafOutcomeValue.get(doc) == positiveLabel) {
|
||||
positiveSet.set(context.docBase + doc);
|
||||
numPositiveDocs++;
|
||||
} else {
|
||||
negativeSet.set(context.docBase + doc);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish() throws IOException {
|
||||
NamedList<Double> analytics = new NamedList<Double>();
|
||||
NamedList<Integer> topFreq = new NamedList();
|
||||
|
||||
NamedList<Integer> allFreq = new NamedList();
|
||||
|
||||
rb.rsp.add("featuredTerms", analytics);
|
||||
rb.rsp.add("docFreq", topFreq);
|
||||
rb.rsp.add("numDocs", count);
|
||||
|
||||
TreeSet<TermWithScore> topTerms = new TreeSet<>();
|
||||
|
||||
double numDocs = count;
|
||||
double pc = numPositiveDocs / numDocs;
|
||||
double entropyC = binaryEntropy(pc);
|
||||
|
||||
Terms terms = MultiFields.getFields(searcher.getIndexReader()).terms(field);
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
BytesRef term;
|
||||
PostingsEnum postingsEnum = null;
|
||||
while ((term = termsEnum.next()) != null) {
|
||||
postingsEnum = termsEnum.postings(postingsEnum);
|
||||
int xc = 0;
|
||||
int nc = 0;
|
||||
while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (positiveSet.get(postingsEnum.docID())) {
|
||||
xc++;
|
||||
} else if (negativeSet.get(postingsEnum.docID())) {
|
||||
nc++;
|
||||
}
|
||||
}
|
||||
|
||||
int docFreq = xc+nc;
|
||||
|
||||
double entropyContainsTerm = binaryEntropy( (double) xc / docFreq );
|
||||
double entropyNotContainsTerm = binaryEntropy( (double) (numPositiveDocs - xc) / (numDocs - docFreq + 1) );
|
||||
double score = entropyC - ( (docFreq / numDocs) * entropyContainsTerm + (1.0 - docFreq / numDocs) * entropyNotContainsTerm);
|
||||
|
||||
topFreq.add(term.utf8ToString(), docFreq);
|
||||
if (topTerms.size() < numTerms) {
|
||||
topTerms.add(new TermWithScore(term.utf8ToString(), score));
|
||||
} else {
|
||||
if (topTerms.first().score < score) {
|
||||
topTerms.pollFirst();
|
||||
topTerms.add(new TermWithScore(term.utf8ToString(), score));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (TermWithScore topTerm : topTerms) {
|
||||
analytics.add(topTerm.term, topTerm.score);
|
||||
topFreq.add(topTerm.term, allFreq.get(topTerm.term));
|
||||
}
|
||||
|
||||
if (this.delegate instanceof DelegatingCollector) {
|
||||
((DelegatingCollector) this.delegate).finish();
|
||||
}
|
||||
}
|
||||
|
||||
private double binaryEntropy(double prob) {
|
||||
if (prob == 0 || prob == 1) return 0;
|
||||
return (-1 * prob * Math.log(prob)) + (-1 * (1.0 - prob) * Math.log(1.0 - prob));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static class TermWithScore implements Comparable<TermWithScore>{
|
||||
public final String term;
|
||||
public final double score;
|
||||
|
||||
public TermWithScore(String term, double score) {
|
||||
this.term = term;
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return term.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj == null) return false;
|
||||
if (obj.getClass() != getClass()) return false;
|
||||
TermWithScore other = (TermWithScore) obj;
|
||||
return other.term.equals(this.term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(TermWithScore o) {
|
||||
int cmp = Double.compare(this.score, o.score);
|
||||
if (cmp == 0) {
|
||||
return this.term.compareTo(o.term);
|
||||
} else {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@ import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.atomic.LongAdder;
|
||||
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
@ -61,11 +61,11 @@ public class LRUCache<K,V> extends SolrCacheBase implements SolrCache<K,V>, Acco
|
||||
* of an LRUCache at the same time. Make sure everything is thread safe.
|
||||
*/
|
||||
private static class CumulativeStats {
|
||||
AtomicLong lookups = new AtomicLong();
|
||||
AtomicLong hits = new AtomicLong();
|
||||
AtomicLong inserts = new AtomicLong();
|
||||
AtomicLong evictions = new AtomicLong();
|
||||
AtomicLong evictionsRamUsage = new AtomicLong();
|
||||
LongAdder lookups = new LongAdder();
|
||||
LongAdder hits = new LongAdder();
|
||||
LongAdder inserts = new LongAdder();
|
||||
LongAdder evictions = new LongAdder();
|
||||
LongAdder evictionsRamUsage = new LongAdder();
|
||||
}
|
||||
|
||||
private CumulativeStats stats;
|
||||
@ -124,8 +124,8 @@ public class LRUCache<K,V> extends SolrCacheBase implements SolrCache<K,V>, Acco
|
||||
iterator.remove();
|
||||
evictions++;
|
||||
evictionsRamUsage++;
|
||||
stats.evictions.incrementAndGet();
|
||||
stats.evictionsRamUsage.incrementAndGet();
|
||||
stats.evictions.increment();
|
||||
stats.evictionsRamUsage.increment();
|
||||
} while (iterator.hasNext() && ramBytesUsed > maxRamBytes);
|
||||
// must return false according to javadocs of removeEldestEntry if we're modifying
|
||||
// the map ourselves
|
||||
@ -135,7 +135,7 @@ public class LRUCache<K,V> extends SolrCacheBase implements SolrCache<K,V>, Acco
|
||||
// this doesn't need to be synchronized because it will
|
||||
// only be called in the context of a higher level synchronized block.
|
||||
evictions++;
|
||||
stats.evictions.incrementAndGet();
|
||||
stats.evictions.increment();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -180,7 +180,7 @@ public class LRUCache<K,V> extends SolrCacheBase implements SolrCache<K,V>, Acco
|
||||
public V put(K key, V value) {
|
||||
synchronized (map) {
|
||||
if (getState() == State.LIVE) {
|
||||
stats.inserts.incrementAndGet();
|
||||
stats.inserts.increment();
|
||||
}
|
||||
|
||||
// increment local inserts regardless of state???
|
||||
@ -232,10 +232,10 @@ public class LRUCache<K,V> extends SolrCacheBase implements SolrCache<K,V>, Acco
|
||||
if (getState() == State.LIVE) {
|
||||
// only increment lookups and hits if we are live.
|
||||
lookups++;
|
||||
stats.lookups.incrementAndGet();
|
||||
stats.lookups.increment();
|
||||
if (val!=null) {
|
||||
hits++;
|
||||
stats.hits.incrementAndGet();
|
||||
stats.hits.increment();
|
||||
}
|
||||
}
|
||||
return val;
|
||||
@ -341,15 +341,15 @@ public class LRUCache<K,V> extends SolrCacheBase implements SolrCache<K,V>, Acco
|
||||
}
|
||||
lst.add("warmupTime", warmupTime);
|
||||
|
||||
long clookups = stats.lookups.get();
|
||||
long chits = stats.hits.get();
|
||||
long clookups = stats.lookups.longValue();
|
||||
long chits = stats.hits.longValue();
|
||||
lst.add("cumulative_lookups", clookups);
|
||||
lst.add("cumulative_hits", chits);
|
||||
lst.add("cumulative_hitratio", calcHitRatio(clookups, chits));
|
||||
lst.add("cumulative_inserts", stats.inserts.get());
|
||||
lst.add("cumulative_evictions", stats.evictions.get());
|
||||
lst.add("cumulative_inserts", stats.inserts.longValue());
|
||||
lst.add("cumulative_evictions", stats.evictions.longValue());
|
||||
if (maxRamBytes != Long.MAX_VALUE) {
|
||||
lst.add("cumulative_evictionsRamUsage", stats.evictionsRamUsage.get());
|
||||
lst.add("cumulative_evictionsRamUsage", stats.evictionsRamUsage.longValue());
|
||||
}
|
||||
|
||||
return lst;
|
||||
|
@ -16,6 +16,11 @@
|
||||
*/
|
||||
package org.apache.solr.search;
|
||||
|
||||
import java.net.URL;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrInfoMBean;
|
||||
@ -26,11 +31,6 @@ import org.apache.solr.search.join.GraphQParserPlugin;
|
||||
import org.apache.solr.search.mlt.MLTQParserPlugin;
|
||||
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
|
||||
|
||||
import java.net.URL;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrInfoMBean {
|
||||
/** internal use - name of the default parser */
|
||||
public static final String DEFAULT_QTYPE = LuceneQParserPlugin.NAME;
|
||||
@ -77,6 +77,8 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI
|
||||
map.put(GraphQParserPlugin.NAME, GraphQParserPlugin.class);
|
||||
map.put(XmlQParserPlugin.NAME, XmlQParserPlugin.class);
|
||||
map.put(GraphTermsQParserPlugin.NAME, GraphTermsQParserPlugin.class);
|
||||
map.put(IGainTermsQParserPlugin.NAME, IGainTermsQParserPlugin.class);
|
||||
map.put(TextLogisticRegressionQParserPlugin.NAME, TextLogisticRegressionQParserPlugin.class);
|
||||
standardPlugins = Collections.unmodifiableMap(map);
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,283 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.SparseFixedBitSet;
|
||||
import org.apache.solr.client.solrj.io.ClassificationEvaluation;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.handler.component.ResponseBuilder;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
|
||||
/**
|
||||
* Returns an AnalyticsQuery implementation that performs
|
||||
* one Gradient Descent iteration of a result set to train a
|
||||
* logistic regression model
|
||||
*
|
||||
* The TextLogitStream provides the parallel iterative framework for this class.
|
||||
**/
|
||||
|
||||
public class TextLogisticRegressionQParserPlugin extends QParserPlugin {
|
||||
public static final String NAME = "tlogit";
|
||||
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
|
||||
return new TextLogisticRegressionQParser(qstr, localParams, params, req);
|
||||
}
|
||||
|
||||
private static class TextLogisticRegressionQParser extends QParser{
|
||||
|
||||
TextLogisticRegressionQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
|
||||
super(qstr, localParams, params, req);
|
||||
}
|
||||
|
||||
public Query parse() {
|
||||
|
||||
String fs = params.get("feature");
|
||||
String[] terms = params.get("terms").split(",");
|
||||
String ws = params.get("weights");
|
||||
String dfsStr = params.get("idfs");
|
||||
int iteration = params.getInt("iteration");
|
||||
String outcome = params.get("outcome");
|
||||
int positiveLabel = params.getInt("positiveLabel", 1);
|
||||
double threshold = params.getDouble("threshold", 0.5);
|
||||
double alpha = params.getDouble("alpha", 0.01);
|
||||
|
||||
double[] idfs = new double[terms.length];
|
||||
String[] idfsArr = dfsStr.split(",");
|
||||
for (int i = 0; i < idfsArr.length; i++) {
|
||||
idfs[i] = Double.parseDouble(idfsArr[i]);
|
||||
}
|
||||
|
||||
double[] weights = new double[terms.length+1];
|
||||
|
||||
if(ws != null) {
|
||||
String[] wa = ws.split(",");
|
||||
for (int i = 0; i < wa.length; i++) {
|
||||
weights[i] = Double.parseDouble(wa[i]);
|
||||
}
|
||||
} else {
|
||||
for(int i=0; i<weights.length; i++) {
|
||||
weights[i]= 1.0d;
|
||||
}
|
||||
}
|
||||
|
||||
TrainingParams input = new TrainingParams(fs, terms, idfs, outcome, weights, iteration, alpha, positiveLabel, threshold);
|
||||
|
||||
return new TextLogisticRegressionQuery(input);
|
||||
}
|
||||
}
|
||||
|
||||
private static class TextLogisticRegressionQuery extends AnalyticsQuery {
|
||||
private TrainingParams trainingParams;
|
||||
|
||||
public TextLogisticRegressionQuery(TrainingParams trainingParams) {
|
||||
this.trainingParams = trainingParams;
|
||||
}
|
||||
|
||||
public DelegatingCollector getAnalyticsCollector(ResponseBuilder rbsp, IndexSearcher indexSearcher) {
|
||||
return new TextLogisticRegressionCollector(rbsp, indexSearcher, trainingParams);
|
||||
}
|
||||
}
|
||||
|
||||
private static class TextLogisticRegressionCollector extends DelegatingCollector {
|
||||
private TrainingParams trainingParams;
|
||||
private LeafReader leafReader;
|
||||
|
||||
private double[] workingDeltas;
|
||||
private ClassificationEvaluation classificationEvaluation;
|
||||
private double[] weights;
|
||||
|
||||
private ResponseBuilder rbsp;
|
||||
private NumericDocValues leafOutcomeValue;
|
||||
private double totalError;
|
||||
private SparseFixedBitSet positiveDocsSet;
|
||||
private SparseFixedBitSet docsSet;
|
||||
private IndexSearcher searcher;
|
||||
|
||||
TextLogisticRegressionCollector(ResponseBuilder rbsp, IndexSearcher searcher,
|
||||
TrainingParams trainingParams) {
|
||||
this.trainingParams = trainingParams;
|
||||
this.workingDeltas = new double[trainingParams.weights.length];
|
||||
this.weights = Arrays.copyOf(trainingParams.weights, trainingParams.weights.length);
|
||||
this.rbsp = rbsp;
|
||||
this.classificationEvaluation = new ClassificationEvaluation();
|
||||
this.searcher = searcher;
|
||||
positiveDocsSet = new SparseFixedBitSet(searcher.getIndexReader().numDocs());
|
||||
docsSet = new SparseFixedBitSet(searcher.getIndexReader().numDocs());
|
||||
}
|
||||
|
||||
public void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
super.doSetNextReader(context);
|
||||
leafReader = context.reader();
|
||||
leafOutcomeValue = leafReader.getNumericDocValues(trainingParams.outcome);
|
||||
}
|
||||
|
||||
public void collect(int doc) throws IOException{
|
||||
|
||||
int outcome = (int) leafOutcomeValue.get(doc);
|
||||
outcome = trainingParams.positiveLabel == outcome? 1 : 0;
|
||||
if (outcome == 1) {
|
||||
positiveDocsSet.set(context.docBase + doc);
|
||||
}
|
||||
docsSet.set(context.docBase+doc);
|
||||
|
||||
}
|
||||
|
||||
public void finish() throws IOException {
|
||||
|
||||
Map<Integer, double[]> docVectors = new HashMap<>();
|
||||
Terms terms = MultiFields.getFields(searcher.getIndexReader()).terms(trainingParams.feature);
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
PostingsEnum postingsEnum = null;
|
||||
int termIndex = 0;
|
||||
for (String termStr : trainingParams.terms) {
|
||||
BytesRef term = new BytesRef(termStr);
|
||||
if (termsEnum.seekExact(term)) {
|
||||
postingsEnum = termsEnum.postings(postingsEnum);
|
||||
while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
int docId = postingsEnum.docID();
|
||||
if (docsSet.get(docId)) {
|
||||
double[] vector = docVectors.get(docId);
|
||||
if (vector == null) {
|
||||
vector = new double[trainingParams.terms.length+1];
|
||||
vector[0] = 1.0;
|
||||
docVectors.put(docId, vector);
|
||||
}
|
||||
vector[termIndex + 1] = trainingParams.idfs[termIndex] * (1.0 + Math.log(postingsEnum.freq()));
|
||||
}
|
||||
}
|
||||
}
|
||||
termIndex++;
|
||||
}
|
||||
|
||||
for (Map.Entry<Integer, double[]> entry : docVectors.entrySet()) {
|
||||
double[] vector = entry.getValue();
|
||||
int outcome = 0;
|
||||
if (positiveDocsSet.get(entry.getKey())) {
|
||||
outcome = 1;
|
||||
}
|
||||
double sig = sigmoid(sum(multiply(vector, weights)));
|
||||
double error = sig - outcome;
|
||||
double lastSig = sigmoid(sum(multiply(vector, trainingParams.weights)));
|
||||
totalError += Math.abs(lastSig - outcome);
|
||||
classificationEvaluation.count(outcome, lastSig >= trainingParams.threshold ? 1 : 0);
|
||||
|
||||
workingDeltas = multiply(error * trainingParams.alpha, vector);
|
||||
|
||||
for(int i = 0; i< workingDeltas.length; i++) {
|
||||
weights[i] -= workingDeltas[i];
|
||||
}
|
||||
}
|
||||
|
||||
NamedList analytics = new NamedList();
|
||||
rbsp.rsp.add("logit", analytics);
|
||||
|
||||
List<Double> outWeights = new ArrayList<>();
|
||||
for(Double d : weights) {
|
||||
outWeights.add(d);
|
||||
}
|
||||
|
||||
analytics.add("weights", outWeights);
|
||||
analytics.add("error", totalError);
|
||||
analytics.add("evaluation", classificationEvaluation.toMap());
|
||||
analytics.add("feature", trainingParams.feature);
|
||||
analytics.add("positiveLabel", trainingParams.positiveLabel);
|
||||
if(this.delegate instanceof DelegatingCollector) {
|
||||
((DelegatingCollector)this.delegate).finish();
|
||||
}
|
||||
}
|
||||
|
||||
private double sigmoid(double in) {
|
||||
double d = 1.0 / (1+Math.exp(-in));
|
||||
return d;
|
||||
}
|
||||
|
||||
private double[] multiply(double[] vals, double[] weights) {
|
||||
for(int i = 0; i < vals.length; ++i) {
|
||||
workingDeltas[i] = vals[i] * weights[i];
|
||||
}
|
||||
|
||||
return workingDeltas;
|
||||
}
|
||||
|
||||
private double[] multiply(double d, double[] vals) {
|
||||
for(int i = 0; i<vals.length; ++i) {
|
||||
workingDeltas[i] = vals[i] * d;
|
||||
}
|
||||
|
||||
return workingDeltas;
|
||||
}
|
||||
|
||||
private double sum(double[] vals) {
|
||||
double d = 0.0d;
|
||||
for(double val : vals) {
|
||||
d += val;
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class TrainingParams {
|
||||
public final String feature;
|
||||
public final String[] terms;
|
||||
public final double[] idfs;
|
||||
public final String outcome;
|
||||
public final double[] weights;
|
||||
public final int interation;
|
||||
public final int positiveLabel;
|
||||
public final double threshold;
|
||||
public final double alpha;
|
||||
|
||||
public TrainingParams(String feature, String[] terms, double[] idfs, String outcome, double[] weights, int interation, double alpha, int positiveLabel, double threshold) {
|
||||
this.feature = feature;
|
||||
this.terms = terms;
|
||||
this.idfs = idfs;
|
||||
this.outcome = outcome;
|
||||
this.weights = weights;
|
||||
this.alpha = alpha;
|
||||
this.interation = interation;
|
||||
this.positiveLabel = positiveLabel;
|
||||
this.threshold = threshold;
|
||||
}
|
||||
}
|
||||
}
|
@ -64,6 +64,7 @@ import org.apache.solr.search.facet.UniqueAgg;
|
||||
import org.apache.solr.search.function.CollapseScoreFunction;
|
||||
import org.apache.solr.search.function.OrdFieldSource;
|
||||
import org.apache.solr.search.function.ReverseOrdFieldSource;
|
||||
import org.apache.solr.search.function.SolrComparisonBoolFunction;
|
||||
import org.apache.solr.search.function.distance.GeoDistValueSourceParser;
|
||||
import org.apache.solr.search.function.distance.GeohashFunction;
|
||||
import org.apache.solr.search.function.distance.GeohashHaversineFunction;
|
||||
@ -826,6 +827,57 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
|
||||
}
|
||||
});
|
||||
|
||||
addParser("gt", new ValueSourceParser() {
|
||||
@Override
|
||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||
ValueSource lhsValSource = fp.parseValueSource();
|
||||
ValueSource rhsValSource = fp.parseValueSource();
|
||||
|
||||
return new SolrComparisonBoolFunction(lhsValSource, rhsValSource, "gt", (cmp) -> cmp > 0);
|
||||
}
|
||||
});
|
||||
|
||||
addParser("lt", new ValueSourceParser() {
|
||||
@Override
|
||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||
ValueSource lhsValSource = fp.parseValueSource();
|
||||
ValueSource rhsValSource = fp.parseValueSource();
|
||||
|
||||
return new SolrComparisonBoolFunction(lhsValSource, rhsValSource, "lt", (cmp) -> cmp < 0);
|
||||
}
|
||||
});
|
||||
|
||||
addParser("gte", new ValueSourceParser() {
|
||||
@Override
|
||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||
ValueSource lhsValSource = fp.parseValueSource();
|
||||
ValueSource rhsValSource = fp.parseValueSource();
|
||||
|
||||
return new SolrComparisonBoolFunction(lhsValSource, rhsValSource, "gte", (cmp) -> cmp >= 0);
|
||||
|
||||
}
|
||||
});
|
||||
|
||||
addParser("lte", new ValueSourceParser() {
|
||||
@Override
|
||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||
ValueSource lhsValSource = fp.parseValueSource();
|
||||
ValueSource rhsValSource = fp.parseValueSource();
|
||||
|
||||
return new SolrComparisonBoolFunction(lhsValSource, rhsValSource, "lte", (cmp) -> cmp <= 0);
|
||||
}
|
||||
});
|
||||
|
||||
addParser("eq", new ValueSourceParser() {
|
||||
@Override
|
||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||
ValueSource lhsValSource = fp.parseValueSource();
|
||||
ValueSource rhsValSource = fp.parseValueSource();
|
||||
|
||||
return new SolrComparisonBoolFunction(lhsValSource, rhsValSource, "eq", (cmp) -> cmp == 0);
|
||||
}
|
||||
});
|
||||
|
||||
addParser("def", new ValueSourceParser() {
|
||||
@Override
|
||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||
|
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.search.function;
|
||||
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
||||
import org.apache.lucene.queries.function.docvalues.LongDocValues;
|
||||
import org.apache.lucene.queries.function.valuesource.ComparisonBoolFunction;
|
||||
|
||||
/**
|
||||
* Refines {@link ComparisonBoolFunction} to compare based on a 'long' or 'double' depending on if the
|
||||
* any of the FunctionValues are {@link LongDocValues}.
|
||||
*/
|
||||
public class SolrComparisonBoolFunction extends ComparisonBoolFunction {
|
||||
|
||||
private final Compare cmp;
|
||||
|
||||
public interface Compare {
|
||||
boolean compare(int integer);
|
||||
}
|
||||
|
||||
public SolrComparisonBoolFunction(ValueSource lhs, ValueSource rhs, String name, Compare cmp) {
|
||||
super(lhs, rhs, name);
|
||||
this.cmp = cmp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean compare(int doc, FunctionValues lhs, FunctionValues rhs) {
|
||||
// TODO consider a separate FunctionValues impl, one for Long, one for Double
|
||||
// performs the safest possible numeric comparison, if both lhs and rhs are Longs, then
|
||||
// we perform a Long comparison to avoid the issues with precision when casting to doubles
|
||||
boolean lhsAnInt = (lhs instanceof LongDocValues || lhs instanceof IntDocValues);
|
||||
boolean rhsAnInt = (rhs instanceof LongDocValues || rhs instanceof IntDocValues);
|
||||
if (lhsAnInt && rhsAnInt) {
|
||||
return cmp.compare(Long.compare(lhs.longVal(doc), rhs.longVal(doc)));
|
||||
} else {
|
||||
return cmp.compare(Double.compare(lhs.doubleVal(doc), rhs.doubleVal(doc)));
|
||||
}
|
||||
}
|
||||
|
||||
// note: don't override equals; the "name" will be unique and is already compared
|
||||
}
|
@ -132,16 +132,15 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo
|
||||
}
|
||||
}
|
||||
assert null != defaultSim;
|
||||
final Similarity defaultSimilarity = defaultSim;
|
||||
similarity = new PerFieldSimilarityWrapper() {
|
||||
similarity = new PerFieldSimilarityWrapper(defaultSim) {
|
||||
@Override
|
||||
public Similarity get(String name) {
|
||||
FieldType fieldType = core.getLatestSchema().getFieldTypeNoEx(name);
|
||||
if (fieldType == null) {
|
||||
return defaultSimilarity;
|
||||
return defaultSim;
|
||||
} else {
|
||||
Similarity similarity = fieldType.getSimilarity();
|
||||
return similarity == null ? defaultSimilarity : similarity;
|
||||
return similarity == null ? defaultSim : similarity;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -17,18 +17,11 @@
|
||||
package org.apache.solr.security;
|
||||
|
||||
import javax.servlet.FilterChain;
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.ServletRequest;
|
||||
import javax.servlet.ServletResponse;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletRequestWrapper;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.security.Principal;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.http.auth.BasicUserPrincipal;
|
||||
|
||||
/**
|
||||
*
|
||||
* @lucene.experimental
|
||||
@ -42,32 +35,20 @@ public abstract class AuthenticationPlugin implements Closeable {
|
||||
* @param pluginConfig Config parameters, possibly from a ZK source
|
||||
*/
|
||||
public abstract void init(Map<String, Object> pluginConfig);
|
||||
|
||||
protected void forward(String user, ServletRequest req, ServletResponse rsp,
|
||||
FilterChain chain) throws IOException, ServletException {
|
||||
if(user != null) {
|
||||
final Principal p = new BasicUserPrincipal(user);
|
||||
req = new HttpServletRequestWrapper((HttpServletRequest) req) {
|
||||
@Override
|
||||
public Principal getUserPrincipal() {
|
||||
return p;
|
||||
}
|
||||
};
|
||||
}
|
||||
chain.doFilter(req,rsp);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method must authenticate the request. Upon a successful authentication, this
|
||||
* This method attempts to authenticate the request. Upon a successful authentication, this
|
||||
* must call the next filter in the filter chain and set the user principal of the request,
|
||||
* or else, upon an error or an authentication failure, throw an exception.
|
||||
*
|
||||
*
|
||||
* @param request the http request
|
||||
* @param response the http response
|
||||
* @param filterChain the servlet filter chain
|
||||
* @return false if the request not be processed by Solr (not continue), i.e.
|
||||
* the response and status code have already been sent.
|
||||
* @throws Exception any exception thrown during the authentication, e.g. PrivilegedActionException
|
||||
*/
|
||||
public abstract void doAuthenticate(ServletRequest request, ServletResponse response,
|
||||
public abstract boolean doAuthenticate(ServletRequest request, ServletResponse response,
|
||||
FilterChain filterChain) throws Exception;
|
||||
|
||||
|
||||
|
@ -99,7 +99,7 @@ public class BasicAuthPlugin extends AuthenticationPlugin implements ConfigEdita
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doAuthenticate(ServletRequest servletRequest, ServletResponse servletResponse, FilterChain filterChain) throws Exception {
|
||||
public boolean doAuthenticate(ServletRequest servletRequest, ServletResponse servletResponse, FilterChain filterChain) throws Exception {
|
||||
|
||||
HttpServletRequest request = (HttpServletRequest) servletRequest;
|
||||
HttpServletResponse response = (HttpServletResponse) servletResponse;
|
||||
@ -127,6 +127,7 @@ public class BasicAuthPlugin extends AuthenticationPlugin implements ConfigEdita
|
||||
}
|
||||
};
|
||||
filterChain.doFilter(wrapper, response);
|
||||
return true;
|
||||
}
|
||||
|
||||
} else {
|
||||
@ -143,8 +144,10 @@ public class BasicAuthPlugin extends AuthenticationPlugin implements ConfigEdita
|
||||
} else {
|
||||
request.setAttribute(AuthenticationPlugin.class.getName(), zkAuthentication.getPromptHeaders());
|
||||
filterChain.doFilter(request, response);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.security;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import javax.servlet.FilterChain;
|
||||
import javax.servlet.FilterConfig;
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.ServletRequest;
|
||||
import javax.servlet.ServletResponse;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletRequestWrapper;
|
||||
|
||||
import org.apache.curator.RetryPolicy;
|
||||
import org.apache.curator.framework.AuthInfo;
|
||||
import org.apache.curator.framework.CuratorFramework;
|
||||
import org.apache.curator.framework.CuratorFrameworkFactory;
|
||||
import org.apache.curator.framework.api.ACLProvider;
|
||||
import org.apache.curator.retry.ExponentialBackoffRetry;
|
||||
|
||||
import org.apache.hadoop.security.authentication.server.AuthenticationHandler;
|
||||
import org.apache.hadoop.security.token.delegation.web.DelegationTokenAuthenticationFilter;
|
||||
import org.apache.solr.common.cloud.SecurityAwareZkACLProvider;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkACLProvider;
|
||||
import org.apache.solr.common.cloud.ZkCredentialsProvider;
|
||||
import org.apache.zookeeper.data.ACL;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class DelegationTokenKerberosFilter extends DelegationTokenAuthenticationFilter {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private CuratorFramework curatorFramework;
|
||||
|
||||
@Override
|
||||
public void init(FilterConfig conf) throws ServletException {
|
||||
if (conf != null && "zookeeper".equals(conf.getInitParameter("signer.secret.provider"))) {
|
||||
SolrZkClient zkClient =
|
||||
(SolrZkClient)conf.getServletContext().getAttribute(KerberosPlugin.DELEGATION_TOKEN_ZK_CLIENT);
|
||||
conf.getServletContext().setAttribute("signer.secret.provider.zookeeper.curator.client",
|
||||
getCuratorClient(zkClient));
|
||||
}
|
||||
super.init(conf);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doFilter(ServletRequest request, ServletResponse response,
|
||||
FilterChain filterChain) throws IOException, ServletException {
|
||||
// HttpClient 4.4.x throws NPE if query string is null and parsed through URLEncodedUtils.
|
||||
// See HTTPCLIENT-1746 and HADOOP-12767
|
||||
HttpServletRequest httpRequest = (HttpServletRequest)request;
|
||||
String queryString = httpRequest.getQueryString();
|
||||
final String nonNullQueryString = queryString == null ? "" : queryString;
|
||||
HttpServletRequest requestNonNullQueryString = new HttpServletRequestWrapper(httpRequest){
|
||||
@Override
|
||||
public String getQueryString() {
|
||||
return nonNullQueryString;
|
||||
}
|
||||
};
|
||||
super.doFilter(requestNonNullQueryString, response, filterChain);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
super.destroy();
|
||||
if (curatorFramework != null) curatorFramework.close();
|
||||
curatorFramework = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void initializeAuthHandler(String authHandlerClassName,
|
||||
FilterConfig filterConfig) throws ServletException {
|
||||
// set the internal authentication handler in order to record whether the request should continue
|
||||
super.initializeAuthHandler(authHandlerClassName, filterConfig);
|
||||
AuthenticationHandler authHandler = getAuthenticationHandler();
|
||||
super.initializeAuthHandler(KerberosPlugin.RequestContinuesRecorderAuthenticationHandler.class.getName(), filterConfig);
|
||||
KerberosPlugin.RequestContinuesRecorderAuthenticationHandler newAuthHandler =
|
||||
(KerberosPlugin.RequestContinuesRecorderAuthenticationHandler)getAuthenticationHandler();
|
||||
newAuthHandler.setAuthHandler(authHandler);
|
||||
}
|
||||
|
||||
protected CuratorFramework getCuratorClient(SolrZkClient zkClient) {
|
||||
// should we try to build a RetryPolicy off of the ZkController?
|
||||
RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000, 3);
|
||||
if (zkClient == null) {
|
||||
throw new IllegalArgumentException("zkClient required");
|
||||
}
|
||||
String zkHost = zkClient.getZkServerAddress();
|
||||
String zkChroot = zkHost.substring(zkHost.indexOf("/"));
|
||||
zkChroot = zkChroot.startsWith("/") ? zkChroot.substring(1) : zkChroot;
|
||||
String zkNamespace = zkChroot + SecurityAwareZkACLProvider.SECURITY_ZNODE_PATH;
|
||||
String zkConnectionString = zkHost.substring(0, zkHost.indexOf("/"));
|
||||
SolrZkToCuratorCredentialsACLs curatorToSolrZk = new SolrZkToCuratorCredentialsACLs(zkClient);
|
||||
final int connectionTimeoutMs = 30000; // this value is currently hard coded, see SOLR-7561.
|
||||
|
||||
curatorFramework = CuratorFrameworkFactory.builder()
|
||||
.namespace(zkNamespace)
|
||||
.connectString(zkConnectionString)
|
||||
.retryPolicy(retryPolicy)
|
||||
.aclProvider(curatorToSolrZk.getACLProvider())
|
||||
.authorization(curatorToSolrZk.getAuthInfos())
|
||||
.sessionTimeoutMs(zkClient.getZkClientTimeout())
|
||||
.connectionTimeoutMs(connectionTimeoutMs)
|
||||
.build();
|
||||
curatorFramework.start();
|
||||
return curatorFramework;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Solr Zk Credentials/ACLs to Curator versions
|
||||
*/
|
||||
protected static class SolrZkToCuratorCredentialsACLs {
|
||||
private final ACLProvider aclProvider;
|
||||
private final List<AuthInfo> authInfos;
|
||||
|
||||
public SolrZkToCuratorCredentialsACLs(SolrZkClient zkClient) {
|
||||
this.aclProvider = createACLProvider(zkClient);
|
||||
this.authInfos = createAuthInfo(zkClient);
|
||||
}
|
||||
|
||||
public ACLProvider getACLProvider() { return aclProvider; }
|
||||
public List<AuthInfo> getAuthInfos() { return authInfos; }
|
||||
|
||||
private ACLProvider createACLProvider(SolrZkClient zkClient) {
|
||||
final ZkACLProvider zkACLProvider = zkClient.getZkACLProvider();
|
||||
return new ACLProvider() {
|
||||
@Override
|
||||
public List<ACL> getDefaultAcl() {
|
||||
return zkACLProvider.getACLsToAdd(null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ACL> getAclForPath(String path) {
|
||||
List<ACL> acls = zkACLProvider.getACLsToAdd(path);
|
||||
return acls;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private List<AuthInfo> createAuthInfo(SolrZkClient zkClient) {
|
||||
List<AuthInfo> ret = new LinkedList<AuthInfo>();
|
||||
|
||||
// In theory the credentials to add could change here if zookeeper hasn't been initialized
|
||||
ZkCredentialsProvider credentialsProvider =
|
||||
zkClient.getZkClientConnectionStrategy().getZkCredentialsToAddAutomatically();
|
||||
for (ZkCredentialsProvider.ZkCredentials zkCredentials : credentialsProvider.getCredentials()) {
|
||||
ret.add(new AuthInfo(zkCredentials.getScheme(), zkCredentials.getAuth()));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
@ -26,6 +26,7 @@ import javax.servlet.ServletResponse;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import org.apache.hadoop.security.authentication.server.AuthenticationFilter;
|
||||
import org.apache.hadoop.security.authentication.server.AuthenticationHandler;
|
||||
|
||||
public class KerberosFilter extends AuthenticationFilter {
|
||||
|
||||
@ -34,6 +35,19 @@ public class KerberosFilter extends AuthenticationFilter {
|
||||
super.init(conf);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void initializeAuthHandler(String authHandlerClassName,
|
||||
FilterConfig filterConfig) throws ServletException {
|
||||
// set the internal authentication handler in order to record whether the request should continue
|
||||
super.initializeAuthHandler(authHandlerClassName, filterConfig);
|
||||
AuthenticationHandler authHandler = getAuthenticationHandler();
|
||||
super.initializeAuthHandler(
|
||||
KerberosPlugin.RequestContinuesRecorderAuthenticationHandler.class.getName(), filterConfig);
|
||||
KerberosPlugin.RequestContinuesRecorderAuthenticationHandler newAuthHandler =
|
||||
(KerberosPlugin.RequestContinuesRecorderAuthenticationHandler)getAuthenticationHandler();
|
||||
newAuthHandler.setAuthHandler(authHandler);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doFilter(FilterChain filterChain, HttpServletRequest request,
|
||||
HttpServletResponse response) throws IOException, ServletException {
|
||||
|
@ -16,14 +16,18 @@
|
||||
*/
|
||||
package org.apache.solr.security;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PrintWriter;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.Collections;
|
||||
import java.util.Enumeration;
|
||||
import java.util.EventListener;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.servlet.Filter;
|
||||
@ -41,12 +45,22 @@ import javax.servlet.SessionCookieConfig;
|
||||
import javax.servlet.SessionTrackingMode;
|
||||
import javax.servlet.FilterRegistration.Dynamic;
|
||||
import javax.servlet.descriptor.JspConfigDescriptor;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import javax.servlet.http.HttpServletResponseWrapper;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import org.apache.commons.collections.iterators.IteratorEnumeration;
|
||||
import org.apache.hadoop.security.authentication.client.AuthenticationException;
|
||||
import org.apache.hadoop.security.authentication.server.AuthenticationHandler;
|
||||
import org.apache.hadoop.security.authentication.server.AuthenticationToken;
|
||||
import org.apache.solr.client.solrj.impl.HttpClientConfigurer;
|
||||
import org.apache.solr.client.solrj.impl.Krb5HttpClientConfigurer;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.cloud.SecurityAwareZkACLProvider;
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@ -55,7 +69,7 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
HttpClientConfigurer kerberosConfigurer = new Krb5HttpClientConfigurer();
|
||||
Filter kerberosFilter = new KerberosFilter();
|
||||
Filter kerberosFilter;
|
||||
|
||||
public static final String NAME_RULES_PARAM = "solr.kerberos.name.rules";
|
||||
public static final String COOKIE_DOMAIN_PARAM = "solr.kerberos.cookie.domain";
|
||||
@ -64,6 +78,26 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
|
||||
public static final String KEYTAB_PARAM = "solr.kerberos.keytab";
|
||||
public static final String TOKEN_VALID_PARAM = "solr.kerberos.token.valid";
|
||||
public static final String COOKIE_PORT_AWARE_PARAM = "solr.kerberos.cookie.portaware";
|
||||
public static final String DELEGATION_TOKEN_ENABLED = "solr.kerberos.delegation.token.enabled";
|
||||
public static final String DELEGATION_TOKEN_KIND = "solr.kerberos.delegation.token.kind";
|
||||
public static final String DELEGATION_TOKEN_VALIDITY = "solr.kerberos.delegation.token.validity";
|
||||
public static final String DELEGATION_TOKEN_SECRET_PROVIDER = "solr.kerberos.delegation.token.signer.secret.provider";
|
||||
public static final String DELEGATION_TOKEN_SECRET_PROVIDER_ZK_PATH =
|
||||
"solr.kerberos.delegation.token.signer.secret.provider.zookeper.path";
|
||||
public static final String DELEGATION_TOKEN_SECRET_MANAGER_ZNODE_WORKING_PATH =
|
||||
"solr.kerberos.delegation.token.secret.manager.znode.working.path";
|
||||
public static final String DELEGATION_TOKEN_TYPE_DEFAULT = "solr-dt";
|
||||
|
||||
// filled in by Plugin/Filter
|
||||
static final String REQUEST_CONTINUES_ATTR =
|
||||
"org.apache.solr.security.kerberosplugin.requestcontinues";
|
||||
static final String DELEGATION_TOKEN_ZK_CLIENT =
|
||||
"solr.kerberos.delegation.token.zk.client";
|
||||
|
||||
// allows test to specify an alternate auth handler
|
||||
@VisibleForTesting
|
||||
public static final String AUTH_HANDLER_PARAM = "solr.kerberos.auth.handler";
|
||||
|
||||
private final CoreContainer coreContainer;
|
||||
|
||||
public KerberosPlugin(CoreContainer coreContainer) {
|
||||
@ -74,12 +108,48 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
|
||||
public void init(Map<String, Object> pluginConfig) {
|
||||
try {
|
||||
Map<String, String> params = new HashMap();
|
||||
params.put("type", "kerberos");
|
||||
putParam(params, "type", AUTH_HANDLER_PARAM, "kerberos");
|
||||
putParam(params, "kerberos.name.rules", NAME_RULES_PARAM, "DEFAULT");
|
||||
putParam(params, "token.valid", TOKEN_VALID_PARAM, "30");
|
||||
putParam(params, "cookie.path", COOKIE_PATH_PARAM, "/");
|
||||
putParam(params, "kerberos.principal", PRINCIPAL_PARAM, null);
|
||||
putParam(params, "kerberos.keytab", KEYTAB_PARAM, null);
|
||||
if ("kerberos".equals(params.get("type"))) {
|
||||
putParam(params, "kerberos.principal", PRINCIPAL_PARAM, null);
|
||||
putParam(params, "kerberos.keytab", KEYTAB_PARAM, null);
|
||||
} else {
|
||||
// allow tests which specify AUTH_HANDLER_PARAM to avoid specifying kerberos principal/keytab
|
||||
putParamOptional(params, "kerberos.principal", PRINCIPAL_PARAM);
|
||||
putParamOptional(params, "kerberos.keytab", KEYTAB_PARAM);
|
||||
}
|
||||
|
||||
String delegationTokenStr = System.getProperty(DELEGATION_TOKEN_ENABLED, null);
|
||||
boolean delegationTokenEnabled =
|
||||
(delegationTokenStr == null) ? false : Boolean.parseBoolean(delegationTokenStr);
|
||||
ZkController controller = coreContainer.getZkController();
|
||||
|
||||
if (delegationTokenEnabled) {
|
||||
putParam(params, "delegation-token.token-kind", DELEGATION_TOKEN_KIND, DELEGATION_TOKEN_TYPE_DEFAULT);
|
||||
if (coreContainer.isZooKeeperAware()) {
|
||||
putParam(params, "signer.secret.provider", DELEGATION_TOKEN_SECRET_PROVIDER, "zookeeper");
|
||||
if ("zookeeper".equals(params.get("signer.secret.provider"))) {
|
||||
String zkHost = controller.getZkServerAddress();
|
||||
putParam(params, "token.validity", DELEGATION_TOKEN_VALIDITY, "36000");
|
||||
params.put("zk-dt-secret-manager.enable", "true");
|
||||
// Note - Curator complains if the znodeWorkingPath starts with /
|
||||
String chrootPath = zkHost.substring(zkHost.indexOf("/"));
|
||||
String relativePath = chrootPath.startsWith("/") ? chrootPath.substring(1) : chrootPath;
|
||||
putParam(params, "zk-dt-secret-manager.znodeWorkingPath",
|
||||
DELEGATION_TOKEN_SECRET_MANAGER_ZNODE_WORKING_PATH,
|
||||
relativePath + SecurityAwareZkACLProvider.SECURITY_ZNODE_PATH + "/zkdtsm");
|
||||
putParam(params, "signer.secret.provider.zookeeper.path",
|
||||
DELEGATION_TOKEN_SECRET_PROVIDER_ZK_PATH, "/token");
|
||||
// need to ensure krb5 is setup properly before running curator;
|
||||
// the coreContainer should take care of this by calling configure on the
|
||||
// kerberosConfigurer.
|
||||
}
|
||||
} else {
|
||||
log.info("CoreContainer is not ZooKeeperAware, not setting ZK-related delegation token properties");
|
||||
}
|
||||
}
|
||||
|
||||
// Special handling for the "cookie.domain" based on whether port should be
|
||||
// appended to the domain. Useful for situations where multiple solr nodes are
|
||||
@ -94,16 +164,27 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
|
||||
if (host==null) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Missing required parameter '"+COOKIE_DOMAIN_PARAM+"'.");
|
||||
}
|
||||
int port = coreContainer.getZkController().getHostPort();
|
||||
int port = controller.getHostPort();
|
||||
params.put("cookie.domain", host + ":" + port);
|
||||
}
|
||||
|
||||
|
||||
final ServletContext servletContext = new AttributeOnlyServletContext();
|
||||
if (delegationTokenEnabled) {
|
||||
kerberosFilter = new DelegationTokenKerberosFilter();
|
||||
// pass an attribute-enabled context in order to pass the zkClient
|
||||
// and because the filter may pass a curator instance.
|
||||
if (controller != null) {
|
||||
servletContext.setAttribute(DELEGATION_TOKEN_ZK_CLIENT, controller.getZkClient());
|
||||
}
|
||||
} else {
|
||||
kerberosFilter = new KerberosFilter();
|
||||
}
|
||||
log.info("Params: "+params);
|
||||
|
||||
FilterConfig conf = new FilterConfig() {
|
||||
@Override
|
||||
public ServletContext getServletContext() {
|
||||
return noContext;
|
||||
return servletContext;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -136,11 +217,43 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
|
||||
params.put(internalParamName, value);
|
||||
}
|
||||
|
||||
private void putParamOptional(Map<String, String> params, String internalParamName, String externalParamName) {
|
||||
String value = System.getProperty(externalParamName);
|
||||
if (value!=null) {
|
||||
params.put(internalParamName, value);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doAuthenticate(ServletRequest req, ServletResponse rsp,
|
||||
public boolean doAuthenticate(ServletRequest req, ServletResponse rsp,
|
||||
FilterChain chain) throws Exception {
|
||||
log.debug("Request to authenticate using kerberos: "+req);
|
||||
kerberosFilter.doFilter(req, rsp, chain);
|
||||
|
||||
final HttpServletResponse frsp = (HttpServletResponse)rsp;
|
||||
|
||||
// kerberosFilter may close the stream and write to closed streams,
|
||||
// see HADOOP-13346. To work around, pass a PrintWriter that ignores
|
||||
// closes
|
||||
HttpServletResponse rspCloseShield = new HttpServletResponseWrapper(frsp) {
|
||||
@SuppressForbidden(reason = "Hadoop DelegationTokenAuthenticationFilter uses response writer, this" +
|
||||
"is providing a CloseShield on top of that")
|
||||
@Override
|
||||
public PrintWriter getWriter() throws IOException {
|
||||
final PrintWriter pw = new PrintWriterWrapper(frsp.getWriter()) {
|
||||
@Override
|
||||
public void close() {};
|
||||
};
|
||||
return pw;
|
||||
}
|
||||
};
|
||||
kerberosFilter.doFilter(req, rspCloseShield, chain);
|
||||
String requestContinuesAttr = (String)req.getAttribute(REQUEST_CONTINUES_ATTR);
|
||||
if (requestContinuesAttr == null) {
|
||||
log.warn("Could not find " + REQUEST_CONTINUES_ATTR);
|
||||
return false;
|
||||
} else {
|
||||
return Boolean.parseBoolean(requestContinuesAttr);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -152,8 +265,9 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
|
||||
kerberosFilter.destroy();
|
||||
}
|
||||
|
||||
protected static ServletContext noContext = new ServletContext() {
|
||||
|
||||
protected static class AttributeOnlyServletContext implements ServletContext {
|
||||
private Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
||||
@Override
|
||||
public void setSessionTrackingModes(Set<SessionTrackingMode> sessionTrackingModes) {}
|
||||
|
||||
@ -161,12 +275,16 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
|
||||
public boolean setInitParameter(String name, String value) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void setAttribute(String name, Object object) {}
|
||||
|
||||
public void setAttribute(String name, Object object) {
|
||||
attributes.put(name, object);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void removeAttribute(String name) {}
|
||||
public void removeAttribute(String name) {
|
||||
attributes.remove(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void log(String message, Throwable throwable) {}
|
||||
@ -326,15 +444,15 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
|
||||
public ClassLoader getClassLoader() {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Enumeration<String> getAttributeNames() {
|
||||
return null;
|
||||
return Collections.enumeration(attributes.keySet());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Object getAttribute(String name) {
|
||||
return null;
|
||||
return attributes.get(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -394,4 +512,44 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* {@link AuthenticationHandler} that delegates to another {@link AuthenticationHandler}
|
||||
* and records the response of managementOperation (which indicates whether the request
|
||||
* should continue or not).
|
||||
*/
|
||||
public static class RequestContinuesRecorderAuthenticationHandler implements AuthenticationHandler {
|
||||
private AuthenticationHandler authHandler;
|
||||
|
||||
public void setAuthHandler(AuthenticationHandler authHandler) {
|
||||
this.authHandler = authHandler;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return authHandler.getType();
|
||||
}
|
||||
|
||||
public void init(Properties config) throws ServletException {
|
||||
// authHandler has already been init'ed, nothing to do here
|
||||
}
|
||||
|
||||
public void destroy() {
|
||||
authHandler.destroy();
|
||||
}
|
||||
|
||||
public boolean managementOperation(AuthenticationToken token,
|
||||
HttpServletRequest request,
|
||||
HttpServletResponse response)
|
||||
throws IOException, AuthenticationException {
|
||||
boolean result = authHandler.managementOperation(token, request, response);
|
||||
request.setAttribute(KerberosPlugin.REQUEST_CONTINUES_ATTR, new Boolean(result).toString());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
public AuthenticationToken authenticate(HttpServletRequest request, HttpServletResponse response)
|
||||
throws IOException, AuthenticationException {
|
||||
return authHandler.authenticate(request, response);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -89,12 +89,12 @@ public class PKIAuthenticationPlugin extends AuthenticationPlugin implements Htt
|
||||
|
||||
@SuppressForbidden(reason = "Needs currentTimeMillis to compare against time in header")
|
||||
@Override
|
||||
public void doAuthenticate(ServletRequest request, ServletResponse response, FilterChain filterChain) throws Exception {
|
||||
public boolean doAuthenticate(ServletRequest request, ServletResponse response, FilterChain filterChain) throws Exception {
|
||||
|
||||
String requestURI = ((HttpServletRequest) request).getRequestURI();
|
||||
if (requestURI.endsWith(PATH)) {
|
||||
filterChain.doFilter(request, response);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
long receivedTime = System.currentTimeMillis();
|
||||
String header = ((HttpServletRequest) request).getHeader(HEADER);
|
||||
@ -102,14 +102,14 @@ public class PKIAuthenticationPlugin extends AuthenticationPlugin implements Htt
|
||||
//this must not happen
|
||||
log.error("No SolrAuth header present");
|
||||
filterChain.doFilter(request, response);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
List<String> authInfo = StrUtils.splitWS(header, false);
|
||||
if (authInfo.size() < 2) {
|
||||
log.error("Invalid SolrAuth Header {}", header);
|
||||
filterChain.doFilter(request, response);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
String nodeName = authInfo.get(0);
|
||||
@ -119,12 +119,12 @@ public class PKIAuthenticationPlugin extends AuthenticationPlugin implements Htt
|
||||
if (decipher == null) {
|
||||
log.error("Could not decipher a header {} . No principal set", header);
|
||||
filterChain.doFilter(request, response);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
if ((receivedTime - decipher.timestamp) > MAX_VALIDITY) {
|
||||
log.error("Invalid key request timestamp: {} , received timestamp: {} , TTL: {}", decipher.timestamp, receivedTime, MAX_VALIDITY);
|
||||
filterChain.doFilter(request, response);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
final Principal principal = "$".equals(decipher.userName) ?
|
||||
@ -132,6 +132,7 @@ public class PKIAuthenticationPlugin extends AuthenticationPlugin implements Htt
|
||||
new BasicUserPrincipal(decipher.userName);
|
||||
|
||||
filterChain.doFilter(getWrapper((HttpServletRequest) request, principal), response);
|
||||
return true;
|
||||
}
|
||||
|
||||
private static HttpServletRequestWrapper getWrapper(final HttpServletRequest request, final Principal principal) {
|
||||
|
@ -0,0 +1,215 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.security;
|
||||
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.commons.lang.NotImplementedException;
|
||||
|
||||
/**
|
||||
* Wrapper for PrintWriter that delegates to constructor arg
|
||||
*/
|
||||
public class PrintWriterWrapper extends PrintWriter {
|
||||
private PrintWriter printWriter;
|
||||
|
||||
public PrintWriterWrapper(PrintWriter printWriter) {
|
||||
super(new StringWriter());
|
||||
this.printWriter = printWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PrintWriter append(char c) {
|
||||
return printWriter.append(c);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PrintWriter append(CharSequence csq) {
|
||||
return printWriter.append(csq);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PrintWriter append(CharSequence csq, int start, int end) {
|
||||
return printWriter.append(csq, start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean checkError() {
|
||||
return printWriter.checkError();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void clearError() {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
printWriter.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush() {
|
||||
printWriter.flush();
|
||||
}
|
||||
|
||||
@Override
|
||||
public PrintWriter format(Locale l, String format, Object... args) {
|
||||
return printWriter.format(l, format, args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PrintWriter format(String format, Object... args) {
|
||||
throw new NotImplementedException("Forbidden API");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void print(boolean b) {
|
||||
printWriter.print(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void print(char c) {
|
||||
printWriter.print(c);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void print(char[] s) {
|
||||
printWriter.print(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void print(double d) {
|
||||
printWriter.print(d);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void print(float f) {
|
||||
printWriter.print(f);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void print(int i) {
|
||||
printWriter.print(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void print(long l) {
|
||||
printWriter.print(l);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void print(Object obj) {
|
||||
printWriter.print(obj);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void print(String s) {
|
||||
printWriter.print(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PrintWriter printf(Locale l, String format, Object... args) {
|
||||
return printWriter.printf(l, format, args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PrintWriter printf(String format, Object... args) {
|
||||
throw new NotImplementedException("Forbidden API");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void println() {
|
||||
printWriter.println();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void println(boolean x) {
|
||||
printWriter.println(x);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void println(char x) {
|
||||
printWriter.println(x);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void println(char[] x) {
|
||||
printWriter.println(x);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void println(double x) {
|
||||
printWriter.println(x);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void println(float x) {
|
||||
printWriter.println(x);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void println(int x) {
|
||||
printWriter.println(x);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void println(long x) {
|
||||
printWriter.println(x);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void println(Object x) {
|
||||
printWriter.println(x);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void println(String x) {
|
||||
printWriter.println(x);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setError() {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(char[] buf) {
|
||||
printWriter.write(buf);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(char[] buf, int off, int len) {
|
||||
printWriter.write(buf, off, len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(int c) {
|
||||
printWriter.write(c);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(String s) {
|
||||
printWriter.write(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(String s, int off, int len) {
|
||||
printWriter.write(s, off, len);
|
||||
}
|
||||
}
|
@ -296,6 +296,7 @@ public class SolrDispatchFilter extends BaseSolrFilter {
|
||||
}
|
||||
|
||||
private boolean authenticateRequest(ServletRequest request, ServletResponse response, final AtomicReference<ServletRequest> wrappedRequest) throws IOException {
|
||||
boolean requestContinues = false;
|
||||
final AtomicBoolean isAuthenticated = new AtomicBoolean(false);
|
||||
AuthenticationPlugin authenticationPlugin = cores.getAuthenticationPlugin();
|
||||
if (authenticationPlugin == null) {
|
||||
@ -308,7 +309,7 @@ public class SolrDispatchFilter extends BaseSolrFilter {
|
||||
try {
|
||||
log.debug("Request to authenticate: {}, domain: {}, port: {}", request, request.getLocalName(), request.getLocalPort());
|
||||
// upon successful authentication, this should call the chain's next filter.
|
||||
authenticationPlugin.doAuthenticate(request, response, new FilterChain() {
|
||||
requestContinues = authenticationPlugin.doAuthenticate(request, response, new FilterChain() {
|
||||
public void doFilter(ServletRequest req, ServletResponse rsp) throws IOException, ServletException {
|
||||
isAuthenticated.set(true);
|
||||
wrappedRequest.set(req);
|
||||
@ -319,8 +320,13 @@ public class SolrDispatchFilter extends BaseSolrFilter {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Error during request authentication, ", e);
|
||||
}
|
||||
}
|
||||
// failed authentication?
|
||||
if (!isAuthenticated.get()) {
|
||||
// requestContinues is an optional short circuit, thus we still need to check isAuthenticated.
|
||||
// This is because the AuthenticationPlugin doesn't always have enough information to determine if
|
||||
// it should short circuit, e.g. the Kerberos Authentication Filter will send an error and not
|
||||
// call later filters in chain, but doesn't throw an exception. We could force each Plugin
|
||||
// to implement isAuthenticated to simplify the check here, but that just moves the complexity to
|
||||
// multiple code paths.
|
||||
if (!requestContinues || !isAuthenticated.get()) {
|
||||
response.flushBuffer();
|
||||
return false;
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.atomic.LongAdder;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.CodecReader;
|
||||
@ -76,20 +76,20 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
protected final SolrCoreState solrCoreState;
|
||||
|
||||
// stats
|
||||
AtomicLong addCommands = new AtomicLong();
|
||||
AtomicLong addCommandsCumulative = new AtomicLong();
|
||||
AtomicLong deleteByIdCommands= new AtomicLong();
|
||||
AtomicLong deleteByIdCommandsCumulative= new AtomicLong();
|
||||
AtomicLong deleteByQueryCommands= new AtomicLong();
|
||||
AtomicLong deleteByQueryCommandsCumulative= new AtomicLong();
|
||||
AtomicLong expungeDeleteCommands = new AtomicLong();
|
||||
AtomicLong mergeIndexesCommands = new AtomicLong();
|
||||
AtomicLong commitCommands= new AtomicLong();
|
||||
AtomicLong optimizeCommands= new AtomicLong();
|
||||
AtomicLong rollbackCommands= new AtomicLong();
|
||||
AtomicLong numDocsPending= new AtomicLong();
|
||||
AtomicLong numErrors = new AtomicLong();
|
||||
AtomicLong numErrorsCumulative = new AtomicLong();
|
||||
LongAdder addCommands = new LongAdder();
|
||||
LongAdder addCommandsCumulative = new LongAdder();
|
||||
LongAdder deleteByIdCommands= new LongAdder();
|
||||
LongAdder deleteByIdCommandsCumulative= new LongAdder();
|
||||
LongAdder deleteByQueryCommands= new LongAdder();
|
||||
LongAdder deleteByQueryCommandsCumulative= new LongAdder();
|
||||
LongAdder expungeDeleteCommands = new LongAdder();
|
||||
LongAdder mergeIndexesCommands = new LongAdder();
|
||||
LongAdder commitCommands= new LongAdder();
|
||||
LongAdder optimizeCommands= new LongAdder();
|
||||
LongAdder rollbackCommands= new LongAdder();
|
||||
LongAdder numDocsPending= new LongAdder();
|
||||
LongAdder numErrors = new LongAdder();
|
||||
LongAdder numErrorsCumulative = new LongAdder();
|
||||
|
||||
// tracks when auto-commit should occur
|
||||
protected final CommitTracker commitTracker;
|
||||
@ -158,7 +158,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
}
|
||||
|
||||
protected void rollbackWriter() throws IOException {
|
||||
numDocsPending.set(0);
|
||||
numDocsPending.reset();
|
||||
solrCoreState.rollbackIndexWriter(core);
|
||||
|
||||
}
|
||||
@ -192,8 +192,8 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
private int addDoc0(AddUpdateCommand cmd) throws IOException {
|
||||
int rc = -1;
|
||||
|
||||
addCommands.incrementAndGet();
|
||||
addCommandsCumulative.incrementAndGet();
|
||||
addCommands.increment();
|
||||
addCommandsCumulative.increment();
|
||||
|
||||
// if there is no ID field, don't overwrite
|
||||
if (idField == null) {
|
||||
@ -230,10 +230,10 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
rc = 1;
|
||||
} finally {
|
||||
if (rc != 1) {
|
||||
numErrors.incrementAndGet();
|
||||
numErrorsCumulative.incrementAndGet();
|
||||
numErrors.increment();
|
||||
numErrorsCumulative.increment();
|
||||
} else {
|
||||
numDocsPending.incrementAndGet();
|
||||
numDocsPending.increment();
|
||||
}
|
||||
}
|
||||
|
||||
@ -368,8 +368,8 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
// we don't return the number of docs deleted because it's not always possible to quickly know that info.
|
||||
@Override
|
||||
public void delete(DeleteUpdateCommand cmd) throws IOException {
|
||||
deleteByIdCommands.incrementAndGet();
|
||||
deleteByIdCommandsCumulative.incrementAndGet();
|
||||
deleteByIdCommands.increment();
|
||||
deleteByIdCommandsCumulative.increment();
|
||||
|
||||
Term deleteTerm = new Term(idField.getName(), cmd.getIndexedId());
|
||||
// SolrCore.verbose("deleteDocuments",deleteTerm,writer);
|
||||
@ -426,8 +426,8 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
// we don't return the number of docs deleted because it's not always possible to quickly know that info.
|
||||
@Override
|
||||
public void deleteByQuery(DeleteUpdateCommand cmd) throws IOException {
|
||||
deleteByQueryCommands.incrementAndGet();
|
||||
deleteByQueryCommandsCumulative.incrementAndGet();
|
||||
deleteByQueryCommands.increment();
|
||||
deleteByQueryCommandsCumulative.increment();
|
||||
boolean madeIt=false;
|
||||
try {
|
||||
Query q = getQuery(cmd);
|
||||
@ -473,8 +473,8 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
|
||||
} finally {
|
||||
if (!madeIt) {
|
||||
numErrors.incrementAndGet();
|
||||
numErrorsCumulative.incrementAndGet();
|
||||
numErrors.increment();
|
||||
numErrorsCumulative.increment();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -482,7 +482,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
|
||||
@Override
|
||||
public int mergeIndexes(MergeIndexesCommand cmd) throws IOException {
|
||||
mergeIndexesCommands.incrementAndGet();
|
||||
mergeIndexesCommands.increment();
|
||||
int rc;
|
||||
|
||||
log.info("start " + cmd);
|
||||
@ -545,7 +545,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
error=false;
|
||||
}
|
||||
finally {
|
||||
if (error) numErrors.incrementAndGet();
|
||||
if (error) numErrors.increment();
|
||||
}
|
||||
}
|
||||
|
||||
@ -557,10 +557,10 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
}
|
||||
|
||||
if (cmd.optimize) {
|
||||
optimizeCommands.incrementAndGet();
|
||||
optimizeCommands.increment();
|
||||
} else {
|
||||
commitCommands.incrementAndGet();
|
||||
if (cmd.expungeDeletes) expungeDeleteCommands.incrementAndGet();
|
||||
commitCommands.increment();
|
||||
if (cmd.expungeDeletes) expungeDeleteCommands.increment();
|
||||
}
|
||||
|
||||
Future[] waitSearcher = null;
|
||||
@ -622,7 +622,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
}
|
||||
|
||||
// SolrCore.verbose("writer.commit() end");
|
||||
numDocsPending.set(0);
|
||||
numDocsPending.reset();
|
||||
callPostCommitCallbacks();
|
||||
}
|
||||
} finally {
|
||||
@ -676,10 +676,10 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
solrCoreState.getCommitLock().unlock();
|
||||
}
|
||||
|
||||
addCommands.set(0);
|
||||
deleteByIdCommands.set(0);
|
||||
deleteByQueryCommands.set(0);
|
||||
if (error) numErrors.incrementAndGet();
|
||||
addCommands.reset();
|
||||
deleteByIdCommands.reset();
|
||||
deleteByQueryCommands.reset();
|
||||
if (error) numErrors.increment();
|
||||
}
|
||||
|
||||
// if we are supposed to wait for the searcher to be registered, then we should do it
|
||||
@ -707,7 +707,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
throw new UnsupportedOperationException("Rollback is currently not supported in SolrCloud mode. (SOLR-4895)");
|
||||
}
|
||||
|
||||
rollbackCommands.incrementAndGet();
|
||||
rollbackCommands.increment();
|
||||
|
||||
boolean error=true;
|
||||
|
||||
@ -727,13 +727,10 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
error=false;
|
||||
}
|
||||
finally {
|
||||
addCommandsCumulative.set(
|
||||
addCommandsCumulative.get() - addCommands.getAndSet( 0 ) );
|
||||
deleteByIdCommandsCumulative.set(
|
||||
deleteByIdCommandsCumulative.get() - deleteByIdCommands.getAndSet( 0 ) );
|
||||
deleteByQueryCommandsCumulative.set(
|
||||
deleteByQueryCommandsCumulative.get() - deleteByQueryCommands.getAndSet( 0 ) );
|
||||
if (error) numErrors.incrementAndGet();
|
||||
addCommandsCumulative.add(-addCommands.sumThenReset());
|
||||
deleteByIdCommandsCumulative.add(-deleteByIdCommands.sumThenReset());
|
||||
deleteByQueryCommandsCumulative.add(-deleteByQueryCommands.sumThenReset());
|
||||
if (error) numErrors.increment();
|
||||
}
|
||||
}
|
||||
|
||||
@ -749,7 +746,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
commitTracker.close();
|
||||
softCommitTracker.close();
|
||||
|
||||
numDocsPending.set(0);
|
||||
numDocsPending.reset();
|
||||
}
|
||||
|
||||
|
||||
@ -882,7 +879,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
@Override
|
||||
public NamedList getStatistics() {
|
||||
NamedList lst = new SimpleOrderedMap();
|
||||
lst.add("commits", commitCommands.get());
|
||||
lst.add("commits", commitCommands.longValue());
|
||||
if (commitTracker.getDocsUpperBound() > 0) {
|
||||
lst.add("autocommit maxDocs", commitTracker.getDocsUpperBound());
|
||||
}
|
||||
@ -897,20 +894,20 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
lst.add("soft autocommit maxTime", "" + softCommitTracker.getTimeUpperBound() + "ms");
|
||||
}
|
||||
lst.add("soft autocommits", softCommitTracker.getCommitCount());
|
||||
lst.add("optimizes", optimizeCommands.get());
|
||||
lst.add("rollbacks", rollbackCommands.get());
|
||||
lst.add("expungeDeletes", expungeDeleteCommands.get());
|
||||
lst.add("docsPending", numDocsPending.get());
|
||||
lst.add("optimizes", optimizeCommands.longValue());
|
||||
lst.add("rollbacks", rollbackCommands.longValue());
|
||||
lst.add("expungeDeletes", expungeDeleteCommands.longValue());
|
||||
lst.add("docsPending", numDocsPending.longValue());
|
||||
// pset.size() not synchronized, but it should be fine to access.
|
||||
// lst.add("deletesPending", pset.size());
|
||||
lst.add("adds", addCommands.get());
|
||||
lst.add("deletesById", deleteByIdCommands.get());
|
||||
lst.add("deletesByQuery", deleteByQueryCommands.get());
|
||||
lst.add("errors", numErrors.get());
|
||||
lst.add("cumulative_adds", addCommandsCumulative.get());
|
||||
lst.add("cumulative_deletesById", deleteByIdCommandsCumulative.get());
|
||||
lst.add("cumulative_deletesByQuery", deleteByQueryCommandsCumulative.get());
|
||||
lst.add("cumulative_errors", numErrorsCumulative.get());
|
||||
lst.add("adds", addCommands.longValue());
|
||||
lst.add("deletesById", deleteByIdCommands.longValue());
|
||||
lst.add("deletesByQuery", deleteByQueryCommands.longValue());
|
||||
lst.add("errors", numErrors.longValue());
|
||||
lst.add("cumulative_adds", addCommandsCumulative.longValue());
|
||||
lst.add("cumulative_deletesById", deleteByIdCommandsCumulative.longValue());
|
||||
lst.add("cumulative_deletesByQuery", deleteByQueryCommandsCumulative.longValue());
|
||||
lst.add("cumulative_errors", numErrorsCumulative.longValue());
|
||||
if (this.ulog != null) {
|
||||
lst.add("transaction_logs_total_size", ulog.getTotalLogsSize());
|
||||
lst.add("transaction_logs_total_number", ulog.getTotalLogsNumber());
|
||||
|
@ -28,6 +28,7 @@ import java.util.TreeSet;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.atomic.LongAdder;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.lang.ref.WeakReference;
|
||||
@ -90,7 +91,7 @@ public class ConcurrentLRUCache<K,V> implements Cache<K,V> {
|
||||
public V get(K key) {
|
||||
CacheEntry<K,V> e = map.get(key);
|
||||
if (e == null) {
|
||||
if (islive) stats.missCounter.incrementAndGet();
|
||||
if (islive) stats.missCounter.increment();
|
||||
return null;
|
||||
}
|
||||
if (islive) e.lastAccessed = stats.accessCounter.incrementAndGet();
|
||||
@ -119,9 +120,9 @@ public class ConcurrentLRUCache<K,V> implements Cache<K,V> {
|
||||
currentSize = stats.size.get();
|
||||
}
|
||||
if (islive) {
|
||||
stats.putCounter.incrementAndGet();
|
||||
stats.putCounter.increment();
|
||||
} else {
|
||||
stats.nonLivePutCounter.incrementAndGet();
|
||||
stats.nonLivePutCounter.increment();
|
||||
}
|
||||
|
||||
// Check if we need to clear out old entries from the cache.
|
||||
@ -172,7 +173,7 @@ public class ConcurrentLRUCache<K,V> implements Cache<K,V> {
|
||||
isCleaning = true;
|
||||
this.oldestEntry = oldestEntry; // volatile write to make isCleaning visible
|
||||
|
||||
long timeCurrent = stats.accessCounter.get();
|
||||
long timeCurrent = stats.accessCounter.longValue();
|
||||
int sz = stats.size.get();
|
||||
|
||||
int numRemoved = 0;
|
||||
@ -532,23 +533,23 @@ public class ConcurrentLRUCache<K,V> implements Cache<K,V> {
|
||||
|
||||
|
||||
public static class Stats {
|
||||
private final AtomicLong accessCounter = new AtomicLong(0),
|
||||
putCounter = new AtomicLong(0),
|
||||
nonLivePutCounter = new AtomicLong(0),
|
||||
missCounter = new AtomicLong();
|
||||
private final AtomicLong accessCounter = new AtomicLong(0);
|
||||
private final LongAdder putCounter = new LongAdder();
|
||||
private final LongAdder nonLivePutCounter = new LongAdder();
|
||||
private final LongAdder missCounter = new LongAdder();
|
||||
private final AtomicInteger size = new AtomicInteger();
|
||||
private AtomicLong evictionCounter = new AtomicLong();
|
||||
|
||||
public long getCumulativeLookups() {
|
||||
return (accessCounter.get() - putCounter.get() - nonLivePutCounter.get()) + missCounter.get();
|
||||
return (accessCounter.longValue() - putCounter.longValue() - nonLivePutCounter.longValue()) + missCounter.longValue();
|
||||
}
|
||||
|
||||
public long getCumulativeHits() {
|
||||
return accessCounter.get() - putCounter.get() - nonLivePutCounter.get();
|
||||
return accessCounter.longValue() - putCounter.longValue() - nonLivePutCounter.longValue();
|
||||
}
|
||||
|
||||
public long getCumulativePuts() {
|
||||
return putCounter.get();
|
||||
return putCounter.longValue();
|
||||
}
|
||||
|
||||
public long getCumulativeEvictions() {
|
||||
@ -560,18 +561,18 @@ public class ConcurrentLRUCache<K,V> implements Cache<K,V> {
|
||||
}
|
||||
|
||||
public long getCumulativeNonLivePuts() {
|
||||
return nonLivePutCounter.get();
|
||||
return nonLivePutCounter.longValue();
|
||||
}
|
||||
|
||||
public long getCumulativeMisses() {
|
||||
return missCounter.get();
|
||||
return missCounter.longValue();
|
||||
}
|
||||
|
||||
public void add(Stats other) {
|
||||
accessCounter.addAndGet(other.accessCounter.get());
|
||||
putCounter.addAndGet(other.putCounter.get());
|
||||
nonLivePutCounter.addAndGet(other.nonLivePutCounter.get());
|
||||
missCounter.addAndGet(other.missCounter.get());
|
||||
putCounter.add(other.putCounter.longValue());
|
||||
nonLivePutCounter.add(other.nonLivePutCounter.longValue());
|
||||
missCounter.add(other.missCounter.longValue());
|
||||
evictionCounter.addAndGet(other.evictionCounter.get());
|
||||
size.set(Math.max(size.get(), other.size.get()));
|
||||
}
|
||||
|
@ -16,6 +16,12 @@
|
||||
*/
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
@ -36,12 +42,6 @@ import org.junit.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/**
|
||||
* Test split phase that occurs when a Collection API split call is made.
|
||||
*/
|
||||
@ -254,6 +254,7 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest {
|
||||
address.replaceAll("/", "_"));
|
||||
overseerElector.setup(ec);
|
||||
overseerElector.joinElection(ec, false);
|
||||
reader.close();
|
||||
return zkClient;
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,8 @@
|
||||
*/
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import javax.security.auth.login.AppConfigurationEntry;
|
||||
import javax.security.auth.login.Configuration;
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
@ -24,18 +26,57 @@ import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import javax.security.auth.login.AppConfigurationEntry;
|
||||
import javax.security.auth.login.Configuration;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.hadoop.minikdc.MiniKdc;
|
||||
import org.apache.solr.client.solrj.impl.Krb5HttpClientConfigurer;
|
||||
|
||||
public class KerberosTestUtil {
|
||||
public class KerberosTestServices {
|
||||
|
||||
private MiniKdc kdc;
|
||||
private JaasConfiguration jaasConfiguration;
|
||||
private Configuration savedConfig;
|
||||
private Locale savedLocale;
|
||||
|
||||
private KerberosTestServices(MiniKdc kdc,
|
||||
JaasConfiguration jaasConfiguration,
|
||||
Configuration savedConfig,
|
||||
Locale savedLocale) {
|
||||
this.kdc = kdc;
|
||||
this.jaasConfiguration = jaasConfiguration;
|
||||
this.savedConfig = savedConfig;
|
||||
this.savedLocale = savedLocale;
|
||||
}
|
||||
|
||||
public MiniKdc getKdc() {
|
||||
return kdc;
|
||||
}
|
||||
|
||||
public void start() throws Exception {
|
||||
if (brokenLanguagesWithMiniKdc.contains(Locale.getDefault().getLanguage())) {
|
||||
Locale.setDefault(Locale.US);
|
||||
}
|
||||
|
||||
if (kdc != null) kdc.start();
|
||||
Configuration.setConfiguration(jaasConfiguration);
|
||||
Krb5HttpClientConfigurer.regenerateJaasConfiguration();
|
||||
}
|
||||
|
||||
public void stop() {
|
||||
if (kdc != null) kdc.stop();
|
||||
Configuration.setConfiguration(savedConfig);
|
||||
Krb5HttpClientConfigurer.regenerateJaasConfiguration();
|
||||
Locale.setDefault(savedLocale);
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a MiniKdc that can be used for creating kerberos principals
|
||||
* and keytabs. Caller is responsible for starting/stopping the kdc.
|
||||
*/
|
||||
public static MiniKdc getKdc(File workDir) throws Exception {
|
||||
private static MiniKdc getKdc(File workDir) throws Exception {
|
||||
Properties conf = MiniKdc.createConf();
|
||||
return new MiniKdc(conf, workDir);
|
||||
}
|
||||
@ -44,7 +85,7 @@ public class KerberosTestUtil {
|
||||
* Programmatic version of a jaas.conf file suitable for connecting
|
||||
* to a SASL-configured zookeeper.
|
||||
*/
|
||||
public static class JaasConfiguration extends Configuration {
|
||||
private static class JaasConfiguration extends Configuration {
|
||||
|
||||
private static AppConfigurationEntry[] clientEntry;
|
||||
private static AppConfigurationEntry[] serverEntry;
|
||||
@ -60,7 +101,7 @@ public class KerberosTestUtil {
|
||||
* @param serverKeytab The location of the keytab with the serverPrincipal
|
||||
*/
|
||||
public JaasConfiguration(String clientPrincipal, File clientKeytab,
|
||||
String serverPrincipal, File serverKeytab) {
|
||||
String serverPrincipal, File serverKeytab) {
|
||||
Map<String, String> clientOptions = new HashMap();
|
||||
clientOptions.put("principal", clientPrincipal);
|
||||
clientOptions.put("keyTab", clientKeytab.getAbsolutePath());
|
||||
@ -73,9 +114,9 @@ public class KerberosTestUtil {
|
||||
clientOptions.put("debug", "true");
|
||||
}
|
||||
clientEntry = new AppConfigurationEntry[]{
|
||||
new AppConfigurationEntry(getKrb5LoginModuleName(),
|
||||
AppConfigurationEntry.LoginModuleControlFlag.REQUIRED,
|
||||
clientOptions)};
|
||||
new AppConfigurationEntry(getKrb5LoginModuleName(),
|
||||
AppConfigurationEntry.LoginModuleControlFlag.REQUIRED,
|
||||
clientOptions)};
|
||||
if(serverPrincipal!=null && serverKeytab!=null) {
|
||||
Map<String, String> serverOptions = new HashMap(clientOptions);
|
||||
serverOptions.put("principal", serverPrincipal);
|
||||
@ -88,9 +129,9 @@ public class KerberosTestUtil {
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an entry to the jaas configuration with the passed in principal and keytab,
|
||||
* Add an entry to the jaas configuration with the passed in principal and keytab,
|
||||
* along with the app name.
|
||||
*
|
||||
*
|
||||
* @param principal The principal
|
||||
* @param keytab The keytab containing credentials for the principal
|
||||
* @param appName The app name of the configuration
|
||||
@ -127,21 +168,62 @@ public class KerberosTestUtil {
|
||||
*/
|
||||
private final static List<String> brokenLanguagesWithMiniKdc =
|
||||
Arrays.asList(
|
||||
new Locale("th").getLanguage(),
|
||||
new Locale("ja").getLanguage(),
|
||||
new Locale("th").getLanguage(),
|
||||
new Locale("ja").getLanguage(),
|
||||
new Locale("hi").getLanguage()
|
||||
);
|
||||
/**
|
||||
*returns the currently set locale, and overrides it with {@link Locale#US} if it's
|
||||
* currently something MiniKdc can not handle
|
||||
*
|
||||
* @see Locale#setDefault
|
||||
*/
|
||||
public static final Locale overrideLocaleIfNotSpportedByMiniKdc() {
|
||||
Locale old = Locale.getDefault();
|
||||
if (brokenLanguagesWithMiniKdc.contains(Locale.getDefault().getLanguage())) {
|
||||
Locale.setDefault(Locale.US);
|
||||
);
|
||||
|
||||
public static class Builder {
|
||||
private File kdcWorkDir;
|
||||
private String clientPrincipal;
|
||||
private File clientKeytab;
|
||||
private String serverPrincipal;
|
||||
private File serverKeytab;
|
||||
private String appName;
|
||||
private Locale savedLocale;
|
||||
|
||||
public Builder() {
|
||||
savedLocale = Locale.getDefault();
|
||||
}
|
||||
|
||||
public Builder withKdc(File kdcWorkDir) {
|
||||
this.kdcWorkDir = kdcWorkDir;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withJaasConfiguration(String clientPrincipal, File clientKeytab,
|
||||
String serverPrincipal, File serverKeytab) {
|
||||
Preconditions.checkNotNull(clientPrincipal);
|
||||
Preconditions.checkNotNull(clientKeytab);
|
||||
this.clientPrincipal = clientPrincipal;
|
||||
this.clientKeytab = clientKeytab;
|
||||
this.serverPrincipal = serverPrincipal;
|
||||
this.serverKeytab = serverKeytab;
|
||||
this.appName = null;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withJaasConfiguration(String principal, File keytab, String appName) {
|
||||
Preconditions.checkNotNull(principal);
|
||||
Preconditions.checkNotNull(keytab);
|
||||
this.clientPrincipal = principal;
|
||||
this.clientKeytab = keytab;
|
||||
this.serverPrincipal = null;
|
||||
this.serverKeytab = null;
|
||||
this.appName = appName;
|
||||
return this;
|
||||
}
|
||||
|
||||
public KerberosTestServices build() throws Exception {
|
||||
final MiniKdc kdc = kdcWorkDir != null ? getKdc(kdcWorkDir) : null;
|
||||
final Configuration oldConfig = clientPrincipal != null ? Configuration.getConfiguration() : null;
|
||||
JaasConfiguration jaasConfiguration = null;
|
||||
if (clientPrincipal != null) {
|
||||
jaasConfiguration = (appName == null) ?
|
||||
new JaasConfiguration(clientPrincipal, clientKeytab, serverPrincipal, serverKeytab) :
|
||||
new JaasConfiguration(clientPrincipal, clientKeytab, appName);
|
||||
}
|
||||
return new KerberosTestServices(kdc, jaasConfiguration, oldConfig, savedLocale);
|
||||
}
|
||||
return old;
|
||||
}
|
||||
}
|
@ -118,6 +118,7 @@ public class LeaderElectionTest extends SolrTestCaseJ4 {
|
||||
if (!zkClient.isClosed()) {
|
||||
zkClient.close();
|
||||
}
|
||||
zkStateReader.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,7 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.cloud.SecurityAwareZkACLProvider;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.ZooDefs;
|
||||
@ -77,6 +78,7 @@ public class OutOfBoxZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
|
||||
zkClient.makePath("/protectedMakePathNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
|
||||
zkClient.create("/unprotectedCreateNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
|
||||
zkClient.makePath("/unprotectedMakePathNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
|
||||
zkClient.create(SecurityAwareZkACLProvider.SECURITY_ZNODE_PATH, "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
|
||||
zkClient.close();
|
||||
|
||||
log.info("####SETUP_END " + getTestName());
|
||||
@ -93,7 +95,9 @@ public class OutOfBoxZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
|
||||
public void testOutOfBoxSolrZkClient() throws Exception {
|
||||
SolrZkClient zkClient = new SolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
|
||||
try {
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, true, true, true, true, true);
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
|
||||
true, true, true, true, true,
|
||||
true, true, true, true, true);
|
||||
} finally {
|
||||
zkClient.close();
|
||||
}
|
||||
@ -110,6 +114,7 @@ public class OutOfBoxZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
|
||||
assertTrue(verifiedList.contains("/solr/unprotectedMakePathNode"));
|
||||
assertTrue(verifiedList.contains("/solr/protectedMakePathNode"));
|
||||
assertTrue(verifiedList.contains("/solr/protectedCreateNode"));
|
||||
assertTrue(verifiedList.contains("/solr" + SecurityAwareZkACLProvider.SECURITY_ZNODE_PATH));
|
||||
} finally {
|
||||
zkClient.close();
|
||||
}
|
||||
|
@ -18,18 +18,15 @@ package org.apache.solr.cloud;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.StringUtils;
|
||||
import org.apache.solr.common.cloud.DefaultZkACLProvider;
|
||||
import org.apache.solr.common.cloud.DefaultZkCredentialsProvider;
|
||||
import org.apache.solr.common.cloud.SecurityAwareZkACLProvider;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.VMParamsAllAndReadonlyDigestZkACLProvider;
|
||||
import org.apache.solr.common.cloud.VMParamsSingleSetCredentialsDigestZkCredentialsProvider;
|
||||
import org.apache.solr.common.cloud.ZkACLProvider;
|
||||
import org.apache.solr.common.cloud.ZkCredentialsProvider;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.ZooDefs;
|
||||
import org.apache.zookeeper.data.ACL;
|
||||
import org.apache.zookeeper.data.Id;
|
||||
import org.apache.zookeeper.server.auth.DigestAuthenticationProvider;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
@ -40,7 +37,6 @@ import java.io.File;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.nio.charset.Charset;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
@ -88,6 +84,7 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
|
||||
"readonlyACLUsername", "readonlyACLPassword").getSolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
|
||||
zkClient.create("/protectedCreateNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
|
||||
zkClient.makePath("/protectedMakePathNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
|
||||
zkClient.create(SecurityAwareZkACLProvider.SECURITY_ZNODE_PATH, "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
|
||||
zkClient.close();
|
||||
|
||||
zkClient = new SolrZkClientFactoryUsingCompletelyNewProviders(null, null,
|
||||
@ -114,7 +111,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
|
||||
SolrZkClient zkClient = new SolrZkClientFactoryUsingCompletelyNewProviders(null, null,
|
||||
null, null).getSolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
|
||||
try {
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, false, false, false, false, false);
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
|
||||
false, false, false, false, false,
|
||||
false, false, false, false, false);
|
||||
} finally {
|
||||
zkClient.close();
|
||||
}
|
||||
@ -125,7 +124,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
|
||||
SolrZkClient zkClient = new SolrZkClientFactoryUsingCompletelyNewProviders("connectAndAllACLUsername", "connectAndAllACLPasswordWrong",
|
||||
null, null).getSolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
|
||||
try {
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, false, false, false, false, false);
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
|
||||
false, false, false, false, false,
|
||||
false, false, false, false, false);
|
||||
} finally {
|
||||
zkClient.close();
|
||||
}
|
||||
@ -136,7 +137,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
|
||||
SolrZkClient zkClient = new SolrZkClientFactoryUsingCompletelyNewProviders("connectAndAllACLUsername", "connectAndAllACLPassword",
|
||||
null, null).getSolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
|
||||
try {
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, true, true, true, true, true);
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
|
||||
true, true, true, true, true,
|
||||
true, true, true, true, true);
|
||||
} finally {
|
||||
zkClient.close();
|
||||
}
|
||||
@ -147,7 +150,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
|
||||
SolrZkClient zkClient = new SolrZkClientFactoryUsingCompletelyNewProviders("readonlyACLUsername", "readonlyACLPassword",
|
||||
null, null).getSolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
|
||||
try {
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, true, true, false, false, false);
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
|
||||
true, true, false, false, false,
|
||||
false, false, false, false, false);
|
||||
} finally {
|
||||
zkClient.close();
|
||||
}
|
||||
@ -159,7 +164,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
|
||||
|
||||
SolrZkClient zkClient = new SolrZkClientUsingVMParamsProvidersButWithDifferentVMParamsNames(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
|
||||
try {
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, false, false, false, false, false);
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
|
||||
false, false, false, false, false,
|
||||
false, false, false, false, false);
|
||||
} finally {
|
||||
zkClient.close();
|
||||
}
|
||||
@ -171,7 +178,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
|
||||
|
||||
SolrZkClient zkClient = new SolrZkClientUsingVMParamsProvidersButWithDifferentVMParamsNames(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
|
||||
try {
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, false, false, false, false, false);
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
|
||||
false, false, false, false, false,
|
||||
false, false, false, false, false);
|
||||
} finally {
|
||||
zkClient.close();
|
||||
}
|
||||
@ -183,7 +192,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
|
||||
|
||||
SolrZkClient zkClient = new SolrZkClientUsingVMParamsProvidersButWithDifferentVMParamsNames(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
|
||||
try {
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, true, true, true, true, true);
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
|
||||
true, true, true, true, true,
|
||||
true, true, true, true, true);
|
||||
} finally {
|
||||
zkClient.close();
|
||||
}
|
||||
@ -195,7 +206,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
|
||||
|
||||
SolrZkClient zkClient = new SolrZkClientUsingVMParamsProvidersButWithDifferentVMParamsNames(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
|
||||
try {
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, true, true, false, false, false);
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
|
||||
true, true, false, false, false,
|
||||
false, false, false, false, false);
|
||||
} finally {
|
||||
zkClient.close();
|
||||
}
|
||||
@ -240,28 +253,18 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
|
||||
|
||||
@Override
|
||||
public ZkACLProvider createZkACLProvider() {
|
||||
return new DefaultZkACLProvider() {
|
||||
return new VMParamsAllAndReadonlyDigestZkACLProvider() {
|
||||
@Override
|
||||
protected List<ACL> createGlobalACLsToAdd() {
|
||||
try {
|
||||
List<ACL> result = new ArrayList<ACL>();
|
||||
|
||||
if (!StringUtils.isEmpty(digestUsername) && !StringUtils.isEmpty(digestPassword)) {
|
||||
result.add(new ACL(ZooDefs.Perms.ALL, new Id("digest", DigestAuthenticationProvider.generateDigest(digestUsername + ":" + digestPassword))));
|
||||
}
|
||||
|
||||
if (!StringUtils.isEmpty(digestReadonlyUsername) && !StringUtils.isEmpty(digestReadonlyPassword)) {
|
||||
result.add(new ACL(ZooDefs.Perms.READ, new Id("digest", DigestAuthenticationProvider.generateDigest(digestReadonlyUsername + ":" + digestReadonlyPassword))));
|
||||
}
|
||||
|
||||
if (result.isEmpty()) {
|
||||
result = ZooDefs.Ids.OPEN_ACL_UNSAFE;
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
protected List<ACL> createNonSecurityACLsToAdd() {
|
||||
return createACLsToAdd(true, digestUsername, digestPassword, digestReadonlyUsername, digestReadonlyPassword);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Set of ACLs to return security-related znodes
|
||||
*/
|
||||
@Override
|
||||
protected List<ACL> createSecurityACLsToAdd() {
|
||||
return createACLsToAdd(false, digestUsername, digestPassword, digestReadonlyUsername, digestReadonlyPassword);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -130,6 +130,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
||||
}
|
||||
}
|
||||
deleteNode(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName);
|
||||
zkStateReader.close();
|
||||
zkClient.close();
|
||||
}
|
||||
|
||||
|
@ -20,15 +20,12 @@ import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Locale;
|
||||
|
||||
import javax.security.auth.login.Configuration;
|
||||
|
||||
import org.apache.hadoop.minikdc.MiniKdc;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.cloud.DefaultZkACLProvider;
|
||||
import org.apache.solr.common.cloud.SaslZkACLProvider;
|
||||
import org.apache.solr.common.cloud.SecurityAwareZkACLProvider;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkACLProvider;
|
||||
import org.apache.solr.util.BadZookeeperThreadsFilter;
|
||||
@ -50,8 +47,6 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
|
||||
|
||||
private static final Charset DATA_ENCODING = Charset.forName("UTF-8");
|
||||
|
||||
protected Locale savedLocale = null;
|
||||
|
||||
protected ZkTestServer zkServer;
|
||||
|
||||
@BeforeClass
|
||||
@ -71,7 +66,6 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
savedLocale = KerberosTestUtil.overrideLocaleIfNotSpportedByMiniKdc();
|
||||
log.info("####SETUP_START " + getTestName());
|
||||
createTempDir();
|
||||
|
||||
@ -99,6 +93,7 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
|
||||
try {
|
||||
zkClient.create("/protectedCreateNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
|
||||
zkClient.makePath("/protectedMakePathNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
|
||||
zkClient.create(SecurityAwareZkACLProvider.SECURITY_ZNODE_PATH, "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
|
||||
} finally {
|
||||
zkClient.close();
|
||||
}
|
||||
@ -115,7 +110,6 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
zkServer.shutdown();
|
||||
Locale.setDefault(savedLocale);
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
@ -124,7 +118,9 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
|
||||
// Test with Sasl enabled
|
||||
SolrZkClient zkClient = new SolrZkClientWithACLs(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
|
||||
try {
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, true, true, true, true, true);
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
|
||||
true, true, true, true, true,
|
||||
true, true, true, true, true);
|
||||
} finally {
|
||||
zkClient.close();
|
||||
}
|
||||
@ -134,7 +130,9 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
|
||||
System.setProperty("zookeeper.sasl.client", "false");
|
||||
zkClient = new SolrZkClientNoACLs(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
|
||||
try {
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, true, true, false, false, false);
|
||||
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
|
||||
true, true, false, false, false,
|
||||
false, false, false, false, false);
|
||||
} finally {
|
||||
zkClient.close();
|
||||
System.clearProperty("zookeeper.sasl.client");
|
||||
@ -176,8 +174,7 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
|
||||
*/
|
||||
public static class SaslZkTestServer extends ZkTestServer {
|
||||
private String kdcDir;
|
||||
private MiniKdc kdc;
|
||||
private Configuration conf;
|
||||
private KerberosTestServices kerberosTestServices;
|
||||
|
||||
public SaslZkTestServer(String zkDir, String kdcDir) {
|
||||
super(zkDir);
|
||||
@ -187,13 +184,11 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
|
||||
public SaslZkTestServer(String zkDir, int port, String kdcDir) {
|
||||
super(zkDir, port);
|
||||
this.kdcDir = kdcDir;
|
||||
conf = Configuration.getConfiguration();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() throws InterruptedException {
|
||||
try {
|
||||
kdc = KerberosTestUtil.getKdc(new File(kdcDir));
|
||||
// Don't require that credentials match the entire principal string, e.g.
|
||||
// can match "solr" rather than "solr/host@DOMAIN"
|
||||
System.setProperty("zookeeper.kerberos.removeRealmFromPrincipal", "true");
|
||||
@ -202,12 +197,13 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
|
||||
String zkClientPrincipal = "solr";
|
||||
String zkServerPrincipal = "zookeeper/127.0.0.1";
|
||||
|
||||
kdc.start();
|
||||
// Create ZK client and server principals and load them into the Configuration
|
||||
kdc.createPrincipal(keytabFile, zkClientPrincipal, zkServerPrincipal);
|
||||
KerberosTestUtil.JaasConfiguration jaas = new KerberosTestUtil.JaasConfiguration(
|
||||
zkClientPrincipal, keytabFile, zkServerPrincipal, keytabFile);
|
||||
Configuration.setConfiguration(jaas);
|
||||
kerberosTestServices = KerberosTestServices.builder()
|
||||
.withKdc(new File(kdcDir))
|
||||
.withJaasConfiguration(zkClientPrincipal, keytabFile, zkServerPrincipal, keytabFile)
|
||||
.build();
|
||||
kerberosTestServices.start();
|
||||
|
||||
kerberosTestServices.getKdc().createPrincipal(keytabFile, zkClientPrincipal, zkServerPrincipal);
|
||||
} catch (Exception ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
@ -220,8 +216,7 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
|
||||
System.clearProperty("zookeeper.authProvider.1");
|
||||
System.clearProperty("zookeeper.kerberos.removeRealmFromPrincipal");
|
||||
System.clearProperty("zookeeper.kerberos.removeHostFromPrincipal");
|
||||
Configuration.setConfiguration(conf);
|
||||
kdc.stop();
|
||||
kerberosTestServices.stop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -236,21 +236,23 @@ public class TestAuthenticationFramework extends LuceneTestCase {
|
||||
public void init(Map<String,Object> pluginConfig) {}
|
||||
|
||||
@Override
|
||||
public void doAuthenticate(ServletRequest request, ServletResponse response, FilterChain filterChain)
|
||||
public boolean doAuthenticate(ServletRequest request, ServletResponse response, FilterChain filterChain)
|
||||
throws Exception {
|
||||
if (expectedUsername == null) {
|
||||
filterChain.doFilter(request, response);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
HttpServletRequest httpRequest = (HttpServletRequest)request;
|
||||
String username = httpRequest.getHeader("username");
|
||||
String password = httpRequest.getHeader("password");
|
||||
|
||||
log.info("Username: "+username+", password: "+password);
|
||||
if(MockAuthenticationPlugin.expectedUsername.equals(username) && MockAuthenticationPlugin.expectedPassword.equals(password))
|
||||
if(MockAuthenticationPlugin.expectedUsername.equals(username) && MockAuthenticationPlugin.expectedPassword.equals(password)) {
|
||||
filterChain.doFilter(request, response);
|
||||
else {
|
||||
return true;
|
||||
} else {
|
||||
((HttpServletResponse)response).sendError(401, "Unauthorized request");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -51,6 +51,7 @@ import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/**
|
||||
@ -104,11 +105,17 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
|
||||
assertEquals(0, CLOUD_CLIENT.add(sdoc("id", "46", "val_i", "3", "ssto", "X", "subject", "ggg")).getStatus());
|
||||
assertEquals(0, CLOUD_CLIENT.commit().getStatus());;
|
||||
|
||||
// uncommitted doc in transaction log
|
||||
}
|
||||
|
||||
@Before
|
||||
private void addUncommittedDoc99() throws Exception {
|
||||
// uncommitted doc in transaction log at start of every test
|
||||
// Even if an RTG causes ulog to re-open realtime searcher, next test method
|
||||
// will get another copy of doc 99 in the ulog
|
||||
assertEquals(0, CLOUD_CLIENT.add(sdoc("id", "99", "val_i", "1", "ssto", "X",
|
||||
"subject", "uncommitted")).getStatus());
|
||||
}
|
||||
|
||||
|
||||
@AfterClass
|
||||
private static void afterClass() throws Exception {
|
||||
CLOUD_CLIENT.close(); CLOUD_CLIENT = null;
|
||||
@ -170,13 +177,12 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
|
||||
assertEquals(""+doc, 10L, doc.getFieldValue("val2_ss"));
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286")
|
||||
public void testMultiValuedRTG() throws Exception {
|
||||
SolrDocument doc = null;
|
||||
|
||||
// check same results as testMultiValued via RTG (committed doc)
|
||||
doc = getRandClient(random()).getById("42", params("fl","val_ss:val_i, val2_ss:10, subject"));
|
||||
assertEquals(""+doc, 2, doc.size());
|
||||
assertEquals(""+doc, 3, doc.size());
|
||||
assertEquals(""+doc, 1, doc.getFieldValue("val_ss"));
|
||||
assertEquals(""+doc, 10L, doc.getFieldValue("val2_ss"));
|
||||
assertEquals(""+doc, "aaa", doc.getFieldValue("subject"));
|
||||
@ -218,6 +224,21 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testFilterAndOneRealFieldRTG() throws Exception {
|
||||
SolrParams params = params("fl","id,val_i",
|
||||
"fq","{!field f='subject' v=$my_var}",
|
||||
"my_var","uncommitted");
|
||||
SolrDocumentList docs = getRandClient(random()).getById(Arrays.asList("42","99"), params);
|
||||
final String msg = params + " => " + docs;
|
||||
assertEquals(msg, 1, docs.size());
|
||||
assertEquals(msg, 1, docs.getNumFound());
|
||||
|
||||
SolrDocument doc = docs.get(0);
|
||||
assertEquals(msg, 2, doc.size());
|
||||
assertEquals(msg, "99", doc.getFieldValue("id"));
|
||||
assertEquals(msg, 1, doc.getFieldValue("val_i"));
|
||||
}
|
||||
|
||||
public void testScoreAndAllRealFields() throws Exception {
|
||||
for (String fl : TestPseudoReturnFields.SCORE_AND_REAL_FIELDS) {
|
||||
@ -304,7 +325,6 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286")
|
||||
public void testFunctionsRTG() throws Exception {
|
||||
// if we use RTG (committed or otherwise) functions should behave the same
|
||||
for (String id : Arrays.asList("42","99")) {
|
||||
@ -334,7 +354,6 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286")
|
||||
public void testFunctionsAndExplicitRTG() throws Exception {
|
||||
// shouldn't matter if we use RTG (committed or otherwise)
|
||||
for (String id : Arrays.asList("42","99")) {
|
||||
@ -382,7 +401,6 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286")
|
||||
public void testFunctionsAndScoreRTG() throws Exception {
|
||||
|
||||
// if we use RTG (committed or otherwise) score should be ignored
|
||||
@ -578,40 +596,35 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9289")
|
||||
public void testDocIdAugmenterRTG() throws Exception {
|
||||
// NOTE: once this test is fixed to pass, testAugmentersRTG should also be updated to test [docid]
|
||||
|
||||
// TODO: in single node, [docid] is silently ignored for uncommited docs (see SOLR-9288) ...
|
||||
// here we see even more confusing: [docid] is silently ignored for both committed & uncommited docs
|
||||
|
||||
// behavior shouldn't matter if we are committed or uncommitted
|
||||
// for an uncommitted doc, we should get -1
|
||||
for (String id : Arrays.asList("42","99")) {
|
||||
SolrDocument doc = getRandClient(random()).getById(id, params("fl","[docid]"));
|
||||
String msg = id + ": fl=[docid] => " + doc;
|
||||
assertEquals(msg, 1, doc.size());
|
||||
assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer);
|
||||
assertTrue(msg, -1 <= ((Integer)doc.getFieldValue("[docid]")).intValue());
|
||||
}
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286")
|
||||
public void testAugmentersRTG() throws Exception {
|
||||
// behavior shouldn't matter if we are committed or uncommitted
|
||||
for (String id : Arrays.asList("42","99")) {
|
||||
// NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well.
|
||||
for (SolrParams p : Arrays.asList(params("fl","[shard],[explain],x_alias:[value v=10 t=int]"),
|
||||
params("fl","[shard]","fl","[explain],x_alias:[value v=10 t=int]"),
|
||||
params("fl","[shard]","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) {
|
||||
for (SolrParams p : Arrays.asList
|
||||
(params("fl","[docid],[shard],[explain],x_alias:[value v=10 t=int]"),
|
||||
params("fl","[docid],[shard]","fl","[explain],x_alias:[value v=10 t=int]"),
|
||||
params("fl","[docid]","fl","[shard]","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) {
|
||||
|
||||
SolrDocument doc = getRandClient(random()).getById(id, p);
|
||||
String msg = id + ": " + p + " => " + doc;
|
||||
|
||||
assertEquals(msg, 2, doc.size());
|
||||
// assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO
|
||||
assertEquals(msg, 3, doc.size());
|
||||
assertTrue(msg, doc.getFieldValue("[shard]") instanceof String);
|
||||
// RTG: [explain] should be ignored
|
||||
assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer);
|
||||
assertEquals(msg, 10, doc.getFieldValue("x_alias"));
|
||||
assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer);
|
||||
assertTrue(msg, -1 <= ((Integer)doc.getFieldValue("[docid]")).intValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -635,23 +648,22 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286")
|
||||
public void testAugmentersAndExplicitRTG() throws Exception {
|
||||
// behavior shouldn't matter if we are committed or uncommitted
|
||||
for (String id : Arrays.asList("42","99")) {
|
||||
// NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well.
|
||||
for (SolrParams p : Arrays.asList(params("fl","id,[explain],x_alias:[value v=10 t=int]"),
|
||||
params("fl","id","fl","[explain],x_alias:[value v=10 t=int]"),
|
||||
params("fl","id","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) {
|
||||
for (SolrParams p : Arrays.asList(params("fl","id,[docid],[explain],x_alias:[value v=10 t=int]"),
|
||||
params("fl","id,[docid]","fl","[explain],x_alias:[value v=10 t=int]"),
|
||||
params("fl","id","fl","[docid]","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) {
|
||||
SolrDocument doc = getRandClient(random()).getById(id, p);
|
||||
String msg = id + ": " + p + " => " + doc;
|
||||
|
||||
assertEquals(msg, 2, doc.size());
|
||||
assertEquals(msg, 3, doc.size());
|
||||
assertTrue(msg, doc.getFieldValue("id") instanceof String);
|
||||
// assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO
|
||||
// RTG: [explain] should be missing (ignored)
|
||||
assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer);
|
||||
assertEquals(msg, 10, doc.getFieldValue("x_alias"));
|
||||
assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer);
|
||||
assertTrue(msg, -1 <= ((Integer)doc.getFieldValue("[docid]")).intValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -688,32 +700,29 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286")
|
||||
public void testAugmentersAndScoreRTG() throws Exception {
|
||||
// if we use RTG (committed or otherwise) score should be ignored
|
||||
for (String id : Arrays.asList("42","99")) {
|
||||
// NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well.
|
||||
SolrDocument doc = getRandClient(random()).getById(id, params("fl","x_alias:[value v=10 t=int],score"));
|
||||
String msg = id + " => " + doc;
|
||||
|
||||
assertEquals(msg, 1, doc.size());
|
||||
// assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO
|
||||
assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer);
|
||||
assertEquals(msg, 10, doc.getFieldValue("x_alias"));
|
||||
|
||||
for (SolrParams p : Arrays.asList(params("fl","x_alias:[value v=10 t=int],[explain],score"),
|
||||
params("fl","x_alias:[value v=10 t=int],[explain]","fl","score"),
|
||||
params("fl","x_alias:[value v=10 t=int]","fl","[explain]","fl","score"))) {
|
||||
for (SolrParams p : Arrays.asList(params("fl","d_alias:[docid],x_alias:[value v=10 t=int],[explain],score"),
|
||||
params("fl","d_alias:[docid],x_alias:[value v=10 t=int],[explain]","fl","score"),
|
||||
params("fl","d_alias:[docid]","fl","x_alias:[value v=10 t=int]","fl","[explain]","fl","score"))) {
|
||||
|
||||
doc = getRandClient(random()).getById(id, p);
|
||||
msg = id + ": " + p + " => " + doc;
|
||||
|
||||
assertEquals(msg, 1, doc.size());
|
||||
assertTrue(msg, doc.getFieldValue("id") instanceof String);
|
||||
// assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO
|
||||
assertEquals(msg, 2, doc.size());
|
||||
assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer);
|
||||
assertEquals(msg, 10, doc.getFieldValue("x_alias"));
|
||||
// RTG: [explain] and score should be missing (ignored)
|
||||
assertTrue(msg, doc.getFieldValue("d_alias") instanceof Integer);
|
||||
assertTrue(msg, -1 <= ((Integer)doc.getFieldValue("d_alias")).intValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -758,8 +767,7 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
|
||||
|
||||
// NOTE: 'ssto' is the missing one
|
||||
final List<String> fl = Arrays.asList
|
||||
// NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well.
|
||||
("id","[explain]","score","val_*","subj*");
|
||||
("id","[docid]","[explain]","score","val_*","subj*");
|
||||
|
||||
final int iters = atLeast(random, 10);
|
||||
for (int i = 0; i< iters; i++) {
|
||||
@ -778,12 +786,13 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
|
||||
SolrDocument doc = getRandClient(random()).getById(id, params);
|
||||
String msg = id + ": " + params + " => " + doc;
|
||||
|
||||
assertEquals(msg, 3, doc.size());
|
||||
assertEquals(msg, 4, doc.size());
|
||||
assertTrue(msg, doc.getFieldValue("id") instanceof String);
|
||||
// assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO
|
||||
assertTrue(msg, doc.getFieldValue("val_i") instanceof Integer);
|
||||
assertEquals(msg, 1, doc.getFieldValue("val_i"));
|
||||
assertTrue(msg, doc.getFieldValue("subject") instanceof String);
|
||||
assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer);
|
||||
assertTrue(msg, -1 <= ((Integer)doc.getFieldValue("[docid]")).intValue());
|
||||
// RTG: [explain] and score should be missing (ignored)
|
||||
}
|
||||
}
|
||||
|
@ -16,6 +16,7 @@
|
||||
*/
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/**
|
||||
@ -27,6 +28,7 @@ public class TestLocalFSCloudBackupRestore extends AbstractCloudBackupRestoreTes
|
||||
|
||||
@BeforeClass
|
||||
public static void setupClass() throws Exception {
|
||||
assumeFalse("Backup/Restore is currently buggy on Windows. Tracking the fix on SOLR-9242", Constants.WINDOWS);
|
||||
configureCluster(NUM_SHARDS)// nodes
|
||||
.addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
|
||||
.configure();
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user