Merge remote-tracking branch 'origin/branch_6x' into branch_6x

This commit is contained in:
Noble Paul 2016-08-04 12:23:24 +05:30
commit 74653e9dbf
152 changed files with 8899 additions and 1171 deletions

View File

@ -40,6 +40,13 @@ New Features
Polygon instances from a standard GeoJSON string (Robert Muir, Mike
McCandless)
* LUCENE-7395: PerFieldSimilarityWrapper requires a default similarity
for calculating query norm and coordination factor in Lucene 6.x.
Lucene 7 will no longer have those factors. (Uwe Schindler, Sascha Markus)
* SOLR-9279: Queries module: new ComparisonBoolFunction base class
(Doug Turnbull via David Smiley)
Bug Fixes
* LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand)
@ -57,6 +64,10 @@ Bug Fixes
* LUCENE-7391: Fix performance regression in MemoryIndex's fields() introduced
in Lucene 6. (Steve Mason via David Smiley)
* LUCENE-7395, SOLR-9315: Fix PerFieldSimilarityWrapper to also delegate query
norm and coordination factor using a default similarity added as ctor param.
(Uwe Schindler, Sascha Markus)
Improvements
* LUCENE-7323: Compound file writing now verifies the incoming
@ -110,10 +121,9 @@ Improvements
* LUCENE-7385: Improve/fix assert messages in SpanScorer. (David Smiley)
* LUCENE-7390: Improve performance of indexing points by allowing the
codec to use transient heap in proportion to IndexWriter's RAM
buffer, instead of a fixed 16.0 MB. A custom codec can still
override the buffer size itself. (Mike McCandless)
* LUCENE-7393: Add ICUTokenizer option to parse Myanmar text as syllables instead of words,
because the ICU word-breaking algorithm has some issues. This allows for the previous
tokenization used before Lucene 5. (AM, Robert Muir)
Optimizations
@ -129,6 +139,9 @@ Optimizations
* LUCENE-7311: Cached term queries do not seek the terms dictionary anymore.
(Adrien Grand)
* LUCENE-7396, LUCENE-7399: Faster flush of points.
(Adrien Grand, Mike McCandless)
Other
* LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien

View File

@ -402,6 +402,7 @@ public class MinHashFilter extends TokenFilter {
}
/** Returns the MurmurHash3_x64_128 hash, placing the result in "out". */
@SuppressWarnings("fallthrough") // the huge switch is designed to use fall through into cases!
static void murmurhash3_x64_128(byte[] key, int offset, int len, int seed, LongPair out) {
// The original algorithm does have a 32 bit unsigned seed.
// We have to mask to match the behavior of the unsigned types and prevent sign extension.

View File

@ -0,0 +1,50 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Parses Myanmar text, with syllable as token.
#
$Cons = [[:Other_Letter:]&[:Myanmar:]];
$Virama = [\u1039];
$Asat = [\u103A];
$WordJoin = [:Line_Break=Word_Joiner:];
#
# default numerical definitions
#
$Extend = [\p{Word_Break = Extend}];
$Format = [\p{Word_Break = Format}];
$MidNumLet = [\p{Word_Break = MidNumLet}];
$MidNum = [\p{Word_Break = MidNum}];
$Numeric = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
$MidNumLetEx = $MidNumLet ($Extend | $Format)*;
$MidNumEx = $MidNum ($Extend | $Format)*;
$NumericEx = $Numeric ($Extend | $Format)*;
$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*;
$ConsEx = $Cons ($Extend | $Format)*;
$AsatEx = $Cons $Asat ($Virama $ConsEx)? ($Extend | $Format)*;
$MyanmarSyllableEx = $ConsEx ($Virama $ConsEx)? ($AsatEx)*;
$MyanmarJoinedSyllableEx = $MyanmarSyllableEx ($WordJoin $MyanmarSyllableEx)*;
!!forward;
$MyanmarJoinedSyllableEx {200};
# default numeric rules
$NumericEx $ExtendNumLetEx? (($MidNumEx | $MidNumLetEx)? $NumericEx $ExtendNumLetEx?)* {100};

View File

@ -63,9 +63,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
// the same as ROOT, except no dictionary segmentation for cjk
private static final BreakIterator defaultBreakIterator =
readBreakIterator("Default.brk");
private static final BreakIterator myanmarSyllableIterator =
readBreakIterator("MyanmarSyllable.brk");
// TODO: deprecate this boolean? you only care if you are doing super-expert stuff...
private final boolean cjkAsWords;
private final boolean myanmarAsWords;
/**
* Creates a new config. This object is lightweight, but the first
@ -74,9 +77,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
* otherwise text will be segmented according to UAX#29 defaults.
* If this is true, all Han+Hiragana+Katakana words will be tagged as
* IDEOGRAPHIC.
* @param myanmarAsWords true if Myanmar text should undergo dictionary-based segmentation,
* otherwise it will be tokenized as syllables.
*/
public DefaultICUTokenizerConfig(boolean cjkAsWords) {
public DefaultICUTokenizerConfig(boolean cjkAsWords, boolean myanmarAsWords) {
this.cjkAsWords = cjkAsWords;
this.myanmarAsWords = myanmarAsWords;
}
@Override
@ -88,6 +94,12 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
public BreakIterator getBreakIterator(int script) {
switch(script) {
case UScript.JAPANESE: return (BreakIterator)cjkBreakIterator.clone();
case UScript.MYANMAR:
if (myanmarAsWords) {
return (BreakIterator)defaultBreakIterator.clone();
} else {
return (BreakIterator)myanmarSyllableIterator.clone();
}
default: return (BreakIterator)defaultBreakIterator.clone();
}
}

View File

@ -68,7 +68,7 @@ public final class ICUTokenizer extends Tokenizer {
* @see DefaultICUTokenizerConfig
*/
public ICUTokenizer() {
this(new DefaultICUTokenizerConfig(true));
this(new DefaultICUTokenizerConfig(true, true));
}
/**

View File

@ -79,6 +79,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
private final Map<Integer,String> tailored;
private ICUTokenizerConfig config;
private final boolean cjkAsWords;
private final boolean myanmarAsWords;
/** Creates a new ICUTokenizerFactory */
public ICUTokenizerFactory(Map<String,String> args) {
@ -95,6 +96,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
}
}
cjkAsWords = getBoolean(args, "cjkAsWords", true);
myanmarAsWords = getBoolean(args, "myanmarAsWords", true);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@ -104,7 +106,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
public void inform(ResourceLoader loader) throws IOException {
assert tailored != null : "init must be called first!";
if (tailored.isEmpty()) {
config = new DefaultICUTokenizerConfig(cjkAsWords);
config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords);
} else {
final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT];
for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
@ -112,7 +114,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
String resourcePath = entry.getValue();
breakers[code] = parseRules(resourcePath, loader);
}
config = new DefaultICUTokenizerConfig(cjkAsWords) {
config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords) {
@Override
public BreakIterator getBreakIterator(int script) {

View File

@ -42,7 +42,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
sb.append(whitespace);
sb.append("testing 1234");
String input = sb.toString();
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
tokenizer.setReader(new StringReader(input));
assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
}
@ -53,7 +53,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
sb.append('a');
}
String input = sb.toString();
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
tokenizer.setReader(new StringReader(input));
char token[] = new char[4096];
Arrays.fill(token, 'a');
@ -75,7 +75,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
TokenFilter filter = new ICUNormalizer2Filter(tokenizer);
return new TokenStreamComponents(tokenizer, filter);
}

View File

@ -34,7 +34,7 @@ public class TestICUTokenizerCJK extends BaseTokenStreamTestCase {
a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(true)));
return new TokenStreamComponents(new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(true, true)));
}
};
}

View File

@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.icu.segmentation;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
/** Test tokenizing Myanmar text into syllables */
public class TestMyanmarSyllable extends BaseTokenStreamTestCase {
Analyzer a;
@Override
public void setUp() throws Exception {
super.setUp();
a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, false));
return new TokenStreamComponents(tokenizer);
}
};
}
@Override
public void tearDown() throws Exception {
a.close();
super.tearDown();
}
/** as opposed to dictionary break of သက်ဝင်|လှုပ်ရှား|စေ|ပြီး */
public void testBasics() throws Exception {
assertAnalyzesTo(a, "သက်ဝင်လှုပ်ရှားစေပြီး", new String[] { "သက်", "ဝင်", "လှုပ်", "ရှား", "စေ", "ပြီး" });
}
// simple tests from "A Rule-based Syllable Segmentation of Myanmar Text"
// * http://www.aclweb.org/anthology/I08-3010
// (see also the presentation: http://gii2.nagaokaut.ac.jp/gii/media/share/20080901-ZMM%20Presentation.pdf)
// The words are fake, we just test the categories.
// note that currently our algorithm is not sophisticated enough to handle some of the special cases!
/** constant */
public void testC() throws Exception {
assertAnalyzesTo(a, "ကက", new String[] { "က", "က" });
}
/** consonant + sign */
public void testCF() throws Exception {
assertAnalyzesTo(a, "ကံကံ", new String[] { "ကံ", "ကံ" });
}
/** consonant + consonant + asat */
public void testCCA() throws Exception {
assertAnalyzesTo(a, "ကင်ကင်", new String[] { "ကင်", "ကင်" });
}
/** consonant + consonant + asat + sign */
public void testCCAF() throws Exception {
assertAnalyzesTo(a, "ကင်းကင်း", new String[] { "ကင်း", "ကင်း" });
}
/** consonant + vowel */
public void testCV() throws Exception {
assertAnalyzesTo(a, "ကာကာ", new String[] { "ကာ", "ကာ" });
}
/** consonant + vowel + sign */
public void testCVF() throws Exception {
assertAnalyzesTo(a, "ကားကား", new String[] { "ကား", "ကား" });
}
/** consonant + vowel + vowel + asat */
public void testCVVA() throws Exception {
assertAnalyzesTo(a, "ကော်ကော်", new String[] { "ကော်", "ကော်" });
}
/** consonant + vowel + vowel + consonant + asat */
public void testCVVCA() throws Exception {
assertAnalyzesTo(a, "ကောင်ကောင်", new String[] { "ကောင်", "ကောင်" });
}
/** consonant + vowel + vowel + consonant + asat + sign */
public void testCVVCAF() throws Exception {
assertAnalyzesTo(a, "ကောင်းကောင်း", new String[] { "ကောင်း", "ကောင်း" });
}
/** consonant + medial */
public void testCM() throws Exception {
assertAnalyzesTo(a, "ကျကျ", new String[] { "ကျ", "ကျ" });
}
/** consonant + medial + sign */
public void testCMF() throws Exception {
assertAnalyzesTo(a, "ကျံကျံ", new String[] { "ကျံ", "ကျံ" });
}
/** consonant + medial + consonant + asat */
public void testCMCA() throws Exception {
assertAnalyzesTo(a, "ကျင်ကျင်", new String[] { "ကျင်", "ကျင်" });
}
/** consonant + medial + consonant + asat + sign */
public void testCMCAF() throws Exception {
assertAnalyzesTo(a, "ကျင်းကျင်း", new String[] { "ကျင်း", "ကျင်း" });
}
/** consonant + medial + vowel */
public void testCMV() throws Exception {
assertAnalyzesTo(a, "ကျာကျာ", new String[] { "ကျာ", "ကျာ" });
}
/** consonant + medial + vowel + sign */
public void testCMVF() throws Exception {
assertAnalyzesTo(a, "ကျားကျား", new String[] { "ကျား", "ကျား" });
}
/** consonant + medial + vowel + vowel + asat */
public void testCMVVA() throws Exception {
assertAnalyzesTo(a, "ကျော်ကျော်", new String[] { "ကျော်", "ကျော်" });
}
/** consonant + medial + vowel + vowel + consonant + asat */
public void testCMVVCA() throws Exception {
assertAnalyzesTo(a, "ကြောင်ကြောင်", new String[] { "ကြောင်", "ကြောင်"});
}
/** consonant + medial + vowel + vowel + consonant + asat + sign */
public void testCMVVCAF() throws Exception {
assertAnalyzesTo(a, "ကြောင်းကြောင်း", new String[] { "ကြောင်း", "ကြောင်း"});
}
/** independent vowel */
public void testI() throws Exception {
assertAnalyzesTo(a, "ဪဪ", new String[] { "", "" });
}
/** independent vowel */
public void testE() throws Exception {
assertAnalyzesTo(a, "ဣဣ", new String[] { "", "" });
}
}

View File

@ -46,7 +46,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase {
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
TokenStream result = new CJKBigramFilter(source);
return new TokenStreamComponents(source, new StopFilter(result, CharArraySet.EMPTY_SET));
}
@ -60,7 +60,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase {
analyzer2 = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false));
Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false, true));
// we put this before the CJKBigramFilter, because the normalization might combine
// some halfwidth katakana forms, which will affect the bigramming.
TokenStream result = new ICUNormalizer2Filter(source);

View File

@ -68,7 +68,7 @@ class SimpleTextPointsWriter extends PointsWriter {
}
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
@ -79,7 +79,7 @@ class SimpleTextPointsWriter extends PointsWriter {
fieldInfo.getPointDimensionCount(),
fieldInfo.getPointNumBytes(),
BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
maxMBSortInHeap,
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
values.size(fieldInfo.name),
singleValuePerDoc) {

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs;
import org.apache.lucene.util.BytesRef;
/** {@link PointsReader} whose order of points can be changed.
* This class is useful for codecs to optimize flush.
* @lucene.internal */
public abstract class MutablePointsReader extends PointsReader {
/** Sole constructor. */
protected MutablePointsReader() {}
/** Set {@code packedValue} with a reference to the packed bytes of the i-th value. */
public abstract void getValue(int i, BytesRef packedValue);
/** Get the k-th byte of the i-th value. */
public abstract byte getByteAt(int i, int k);
/** Return the doc ID of the i-th value. */
public abstract int getDocID(int i);
/** Swap the i-th and j-th values. */
public abstract void swap(int i, int j);
}

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.util.bkd.BKDWriter;
/** Abstract API to write points
*
@ -35,9 +34,8 @@ public abstract class PointsWriter implements Closeable {
protected PointsWriter() {
}
/** Write all values contained in the provided reader. {@code maxMBSortInHeap} is the maximum
* transient heap that can be used to sort values, before spilling to disk for offline sorting */
public abstract void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException;
/** Write all values contained in the provided reader */
public abstract void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException;
/** Default naive merge implementation for one field: it just re-indexes all the values
* from the incoming segment. The default codec overrides this for 1D fields and uses
@ -147,10 +145,7 @@ public abstract class PointsWriter implements Closeable {
public int getDocCount(String fieldName) {
return finalDocCount;
}
},
// TODO: also let merging of > 1D fields tap into IW's indexing buffer size, somehow (1D fields do an optimized merge sort
// and don't need heap)
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
});
}
/** Default merge implementation to merge incoming points readers by visiting all their points and

View File

@ -25,6 +25,7 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.index.FieldInfo;
@ -39,9 +40,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDReader;
import org.apache.lucene.util.bkd.BKDWriter;
/** Writes dimensional values
*
* @lucene.experimental */
/** Writes dimensional values */
public class Lucene60PointsWriter extends PointsWriter implements Closeable {
/** Output used to write the BKD tree data file */
@ -52,13 +51,15 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
final SegmentWriteState writeState;
final int maxPointsInLeafNode;
final double maxMBSortInHeap;
private boolean finished;
/** Full constructor */
public Lucene60PointsWriter(SegmentWriteState writeState, int maxPointsInLeafNode) throws IOException {
public Lucene60PointsWriter(SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap) throws IOException {
assert writeState.fieldInfos.hasPointValues();
this.writeState = writeState;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxMBSortInHeap = maxMBSortInHeap;
String dataFileName = IndexFileNames.segmentFileName(writeState.segmentInfo.name,
writeState.segmentSuffix,
Lucene60PointsFormat.DATA_EXTENSION);
@ -80,11 +81,11 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
/** Uses the defaults values for {@code maxPointsInLeafNode} (1024) and {@code maxMBSortInHeap} (16.0) */
public Lucene60PointsWriter(SegmentWriteState writeState) throws IOException {
this(writeState, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
this(writeState, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
@ -98,6 +99,14 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
values.size(fieldInfo.name),
singleValuePerDoc)) {
if (values instanceof MutablePointsReader) {
final long fp = writer.writeField(dataOut, fieldInfo.name, (MutablePointsReader) values);
if (fp != -1) {
indexFPs.put(fieldInfo.name, fp);
}
return;
}
values.intersect(fieldInfo.name, new IntersectVisitor() {
@Override
public void visit(int docID) {
@ -173,8 +182,7 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
fieldInfo.getPointDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode,
// NOTE: not used, since BKDWriter.merge does a merge sort:
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
maxMBSortInHeap,
totMaxSize,
singleValuePerDoc)) {
List<BKDReader> bkdReaders = new ArrayList<>();

View File

@ -257,7 +257,7 @@ public class Field implements IndexableField {
/**
* The value of the field as a String, or null. If null, the Reader value or
* binary value is used. Exactly one of stringValue(), readerValue(), and
* getBinaryValue() must be set.
* binaryValue() must be set.
*/
@Override
public String stringValue() {
@ -271,7 +271,7 @@ public class Field implements IndexableField {
/**
* The value of the field as a Reader, or null. If null, the String value or
* binary value is used. Exactly one of stringValue(), readerValue(), and
* getBinaryValue() must be set.
* binaryValue() must be set.
*/
@Override
public Reader readerValue() {
@ -420,7 +420,7 @@ public class Field implements IndexableField {
/**
* Expert: sets the token stream to be used for indexing and causes
* isIndexed() and isTokenized() to return true. May be combined with stored
* values from stringValue() or getBinaryValue()
* values from stringValue() or binaryValue()
*/
public void setTokenStream(TokenStream tokenStream) {
if (type.indexOptions() == IndexOptions.NONE || !type.tokenized()) {

View File

@ -153,7 +153,7 @@ class DocumentsWriterPerThread {
final Allocator byteBlockAllocator;
final IntBlockPool.Allocator intBlockAllocator;
private final AtomicLong pendingNumDocs;
final LiveIndexWriterConfig indexWriterConfig;
private final LiveIndexWriterConfig indexWriterConfig;
private final boolean enableTestPoints;
private final IndexWriter indexWriter;

View File

@ -762,7 +762,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* {@link #getConfig()}.
*
* <p>
* <b>NOTE:</b> after this writer is created, the given configuration instance
* <b>NOTE:</b> after ths writer is created, the given configuration instance
* cannot be passed to another writer.
*
* @param d

View File

@ -168,14 +168,9 @@ public class LiveIndexWriterConfig {
/**
* Determines the amount of RAM that may be used for buffering added documents
* and deletions before beginning to flush them to the Directory. For
* faster indexing performance it's best to use as large a RAM buffer as you can.
* <p>
* Note that this setting is not a hard limit on memory usage during indexing, as
* transient and non-trivial memory well beyond this buffer size may be used,
* for example due to segment merges or writing points to new segments.
* For application stability the available memory in the JVM
* should be significantly larger than the RAM buffer used for indexing.
* and deletions before they are flushed to the Directory. Generally for
* faster indexing performance it's best to flush by RAM usage instead of
* document count and use as large a RAM buffer as you can.
* <p>
* When this is set, the writer will flush whenever buffered documents and
* deletions use this much RAM. Pass in
@ -183,6 +178,14 @@ public class LiveIndexWriterConfig {
* due to RAM usage. Note that if flushing by document count is also enabled,
* then the flush will be triggered by whichever comes first.
* <p>
* The maximum RAM limit is inherently determined by the JVMs available
* memory. Yet, an {@link IndexWriter} session can consume a significantly
* larger amount of memory than the given RAM limit since this limit is just
* an indicator when to flush memory resident documents to the Directory.
* Flushes are likely happen concurrently while other threads adding documents
* to the writer. For application stability the available memory in the JVM
* should be significantly larger than the RAM buffer used for indexing.
* <p>
* <b>NOTE</b>: the account of RAM usage for pending deletions is only
* approximate. Specifically, if you delete by Query, Lucene currently has no
* way to measure the RAM usage of individual Queries so the accounting will

View File

@ -18,13 +18,13 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.bkd.BKDWriter;
/** Buffers up pending byte[][] value(s) per doc, then flushes when segment flushes. */
class PointValuesWriter {
@ -35,8 +35,7 @@ class PointValuesWriter {
private int numPoints;
private int numDocs;
private int lastDocID = -1;
private final byte[] packedValue;
private final LiveIndexWriterConfig indexWriterConfig;
private final int packedBytesLength;
public PointValuesWriter(DocumentsWriterPerThread docWriter, FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
@ -44,8 +43,7 @@ class PointValuesWriter {
this.bytes = new ByteBlockPool(docWriter.byteBlockAllocator);
docIDs = new int[16];
iwBytesUsed.addAndGet(16 * Integer.BYTES);
packedValue = new byte[fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()];
indexWriterConfig = docWriter.indexWriterConfig;
packedBytesLength = fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes();
}
// TODO: if exactly the same value is added to exactly the same doc, should we dedup?
@ -53,9 +51,10 @@ class PointValuesWriter {
if (value == null) {
throw new IllegalArgumentException("field=" + fieldInfo.name + ": point value must not be null");
}
if (value.length != fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()) {
if (value.length != packedBytesLength) {
throw new IllegalArgumentException("field=" + fieldInfo.name + ": this field's value has length=" + value.length + " but should be " + (fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()));
}
if (docIDs.length == numPoints) {
docIDs = ArrayUtil.grow(docIDs, numPoints+1);
iwBytesUsed.addAndGet((docIDs.length - numPoints) * Integer.BYTES);
@ -66,21 +65,32 @@ class PointValuesWriter {
numDocs++;
lastDocID = docID;
}
numPoints++;
}
public void flush(SegmentWriteState state, PointsWriter writer) throws IOException {
PointsReader reader = new MutablePointsReader() {
final int[] ords = new int[numPoints];
{
for (int i = 0; i < numPoints; ++i) {
ords[i] = i;
}
}
writer.writeField(fieldInfo,
new PointsReader() {
@Override
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
if (fieldName.equals(fieldInfo.name) == false) {
throw new IllegalArgumentException("fieldName must be the same");
}
final BytesRef scratch = new BytesRef();
final byte[] packedValue = new byte[packedBytesLength];
for(int i=0;i<numPoints;i++) {
bytes.readBytes(packedValue.length * i, packedValue, 0, packedValue.length);
visitor.visit(docIDs[i], packedValue);
getValue(i, scratch);
assert scratch.length == packedValue.length;
System.arraycopy(scratch.bytes, scratch.offset, packedValue, 0, packedBytesLength);
visitor.visit(getDocID(i), packedValue);
}
}
@ -120,14 +130,46 @@ class PointValuesWriter {
@Override
public long size(String fieldName) {
if (fieldName.equals(fieldInfo.name) == false) {
throw new IllegalArgumentException("fieldName must be the same");
}
return numPoints;
}
@Override
public int getDocCount(String fieldName) {
if (fieldName.equals(fieldInfo.name) == false) {
throw new IllegalArgumentException("fieldName must be the same");
}
return numDocs;
}
},
Math.max(indexWriterConfig.getRAMBufferSizeMB()/8.0, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP));
@Override
public void swap(int i, int j) {
int tmp = ords[i];
ords[i] = ords[j];
ords[j] = tmp;
}
@Override
public int getDocID(int i) {
return docIDs[ords[i]];
}
@Override
public void getValue(int i, BytesRef packedValue) {
final long offset = (long) packedBytesLength * ords[i];
packedValue.length = packedBytesLength;
bytes.setRawBytesRef(packedValue, offset);
}
@Override
public byte getByteAt(int i, int k) {
final long offset = (long) packedBytesLength * ords[i] + k;
return bytes.readByte(offset);
}
};
writer.writeField(fieldInfo, reader);
}
}

View File

@ -29,16 +29,54 @@ import org.apache.lucene.search.TermStatistics;
* <p>
* Subclasses should implement {@link #get(String)} to return an appropriate
* Similarity (for example, using field-specific parameter values) for the field.
* <p>
* For Lucene 6, you should pass a default similarity that is used for all non
* field-specific methods. From Lucene 7 on, this is no longer required.
*
* @lucene.experimental
*/
public abstract class PerFieldSimilarityWrapper extends Similarity {
/** Default similarity used for query norm and coordination factors. */
protected final Similarity defaultSim;
/**
* Sole constructor. (For invocation by subclass
* constructors, typically implicit.)
* Constructor taking a default similarity for all non-field specific calculations.
* @param defaultSim is used for all non field-specific calculations, like
* {@link #queryNorm(float)} and {@link #coord(int, int)}.
*/
public PerFieldSimilarityWrapper() {}
public PerFieldSimilarityWrapper(Similarity defaultSim) {
this.defaultSim = defaultSim;
}
/**
* Backwards compatibility constructor for 6.x series that creates a per-field
* similarity where all non field-specific methods return a constant (1).
* <p>
* From Lucene 7 on, this will get the default again, because coordination
* factors and query normalization will be removed.
* @deprecated specify a default similarity for non field-specific calculations.
*/
@Deprecated
public PerFieldSimilarityWrapper() {
// a fake similarity that is only used to return the default of 1 for queryNorm and coord.
this(new Similarity() {
@Override
public long computeNorm(FieldInvertState state) {
throw new AssertionError();
}
@Override
public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
throw new AssertionError();
}
@Override
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
throw new AssertionError();
}
});
}
@Override
public final long computeNorm(FieldInvertState state) {
@ -59,6 +97,16 @@ public abstract class PerFieldSimilarityWrapper extends Similarity {
return perFieldWeight.delegate.simScorer(perFieldWeight.delegateWeight, context);
}
@Override
public final float coord(int overlap, int maxOverlap) {
return defaultSim.coord(overlap, maxOverlap);
}
@Override
public final float queryNorm(float valueForNormalization) {
return defaultSim.queryNorm(valueForNormalization);
}
/**
* Returns a {@link Similarity} for scoring a field.
*/

View File

@ -459,69 +459,26 @@ public final class ArrayUtil {
* greater than or equal to it.
* This runs in linear time on average and in {@code n log(n)} time in the
* worst case.*/
public static <T> void select(T[] arr, int from, int to, int k, Comparator<T> comparator) {
if (k < from) {
throw new IllegalArgumentException("k must be >= from");
}
if (k >= to) {
throw new IllegalArgumentException("k must be < to");
}
final int maxDepth = 2 * MathUtil.log(to - from, 2);
quickSelect(arr, from, to, k, comparator, maxDepth);
public static <T> void select(T[] arr, int from, int to, int k, Comparator<? super T> comparator) {
new IntroSelector() {
T pivot;
@Override
protected void swap(int i, int j) {
ArrayUtil.swap(arr, i, j);
}
private static <T> void quickSelect(T[] arr, int from, int to, int k, Comparator<T> comparator, int maxDepth) {
assert from <= k;
assert k < to;
if (to - from == 1) {
return;
}
if (--maxDepth < 0) {
Arrays.sort(arr, from, to, comparator);
return;
@Override
protected void setPivot(int i) {
pivot = arr[i];
}
final int mid = (from + to) >>> 1;
// heuristic: we use the median of the values at from, to-1 and mid as a pivot
if (comparator.compare(arr[from], arr[to - 1]) > 0) {
swap(arr, from, to - 1);
}
if (comparator.compare(arr[to - 1], arr[mid]) > 0) {
swap(arr, to - 1, mid);
if (comparator.compare(arr[from], arr[to - 1]) > 0) {
swap(arr, from, to - 1);
@Override
protected int comparePivot(int j) {
return comparator.compare(pivot, arr[j]);
}
}.select(from, to, k);
}
T pivot = arr[to - 1];
int left = from + 1;
int right = to - 2;
for (;;) {
while (comparator.compare(pivot, arr[left]) > 0) {
++left;
}
while (left < right && comparator.compare(pivot, arr[right]) <= 0) {
--right;
}
if (left < right) {
swap(arr, left, right);
--right;
} else {
break;
}
}
swap(arr, left, to - 1);
if (left == k) {
return;
} else if (left < k) {
quickSelect(arr, left + 1, to, k, comparator, maxDepth);
} else {
quickSelect(arr, from, left, k, comparator, maxDepth);
}
}
}

View File

@ -378,5 +378,34 @@ public final class ByteBlockPool {
}
} while (true);
}
/**
* Set the given {@link BytesRef} so that its content is equal to the
* {@code ref.length} bytes starting at {@code offset}. Most of the time this
* method will set pointers to internal data-structures. However, in case a
* value crosses a boundary, a fresh copy will be returned.
* On the contrary to {@link #setBytesRef(BytesRef, int)}, this does not
* expect the length to be encoded with the data.
*/
public void setRawBytesRef(BytesRef ref, final long offset) {
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
int pos = (int) (offset & BYTE_BLOCK_MASK);
if (pos + ref.length <= BYTE_BLOCK_SIZE) {
ref.bytes = buffers[bufferIndex];
ref.offset = pos;
} else {
ref.bytes = new byte[ref.length];
ref.offset = 0;
readBytes(offset, ref.bytes, 0, ref.length);
}
}
/** Read a single byte at the given {@code offset}. */
public byte readByte(long offset) {
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
int pos = (int) (offset & BYTE_BLOCK_MASK);
byte[] buffer = buffers[bufferIndex];
return buffer[pos];
}
}

View File

@ -33,8 +33,8 @@ public abstract class InPlaceMergeSorter extends Sorter {
}
void mergeSort(int from, int to) {
if (to - from < INSERTION_SORT_THRESHOLD) {
insertionSort(from, to);
if (to - from < BINARY_SORT_THRESHOLD) {
binarySort(from, to);
} else {
final int mid = (from + to) >>> 1;
mergeSort(from, mid);

View File

@ -0,0 +1,128 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Comparator;
/** Implementation of the quick select algorithm.
* <p>It uses the median of the first, middle and last values as a pivot and
* falls back to a heap sort when the number of recursion levels exceeds
* {@code 2 lg(n)}, as a consequence it runs in linear time on average and in
* {@code n log(n)} time in the worst case.</p>
* @lucene.internal */
public abstract class IntroSelector extends Selector {
@Override
public final void select(int from, int to, int k) {
checkArgs(from, to, k);
final int maxDepth = 2 * MathUtil.log(to - from, 2);
quickSelect(from, to, k, maxDepth);
}
// heap sort
// TODO: use median of median instead to have linear worst-case rather than
// n*log(n)
void slowSelect(int from, int to, int k) {
new Sorter() {
@Override
protected void swap(int i, int j) {
IntroSelector.this.swap(i, j);
}
@Override
protected int compare(int i, int j) {
return IntroSelector.this.compare(i, j);
}
public void sort(int from, int to) {
heapSort(from, to);
}
}.sort(from, to);
}
private void quickSelect(int from, int to, int k, int maxDepth) {
assert from <= k;
assert k < to;
if (to - from == 1) {
return;
}
if (--maxDepth < 0) {
slowSelect(from, to, k);
return;
}
final int mid = (from + to) >>> 1;
// heuristic: we use the median of the values at from, to-1 and mid as a pivot
if (compare(from, to - 1) > 0) {
swap(from, to - 1);
}
if (compare(to - 1, mid) > 0) {
swap(to - 1, mid);
if (compare(from, to - 1) > 0) {
swap(from, to - 1);
}
}
setPivot(to - 1);
int left = from + 1;
int right = to - 2;
for (;;) {
while (comparePivot(left) > 0) {
++left;
}
while (left < right && comparePivot(right) <= 0) {
--right;
}
if (left < right) {
swap(left, right);
--right;
} else {
break;
}
}
swap(left, to - 1);
if (left == k) {
return;
} else if (left < k) {
quickSelect(left + 1, to, k, maxDepth);
} else {
quickSelect(from, left, k, maxDepth);
}
}
/** Compare entries found in slots <code>i</code> and <code>j</code>.
* The contract for the returned value is the same as
* {@link Comparator#compare(Object, Object)}. */
protected int compare(int i, int j) {
setPivot(i);
return comparePivot(j);
}
/** Save the value at slot <code>i</code> so that it can later be used as a
* pivot, see {@link #comparePivot(int)}. */
protected abstract void setPivot(int i);
/** Compare the pivot with the slot at <code>j</code>, similarly to
* {@link #compare(int, int) compare(i, j)}. */
protected abstract int comparePivot(int j);
}

View File

@ -16,7 +16,6 @@
*/
package org.apache.lucene.util;
/**
* {@link Sorter} implementation based on a variant of the quicksort algorithm
* called <a href="http://en.wikipedia.org/wiki/Introsort">introsort</a>: when
@ -38,8 +37,8 @@ public abstract class IntroSorter extends Sorter {
}
void quicksort(int from, int to, int maxDepth) {
if (to - from < INSERTION_SORT_THRESHOLD) {
insertionSort(from, to);
if (to - from < BINARY_SORT_THRESHOLD) {
binarySort(from, to);
return;
} else if (--maxDepth < 0) {
heapSort(from, to);
@ -84,11 +83,18 @@ public abstract class IntroSorter extends Sorter {
quicksort(left + 1, to, maxDepth);
}
/** Save the value at slot <code>i</code> so that it can later be used as a
* pivot, see {@link #comparePivot(int)}. */
// Don't rely on the slow default impl of setPivot/comparePivot since
// quicksort relies on these methods to be fast for good performance
@Override
protected abstract void setPivot(int i);
/** Compare the pivot with the slot at <code>j</code>, similarly to
* {@link #compare(int, int) compare(i, j)}. */
@Override
protected abstract int comparePivot(int j);
@Override
protected int compare(int i, int j) {
setPivot(i);
return comparePivot(j);
}
}

View File

@ -38,6 +38,7 @@ public abstract class MSBRadixSorter extends Sorter {
// we store one histogram per recursion level
private final int[][] histograms = new int[LEVEL_THRESHOLD][];
private final int[] endOffsets = new int[HISTOGRAM_SIZE];
private final int[] commonPrefix;
private final int maxLength;
@ -47,6 +48,7 @@ public abstract class MSBRadixSorter extends Sorter {
*/
protected MSBRadixSorter(int maxLength) {
this.maxLength = maxLength;
this.commonPrefix = new int[Math.min(24, maxLength)];
}
/** Return the k-th byte of the entry at index {@code i}, or {@code -1} if
@ -116,14 +118,14 @@ public abstract class MSBRadixSorter extends Sorter {
@Override
public void sort(int from, int to) {
checkRange(from, to);
sort(from, to, 0);
sort(from, to, 0, 0);
}
private void sort(int from, int to, int k) {
if (to - from <= LENGTH_THRESHOLD || k >= LEVEL_THRESHOLD) {
private void sort(int from, int to, int k, int l) {
if (to - from <= LENGTH_THRESHOLD || l >= LEVEL_THRESHOLD) {
introSort(from, to, k);
} else {
radixSort(from, to, k);
radixSort(from, to, k, l);
}
}
@ -131,28 +133,30 @@ public abstract class MSBRadixSorter extends Sorter {
getFallbackSorter(k).sort(from, to);
}
private void radixSort(int from, int to, int k) {
int[] histogram = histograms[k];
/**
* @param k the character number to compare
* @param l the level of recursion
*/
private void radixSort(int from, int to, int k, int l) {
int[] histogram = histograms[l];
if (histogram == null) {
histogram = histograms[k] = new int[HISTOGRAM_SIZE];
histogram = histograms[l] = new int[HISTOGRAM_SIZE];
} else {
Arrays.fill(histogram, 0);
}
buildHistogram(from, to, k, histogram);
// short-circuit: if all keys have the same byte at offset k, then recurse directly
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
if (histogram[i] == to - from) {
// everything is in the same bucket, recurse
if (i > 0) {
sort(from, to, k + 1);
final int commonPrefixLength = computeCommonPrefixLengthAndBuildHistogram(from, to, k, histogram);
if (commonPrefixLength > 0) {
// if there are no more chars to compare or if all entries fell into the
// first bucket (which means strings are shorter than k) then we are done
// otherwise recurse
if (k + commonPrefixLength < maxLength
&& histogram[0] < to - from) {
radixSort(from, to, k + commonPrefixLength, l);
}
return;
} else if (histogram[i] != 0) {
break;
}
}
assert assertHistogram(commonPrefixLength, histogram);
int[] startOffsets = histogram;
int[] endOffsets = this.endOffsets;
@ -167,24 +171,83 @@ public abstract class MSBRadixSorter extends Sorter {
int h = endOffsets[i];
final int bucketLen = h - prev;
if (bucketLen > 1) {
sort(from + prev, from + h, k + 1);
sort(from + prev, from + h, k + 1, l + 1);
}
prev = h;
}
}
}
// only used from assert
private boolean assertHistogram(int commonPrefixLength, int[] histogram) {
int numberOfUniqueBytes = 0;
for (int freq : histogram) {
if (freq > 0) {
numberOfUniqueBytes++;
}
}
if (numberOfUniqueBytes == 1) {
assert commonPrefixLength >= 1;
} else {
assert commonPrefixLength == 0 : commonPrefixLength;
}
return true;
}
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
private int getBucket(int i, int k) {
return byteAt(i, k) + 1;
}
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */
private int[] buildHistogram(int from, int to, int k, int[] histogram) {
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}
* and return a common prefix length for all visited values.
* @see #buildHistogram */
private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
final int[] commonPrefix = this.commonPrefix;
int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
for (int j = 0; j < commonPrefixLength; ++j) {
final int b = byteAt(from, k + j);
commonPrefix[j] = b;
if (b == -1) {
commonPrefixLength = j + 1;
break;
}
}
int i;
outer: for (i = from + 1; i < to; ++i) {
for (int j = 0; j < commonPrefixLength; ++j) {
final int b = byteAt(i, k + j);
if (b != commonPrefix[j]) {
commonPrefixLength = j;
if (commonPrefixLength == 0) { // we have no common prefix
histogram[commonPrefix[0] + 1] = i - from;
histogram[b + 1] = 1;
break outer;
}
break;
}
}
}
if (i < to) {
// the loop got broken because there is no common prefix
assert commonPrefixLength == 0;
buildHistogram(i + 1, to, k, histogram);
} else {
assert commonPrefixLength > 0;
histogram[commonPrefix[0] + 1] = to - from;
}
return commonPrefixLength;
}
/** Build an histogram of the k-th characters of values occurring between
* offsets {@code from} and {@code to}, using {@link #getBucket}. */
private void buildHistogram(int from, int to, int k, int[] histogram) {
for (int i = from; i < to; ++i) {
histogram[getBucket(i, k)]++;
}
return histogram;
}
/** Accumulate values of the histogram so that it does not store counts but

View File

@ -0,0 +1,278 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
/** Radix selector.
* <p>This implementation works similarly to a MSB radix sort except that it
* only recurses into the sub partition that contains the desired value.
* @lucene.internal */
public abstract class RadixSelector extends Selector {
// after that many levels of recursion we fall back to introselect anyway
// this is used as a protection against the fact that radix sort performs
// worse when there are long common prefixes (probably because of cache
// locality)
private static final int LEVEL_THRESHOLD = 8;
// size of histograms: 256 + 1 to indicate that the string is finished
private static final int HISTOGRAM_SIZE = 257;
// buckets below this size will be sorted with introselect
private static final int LENGTH_THRESHOLD = 100;
// we store one histogram per recursion level
private final int[] histogram = new int[HISTOGRAM_SIZE];
private final int[] commonPrefix;
private final int maxLength;
/**
* Sole constructor.
* @param maxLength the maximum length of keys, pass {@link Integer#MAX_VALUE} if unknown.
*/
protected RadixSelector(int maxLength) {
this.maxLength = maxLength;
this.commonPrefix = new int[Math.min(24, maxLength)];
}
/** Return the k-th byte of the entry at index {@code i}, or {@code -1} if
* its length is less than or equal to {@code k}. This may only be called
* with a value of {@code i} between {@code 0} included and
* {@code maxLength} excluded. */
protected abstract int byteAt(int i, int k);
/** Get a fall-back selector which may assume that the first {@code d} bytes
* of all compared strings are equal. This fallback selector is used when
* the range becomes narrow or when the maximum level of recursion has
* been exceeded. */
protected Selector getFallbackSelector(int d) {
return new IntroSelector() {
@Override
protected void swap(int i, int j) {
RadixSelector.this.swap(i, j);
}
@Override
protected int compare(int i, int j) {
for (int o = d; o < maxLength; ++o) {
final int b1 = byteAt(i, o);
final int b2 = byteAt(j, o);
if (b1 != b2) {
return b1 - b2;
} else if (b1 == -1) {
break;
}
}
return 0;
}
@Override
protected void setPivot(int i) {
pivot.setLength(0);
for (int o = d; o < maxLength; ++o) {
final int b = byteAt(i, o);
if (b == -1) {
break;
}
pivot.append((byte) b);
}
}
@Override
protected int comparePivot(int j) {
for (int o = 0; o < pivot.length(); ++o) {
final int b1 = pivot.byteAt(o) & 0xff;
final int b2 = byteAt(j, d + o);
if (b1 != b2) {
return b1 - b2;
}
}
if (d + pivot.length() == maxLength) {
return 0;
}
return -1 - byteAt(j, d + pivot.length());
}
private final BytesRefBuilder pivot = new BytesRefBuilder();
};
}
@Override
public void select(int from, int to, int k) {
checkArgs(from, to, k);
select(from, to, k, 0, 0);
}
private void select(int from, int to, int k, int d, int l) {
if (to - from <= LENGTH_THRESHOLD || d >= LEVEL_THRESHOLD) {
getFallbackSelector(d).select(from, to, k);
} else {
radixSelect(from, to, k, d, l);
}
}
/**
* @param d the character number to compare
* @param l the level of recursion
*/
private void radixSelect(int from, int to, int k, int d, int l) {
final int[] histogram = this.histogram;
Arrays.fill(histogram, 0);
final int commonPrefixLength = computeCommonPrefixLengthAndBuildHistogram(from, to, d, histogram);
if (commonPrefixLength > 0) {
// if there are no more chars to compare or if all entries fell into the
// first bucket (which means strings are shorter than d) then we are done
// otherwise recurse
if (d + commonPrefixLength < maxLength
&& histogram[0] < to - from) {
radixSelect(from, to, k, d + commonPrefixLength, l);
}
return;
}
assert assertHistogram(commonPrefixLength, histogram);
int bucketFrom = from;
for (int bucket = 0; bucket < HISTOGRAM_SIZE; ++bucket) {
final int bucketTo = bucketFrom + histogram[bucket];
if (bucketTo > k) {
partition(from, to, bucket, bucketFrom, bucketTo, d);
if (bucket != 0 && d + 1 < maxLength) {
// all elements in bucket 0 are equal so we only need to recurse if bucket != 0
select(bucketFrom, bucketTo, k, d + 1, l + 1);
}
return;
}
bucketFrom = bucketTo;
}
throw new AssertionError("Unreachable code");
}
// only used from assert
private boolean assertHistogram(int commonPrefixLength, int[] histogram) {
int numberOfUniqueBytes = 0;
for (int freq : histogram) {
if (freq > 0) {
numberOfUniqueBytes++;
}
}
if (numberOfUniqueBytes == 1) {
assert commonPrefixLength >= 1;
} else {
assert commonPrefixLength == 0;
}
return true;
}
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
private int getBucket(int i, int k) {
return byteAt(i, k) + 1;
}
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}
* and return a common prefix length for all visited values.
* @see #buildHistogram */
private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
final int[] commonPrefix = this.commonPrefix;
int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
for (int j = 0; j < commonPrefixLength; ++j) {
final int b = byteAt(from, k + j);
commonPrefix[j] = b;
if (b == -1) {
commonPrefixLength = j + 1;
break;
}
}
int i;
outer: for (i = from + 1; i < to; ++i) {
for (int j = 0; j < commonPrefixLength; ++j) {
final int b = byteAt(i, k + j);
if (b != commonPrefix[j]) {
commonPrefixLength = j;
if (commonPrefixLength == 0) { // we have no common prefix
histogram[commonPrefix[0] + 1] = i - from;
histogram[b + 1] = 1;
break outer;
}
break;
}
}
}
if (i < to) {
// the loop got broken because there is no common prefix
assert commonPrefixLength == 0;
buildHistogram(i + 1, to, k, histogram);
} else {
assert commonPrefixLength > 0;
histogram[commonPrefix[0] + 1] = to - from;
}
return commonPrefixLength;
}
/** Build an histogram of the k-th characters of values occurring between
* offsets {@code from} and {@code to}, using {@link #getBucket}. */
private void buildHistogram(int from, int to, int k, int[] histogram) {
for (int i = from; i < to; ++i) {
histogram[getBucket(i, k)]++;
}
}
/** Reorder elements so that all of them that fall into {@code bucket} are
* between offsets {@code bucketFrom} and {@code bucketTo}. */
private void partition(int from, int to, int bucket, int bucketFrom, int bucketTo, int d) {
int left = from;
int right = to - 1;
int slot = bucketFrom;
for (;;) {
int leftBucket = getBucket(left, d);
int rightBucket = getBucket(right, d);
while (leftBucket <= bucket && left < bucketFrom) {
if (leftBucket == bucket) {
swap(left, slot++);
} else {
++left;
}
leftBucket = getBucket(left, d);
}
while (rightBucket >= bucket && right >= bucketTo) {
if (rightBucket == bucket) {
swap(right, slot++);
} else {
--right;
}
rightBucket = getBucket(right, d);
}
if (left < bucketFrom && right >= bucketTo) {
swap(left++, right--);
} else {
assert left == bucketFrom;
assert right == bucketTo - 1;
break;
}
}
}
}

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
/** An implementation of a selection algorithm, ie. computing the k-th greatest
* value from a collection. */
public abstract class Selector {
/** Reorder elements so that the element at position {@code k} is the same
* as if all elements were sorted and all other elements are partitioned
* around it: {@code [from, k)} only contains elements that are less than
* or equal to {@code k} and {@code (k, to)} only contains elements that
* are greater than or equal to {@code k}. */
public abstract void select(int from, int to, int k);
void checkArgs(int from, int to, int k) {
if (k < from) {
throw new IllegalArgumentException("k must be >= from");
}
if (k >= to) {
throw new IllegalArgumentException("k must be < to");
}
}
/** Swap values at slots <code>i</code> and <code>j</code>. */
protected abstract void swap(int i, int j);
}

View File

@ -23,7 +23,7 @@ import java.util.Comparator;
* @lucene.internal */
public abstract class Sorter {
static final int INSERTION_SORT_THRESHOLD = 20;
static final int BINARY_SORT_THRESHOLD = 20;
/** Sole constructor, used for inheritance. */
protected Sorter() {}
@ -36,6 +36,20 @@ public abstract class Sorter {
/** Swap values at slots <code>i</code> and <code>j</code>. */
protected abstract void swap(int i, int j);
private int pivotIndex;
/** Save the value at slot <code>i</code> so that it can later be used as a
* pivot, see {@link #comparePivot(int)}. */
protected void setPivot(int i) {
pivotIndex = i;
}
/** Compare the pivot with the slot at <code>j</code>, similarly to
* {@link #compare(int, int) compare(i, j)}. */
protected int comparePivot(int j) {
return compare(pivotIndex, j);
}
/** Sort the slice which starts at <code>from</code> (inclusive) and ends at
* <code>to</code> (exclusive). */
public abstract void sort(int from, int to);
@ -163,54 +177,41 @@ public abstract class Sorter {
}
}
void insertionSort(int from, int to) {
for (int i = from + 1; i < to; ++i) {
for (int j = i; j > from; --j) {
if (compare(j - 1, j) > 0) {
swap(j - 1, j);
} else {
break;
}
}
}
}
/**
* A binary sort implementation. This performs {@code O(n*log(n))} comparisons
* and {@code O(n^2)} swaps. It is typically used by more sophisticated
* implementations as a fall-back when the numbers of items to sort has become
* less than {@value #BINARY_SORT_THRESHOLD}.
*/
void binarySort(int from, int to) {
binarySort(from, to, from + 1);
}
void binarySort(int from, int to, int i) {
for ( ; i < to; ++i) {
setPivot(i);
int l = from;
int h = i - 1;
while (l <= h) {
final int mid = (l + h) >>> 1;
final int cmp = compare(i, mid);
final int cmp = comparePivot(mid);
if (cmp < 0) {
h = mid - 1;
} else {
l = mid + 1;
}
}
switch (i - l) {
case 2:
swap(l + 1, l + 2);
swap(l, l + 1);
break;
case 1:
swap(l, l + 1);
break;
case 0:
break;
default:
for (int j = i; j > l; --j) {
swap(j - 1, j);
}
break;
}
}
}
/**
* Use heap sort to sort items between {@code from} inclusive and {@code to}
* exclusive. This runs in {@code O(n*log(n))} and is used as a fall-back by
* {@link IntroSorter}.
*/
void heapSort(int from, int to) {
if (to - from <= 1) {
return;

View File

@ -25,6 +25,7 @@ import java.util.List;
import java.util.function.IntFunction;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
@ -111,7 +112,8 @@ public class BKDWriter implements Closeable {
final byte[] scratchDiff;
final byte[] scratch1;
final byte[] scratch2;
final BytesRef scratchBytesRef = new BytesRef();
final BytesRef scratchBytesRef1 = new BytesRef();
final BytesRef scratchBytesRef2 = new BytesRef();
final int[] commonPrefixLengths;
protected final FixedBitSet docsSeen;
@ -173,7 +175,6 @@ public class BKDWriter implements Closeable {
packedBytesLength = numDims * bytesPerDim;
scratchDiff = new byte[bytesPerDim];
scratchBytesRef.length = packedBytesLength;
scratch1 = new byte[packedBytesLength];
scratch2 = new byte[packedBytesLength];
commonPrefixLengths = new int[numDims];
@ -204,7 +205,7 @@ public class BKDWriter implements Closeable {
// all recursive halves (i.e. 16 + 8 + 4 + 2) so the memory usage is 2X
// what that level would consume, so we multiply by 0.5 to convert from
// bytes to points here. Each dimension has its own sorted partition, so
// we must divide by numDims as well.
// we must divide by numDims as wel.
maxPointsSortInHeap = (int) (0.5 * (maxMBSortInHeap * 1024 * 1024) / (bytesPerDoc * numDims));
@ -416,15 +417,25 @@ public class BKDWriter implements Closeable {
}
}
/** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already
* sorted values and currently only works when numDims==1. This returns -1 if all documents containing
* dimensional values were deleted. */
public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers) throws IOException {
if (numDims != 1) {
throw new UnsupportedOperationException("numDims must be 1 but got " + numDims);
/** Write a field from a {@link MutablePointsReader}. This way of writing
* points is faster than regular writes with {@link BKDWriter#add} since
* there is opportunity for reordering points before writing them to
* disk. This method does not use transient disk in order to reorder points.
*/
public long writeField(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
if (numDims == 1) {
return writeField1Dim(out, fieldName, reader);
} else {
return writeFieldNDims(out, fieldName, reader);
}
}
/* In the 2+D case, we recursively pick the split dimension, compute the
* median value and partition other values around it. */
private long writeFieldNDims(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
if (pointCount != 0) {
throw new IllegalStateException("cannot mix add and merge");
throw new IllegalStateException("cannot mix add and writeField");
}
// Catch user silliness:
@ -435,6 +446,81 @@ public class BKDWriter implements Closeable {
// Mark that we already finished:
heapPointWriter = null;
long countPerLeaf = pointCount = reader.size(fieldName);
long innerNodeCount = 1;
while (countPerLeaf > maxPointsInLeafNode) {
countPerLeaf = (countPerLeaf+1)/2;
innerNodeCount *= 2;
}
int numLeaves = Math.toIntExact(innerNodeCount);
checkMaxLeafNodeCount(numLeaves);
final byte[] splitPackedValues = new byte[numLeaves * (bytesPerDim + 1)];
final long[] leafBlockFPs = new long[numLeaves];
// compute the min/max for this slice
Arrays.fill(minPackedValue, (byte) 0xff);
Arrays.fill(maxPackedValue, (byte) 0);
for (int i = 0; i < Math.toIntExact(pointCount); ++i) {
reader.getValue(i, scratchBytesRef1);
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset) < 0) {
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset, bytesPerDim);
}
if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset) > 0) {
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset, bytesPerDim);
}
}
docsSeen.set(reader.getDocID(i));
}
build(1, numLeaves, reader, 0, Math.toIntExact(pointCount), out,
minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs,
new int[maxPointsInLeafNode]);
long indexFP = out.getFilePointer();
writeIndex(out, leafBlockFPs, splitPackedValues);
return indexFP;
}
/* In the 1D case, we can simply sort points in ascending order and use the
* same writing logic as we use at merge time. */
private long writeField1Dim(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException {
MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size(fieldName)));
final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
reader.intersect(fieldName, new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
oneDimWriter.add(packedValue, docID);
}
@Override
public void visit(int docID) throws IOException {
throw new IllegalStateException();
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});
return oneDimWriter.finish();
}
/** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already
* sorted values and currently only works when numDims==1. This returns -1 if all documents containing
* dimensional values were deleted. */
public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers) throws IOException {
assert docMaps == null || readers.size() == docMaps.size();
BKDMergeQueue queue = new BKDMergeQueue(bytesPerDim, readers.size());
@ -453,72 +539,14 @@ public class BKDWriter implements Closeable {
}
}
if (queue.size() == 0) {
return -1;
}
int leafCount = 0;
List<Long> leafBlockFPs = new ArrayList<>();
List<byte[]> leafBlockStartValues = new ArrayList<>();
// Target halfway between min and max allowed for the leaf:
int pointsPerLeafBlock = (int) (0.75 * maxPointsInLeafNode);
//System.out.println("POINTS PER: " + pointsPerLeafBlock);
byte[] lastPackedValue = new byte[bytesPerDim];
byte[] firstPackedValue = new byte[bytesPerDim];
long valueCount = 0;
// Buffer up each leaf block's docs and values
int[] leafBlockDocIDs = new int[maxPointsInLeafNode];
byte[][] leafBlockPackedValues = new byte[maxPointsInLeafNode][];
for(int i=0;i<maxPointsInLeafNode;i++) {
leafBlockPackedValues[i] = new byte[packedBytesLength];
}
Arrays.fill(commonPrefixLengths, bytesPerDim);
OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
while (queue.size() != 0) {
MergeReader reader = queue.top();
// System.out.println("iter reader=" + reader);
// NOTE: doesn't work with subclasses (e.g. SimpleText!)
int docID = reader.docID;
leafBlockDocIDs[leafCount] = docID;
System.arraycopy(reader.state.scratchPackedValue, 0, leafBlockPackedValues[leafCount], 0, packedBytesLength);
docsSeen.set(docID);
if (valueCount == 0) {
System.arraycopy(reader.state.scratchPackedValue, 0, minPackedValue, 0, packedBytesLength);
}
System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, 0, packedBytesLength);
assert numDims > 1 || valueInOrder(valueCount, lastPackedValue, reader.state.scratchPackedValue, 0);
valueCount++;
if (pointCount > totalPointCount) {
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
}
if (leafCount == 0) {
if (leafBlockFPs.size() > 0) {
// Save the first (minimum) value in each leaf block except the first, to build the split value index in the end:
leafBlockStartValues.add(Arrays.copyOf(reader.state.scratchPackedValue, bytesPerDim));
}
Arrays.fill(commonPrefixLengths, bytesPerDim);
System.arraycopy(reader.state.scratchPackedValue, 0, firstPackedValue, 0, bytesPerDim);
} else {
// Find per-dim common prefix:
for(int dim=0;dim<numDims;dim++) {
int offset = dim * bytesPerDim;
for(int j=0;j<commonPrefixLengths[dim];j++) {
if (firstPackedValue[offset+j] != reader.state.scratchPackedValue[offset+j]) {
commonPrefixLengths[dim] = j;
break;
}
}
}
}
leafCount++;
oneDimWriter.add(reader.state.scratchPackedValue, reader.docID);
if (reader.next()) {
queue.updateTop();
@ -526,35 +554,78 @@ public class BKDWriter implements Closeable {
// This segment was exhausted
queue.pop();
}
}
return oneDimWriter.finish();
}
private class OneDimensionBKDWriter {
final IndexOutput out;
final List<Long> leafBlockFPs = new ArrayList<>();
final List<byte[]> leafBlockStartValues = new ArrayList<>();
final byte[] leafValues = new byte[maxPointsInLeafNode * packedBytesLength];
final int[] leafDocs = new int[maxPointsInLeafNode];
long valueCount;
int leafCount;
OneDimensionBKDWriter(IndexOutput out) {
if (numDims != 1) {
throw new UnsupportedOperationException("numDims must be 1 but got " + numDims);
}
if (pointCount != 0) {
throw new IllegalStateException("cannot mix add and merge");
}
// Catch user silliness:
if (heapPointWriter == null && tempInput == null) {
throw new IllegalStateException("already finished");
}
// Mark that we already finished:
heapPointWriter = null;
this.out = out;
lastPackedValue = new byte[packedBytesLength];
}
// for asserts
final byte[] lastPackedValue;
int lastDocID;
void add(byte[] packedValue, int docID) throws IOException {
assert valueInOrder(valueCount + leafCount,
0, lastPackedValue, packedValue, 0, docID, lastDocID);
System.arraycopy(packedValue, 0, leafValues, leafCount * packedBytesLength, packedBytesLength);
leafDocs[leafCount] = docID;
docsSeen.set(docID);
leafCount++;
if (valueCount > totalPointCount) {
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
}
if (leafCount == maxPointsInLeafNode) {
// We write a block once we hit exactly the max count ... this is different from
// when we flush a new segment, where we write between max/2 and max per leaf block,
// so merged segments will behave differently from newly flushed segments:
if (leafCount == pointsPerLeafBlock || queue.size() == 0) {
leafBlockFPs.add(out.getFilePointer());
checkMaxLeafNodeCount(leafBlockFPs.size());
writeLeafBlockDocs(out, leafBlockDocIDs, 0, leafCount);
writeCommonPrefixes(out, commonPrefixLengths, firstPackedValue);
final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
final BytesRef scratch = new BytesRef();
{
scratch.length = packedBytesLength;
scratch.offset = 0;
}
@Override
public BytesRef apply(int i) {
scratch.bytes = leafBlockPackedValues[i];
return scratch;
}
};
writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
writeLeafBlock();
leafCount = 0;
}
assert (lastDocID = docID) >= 0; // only assign when asserts are enabled
}
public long finish() throws IOException {
if (leafCount > 0) {
writeLeafBlock();
leafCount = 0;
}
if (valueCount == 0) {
return -1;
}
pointCount = valueCount;
@ -575,6 +646,60 @@ public class BKDWriter implements Closeable {
return indexFP;
}
private void writeLeafBlock() throws IOException {
assert leafCount != 0;
if (valueCount == 0) {
System.arraycopy(leafValues, 0, minPackedValue, 0, packedBytesLength);
}
System.arraycopy(leafValues, (leafCount - 1) * packedBytesLength, maxPackedValue, 0, packedBytesLength);
valueCount += leafCount;
if (leafBlockFPs.size() > 0) {
// Save the first (minimum) value in each leaf block except the first, to build the split value index in the end:
leafBlockStartValues.add(Arrays.copyOf(leafValues, packedBytesLength));
}
leafBlockFPs.add(out.getFilePointer());
checkMaxLeafNodeCount(leafBlockFPs.size());
Arrays.fill(commonPrefixLengths, bytesPerDim);
// Find per-dim common prefix:
for(int dim=0;dim<numDims;dim++) {
int offset1 = dim * bytesPerDim;
int offset2 = (leafCount - 1) * packedBytesLength + offset1;
for(int j=0;j<commonPrefixLengths[dim];j++) {
if (leafValues[offset1+j] != leafValues[offset2+j]) {
commonPrefixLengths[dim] = j;
break;
}
}
}
writeLeafBlockDocs(out, leafDocs, 0, leafCount);
writeCommonPrefixes(out, commonPrefixLengths, leafValues);
final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
final BytesRef scratch = new BytesRef();
{
scratch.length = packedBytesLength;
scratch.bytes = leafValues;
}
@Override
public BytesRef apply(int i) {
scratch.offset = packedBytesLength * i;
return scratch;
}
};
assert valuesInOrderAndBounds(leafCount, 0, Arrays.copyOf(leafValues, packedBytesLength),
Arrays.copyOfRange(leafValues, (leafCount - 1) * packedBytesLength, leafCount * packedBytesLength),
packedValues, leafDocs, 0);
writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
}
}
// TODO: there must be a simpler way?
private void rotateToTree(int nodeID, int offset, int count, byte[] index, List<byte[]> leafBlockStartValues) {
//System.out.println("ROTATE: nodeID=" + nodeID + " offset=" + offset + " count=" + count + " bpd=" + bytesPerDim + " index.length=" + index.length);
@ -686,6 +811,7 @@ public class BKDWriter implements Closeable {
}
private PointWriter sort(int dim) throws IOException {
assert dim >= 0 && dim < numDims;
if (heapPointWriter != null) {
@ -1110,6 +1236,132 @@ public class BKDWriter implements Closeable {
}
}
/* Recursively reorders the provided reader and writes the bkd-tree on the fly. */
private void build(int nodeID, int leafNodeOffset,
MutablePointsReader reader, int from, int to,
IndexOutput out,
byte[] minPackedValue, byte[] maxPackedValue,
byte[] splitPackedValues,
long[] leafBlockFPs,
int[] spareDocIds) throws IOException {
if (nodeID >= leafNodeOffset) {
// leaf node
final int count = to - from;
assert count <= maxPointsInLeafNode;
// Compute common prefixes
Arrays.fill(commonPrefixLengths, bytesPerDim);
reader.getValue(from, scratchBytesRef1);
for (int i = from + 1; i < to; ++i) {
reader.getValue(i, scratchBytesRef2);
for (int dim=0;dim<numDims;dim++) {
final int offset = dim * bytesPerDim;
for(int j=0;j<commonPrefixLengths[dim];j++) {
if (scratchBytesRef1.bytes[scratchBytesRef1.offset+offset+j] != scratchBytesRef2.bytes[scratchBytesRef2.offset+offset+j]) {
commonPrefixLengths[dim] = j;
break;
}
}
}
}
// Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
FixedBitSet[] usedBytes = new FixedBitSet[numDims];
for (int dim = 0; dim < numDims; ++dim) {
if (commonPrefixLengths[dim] < bytesPerDim) {
usedBytes[dim] = new FixedBitSet(256);
}
}
for (int i = from + 1; i < to; ++i) {
for (int dim=0;dim<numDims;dim++) {
if (usedBytes[dim] != null) {
byte b = reader.getByteAt(i, dim * bytesPerDim + commonPrefixLengths[dim]);
usedBytes[dim].set(Byte.toUnsignedInt(b));
}
}
}
int sortedDim = 0;
int sortedDimCardinality = Integer.MAX_VALUE;
for (int dim = 0; dim < numDims; ++dim) {
if (usedBytes[dim] != null) {
final int cardinality = usedBytes[dim].cardinality();
if (cardinality < sortedDimCardinality) {
sortedDim = dim;
sortedDimCardinality = cardinality;
}
}
}
// sort by sortedDim
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
reader, from, to, scratchBytesRef1, scratchBytesRef2);
// Save the block file pointer:
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
// Write doc IDs
int[] docIDs = spareDocIds;
for (int i = from; i < to; ++i) {
docIDs[i - from] = reader.getDocID(i);
}
writeLeafBlockDocs(out, docIDs, 0, count);
// Write the common prefixes:
reader.getValue(from, scratchBytesRef1);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
writeCommonPrefixes(out, commonPrefixLengths, scratch1);
// Write the full values:
IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
@Override
public BytesRef apply(int i) {
reader.getValue(from + i, scratchBytesRef1);
return scratchBytesRef1;
}
};
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
docIDs, 0);
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
} else {
// inner node
// compute the split dimension and partition around it
final int splitDim = split(minPackedValue, maxPackedValue);
final int mid = (from + to + 1) >>> 1;
int commonPrefixLen = bytesPerDim;
for (int i = 0; i < bytesPerDim; ++i) {
if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) {
commonPrefixLen = i;
break;
}
}
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
// set the split value
final int address = nodeID * (1+bytesPerDim);
splitPackedValues[address] = (byte) splitDim;
reader.getValue(mid, scratchBytesRef1);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
byte[] minSplitPackedValue = Arrays.copyOf(minPackedValue, packedBytesLength);
byte[] maxSplitPackedValue = Arrays.copyOf(maxPackedValue, packedBytesLength);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
// recurse
build(nodeID * 2, leafNodeOffset, reader, from, mid, out,
minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
build(nodeID * 2 + 1, leafNodeOffset, reader, mid, to, out,
minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
}
}
/** The array (sized numDims) of PathSlice describe the cell we have currently recursed to. */
private void build(int nodeID, int leafNodeOffset,
PathSlice[] slices,
@ -1217,7 +1469,8 @@ public class BKDWriter implements Closeable {
return scratch;
}
};
assert valuesInOrderAndBounds(count, minPackedValue, maxPackedValue, packedValues);
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
heapSource.docIDs, Math.toIntExact(source.start));
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
} else {
@ -1321,12 +1574,16 @@ public class BKDWriter implements Closeable {
}
// only called from assert
private boolean valuesInOrderAndBounds(int count, byte[] minPackedValue, byte[] maxPackedValue, IntFunction<BytesRef> values) throws IOException {
byte[] lastPackedValue = new byte[bytesPerDim];
private boolean valuesInOrderAndBounds(int count, int sortedDim, byte[] minPackedValue, byte[] maxPackedValue,
IntFunction<BytesRef> values, int[] docs, int docsOffset) throws IOException {
byte[] lastPackedValue = new byte[packedBytesLength];
int lastDoc = -1;
for (int i=0;i<count;i++) {
BytesRef packedValue = values.apply(i);
assert packedValue.length == packedBytesLength;
assert numDims != 1 || valueInOrder(i, lastPackedValue, packedValue.bytes, packedValue.offset);
assert valueInOrder(i, sortedDim, lastPackedValue, packedValue.bytes, packedValue.offset,
docs[docsOffset + i], lastDoc);
lastDoc = docs[docsOffset + i];
// Make sure this value does in fact fall within this leaf cell:
assert valueInBounds(packedValue, minPackedValue, maxPackedValue);
@ -1335,11 +1592,19 @@ public class BKDWriter implements Closeable {
}
// only called from assert
private boolean valueInOrder(long ord, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset) {
if (ord > 0 && StringHelper.compare(bytesPerDim, lastPackedValue, 0, packedValue, packedValueOffset) > 0) {
private boolean valueInOrder(long ord, int sortedDim, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset,
int doc, int lastDoc) {
int dimOffset = sortedDim * bytesPerDim;
if (ord > 0) {
int cmp = StringHelper.compare(bytesPerDim, lastPackedValue, dimOffset, packedValue, packedValueOffset + dimOffset);
if (cmp > 0) {
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
}
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, bytesPerDim);
if (cmp == 0 && doc < lastDoc) {
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord);
}
}
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, packedBytesLength);
return true;
}

View File

@ -0,0 +1,186 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.bkd;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntroSelector;
import org.apache.lucene.util.IntroSorter;
import org.apache.lucene.util.MSBRadixSorter;
import org.apache.lucene.util.RadixSelector;
import org.apache.lucene.util.Selector;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.packed.PackedInts;
final class MutablePointsReaderUtils {
MutablePointsReaderUtils() {}
/** Sort the given {@link MutablePointsReader} based on its packed value then doc ID. */
static void sort(int maxDoc, int packedBytesLength,
MutablePointsReader reader, int from, int to) {
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
new MSBRadixSorter(packedBytesLength + (bitsPerDocId + 7) / 8) {
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected int byteAt(int i, int k) {
if (k < packedBytesLength) {
return Byte.toUnsignedInt(reader.getByteAt(i, k));
} else {
final int shift = bitsPerDocId - ((k - packedBytesLength + 1) << 3);
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
}
}
@Override
protected org.apache.lucene.util.Sorter getFallbackSorter(int k) {
return new IntroSorter() {
final BytesRef pivot = new BytesRef();
final BytesRef scratch = new BytesRef();
int pivotDoc;
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected void setPivot(int i) {
reader.getValue(i, pivot);
pivotDoc = reader.getDocID(i);
}
@Override
protected int comparePivot(int j) {
if (k < packedBytesLength) {
reader.getValue(j, scratch);
int cmp = StringHelper.compare(packedBytesLength - k, pivot.bytes, pivot.offset + k, scratch.bytes, scratch.offset + k);
if (cmp != 0) {
return cmp;
}
}
return pivotDoc - reader.getDocID(j);
}
};
}
}.sort(from, to);
}
/** Sort points on the given dimension. */
static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
MutablePointsReader reader, int from, int to,
BytesRef scratch1, BytesRef scratch2) {
// No need for a fancy radix sort here, this is called on the leaves only so
// there are not many values to sort
final int offset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
final int numBytesToCompare = bytesPerDim - commonPrefixLengths[sortedDim];
new IntroSorter() {
final BytesRef pivot = scratch1;
int pivotDoc = -1;
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected void setPivot(int i) {
reader.getValue(i, pivot);
pivotDoc = reader.getDocID(i);
}
@Override
protected int comparePivot(int j) {
reader.getValue(j, scratch2);
int cmp = StringHelper.compare(numBytesToCompare, pivot.bytes, pivot.offset + offset, scratch2.bytes, scratch2.offset + offset);
if (cmp == 0) {
cmp = pivotDoc - reader.getDocID(j);
}
return cmp;
}
}.sort(from, to);
}
/** Partition points around {@code mid}. All values on the left must be less
* than or equal to it and all values on the right must be greater than or
* equal to it. */
static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
MutablePointsReader reader, int from, int to, int mid,
BytesRef scratch1, BytesRef scratch2) {
final int offset = splitDim * bytesPerDim + commonPrefixLen;
final int cmpBytes = bytesPerDim - commonPrefixLen;
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
new RadixSelector(cmpBytes + (bitsPerDocId + 7) / 8) {
@Override
protected Selector getFallbackSelector(int k) {
return new IntroSelector() {
final BytesRef pivot = scratch1;
int pivotDoc;
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected void setPivot(int i) {
reader.getValue(i, pivot);
pivotDoc = reader.getDocID(i);
}
@Override
protected int comparePivot(int j) {
if (k < cmpBytes) {
reader.getValue(j, scratch2);
int cmp = StringHelper.compare(cmpBytes - k, pivot.bytes, pivot.offset + offset + k, scratch2.bytes, scratch2.offset + offset + k);
if (cmp != 0) {
return cmp;
}
}
return pivotDoc - reader.getDocID(j);
}
};
}
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected int byteAt(int i, int k) {
if (k < cmpBytes) {
return Byte.toUnsignedInt(reader.getByteAt(i, offset + k));
} else {
final int shift = bitsPerDocId - ((k - cmpBytes + 1) << 3);
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
}
}
}.select(from, to, mid);
}
}

View File

@ -41,8 +41,9 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
if (random().nextBoolean()) {
// randomize parameters
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
double maxMBSortInHeap = 3.0 + (3*random().nextDouble());
if (VERBOSE) {
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode);
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
}
// sneaky impersonation!
@ -52,7 +53,7 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode);
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
}
@Override

View File

@ -80,11 +80,8 @@ public class TestCustomNorms extends LuceneTestCase {
}
public class MySimProvider extends PerFieldSimilarityWrapper {
Similarity delegate = new ClassicSimilarity();
@Override
public float queryNorm(float sumOfSquaredWeights) {
return delegate.queryNorm(sumOfSquaredWeights);
public MySimProvider() {
super(new ClassicSimilarity());
}
@Override
@ -92,14 +89,9 @@ public class TestCustomNorms extends LuceneTestCase {
if (floatTestField.equals(field)) {
return new FloatEncodingBoostSimilarity();
} else {
return delegate;
return defaultSim;
}
}
@Override
public float coord(int overlap, int maxOverlap) {
return delegate.coord(overlap, maxOverlap);
}
}
public static class FloatEncodingBoostSimilarity extends Similarity {

View File

@ -154,12 +154,8 @@ public class TestNorms extends LuceneTestCase {
public class MySimProvider extends PerFieldSimilarityWrapper {
Similarity delegate = new ClassicSimilarity();
@Override
public float queryNorm(float sumOfSquaredWeights) {
return delegate.queryNorm(sumOfSquaredWeights);
public MySimProvider() {
super(new ClassicSimilarity());
}
@Override
@ -167,14 +163,9 @@ public class TestNorms extends LuceneTestCase {
if (byteTestField.equals(field)) {
return new ByteEncodingBoostSimilarity();
} else {
return delegate;
return defaultSim;
}
}
@Override
public float coord(int overlap, int maxOverlap) {
return delegate.coord(overlap, maxOverlap);
}
}

View File

@ -71,23 +71,13 @@ public class TestDocValuesScoring extends LuceneTestCase {
final Similarity base = searcher1.getSimilarity(true);
// boosting
IndexSearcher searcher2 = newSearcher(ir, false);
searcher2.setSimilarity(new PerFieldSimilarityWrapper() {
searcher2.setSimilarity(new PerFieldSimilarityWrapper(base) {
final Similarity fooSim = new BoostingSimilarity(base, "foo_boost");
@Override
public Similarity get(String field) {
return "foo".equals(field) ? fooSim : base;
}
@Override
public float coord(int overlap, int maxOverlap) {
return base.coord(overlap, maxOverlap);
}
@Override
public float queryNorm(float sumOfSquaredWeights) {
return base.queryNorm(sumOfSquaredWeights);
}
});
// in this case, we searched on field "foo". first document should have 2x the score.

View File

@ -1156,8 +1156,9 @@ public class TestPointQueries extends LuceneTestCase {
private static Codec getCodec() {
if (Codec.getDefault().getName().equals("Lucene62")) {
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
double maxMBSortInHeap = 5.0 + (3*random().nextDouble());
if (VERBOSE) {
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode);
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
}
return new FilterCodec("Lucene62", Codec.getDefault()) {
@ -1166,7 +1167,7 @@ public class TestPointQueries extends LuceneTestCase {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode);
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
}
@Override

View File

@ -89,8 +89,12 @@ public class TestSimilarityProvider extends LuceneTestCase {
}
private class ExampleSimilarityProvider extends PerFieldSimilarityWrapper {
private Similarity sim1 = new Sim1();
private Similarity sim2 = new Sim2();
private final Similarity sim1 = new Sim1();
private final Similarity sim2 = new Sim2();
public ExampleSimilarityProvider() {
super(new Sim1());
}
@Override
public Similarity get(String field) {

View File

@ -45,7 +45,26 @@ public class TestByteBlockPool extends LuceneTestCase {
for (BytesRef expected : list) {
ref.grow(expected.length);
ref.setLength(expected.length);
switch (random().nextInt(3)) {
case 0:
// copy bytes
pool.readBytes(position, ref.bytes(), 0, ref.length());
break;
case 1:
// copy bytes one by one
for (int i = 0; i < ref.length(); ++i) {
ref.setByteAt(i, pool.readByte(position + i));
}
break;
case 2:
BytesRef scratch = new BytesRef();
scratch.length = ref.length();
pool.setRawBytesRef(scratch, position);
System.arraycopy(scratch.bytes, scratch.offset, ref.bytes(), 0, ref.length());
break;
default:
fail();
}
assertEquals(expected, ref.get());
position += ref.length();
}

View File

@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
public class TestIntroSelector extends LuceneTestCase {
public void testSelect() {
for (int iter = 0; iter < 100; ++iter) {
doTestSelect(false);
}
}
public void testSlowSelect() {
for (int iter = 0; iter < 100; ++iter) {
doTestSelect(true);
}
}
private void doTestSelect(boolean slow) {
final int from = random().nextInt(5);
final int to = from + TestUtil.nextInt(random(), 1, 10000);
final int max = random().nextBoolean() ? random().nextInt(100) : random().nextInt(100000);
Integer[] arr = new Integer[from + to + random().nextInt(5)];
for (int i = 0; i < arr.length; ++i) {
arr[i] = TestUtil.nextInt(random(), 0, max);
}
final int k = TestUtil.nextInt(random(), from, to - 1);
Integer[] expected = arr.clone();
Arrays.sort(expected, from, to);
Integer[] actual = arr.clone();
IntroSelector selector = new IntroSelector() {
Integer pivot;
@Override
protected void swap(int i, int j) {
ArrayUtil.swap(actual, i, j);
}
@Override
protected void setPivot(int i) {
pivot = actual[i];
}
@Override
protected int comparePivot(int j) {
return pivot.compareTo(actual[j]);
}
};
if (slow) {
selector.slowSelect(from, to, k);
} else {
selector.select(from, to, k);
}
assertEquals(expected[k], actual[k]);
for (int i = 0; i < actual.length; ++i) {
if (i < from || i >= to) {
assertSame(arr[i], actual[i]);
} else if (i <= k) {
assertTrue(actual[i].intValue() <= actual[k].intValue());
} else {
assertTrue(actual[i].intValue() >= actual[k].intValue());
}
}
}
}

View File

@ -17,6 +17,8 @@
package org.apache.lucene.util;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
public class TestMSBRadixSorter extends LuceneTestCase {
@ -41,9 +43,12 @@ public class TestMSBRadixSorter extends LuceneTestCase {
break;
}
final int finalMaxLength = maxLength;
new MSBRadixSorter(maxLength) {
@Override
protected int byteAt(int i, int k) {
assertTrue(k < finalMaxLength);
BytesRef ref = refs[i];
if (ref.length <= k) {
return -1;
@ -114,4 +119,67 @@ public class TestMSBRadixSorter extends LuceneTestCase {
testRandom(TestUtil.nextInt(random(), 1, 30), 2);
}
}
public void testRandom2() {
// how large our alphabet is
int letterCount = TestUtil.nextInt(random(), 2, 10);
// how many substring fragments to use
int substringCount = TestUtil.nextInt(random(), 2, 10);
Set<BytesRef> substringsSet = new HashSet<>();
// how many strings to make
int stringCount = atLeast(10000);
//System.out.println("letterCount=" + letterCount + " substringCount=" + substringCount + " stringCount=" + stringCount);
while(substringsSet.size() < substringCount) {
int length = TestUtil.nextInt(random(), 2, 10);
byte[] bytes = new byte[length];
for(int i=0;i<length;i++) {
bytes[i] = (byte) random().nextInt(letterCount);
}
BytesRef br = new BytesRef(bytes);
substringsSet.add(br);
//System.out.println("add substring count=" + substringsSet.size() + ": " + br);
}
BytesRef[] substrings = substringsSet.toArray(new BytesRef[substringsSet.size()]);
double[] chance = new double[substrings.length];
double sum = 0.0;
for(int i=0;i<substrings.length;i++) {
chance[i] = random().nextDouble();
sum += chance[i];
}
// give each substring a random chance of occurring:
double accum = 0.0;
for(int i=0;i<substrings.length;i++) {
accum += chance[i]/sum;
chance[i] = accum;
}
Set<BytesRef> stringsSet = new HashSet<>();
int iters = 0;
while (stringsSet.size() < stringCount && iters < stringCount*5) {
int count = TestUtil.nextInt(random(), 1, 5);
BytesRefBuilder b = new BytesRefBuilder();
for(int i=0;i<count;i++) {
double v = random().nextDouble();
accum = 0.0;
for(int j=0;j<substrings.length;j++) {
accum += chance[j];
if (accum >= v) {
b.append(substrings[j]);
break;
}
}
}
BytesRef br = b.toBytesRef();
stringsSet.add(br);
//System.out.println("add string count=" + stringsSet.size() + ": " + br);
iters++;
}
test(stringsSet.toArray(new BytesRef[stringsSet.size()]), stringsSet.size());
}
}

View File

@ -0,0 +1,106 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
public class TestRadixSelector extends LuceneTestCase {
public void testSelect() {
for (int iter = 0; iter < 100; ++iter) {
doTestSelect();
}
}
private void doTestSelect() {
final int from = random().nextInt(5);
final int to = from + TestUtil.nextInt(random(), 1, 10000);
final int maxLen = TestUtil.nextInt(random(), 1, 12);
BytesRef[] arr = new BytesRef[from + to + random().nextInt(5)];
for (int i = 0; i < arr.length; ++i) {
byte[] bytes = new byte[TestUtil.nextInt(random(), 0, maxLen)];
random().nextBytes(bytes);
arr[i] = new BytesRef(bytes);
}
doTest(arr, from, to, maxLen);
}
public void testSharedPrefixes() {
for (int iter = 0; iter < 100; ++iter) {
doTestSharedPrefixes();
}
}
private void doTestSharedPrefixes() {
final int from = random().nextInt(5);
final int to = from + TestUtil.nextInt(random(), 1, 10000);
final int maxLen = TestUtil.nextInt(random(), 1, 12);
BytesRef[] arr = new BytesRef[from + to + random().nextInt(5)];
for (int i = 0; i < arr.length; ++i) {
byte[] bytes = new byte[TestUtil.nextInt(random(), 0, maxLen)];
random().nextBytes(bytes);
arr[i] = new BytesRef(bytes);
}
final int sharedPrefixLength = Math.min(arr[0].length, TestUtil.nextInt(random(), 1, maxLen));
for (int i = 1; i < arr.length; ++i) {
System.arraycopy(arr[0].bytes, arr[0].offset, arr[i].bytes, arr[i].offset, Math.min(sharedPrefixLength, arr[i].length));
}
doTest(arr, from, to, maxLen);
}
private void doTest(BytesRef[] arr, int from, int to, int maxLen) {
final int k = TestUtil.nextInt(random(), from, to - 1);
BytesRef[] expected = arr.clone();
Arrays.sort(expected, from, to);
BytesRef[] actual = arr.clone();
final int enforcedMaxLen = random().nextBoolean() ? maxLen : Integer.MAX_VALUE;
RadixSelector selector = new RadixSelector(enforcedMaxLen) {
@Override
protected void swap(int i, int j) {
ArrayUtil.swap(actual, i, j);
}
@Override
protected int byteAt(int i, int k) {
assertTrue(k < enforcedMaxLen);
BytesRef b = actual[i];
if (k >= b.length) {
return -1;
} else {
return Byte.toUnsignedInt(b.bytes[b.offset + k]);
}
}
};
selector.select(from, to, k);
assertEquals(expected[k], actual[k]);
for (int i = 0; i < actual.length; ++i) {
if (i < from || i >= to) {
assertSame(arr[i], actual[i]);
} else if (i <= k) {
assertTrue(actual[i].compareTo(actual[k]) <= 0);
} else {
assertTrue(actual[i].compareTo(actual[k]) >= 0);
}
}
}
}

View File

@ -0,0 +1,270 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.bkd;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.codecs.MutablePointsReader;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
public class TestMutablePointsReaderUtils extends LuceneTestCase {
public void testSort() {
for (int iter = 0; iter < 5; ++iter) {
doTestSort();
}
}
private void doTestSort() {
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
Point[] points = createRandomPoints(1, bytesPerDim, maxDoc);
DummyPointsReader reader = new DummyPointsReader(points);
MutablePointsReaderUtils.sort(maxDoc, bytesPerDim, reader, 0, points.length);
Arrays.sort(points, new Comparator<Point>() {
@Override
public int compare(Point o1, Point o2) {
int cmp = o1.packedValue.compareTo(o2.packedValue);
if (cmp == 0) {
cmp = Integer.compare(o1.doc, o2.doc);
}
return cmp;
}
});
assertNotSame(points, reader.points);
assertArrayEquals(points, reader.points);
}
public void testSortByDim() {
for (int iter = 0; iter < 5; ++iter) {
doTestSortByDim();
}
}
private void doTestSortByDim() {
final int numDims = TestUtil.nextInt(random(), 1, 8);
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
int[] commonPrefixLengths = new int[numDims];
for (int i = 0; i < commonPrefixLengths.length; ++i) {
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
}
BytesRef firstValue = points[0].packedValue;
for (int i = 1; i < points.length; ++i) {
for (int dim = 0; dim < numDims; ++dim) {
int offset = dim * bytesPerDim;
BytesRef packedValue = points[i].packedValue;
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
}
}
DummyPointsReader reader = new DummyPointsReader(points);
final int sortedDim = random().nextInt(numDims);
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
new BytesRef(), new BytesRef());
for (int i = 1; i < points.length; ++i) {
final int offset = sortedDim * bytesPerDim;
BytesRef previousValue = reader.points[i-1].packedValue;
BytesRef currentValue = reader.points[i].packedValue;
int cmp = StringHelper.compare(bytesPerDim,
previousValue.bytes, previousValue.offset + offset,
currentValue.bytes, currentValue.offset + offset);
if (cmp == 0) {
cmp = reader.points[i - 1].doc - reader.points[i].doc;
}
assertTrue(cmp <= 0);
}
}
public void testPartition() {
for (int iter = 0; iter < 5; ++iter) {
doTestPartition();
}
}
private void doTestPartition() {
final int numDims = TestUtil.nextInt(random(), 1, 8);
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
int commonPrefixLength = TestUtil.nextInt(random(), 0, bytesPerDim);
final int splitDim = random().nextInt(numDims);
BytesRef firstValue = points[0].packedValue;
for (int i = 1; i < points.length; ++i) {
BytesRef packedValue = points[i].packedValue;
int offset = splitDim * bytesPerDim;
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLength);
}
DummyPointsReader reader = new DummyPointsReader(points);
final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLength, reader, 0, points.length, pivot,
new BytesRef(), new BytesRef());
BytesRef pivotValue = reader.points[pivot].packedValue;
int offset = splitDim * bytesPerDim;
for (int i = 0; i < points.length; ++i) {
BytesRef value = reader.points[i].packedValue;
int cmp = StringHelper.compare(bytesPerDim,
value.bytes, value.offset + offset,
pivotValue.bytes, pivotValue.offset + offset);
if (cmp == 0) {
cmp = reader.points[i].doc - reader.points[pivot].doc;
}
if (i < pivot) {
assertTrue(cmp <= 0);
} else if (i > pivot) {
assertTrue(cmp >= 0);
} else {
assertEquals(0, cmp);
}
}
}
private static Point[] createRandomPoints(int numDims, int bytesPerDim, int maxDoc) {
final int packedBytesLength = numDims * bytesPerDim;
final int numPoints = TestUtil.nextInt(random(), 1, 100000);
Point[] points = new Point[numPoints];
for (int i = 0; i < numPoints; ++i) {
byte[] value = new byte[packedBytesLength];
random().nextBytes(value);
points[i] = new Point(value, random().nextInt(maxDoc));
}
return points;
}
private static class Point {
final BytesRef packedValue;
final int doc;
Point(byte[] packedValue, int doc) {
// use a non-null offset to make sure MutablePointsReaderUtils does not ignore it
this.packedValue = new BytesRef(packedValue.length + 1);
this.packedValue.bytes[0] = (byte) random().nextInt(256);
this.packedValue.offset = 1;
this.packedValue.length = packedValue.length;
this.doc = doc;
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj instanceof Point == false) {
return false;
}
Point that = (Point) obj;
return packedValue.equals(that.packedValue) && doc == that.doc;
}
@Override
public int hashCode() {
return 31 * packedValue.hashCode() + doc;
}
@Override
public String toString() {
return "value=" + packedValue + " doc=" + doc;
}
}
private static class DummyPointsReader extends MutablePointsReader {
private final Point[] points;
DummyPointsReader(Point[] points) {
this.points = points.clone();
}
@Override
public void close() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long ramBytesUsed() {
return 0;
}
@Override
public void getValue(int i, BytesRef packedValue) {
packedValue.bytes = points[i].packedValue.bytes;
packedValue.offset = points[i].packedValue.offset;
packedValue.length = points[i].packedValue.length;
}
@Override
public byte getByteAt(int i, int k) {
BytesRef packedValue = points[i].packedValue;
return packedValue.bytes[packedValue.offset + k];
}
@Override
public int getDocID(int i) {
return points[i].doc;
}
@Override
public void swap(int i, int j) {
ArrayUtil.swap(points, i, j);
}
@Override
public void checkIntegrity() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMinPackedValue(String fieldName) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMaxPackedValue(String fieldName) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getNumDimensions(String fieldName) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getBytesPerDimension(String fieldName) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long size(String fieldName) {
throw new UnsupportedOperationException();
}
@Override
public int getDocCount(String fieldName) {
throw new UnsupportedOperationException();
}
}
}

View File

@ -259,13 +259,11 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setSimilarity(new PerFieldSimilarityWrapper() {
final Similarity sim = new ClassicSimilarity();
iwc.setSimilarity(new PerFieldSimilarityWrapper(new ClassicSimilarity()) {
@Override
public Similarity get(String name) {
assertEquals("field", name);
return sim;
return defaultSim;
}
});
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);

View File

@ -106,6 +106,7 @@ io.netty.netty-all.version = 4.0.36.Final
org.apache.curator.version = 2.8.0
/org.apache.curator/curator-client = ${org.apache.curator.version}
/org.apache.curator/curator-framework = ${org.apache.curator.version}
/org.apache.curator/curator-recipes = ${org.apache.curator.version}
/org.apache.derby/derby = 10.9.1.0

View File

@ -91,7 +91,7 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
final SweetSpotSimilarity ssB = new SweetSpotSimilarity();
ssB.setLengthNormFactors(5,8,0.1f, false);
Similarity sp = new PerFieldSimilarityWrapper() {
Similarity sp = new PerFieldSimilarityWrapper(ss) {
@Override
public Similarity get(String field) {
if (field.equals("bar"))

View File

@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queries.function.valuesource;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
import org.apache.lucene.search.IndexSearcher;
/**
* Base class for comparison operators useful within an "if"/conditional.
*/
public abstract class ComparisonBoolFunction extends BoolFunction {
private final ValueSource lhs;
private final ValueSource rhs;
private final String name;
public ComparisonBoolFunction(ValueSource lhs, ValueSource rhs, String name) {
this.lhs = lhs;
this.rhs = rhs;
this.name = name;
}
/** Perform the comparison, returning true or false */
public abstract boolean compare(int doc, FunctionValues lhs, FunctionValues rhs);
/** Uniquely identify the operation (ie "gt", "lt" "gte", etc) */
public String name() {
return this.name;
}
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
final FunctionValues lhsVal = this.lhs.getValues(context, readerContext);
final FunctionValues rhsVal = this.rhs.getValues(context, readerContext);
final String compLabel = this.name();
return new BoolDocValues(this) {
@Override
public boolean boolVal(int doc) {
return compare(doc, lhsVal, rhsVal);
}
@Override
public String toString(int doc) {
return compLabel + "(" + lhsVal.toString(doc) + "," + rhsVal.toString(doc) + ")";
}
@Override
public boolean exists(int doc) {
return lhsVal.exists(doc) && rhsVal.exists(doc);
}
};
}
@Override
public boolean equals(Object o) {
if (this.getClass() != o.getClass()) return false;
ComparisonBoolFunction other = (ComparisonBoolFunction)o;
return name().equals(other.name())
&& lhs.equals(other.lhs)
&& rhs.equals(other.rhs); }
@Override
public int hashCode() {
int h = this.getClass().hashCode();
h = h * 31 + this.name().hashCode();
h = h * 31 + lhs.hashCode();
h = h * 31 + rhs.hashCode();
return h;
}
@Override
public String description() {
return name() + "(" + lhs.description() + "," + rhs.description() + ")";
}
@Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
lhs.createWeight(context, searcher);
rhs.createWeight(context, searcher);
}
}

View File

@ -38,5 +38,10 @@ public class FileMetaData {
this.length = length;
this.checksum = checksum;
}
@Override
public String toString() {
return "FileMetaData(length=" + length + ")";
}
}

View File

@ -118,6 +118,8 @@ class SimpleCopyJob extends CopyJob {
return highPriority ? -1 : 1;
} else if (ord < other.ord) {
return -1;
} else if (ord > other.ord) {
return 1;
} else {
return 0;
}

View File

@ -87,8 +87,9 @@ public class TestGeo3DPoint extends LuceneTestCase {
private static Codec getCodec() {
if (Codec.getDefault().getName().equals("Lucene62")) {
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
double maxMBSortInHeap = 3.0 + (3*random().nextDouble());
if (VERBOSE) {
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode);
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
}
return new FilterCodec("Lucene62", Codec.getDefault()) {
@ -97,7 +98,7 @@ public class TestGeo3DPoint extends LuceneTestCase {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode);
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
}
@Override

View File

@ -126,6 +126,7 @@ public final class AssertingPointsFormat extends PointsFormat {
assert false: "point values are out of order";
}
System.arraycopy(packedValue, 0, lastDocValue, 0, bytesPerDim);
lastDocID = docID;
}
in.visit(docID, packedValue);
}
@ -254,11 +255,11 @@ public final class AssertingPointsFormat extends PointsFormat {
}
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
if (fieldInfo.getPointDimensionCount() == 0) {
throw new IllegalArgumentException("writing field=\"" + fieldInfo.name + "\" but pointDimensionalCount is 0");
}
in.writeField(fieldInfo, values, maxMBSortInHeap);
in.writeField(fieldInfo, values);
}
@Override

View File

@ -56,11 +56,11 @@ class CrankyPointsFormat extends PointsFormat {
}
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
if (random.nextInt(100) == 0) {
throw new IOException("Fake IOException");
}
delegate.writeField(fieldInfo, values, maxMBSortInHeap);
delegate.writeField(fieldInfo, values);
}
@Override

View File

@ -67,6 +67,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.SloppyMath;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.bkd.BKDWriter;
/**
* Abstract class to do basic tests for a geospatial impl (high level
@ -1247,7 +1248,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, pointsInLeaf);
return new Lucene60PointsWriter(writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
@Override

View File

@ -92,6 +92,7 @@ public class RandomCodec extends AssertingCodec {
// which is less effective for testing.
// TODO: improve how we randomize this...
private final int maxPointsInLeafNode;
private final double maxMBSortInHeap;
private final int bkdSplitRandomSeed;
@Override
@ -102,9 +103,9 @@ public class RandomCodec extends AssertingCodec {
// Randomize how BKDWriter chooses its splis:
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode) {
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap) {
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values, double maxMBSortInHeap) throws IOException {
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
@ -184,6 +185,7 @@ public class RandomCodec extends AssertingCodec {
int lowFreqCutoff = TestUtil.nextInt(random, 2, 100);
maxPointsInLeafNode = TestUtil.nextInt(random, 16, 2048);
maxMBSortInHeap = 5.0 + (3*random.nextDouble());
bkdSplitRandomSeed = random.nextInt();
add(avoidCodecs,
@ -251,7 +253,8 @@ public class RandomCodec extends AssertingCodec {
public String toString() {
return super.toString() + ": " + previousMappings.toString() +
", docValues:" + previousDVMappings.toString() +
", maxPointsInLeafNode=" + maxPointsInLeafNode;
", maxPointsInLeafNode=" + maxPointsInLeafNode +
", maxMBSortInHeap=" + maxMBSortInHeap;
}
/** Just like {@link BKDWriter} except it evilly picks random ways to split cells on

View File

@ -31,27 +31,21 @@ import java.util.Random;
* for the same field.
*/
public class RandomSimilarity extends PerFieldSimilarityWrapper {
final ClassicSimilarity defaultSim = new ClassicSimilarity();
final List<Similarity> knownSims;
Map<String,Similarity> previousMappings = new HashMap<>();
final Map<String,Similarity> previousMappings = new HashMap<>();
final int perFieldSeed;
final int coordType; // 0 = no coord, 1 = coord, 2 = crazy coord
final boolean shouldQueryNorm;
public RandomSimilarity(Random random) {
perFieldSeed = random.nextInt();
coordType = random.nextInt(3);
shouldQueryNorm = random.nextBoolean();
knownSims = new ArrayList<>(allSims);
Collections.shuffle(knownSims, random);
}
super(new ClassicSimilarity() {
final int coordType = random.nextInt(3); // 0 = no coord, 1 = coord, 2 = crazy coord
final boolean shouldQueryNorm = random.nextBoolean();
@Override
public float coord(int overlap, int maxOverlap) {
if (coordType == 0) {
return 1.0f;
} else if (coordType == 1) {
return defaultSim.coord(overlap, maxOverlap);
return super.coord(overlap, maxOverlap);
} else {
return overlap / ((float)maxOverlap + 1);
}
@ -60,12 +54,31 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
@Override
public float queryNorm(float sumOfSquaredWeights) {
if (shouldQueryNorm) {
return defaultSim.queryNorm(sumOfSquaredWeights);
return super.queryNorm(sumOfSquaredWeights);
} else {
return 1.0f;
}
}
@Override
public synchronized String toString() {
final String coordMethod;
if (coordType == 0) {
coordMethod = "no";
} else if (coordType == 1) {
coordMethod = "yes";
} else {
coordMethod = "crazy";
}
return "queryNorm=" + shouldQueryNorm + ",coord=" + coordMethod;
}
});
perFieldSeed = random.nextInt();
knownSims = new ArrayList<>(allSims);
Collections.shuffle(knownSims, random);
}
@Override
public synchronized Similarity get(String field) {
assert field != null;
@ -138,14 +151,6 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
@Override
public synchronized String toString() {
final String coordMethod;
if (coordType == 0) {
coordMethod = "no";
} else if (coordType == 1) {
coordMethod = "yes";
} else {
coordMethod = "crazy";
}
return "RandomSimilarity(queryNorm=" + shouldQueryNorm + ",coord=" + coordMethod + "): " + previousMappings.toString();
return "RandomSimilarity(" + defaultSim + "): " + previousMappings.toString();
}
}

View File

@ -66,6 +66,19 @@ New Features
* SOLR-9275: XML QueryParser support (defType=xmlparser) now extensible via configuration.
(Christine Poerschke)
* SOLR-9038: Solr core snapshots: The current commit can be snapshotted which retains the commit and associates it with
a name. The core admin API can create snapshots, list them, and delete them. Snapshot names can be referenced in
doing a core backup, and in replication. Snapshot metadata is stored in a new snapshot_metadata/ dir.
(Hrishikesh Gadre via David Smiley)
* SOLR-9279: New boolean comparison function queries comparing numeric arguments: gt, gte, lt, lte, eq
(Doug Turnbull, David Smiley)
* SOLR-9200: Add Delegation Token Support to Solr.
(Gregory Chanan)
* SOLR-9252: Feature selection and logistic regression on text (Cao Manh Dat, Joel Bernstein)
Bug Fixes
----------------------
@ -132,6 +145,15 @@ Bug Fixes
* SOLR-9334: CloudSolrClient.collectionStateCache is unbounded (noble)
* SOLR-9339: NPE in CloudSolrClient when the response is null (noble)
* SOLR-8596: Web UI doesn't correctly generate queries which include local parameters (Alexandre Rafalovitch, janhoy)
* SOLR-8645: managed-schema is now syntax highlighted in cloud->Tree view (Alexandre Rafalovitch via janhoy)
* SOLR-8379: UI Cloud->Tree view now shows .txt files correctly (Alexandre Rafalovitch via janhoy)
* SOLR-9308: Fix distributed RTG to forward request params, fixes fq and non-default fl params (hossman)
* SOLR-9179: NPE in IndexSchema using IBM JDK (noble, Colvin Cowie)
@ -143,6 +165,9 @@ Optimizations
* SOLR-9264: Optimize ZkController.publishAndWaitForDownStates to not read all collection states and
watch relevant collections instead. (Hrishikesh Gadre, shalin)
* SOLR-9335: Solr cache/search/update stats counters now use LongAdder which are supposed to have higher throughput
under high contention. (Varun Thacker)
Other Changes
----------------------
@ -166,6 +191,15 @@ Other Changes
* SOLR-9163: Sync up basic_configs and data_driven_schema_configs, removing almost all differences
except what is required for schemaless. (yonik)
* SOLR-9340: Change ZooKeeper disconnect and session expiry related logging from INFO to WARN to
make debugging easier (Varun Thacker)
* SOLR-9358: [AngularUI] In Cloud->Tree file view area, collapse metadata by default (janhoy)
* SOLR-9256: asserting hasNext() contract in JdbcDataSource in DataImportHandler (Kristine Jetzke via Mikhai Khludnev)
* SOLR-9209: extracting JdbcDataSource.createResultSetIterator() for extension (Kristine Jetzke via Mikhai Khludnev)
================== 6.1.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -280,10 +280,14 @@ public class JdbcDataSource extends
resultSetIterator.close();
resultSetIterator = null;
}
resultSetIterator = new ResultSetIterator(query);
resultSetIterator = createResultSetIterator(query);
return resultSetIterator.getIterator();
}
protected ResultSetIterator createResultSetIterator(String query) {
return new ResultSetIterator(query);
}
private void logError(String msg, Exception e) {
LOG.warn(msg, e);
}

View File

@ -510,6 +510,45 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase {
DriverManager.deregisterDriver(driver);
}
}
@Test
public void testEmptyResultSet() throws Exception {
MockInitialContextFactory.bind("java:comp/env/jdbc/JndiDB", dataSource);
props.put(JdbcDataSource.JNDI_NAME, "java:comp/env/jdbc/JndiDB");
EasyMock.expect(dataSource.getConnection()).andReturn(connection);
jdbcDataSource.init(context, props);
connection.setAutoCommit(false);
Statement statement = mockControl.createMock(Statement.class);
EasyMock.expect(connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY))
.andReturn(statement);
statement.setFetchSize(500);
statement.setMaxRows(0);
EasyMock.expect(statement.execute("query")).andReturn(true);
ResultSet resultSet = mockControl.createMock(ResultSet.class);
EasyMock.expect(statement.getResultSet()).andReturn(resultSet);
ResultSetMetaData metaData = mockControl.createMock(ResultSetMetaData.class);
EasyMock.expect(resultSet.getMetaData()).andReturn(metaData);
EasyMock.expect(metaData.getColumnCount()).andReturn(0);
EasyMock.expect(resultSet.next()).andReturn(false);
resultSet.close();
EasyMock.expect(statement.getMoreResults()).andReturn(false);
EasyMock.expect(statement.getUpdateCount()).andReturn(-1);
statement.close();
mockControl.replay();
Iterator<Map<String,Object>> resultSetIterator = jdbcDataSource.getData("query");
resultSetIterator.hasNext();
resultSetIterator.hasNext();
mockControl.verify();
}
@Test
@Ignore("Needs a Mock database server to work")
public void testBasic() throws Exception {

View File

@ -16,6 +16,15 @@
*/
package org.apache.solr.hadoop;
import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import com.google.common.io.Files;
import org.apache.commons.io.FileUtils;
import org.apache.solr.cloud.ZkController;
@ -35,15 +44,6 @@ import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
/**
* Extracts SolrCloud information from ZooKeeper.
*/
@ -78,8 +78,7 @@ final class ZooKeeperInspector {
}
SolrZkClient zkClient = getZkClient(zkHost);
try {
ZkStateReader zkStateReader = new ZkStateReader(zkClient);
try (ZkStateReader zkStateReader = new ZkStateReader(zkClient)) {
try {
// first check for alias
collection = checkForAlias(zkClient, collection);

View File

@ -134,6 +134,10 @@
<dependency org="antlr" name="antlr" rev="${/antlr/antlr}" conf="test.MiniKdc"/>
<dependency org="net.sf.ehcache" name="ehcache-core" rev="${/net.sf.ehcache/ehcache-core}" conf="test.MiniKdc"/>
<dependency org="org.apache.curator" name="curator-framework" rev="${/org.apache.curator/curator-framework}" conf="compile"/>
<dependency org="org.apache.curator" name="curator-client" rev="${/org.apache.curator/curator-client}" conf="compile"/>
<dependency org="org.apache.curator" name="curator-recipes" rev="${/org.apache.curator/curator-recipes}" conf="compile"/>
<!-- StatsComponents percentiles Dependencies-->
<dependency org="com.tdunning" name="t-digest" rev="${/com.tdunning/t-digest}" conf="compile->*"/>
<!-- SQL Parser -->

View File

@ -15,21 +15,26 @@
* limitations under the License.
*/
package org.apache.solr.core;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.update.SolrIndexWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
/**
* A wrapper for an IndexDeletionPolicy instance.
* <p>
@ -52,9 +57,11 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
private final Map<Long, Long> reserves = new ConcurrentHashMap<>();
private volatile IndexCommit latestCommit;
private final ConcurrentHashMap<Long, AtomicInteger> savedCommits = new ConcurrentHashMap<>();
private final SolrSnapshotMetaDataManager snapshotMgr;
public IndexDeletionPolicyWrapper(IndexDeletionPolicy deletionPolicy) {
public IndexDeletionPolicyWrapper(IndexDeletionPolicy deletionPolicy, SolrSnapshotMetaDataManager snapshotMgr) {
this.deletionPolicy = deletionPolicy;
this.snapshotMgr = snapshotMgr;
}
/**
@ -134,7 +141,6 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
}
}
/**
* Internal use for Lucene... do not explicitly call.
*/
@ -186,6 +192,7 @@ public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
Long reserve = reserves.get(gen);
if (reserve != null && System.nanoTime() < reserve) return;
if (savedCommits.containsKey(gen)) return;
if (snapshotMgr.isSnapshotted(gen)) return;
delegate.delete();
}

View File

@ -81,6 +81,7 @@ import org.apache.solr.common.util.ObjectReleaseTracker;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.Utils;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.handler.IndexFetcher;
import org.apache.solr.handler.ReplicationHandler;
import org.apache.solr.handler.RequestHandlerBase;
@ -184,6 +185,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
private final Map<String,UpdateRequestProcessorChain> updateProcessorChains;
private final Map<String, SolrInfoMBean> infoRegistry;
private final IndexDeletionPolicyWrapper solrDelPolicy;
private final SolrSnapshotMetaDataManager snapshotMgr;
private final DirectoryFactory directoryFactory;
private IndexReaderFactory indexReaderFactory;
private final Codec codec;
@ -414,7 +416,19 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
} else {
delPolicy = new SolrDeletionPolicy();
}
return new IndexDeletionPolicyWrapper(delPolicy);
return new IndexDeletionPolicyWrapper(delPolicy, snapshotMgr);
}
private SolrSnapshotMetaDataManager initSnapshotMetaDataManager() {
try {
String dirName = getDataDir() + SolrSnapshotMetaDataManager.SNAPSHOT_METADATA_DIR + "/";
Directory snapshotDir = directoryFactory.get(dirName, DirContext.DEFAULT,
getSolrConfig().indexConfig.lockType);
return new SolrSnapshotMetaDataManager(this, snapshotDir);
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
private void initListeners() {
@ -739,6 +753,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
initListeners();
this.snapshotMgr = initSnapshotMetaDataManager();
this.solrDelPolicy = initDeletionPolicy(delPolicy);
this.codec = initCodec(solrConfig, this.schema);
@ -1242,6 +1257,17 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
}
}
// Close the snapshots meta-data directory.
Directory snapshotsDir = snapshotMgr.getSnapshotsDir();
try {
this.directoryFactory.release(snapshotsDir);
} catch (Throwable e) {
SolrException.log(log,e);
if (e instanceof Error) {
throw (Error) e;
}
}
if (coreStateClosed) {
try {
@ -2343,6 +2369,14 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
return solrDelPolicy;
}
/**
* @return A reference of {@linkplain SolrSnapshotMetaDataManager}
* managing the persistent snapshots for this Solr core.
*/
public SolrSnapshotMetaDataManager getSnapshotMetaDataManager() {
return snapshotMgr;
}
public ReentrantLock getRuleExpiryLock() {
return ruleExpiryLock;
}

View File

@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core.snapshots;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import com.google.common.annotations.VisibleForTesting;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.store.Directory;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class provides functionality required to handle the data files corresponding to Solr snapshots.
*/
public class SolrSnapshotManager {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
/**
* This method deletes index files of the {@linkplain IndexCommit} for the specified generation number.
*
* @param dir The index directory storing the snapshot.
* @param gen The generation number for the {@linkplain IndexCommit}
* @throws IOException in case of I/O errors.
*/
public static void deleteIndexFiles ( Directory dir, Collection<SnapshotMetaData> snapshots, long gen ) throws IOException {
List<IndexCommit> commits = DirectoryReader.listCommits(dir);
Map<String, Integer> refCounts = buildRefCounts(snapshots, commits);
for (IndexCommit ic : commits) {
if (ic.getGeneration() == gen) {
deleteIndexFiles(dir,refCounts, ic);
break;
}
}
}
/**
* This method deletes all files not corresponding to a configured snapshot in the specified index directory.
*
* @param dir The index directory to search for.
* @throws IOException in case of I/O errors.
*/
public static void deleteNonSnapshotIndexFiles (Directory dir, Collection<SnapshotMetaData> snapshots) throws IOException {
List<IndexCommit> commits = DirectoryReader.listCommits(dir);
Map<String, Integer> refCounts = buildRefCounts(snapshots, commits);
Set<Long> snapshotGenNumbers = snapshots.stream()
.map(SnapshotMetaData::getGenerationNumber)
.collect(Collectors.toSet());
for (IndexCommit ic : commits) {
if (!snapshotGenNumbers.contains(ic.getGeneration())) {
deleteIndexFiles(dir,refCounts, ic);
}
}
}
/**
* This method computes reference count for the index files by taking into consideration
* (a) configured snapshots and (b) files sharing between two or more {@linkplain IndexCommit} instances.
*
* @param snapshots A collection of user configured snapshots
* @param commits A list of {@linkplain IndexCommit} instances
* @return A map containing reference count for each index file referred in one of the {@linkplain IndexCommit} instances.
* @throws IOException in case of I/O error.
*/
@VisibleForTesting
static Map<String, Integer> buildRefCounts (Collection<SnapshotMetaData> snapshots, List<IndexCommit> commits) throws IOException {
Map<String, Integer> result = new HashMap<>();
Map<Long, IndexCommit> commitsByGen = commits.stream().collect(
Collectors.toMap(IndexCommit::getGeneration, Function.identity()));
for(SnapshotMetaData md : snapshots) {
IndexCommit ic = commitsByGen.get(md.getGenerationNumber());
if (ic != null) {
Collection<String> fileNames = ic.getFileNames();
for(String fileName : fileNames) {
int refCount = result.getOrDefault(fileName, 0);
result.put(fileName, refCount+1);
}
}
}
return result;
}
/**
* This method deletes the index files associated with specified <code>indexCommit</code> provided they
* are not referred by some other {@linkplain IndexCommit}.
*
* @param dir The index directory containing the {@linkplain IndexCommit} to be deleted.
* @param refCounts A map containing reference counts for each file associated with every {@linkplain IndexCommit}
* in the specified directory.
* @param indexCommit The {@linkplain IndexCommit} whose files need to be deleted.
* @throws IOException in case of I/O errors.
*/
private static void deleteIndexFiles ( Directory dir, Map<String, Integer> refCounts, IndexCommit indexCommit ) throws IOException {
log.info("Deleting index files for index commit with generation {} in directory {}", indexCommit.getGeneration(), dir);
for (String fileName : indexCommit.getFileNames()) {
try {
// Ensure that a file being deleted is not referred by some other commit.
int ref = refCounts.getOrDefault(fileName, 0);
log.debug("Reference count for file {} is {}", fileName, ref);
if (ref == 0) {
dir.deleteFile(fileName);
}
} catch (IOException e) {
log.warn("Unable to delete file {} in directory {} due to exception {}", fileName, dir, e.getMessage());
}
}
}
}

View File

@ -0,0 +1,416 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core.snapshots;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.stream.Collectors;
import com.google.common.base.Preconditions;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.IndexDeletionPolicyWrapper;
import org.apache.solr.core.SolrCore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class is responsible to manage the persistent snapshots meta-data for the Solr indexes. The
* persistent snapshots are implemented by relying on Lucene {@linkplain IndexDeletionPolicy}
* abstraction to configure a specific {@linkplain IndexCommit} to be retained. The
* {@linkplain IndexDeletionPolicyWrapper} in Solr uses this class to create/delete the Solr index
* snapshots.
*/
public class SolrSnapshotMetaDataManager {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String SNAPSHOT_METADATA_DIR = "snapshot_metadata";
/**
* A class defining the meta-data for a specific snapshot.
*/
public static class SnapshotMetaData {
private String name;
private String indexDirPath;
private long generationNumber;
public SnapshotMetaData(String name, String indexDirPath, long generationNumber) {
super();
this.name = name;
this.indexDirPath = indexDirPath;
this.generationNumber = generationNumber;
}
public String getName() {
return name;
}
public String getIndexDirPath() {
return indexDirPath;
}
public long getGenerationNumber() {
return generationNumber;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("SnapshotMetaData[name=");
builder.append(name);
builder.append(", indexDirPath=");
builder.append(indexDirPath);
builder.append(", generation=");
builder.append(generationNumber);
builder.append("]");
return builder.toString();
}
}
/** Prefix used for the save file. */
public static final String SNAPSHOTS_PREFIX = "snapshots_";
private static final int VERSION_START = 0;
private static final int VERSION_CURRENT = VERSION_START;
private static final String CODEC_NAME = "solr-snapshots";
// The index writer which maintains the snapshots metadata
private long nextWriteGen;
private final Directory dir;
/** Used to map snapshot name to snapshot meta-data. */
protected final Map<String,SnapshotMetaData> nameToDetailsMapping = new LinkedHashMap<>();
/** Used to figure out the *current* index data directory path */
private final SolrCore solrCore;
/**
* A constructor.
*
* @param dir The directory where the snapshot meta-data should be stored. Enables updating
* the existing meta-data.
* @throws IOException in case of errors.
*/
public SolrSnapshotMetaDataManager(SolrCore solrCore, Directory dir) throws IOException {
this(solrCore, dir, OpenMode.CREATE_OR_APPEND);
}
/**
* A constructor.
*
* @param dir The directory where the snapshot meta-data is stored.
* @param mode CREATE If previous meta-data should be erased.
* APPEND If previous meta-data should be read and updated.
* CREATE_OR_APPEND Creates a new meta-data structure if one does not exist
* Updates the existing structure if one exists.
* @throws IOException in case of errors.
*/
public SolrSnapshotMetaDataManager(SolrCore solrCore, Directory dir, OpenMode mode) throws IOException {
this.solrCore = solrCore;
this.dir = dir;
if (mode == OpenMode.CREATE) {
deleteSnapshotMetadataFiles();
}
loadFromSnapshotMetadataFile();
if (mode == OpenMode.APPEND && nextWriteGen == 0) {
throw new IllegalStateException("no snapshots stored in this directory");
}
}
/**
* @return The snapshot meta-data directory
*/
public Directory getSnapshotsDir() {
return dir;
}
/**
* This method creates a new snapshot meta-data entry.
*
* @param name The name of the snapshot.
* @param indexDirPath The directory path where the index files are stored.
* @param gen The generation number for the {@linkplain IndexCommit} being snapshotted.
* @throws IOException in case of I/O errors.
*/
public synchronized void snapshot(String name, String indexDirPath, long gen) throws IOException {
Preconditions.checkNotNull(name);
log.info("Creating the snapshot named {} for core {} associated with index commit with generation {} in directory {}"
, name, solrCore.getName(), gen, indexDirPath);
if(nameToDetailsMapping.containsKey(name)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "A snapshot with name " + name + " already exists");
}
SnapshotMetaData d = new SnapshotMetaData(name, indexDirPath, gen);
nameToDetailsMapping.put(name, d);
boolean success = false;
try {
persist();
success = true;
} finally {
if (!success) {
try {
release(name);
} catch (Exception e) {
// Suppress so we keep throwing original exception
}
}
}
}
/**
* This method deletes a previously created snapshot (if any).
*
* @param name The name of the snapshot to be deleted.
* @return The snapshot meta-data if the snapshot with the snapshot name exists.
* @throws IOException in case of I/O error
*/
public synchronized Optional<SnapshotMetaData> release(String name) throws IOException {
log.info("Deleting the snapshot named {} for core {}", name, solrCore.getName());
SnapshotMetaData result = nameToDetailsMapping.remove(Preconditions.checkNotNull(name));
if(result != null) {
boolean success = false;
try {
persist();
success = true;
} finally {
if (!success) {
nameToDetailsMapping.put(name, result);
}
}
}
return Optional.ofNullable(result);
}
/**
* This method returns if snapshot is created for the specified generation number in
* the *current* index directory.
*
* @param genNumber The generation number for the {@linkplain IndexCommit} to be checked.
* @return true if the snapshot is created.
* false otherwise.
*/
public synchronized boolean isSnapshotted(long genNumber) {
return !nameToDetailsMapping.isEmpty() && isSnapshotted(solrCore.getIndexDir(), genNumber);
}
/**
* This method returns if snapshot is created for the specified generation number in
* the specified index directory.
*
* @param genNumber The generation number for the {@linkplain IndexCommit} to be checked.
* @return true if the snapshot is created.
* false otherwise.
*/
public synchronized boolean isSnapshotted(String indexDirPath, long genNumber) {
return !nameToDetailsMapping.isEmpty()
&& nameToDetailsMapping.values().stream()
.anyMatch(entry -> entry.getIndexDirPath().equals(indexDirPath) && entry.getGenerationNumber() == genNumber);
}
/**
* This method returns the snapshot meta-data for the specified name (if it exists).
*
* @param name The name of the snapshot
* @return The snapshot meta-data if exists.
*/
public synchronized Optional<SnapshotMetaData> getSnapshotMetaData(String name) {
return Optional.ofNullable(nameToDetailsMapping.get(name));
}
/**
* @return A list of snapshots created so far.
*/
public synchronized List<String> listSnapshots() {
// We create a copy for thread safety.
return new ArrayList<>(nameToDetailsMapping.keySet());
}
/**
* This method returns a list of snapshots created in a specified index directory.
*
* @param indexDirPath The index directory path.
* @return a list snapshots stored in the specified directory.
*/
public synchronized Collection<SnapshotMetaData> listSnapshotsInIndexDir(String indexDirPath) {
return nameToDetailsMapping.values().stream()
.filter(entry -> indexDirPath.equals(entry.getIndexDirPath()))
.collect(Collectors.toList());
}
/**
* This method returns the {@linkplain IndexCommit} associated with the specified
* <code>commitName</code>. A snapshot with specified <code>commitName</code> must
* be created before invoking this method.
*
* @param commitName The name of persisted commit
* @return the {@linkplain IndexCommit}
* @throws IOException in case of I/O error.
*/
public Optional<IndexCommit> getIndexCommitByName(String commitName) throws IOException {
Optional<IndexCommit> result = Optional.empty();
Optional<SnapshotMetaData> metaData = getSnapshotMetaData(commitName);
if (metaData.isPresent()) {
String indexDirPath = metaData.get().getIndexDirPath();
long gen = metaData.get().getGenerationNumber();
Directory d = solrCore.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, DirectoryFactory.LOCK_TYPE_NONE);
try {
result = DirectoryReader.listCommits(d)
.stream()
.filter(ic -> ic.getGeneration() == gen)
.findAny();
if (!result.isPresent()) {
log.warn("Unable to find commit with generation {} in the directory {}", gen, indexDirPath);
}
} finally {
solrCore.getDirectoryFactory().release(d);
}
} else {
log.warn("Commit with name {} is not persisted for core {}", commitName, solrCore.getName());
}
return result;
}
private synchronized void persist() throws IOException {
String fileName = SNAPSHOTS_PREFIX + nextWriteGen;
IndexOutput out = dir.createOutput(fileName, IOContext.DEFAULT);
boolean success = false;
try {
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
out.writeVInt(nameToDetailsMapping.size());
for(Entry<String,SnapshotMetaData> ent : nameToDetailsMapping.entrySet()) {
out.writeString(ent.getKey());
out.writeString(ent.getValue().getIndexDirPath());
out.writeVLong(ent.getValue().getGenerationNumber());
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(out);
IOUtils.deleteFilesIgnoringExceptions(dir, fileName);
} else {
IOUtils.close(out);
}
}
dir.sync(Collections.singletonList(fileName));
if (nextWriteGen > 0) {
String lastSaveFile = SNAPSHOTS_PREFIX + (nextWriteGen-1);
// exception OK: likely it didn't exist
IOUtils.deleteFilesIgnoringExceptions(dir, lastSaveFile);
}
nextWriteGen++;
}
private synchronized void deleteSnapshotMetadataFiles() throws IOException {
for(String file : dir.listAll()) {
if (file.startsWith(SNAPSHOTS_PREFIX)) {
dir.deleteFile(file);
}
}
}
/**
* Reads the snapshot meta-data information from the given {@link Directory}.
*/
private synchronized void loadFromSnapshotMetadataFile() throws IOException {
log.info("Loading from snapshot metadata file...");
long genLoaded = -1;
IOException ioe = null;
List<String> snapshotFiles = new ArrayList<>();
for(String file : dir.listAll()) {
if (file.startsWith(SNAPSHOTS_PREFIX)) {
long gen = Long.parseLong(file.substring(SNAPSHOTS_PREFIX.length()));
if (genLoaded == -1 || gen > genLoaded) {
snapshotFiles.add(file);
Map<String, SnapshotMetaData> snapshotMetaDataMapping = new HashMap<>();
IndexInput in = dir.openInput(file, IOContext.DEFAULT);
try {
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
int count = in.readVInt();
for(int i=0;i<count;i++) {
String name = in.readString();
String indexDirPath = in.readString();
long commitGen = in.readVLong();
snapshotMetaDataMapping.put(name, new SnapshotMetaData(name, indexDirPath, commitGen));
}
} catch (IOException ioe2) {
// Save first exception & throw in the end
if (ioe == null) {
ioe = ioe2;
}
} finally {
in.close();
}
genLoaded = gen;
nameToDetailsMapping.clear();
nameToDetailsMapping.putAll(snapshotMetaDataMapping);
}
}
}
if (genLoaded == -1) {
// Nothing was loaded...
if (ioe != null) {
// ... not for lack of trying:
throw ioe;
}
} else {
if (snapshotFiles.size() > 1) {
// Remove any broken / old snapshot files:
String curFileName = SNAPSHOTS_PREFIX + genLoaded;
for(String file : snapshotFiles) {
if (!curFileName.equals(file)) {
IOUtils.deleteFilesIgnoringExceptions(dir, file);
}
}
}
nextWriteGen = 1+genLoaded;
}
}
}

View File

@ -0,0 +1,22 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Core classes for Solr's persistent snapshots functionality
*/
package org.apache.solr.core.snapshots;

View File

@ -81,6 +81,9 @@ import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.IndexDeletionPolicyWrapper;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.snapshots.SolrSnapshotManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
import org.apache.solr.handler.ReplicationHandler.*;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
@ -454,9 +457,18 @@ public class IndexFetcher {
// let the system know we are changing dir's and the old one
// may be closed
if (indexDir != null) {
LOG.info("removing old index directory " + indexDir);
solrCore.getDirectoryFactory().doneWithDirectory(indexDir);
SolrSnapshotMetaDataManager snapshotsMgr = solrCore.getSnapshotMetaDataManager();
Collection<SnapshotMetaData> snapshots = snapshotsMgr.listSnapshotsInIndexDir(indexDirPath);
// Delete the old index directory only if no snapshot exists in that directory.
if(snapshots.isEmpty()) {
LOG.info("removing old index directory " + indexDir);
solrCore.getDirectoryFactory().remove(indexDir);
} else {
SolrSnapshotManager.deleteNonSnapshotIndexFiles(indexDir, snapshots);
}
}
}

View File

@ -87,6 +87,7 @@ import org.apache.solr.core.SolrDeletionPolicy;
import org.apache.solr.core.SolrEventListener;
import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.core.backup.repository.LocalFileSystemRepository;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.SolrIndexSearcher;
@ -505,12 +506,25 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
numberToKeep = Integer.MAX_VALUE;
}
IndexCommit indexCommit = null;
String commitName = params.get(CoreAdminParams.COMMIT_NAME);
if (commitName != null) {
SolrSnapshotMetaDataManager snapshotMgr = core.getSnapshotMetaDataManager();
Optional<IndexCommit> commit = snapshotMgr.getIndexCommitByName(commitName);
if(commit.isPresent()) {
indexCommit = commit.get();
} else {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to find an index commit with name " + commitName +
" for core " + core.getName());
}
} else {
IndexDeletionPolicyWrapper delPolicy = core.getDeletionPolicy();
IndexCommit indexCommit = delPolicy.getLatestCommit();
indexCommit = delPolicy.getLatestCommit();
if (indexCommit == null) {
indexCommit = req.getSearcher().getIndexReader().getIndexCommit();
}
}
String location = params.get(CoreAdminParams.BACKUP_LOCATION);
String repoName = params.get(CoreAdminParams.BACKUP_REPOSITORY);
@ -532,7 +546,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
}
// small race here before the commit point is saved
SnapShooter snapShooter = new SnapShooter(repo, core, location, params.get(NAME));
SnapShooter snapShooter = new SnapShooter(repo, core, location, params.get(NAME), commitName);
snapShooter.validateCreateSnapshot();
snapShooter.createSnapAsync(indexCommit, numberToKeep, (nl) -> snapShootDetails = nl);

View File

@ -16,13 +16,17 @@
*/
package org.apache.solr.handler;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.util.concurrent.atomic.LongAdder;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.PluginBag;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestHandler;
@ -35,10 +39,6 @@ import org.apache.solr.util.stats.TimerContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.util.concurrent.atomic.AtomicLong;
import static org.apache.solr.core.RequestParams.USEPARAM;
/**
@ -53,10 +53,10 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
protected boolean httpCaching = true;
// Statistics
private final AtomicLong numRequests = new AtomicLong();
private final AtomicLong numServerErrors = new AtomicLong();
private final AtomicLong numClientErrors = new AtomicLong();
private final AtomicLong numTimeouts = new AtomicLong();
private final LongAdder numRequests = new LongAdder();
private final LongAdder numServerErrors = new LongAdder();
private final LongAdder numClientErrors = new LongAdder();
private final LongAdder numTimeouts = new LongAdder();
private final Timer requestTimes = new Timer();
private final long handlerStart;
@ -144,7 +144,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
@Override
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
numRequests.incrementAndGet();
numRequests.increment();
TimerContext timer = requestTimes.time();
try {
if(pluginInfo != null && pluginInfo.attributes.containsKey(USEPARAM)) req.getContext().put(USEPARAM,pluginInfo.attributes.get(USEPARAM));
@ -158,7 +158,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
Object partialResults = header.get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY);
boolean timedOut = partialResults == null ? false : (Boolean)partialResults;
if( timedOut ) {
numTimeouts.incrementAndGet();
numTimeouts.increment();
rsp.setHttpCaching(false);
}
}
@ -185,9 +185,9 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
SolrException.log(log, e);
if (isServerError) {
numServerErrors.incrementAndGet();
numServerErrors.increment();
} else {
numClientErrors.incrementAndGet();
numClientErrors.increment();
}
}
}

View File

@ -19,6 +19,7 @@ package org.apache.solr.handler;
import java.lang.invoke.MethodHandles;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.Date;
import java.util.Locale;
import java.util.concurrent.Callable;
@ -32,6 +33,9 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.core.snapshots.SolrSnapshotManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -63,6 +67,7 @@ public class RestoreCore implements Callable<Boolean> {
String restoreIndexName = "restore." + dateFormat.format(new Date());
String restoreIndexPath = core.getDataDir() + restoreIndexName;
String indexDirPath = core.getIndexDir();
Directory restoreIndexDir = null;
Directory indexDir = null;
try {
@ -71,7 +76,7 @@ public class RestoreCore implements Callable<Boolean> {
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
//Prefer local copy.
indexDir = core.getDirectoryFactory().get(core.getIndexDir(),
indexDir = core.getDirectoryFactory().get(indexDirPath,
DirectoryFactory.DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
//Move all files from backupDir to restoreIndexDir
@ -130,7 +135,16 @@ public class RestoreCore implements Callable<Boolean> {
}
if (success) {
core.getDirectoryFactory().doneWithDirectory(indexDir);
SolrSnapshotMetaDataManager snapshotsMgr = core.getSnapshotMetaDataManager();
Collection<SnapshotMetaData> snapshots = snapshotsMgr.listSnapshotsInIndexDir(indexDirPath);
// Delete the old index directory only if no snapshot exists in that directory.
if (snapshots.isEmpty()) {
core.getDirectoryFactory().remove(indexDir);
} else {
SolrSnapshotManager.deleteNonSnapshotIndexFiles(indexDir, snapshots);
}
}
return true;

View File

@ -26,12 +26,14 @@ import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Optional;
import java.util.function.Consumer;
import com.google.common.base.Preconditions;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.store.Directory;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.IndexDeletionPolicyWrapper;
@ -39,6 +41,7 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.core.backup.repository.BackupRepository.PathType;
import org.apache.solr.core.backup.repository.LocalFileSystemRepository;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.slf4j.Logger;
@ -59,6 +62,7 @@ public class SnapShooter {
private URI baseSnapDirPath = null;
private URI snapshotDirPath = null;
private BackupRepository backupRepo = null;
private String commitName; // can be null
@Deprecated
public SnapShooter(SolrCore core, String location, String snapshotName) {
@ -71,14 +75,14 @@ public class SnapShooter {
} else {
snapDirStr = core.getCoreDescriptor().getInstanceDir().resolve(location).normalize().toString();
}
initialize(new LocalFileSystemRepository(), core, snapDirStr, snapshotName);
initialize(new LocalFileSystemRepository(), core, snapDirStr, snapshotName, null);
}
public SnapShooter(BackupRepository backupRepo, SolrCore core, String location, String snapshotName) {
initialize(backupRepo, core, location, snapshotName);
public SnapShooter(BackupRepository backupRepo, SolrCore core, String location, String snapshotName, String commitName) {
initialize(backupRepo, core, location, snapshotName, commitName);
}
private void initialize(BackupRepository backupRepo, SolrCore core, String location, String snapshotName) {
private void initialize(BackupRepository backupRepo, SolrCore core, String location, String snapshotName, String commitName) {
this.solrCore = Preconditions.checkNotNull(core);
this.backupRepo = Preconditions.checkNotNull(backupRepo);
this.baseSnapDirPath = backupRepo.createURI(Preconditions.checkNotNull(location)).normalize();
@ -90,6 +94,7 @@ public class SnapShooter {
directoryName = "snapshot." + fmt.format(new Date());
}
this.snapshotDirPath = backupRepo.createURI(location, directoryName);
this.commitName = commitName;
}
public BackupRepository getBackupRepository() {
@ -145,10 +150,19 @@ public class SnapShooter {
}
public NamedList createSnapshot() throws Exception {
IndexDeletionPolicyWrapper deletionPolicy = solrCore.getDeletionPolicy();
RefCounted<SolrIndexSearcher> searcher = solrCore.getSearcher();
try {
if (commitName != null) {
SolrSnapshotMetaDataManager snapshotMgr = solrCore.getSnapshotMetaDataManager();
Optional<IndexCommit> commit = snapshotMgr.getIndexCommitByName(commitName);
if(commit.isPresent()) {
return createSnapshot(commit.get());
}
throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to find an index commit with name " + commitName +
" for core " + solrCore.getName());
} else {
//TODO should we try solrCore.getDeletionPolicy().getLatestCommit() first?
IndexDeletionPolicyWrapper deletionPolicy = solrCore.getDeletionPolicy();
IndexCommit indexCommit = searcher.get().getIndexReader().getIndexCommit();
deletionPolicy.saveCommitPoint(indexCommit.getGeneration());
try {
@ -156,6 +170,7 @@ public class SnapShooter {
} finally {
deletionPolicy.releaseCommitPoint(indexCommit.getGeneration());
}
}
} finally {
searcher.decref();
}

View File

@ -121,6 +121,9 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
.withFunctionName("outerHashJoin", OuterHashJoinStream.class)
.withFunctionName("intersect", IntersectStream.class)
.withFunctionName("complement", ComplementStream.class)
.withFunctionName("sort", SortStream.class)
.withFunctionName("train", TextLogitStream.class)
.withFunctionName("features", FeaturesSelectionStream.class)
.withFunctionName("daemon", DaemonStream.class)
.withFunctionName("sort", SortStream.class)
.withFunctionName("select", SelectStream.class)

View File

@ -34,6 +34,7 @@ import java.util.concurrent.Future;
import com.google.common.collect.Lists;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
@ -59,9 +60,13 @@ import org.apache.solr.core.CachingDirectoryFactory;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.core.snapshots.SolrSnapshotManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager.SnapshotMetaData;
import org.apache.solr.handler.RestoreCore;
import org.apache.solr.handler.SnapShooter;
import org.apache.solr.handler.admin.CoreAdminHandler.CoreAdminOp;
@ -794,20 +799,24 @@ enum CoreAdminOperation implements CoreAdminOp {
+ " parameter or as a default repository property");
}
// An optional parameter to describe the snapshot to be backed-up. If this
// parameter is not supplied, the latest index commit is backed-up.
String commitName = params.get(CoreAdminParams.COMMIT_NAME);
try (SolrCore core = it.handler.coreContainer.getCore(cname)) {
SnapShooter snapShooter = new SnapShooter(repository, core, location, name);
SnapShooter snapShooter = new SnapShooter(repository, core, location, name, commitName);
// validateCreateSnapshot will create parent dirs instead of throw; that choice is dubious.
// But we want to throw. One reason is that
// this dir really should, in fact must, already exist here if triggered via a collection backup on a shared
// file system. Otherwise, perhaps the FS location isn't shared -- we want an error.
if (!snapShooter.getBackupRepository().exists(snapShooter.getLocation())) {
throw new SolrException(ErrorCode.BAD_REQUEST,
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Directory to contain snapshots doesn't exist: " + snapShooter.getLocation());
}
snapShooter.validateCreateSnapshot();
snapShooter.createSnapshot();
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR,
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Failed to backup core=" + cname + " because " + e, e);
}
}),
@ -845,6 +854,92 @@ enum CoreAdminOperation implements CoreAdminOp {
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to restore core=" + core.getName());
}
}
}),
CREATESNAPSHOT_OP(CREATESNAPSHOT, it -> {
CoreContainer cc = it.handler.getCoreContainer();
final SolrParams params = it.req.getParams();
String commitName = params.required().get(CoreAdminParams.COMMIT_NAME);
String cname = params.required().get(CoreAdminParams.CORE);
try (SolrCore core = cc.getCore(cname)) {
if (core == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
}
String indexDirPath = core.getIndexDir();
IndexCommit ic = core.getDeletionPolicy().getLatestCommit();
if (ic == null) {
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
try {
ic = searcher.get().getIndexReader().getIndexCommit();
} finally {
searcher.decref();
}
}
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
mgr.snapshot(commitName, indexDirPath, ic.getGeneration());
it.rsp.add("core", core.getName());
it.rsp.add("commitName", commitName);
it.rsp.add("indexDirPath", indexDirPath);
it.rsp.add("generation", ic.getGeneration());
}
}),
DELETESNAPSHOT_OP(DELETESNAPSHOT, it -> {
CoreContainer cc = it.handler.getCoreContainer();
final SolrParams params = it.req.getParams();
String commitName = params.required().get(CoreAdminParams.COMMIT_NAME);
String cname = params.required().get(CoreAdminParams.CORE);
try (SolrCore core = cc.getCore(cname)) {
if (core == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
}
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
Optional<SnapshotMetaData> metadata = mgr.release(commitName);
if (metadata.isPresent()) {
long gen = metadata.get().getGenerationNumber();
String indexDirPath = metadata.get().getIndexDirPath();
// If the directory storing the snapshot is not the same as the *current* core
// index directory, then delete the files corresponding to this snapshot.
// Otherwise we leave the index files related to snapshot as is (assuming the
// underlying Solr IndexDeletionPolicy will clean them up appropriately).
if (!indexDirPath.equals(core.getIndexDir())) {
Directory d = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, DirectoryFactory.LOCK_TYPE_NONE);
try {
SolrSnapshotManager.deleteIndexFiles(d, mgr.listSnapshotsInIndexDir(indexDirPath), gen);
} finally {
core.getDirectoryFactory().release(d);
}
}
}
}
}),
LISTSNAPSHOTS_OP(LISTSNAPSHOTS, it -> {
CoreContainer cc = it.handler.getCoreContainer();
final SolrParams params = it.req.getParams();
String cname = params.required().get(CoreAdminParams.CORE);
try ( SolrCore core = cc.getCore(cname) ) {
if (core == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
}
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
NamedList result = new NamedList();
for (String name : mgr.listSnapshots()) {
Optional<SnapshotMetaData> metadata = mgr.getSnapshotMetaData(name);
if ( metadata.isPresent() ) {
NamedList<String> props = new NamedList<>();
props.add("generation", String.valueOf(metadata.get().getGenerationNumber()));
props.add("indexDirPath", metadata.get().getIndexDirPath());
result.add(name, props);
}
}
it.rsp.add("snapshots", result);
}
});
final CoreAdminParams.CoreAdminAction action;

View File

@ -249,7 +249,8 @@ public class RealTimeGetComponent extends SearchComponent
docid = segid + ctx.docBase;
if (rb.getFilters() != null) {
for (Query q : rb.getFilters()) {
for (Query raw : rb.getFilters()) {
Query q = raw.rewrite(searcher.getIndexReader());
Scorer scorer = searcher.createWeight(q, false).scorer(ctx);
if (scorer == null || segid != scorer.iterator().advance(segid)) {
// filter doesn't match.
@ -448,7 +449,7 @@ public class RealTimeGetComponent extends SearchComponent
ZkController zkController = rb.req.getCore().getCoreDescriptor().getCoreContainer().getZkController();
// if shards=... then use that
if (zkController != null && params.get("shards") == null) {
if (zkController != null && params.get(ShardParams.SHARDS) == null) {
CloudDescriptor cloudDescriptor = rb.req.getCore().getCoreDescriptor().getCloudDescriptor();
String collection = cloudDescriptor.getCollectionName();
@ -470,32 +471,18 @@ public class RealTimeGetComponent extends SearchComponent
for (Map.Entry<String,List<String>> entry : sliceToId.entrySet()) {
String shard = entry.getKey();
String shardIdList = StrUtils.join(entry.getValue(), ',');
ShardRequest sreq = new ShardRequest();
sreq.purpose = 1;
ShardRequest sreq = createShardRequest(rb, entry.getValue());
// sreq.shards = new String[]{shard}; // TODO: would be nice if this would work...
sreq.shards = sliceToShards(rb, collection, shard);
sreq.actualShards = sreq.shards;
sreq.params = new ModifiableSolrParams();
sreq.params.set(ShardParams.SHARDS_QT,"/get"); // TODO: how to avoid hardcoding this and hit the same handler?
sreq.params.set("distrib",false);
sreq.params.set("ids", shardIdList);
rb.addRequest(this, sreq);
}
} else {
String shardIdList = StrUtils.join(reqIds.allIds, ',');
ShardRequest sreq = new ShardRequest();
sreq.purpose = 1;
ShardRequest sreq = createShardRequest(rb, reqIds.allIds);
sreq.shards = null; // ALL
sreq.actualShards = sreq.shards;
sreq.params = new ModifiableSolrParams();
sreq.params.set(ShardParams.SHARDS_QT,"/get"); // TODO: how to avoid hardcoding this and hit the same handler?
sreq.params.set("distrib",false);
sreq.params.set("ids", shardIdList);
rb.addRequest(this, sreq);
}
@ -503,6 +490,28 @@ public class RealTimeGetComponent extends SearchComponent
return ResponseBuilder.STAGE_DONE;
}
/**
* Helper method for creating a new ShardRequest for the specified ids, based on the params
* specified for the current request. The new ShardRequest does not yet know anything about
* which shard/slice it will be sent to.
*/
private ShardRequest createShardRequest(final ResponseBuilder rb, final List<String> ids) {
final ShardRequest sreq = new ShardRequest();
sreq.purpose = 1;
sreq.params = new ModifiableSolrParams(rb.req.getParams());
// TODO: how to avoid hardcoding this and hit the same handler?
sreq.params.set(ShardParams.SHARDS_QT,"/get");
sreq.params.set("distrib",false);
sreq.params.remove(ShardParams.SHARDS);
sreq.params.remove("id");
sreq.params.remove("ids");
sreq.params.set("ids", StrUtils.join(ids, ','));
return sreq;
}
private String[] sliceToShards(ResponseBuilder rb, String collection, String slice) {
String lookup = collection + '_' + slice; // seems either form may be filled in rb.slices?

View File

@ -0,0 +1,240 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.io.IOException;
import java.util.TreeSet;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SparseFixedBitSet;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.request.SolrQueryRequest;
public class IGainTermsQParserPlugin extends QParserPlugin {
public static final String NAME = "igain";
@Override
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
return new IGainTermsQParser(qstr, localParams, params, req);
}
private static class IGainTermsQParser extends QParser {
public IGainTermsQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
super(qstr, localParams, params, req);
}
@Override
public Query parse() throws SyntaxError {
String field = getParam("field");
String outcome = getParam("outcome");
int numTerms = Integer.parseInt(getParam("numTerms"));
int positiveLabel = Integer.parseInt(getParam("positiveLabel"));
return new IGainTermsQuery(field, outcome, positiveLabel, numTerms);
}
}
private static class IGainTermsQuery extends AnalyticsQuery {
private String field;
private String outcome;
private int numTerms;
private int positiveLabel;
public IGainTermsQuery(String field, String outcome, int positiveLabel, int numTerms) {
this.field = field;
this.outcome = outcome;
this.numTerms = numTerms;
this.positiveLabel = positiveLabel;
}
@Override
public DelegatingCollector getAnalyticsCollector(ResponseBuilder rb, IndexSearcher searcher) {
return new IGainTermsCollector(rb, searcher, field, outcome, positiveLabel, numTerms);
}
}
private static class IGainTermsCollector extends DelegatingCollector {
private String field;
private String outcome;
private IndexSearcher searcher;
private ResponseBuilder rb;
private int positiveLabel;
private int numTerms;
private int count;
private NumericDocValues leafOutcomeValue;
private SparseFixedBitSet positiveSet;
private SparseFixedBitSet negativeSet;
private int numPositiveDocs;
public IGainTermsCollector(ResponseBuilder rb, IndexSearcher searcher, String field, String outcome, int positiveLabel, int numTerms) {
this.rb = rb;
this.searcher = searcher;
this.field = field;
this.outcome = outcome;
this.positiveSet = new SparseFixedBitSet(searcher.getIndexReader().maxDoc());
this.negativeSet = new SparseFixedBitSet(searcher.getIndexReader().maxDoc());
this.numTerms = numTerms;
this.positiveLabel = positiveLabel;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
super.doSetNextReader(context);
LeafReader reader = context.reader();
leafOutcomeValue = reader.getNumericDocValues(outcome);
}
@Override
public void collect(int doc) throws IOException {
super.collect(doc);
++count;
if (leafOutcomeValue.get(doc) == positiveLabel) {
positiveSet.set(context.docBase + doc);
numPositiveDocs++;
} else {
negativeSet.set(context.docBase + doc);
}
}
@Override
public void finish() throws IOException {
NamedList<Double> analytics = new NamedList<Double>();
NamedList<Integer> topFreq = new NamedList();
NamedList<Integer> allFreq = new NamedList();
rb.rsp.add("featuredTerms", analytics);
rb.rsp.add("docFreq", topFreq);
rb.rsp.add("numDocs", count);
TreeSet<TermWithScore> topTerms = new TreeSet<>();
double numDocs = count;
double pc = numPositiveDocs / numDocs;
double entropyC = binaryEntropy(pc);
Terms terms = MultiFields.getFields(searcher.getIndexReader()).terms(field);
TermsEnum termsEnum = terms.iterator();
BytesRef term;
PostingsEnum postingsEnum = null;
while ((term = termsEnum.next()) != null) {
postingsEnum = termsEnum.postings(postingsEnum);
int xc = 0;
int nc = 0;
while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
if (positiveSet.get(postingsEnum.docID())) {
xc++;
} else if (negativeSet.get(postingsEnum.docID())) {
nc++;
}
}
int docFreq = xc+nc;
double entropyContainsTerm = binaryEntropy( (double) xc / docFreq );
double entropyNotContainsTerm = binaryEntropy( (double) (numPositiveDocs - xc) / (numDocs - docFreq + 1) );
double score = entropyC - ( (docFreq / numDocs) * entropyContainsTerm + (1.0 - docFreq / numDocs) * entropyNotContainsTerm);
topFreq.add(term.utf8ToString(), docFreq);
if (topTerms.size() < numTerms) {
topTerms.add(new TermWithScore(term.utf8ToString(), score));
} else {
if (topTerms.first().score < score) {
topTerms.pollFirst();
topTerms.add(new TermWithScore(term.utf8ToString(), score));
}
}
}
for (TermWithScore topTerm : topTerms) {
analytics.add(topTerm.term, topTerm.score);
topFreq.add(topTerm.term, allFreq.get(topTerm.term));
}
if (this.delegate instanceof DelegatingCollector) {
((DelegatingCollector) this.delegate).finish();
}
}
private double binaryEntropy(double prob) {
if (prob == 0 || prob == 1) return 0;
return (-1 * prob * Math.log(prob)) + (-1 * (1.0 - prob) * Math.log(1.0 - prob));
}
}
private static class TermWithScore implements Comparable<TermWithScore>{
public final String term;
public final double score;
public TermWithScore(String term, double score) {
this.term = term;
this.score = score;
}
@Override
public int hashCode() {
return term.hashCode();
}
@Override
public boolean equals(Object obj) {
if (obj == null) return false;
if (obj.getClass() != getClass()) return false;
TermWithScore other = (TermWithScore) obj;
return other.term.equals(this.term);
}
@Override
public int compareTo(TermWithScore o) {
int cmp = Double.compare(this.score, o.score);
if (cmp == 0) {
return this.term.compareTo(o.term);
} else {
return cmp;
}
}
}
}

View File

@ -23,7 +23,7 @@ import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
@ -61,11 +61,11 @@ public class LRUCache<K,V> extends SolrCacheBase implements SolrCache<K,V>, Acco
* of an LRUCache at the same time. Make sure everything is thread safe.
*/
private static class CumulativeStats {
AtomicLong lookups = new AtomicLong();
AtomicLong hits = new AtomicLong();
AtomicLong inserts = new AtomicLong();
AtomicLong evictions = new AtomicLong();
AtomicLong evictionsRamUsage = new AtomicLong();
LongAdder lookups = new LongAdder();
LongAdder hits = new LongAdder();
LongAdder inserts = new LongAdder();
LongAdder evictions = new LongAdder();
LongAdder evictionsRamUsage = new LongAdder();
}
private CumulativeStats stats;
@ -124,8 +124,8 @@ public class LRUCache<K,V> extends SolrCacheBase implements SolrCache<K,V>, Acco
iterator.remove();
evictions++;
evictionsRamUsage++;
stats.evictions.incrementAndGet();
stats.evictionsRamUsage.incrementAndGet();
stats.evictions.increment();
stats.evictionsRamUsage.increment();
} while (iterator.hasNext() && ramBytesUsed > maxRamBytes);
// must return false according to javadocs of removeEldestEntry if we're modifying
// the map ourselves
@ -135,7 +135,7 @@ public class LRUCache<K,V> extends SolrCacheBase implements SolrCache<K,V>, Acco
// this doesn't need to be synchronized because it will
// only be called in the context of a higher level synchronized block.
evictions++;
stats.evictions.incrementAndGet();
stats.evictions.increment();
return true;
}
}
@ -180,7 +180,7 @@ public class LRUCache<K,V> extends SolrCacheBase implements SolrCache<K,V>, Acco
public V put(K key, V value) {
synchronized (map) {
if (getState() == State.LIVE) {
stats.inserts.incrementAndGet();
stats.inserts.increment();
}
// increment local inserts regardless of state???
@ -232,10 +232,10 @@ public class LRUCache<K,V> extends SolrCacheBase implements SolrCache<K,V>, Acco
if (getState() == State.LIVE) {
// only increment lookups and hits if we are live.
lookups++;
stats.lookups.incrementAndGet();
stats.lookups.increment();
if (val!=null) {
hits++;
stats.hits.incrementAndGet();
stats.hits.increment();
}
}
return val;
@ -341,15 +341,15 @@ public class LRUCache<K,V> extends SolrCacheBase implements SolrCache<K,V>, Acco
}
lst.add("warmupTime", warmupTime);
long clookups = stats.lookups.get();
long chits = stats.hits.get();
long clookups = stats.lookups.longValue();
long chits = stats.hits.longValue();
lst.add("cumulative_lookups", clookups);
lst.add("cumulative_hits", chits);
lst.add("cumulative_hitratio", calcHitRatio(clookups, chits));
lst.add("cumulative_inserts", stats.inserts.get());
lst.add("cumulative_evictions", stats.evictions.get());
lst.add("cumulative_inserts", stats.inserts.longValue());
lst.add("cumulative_evictions", stats.evictions.longValue());
if (maxRamBytes != Long.MAX_VALUE) {
lst.add("cumulative_evictionsRamUsage", stats.evictionsRamUsage.get());
lst.add("cumulative_evictionsRamUsage", stats.evictionsRamUsage.longValue());
}
return lst;

View File

@ -16,6 +16,11 @@
*/
package org.apache.solr.search;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrInfoMBean;
@ -26,11 +31,6 @@ import org.apache.solr.search.join.GraphQParserPlugin;
import org.apache.solr.search.mlt.MLTQParserPlugin;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrInfoMBean {
/** internal use - name of the default parser */
public static final String DEFAULT_QTYPE = LuceneQParserPlugin.NAME;
@ -77,6 +77,8 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI
map.put(GraphQParserPlugin.NAME, GraphQParserPlugin.class);
map.put(XmlQParserPlugin.NAME, XmlQParserPlugin.class);
map.put(GraphTermsQParserPlugin.NAME, GraphTermsQParserPlugin.class);
map.put(IGainTermsQParserPlugin.NAME, IGainTermsQParserPlugin.class);
map.put(TextLogisticRegressionQParserPlugin.NAME, TextLogisticRegressionQParserPlugin.class);
standardPlugins = Collections.unmodifiableMap(map);
}

View File

@ -0,0 +1,283 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SparseFixedBitSet;
import org.apache.solr.client.solrj.io.ClassificationEvaluation;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.request.SolrQueryRequest;
/**
* Returns an AnalyticsQuery implementation that performs
* one Gradient Descent iteration of a result set to train a
* logistic regression model
*
* The TextLogitStream provides the parallel iterative framework for this class.
**/
public class TextLogisticRegressionQParserPlugin extends QParserPlugin {
public static final String NAME = "tlogit";
@Override
public void init(NamedList args) {
}
@Override
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
return new TextLogisticRegressionQParser(qstr, localParams, params, req);
}
private static class TextLogisticRegressionQParser extends QParser{
TextLogisticRegressionQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
super(qstr, localParams, params, req);
}
public Query parse() {
String fs = params.get("feature");
String[] terms = params.get("terms").split(",");
String ws = params.get("weights");
String dfsStr = params.get("idfs");
int iteration = params.getInt("iteration");
String outcome = params.get("outcome");
int positiveLabel = params.getInt("positiveLabel", 1);
double threshold = params.getDouble("threshold", 0.5);
double alpha = params.getDouble("alpha", 0.01);
double[] idfs = new double[terms.length];
String[] idfsArr = dfsStr.split(",");
for (int i = 0; i < idfsArr.length; i++) {
idfs[i] = Double.parseDouble(idfsArr[i]);
}
double[] weights = new double[terms.length+1];
if(ws != null) {
String[] wa = ws.split(",");
for (int i = 0; i < wa.length; i++) {
weights[i] = Double.parseDouble(wa[i]);
}
} else {
for(int i=0; i<weights.length; i++) {
weights[i]= 1.0d;
}
}
TrainingParams input = new TrainingParams(fs, terms, idfs, outcome, weights, iteration, alpha, positiveLabel, threshold);
return new TextLogisticRegressionQuery(input);
}
}
private static class TextLogisticRegressionQuery extends AnalyticsQuery {
private TrainingParams trainingParams;
public TextLogisticRegressionQuery(TrainingParams trainingParams) {
this.trainingParams = trainingParams;
}
public DelegatingCollector getAnalyticsCollector(ResponseBuilder rbsp, IndexSearcher indexSearcher) {
return new TextLogisticRegressionCollector(rbsp, indexSearcher, trainingParams);
}
}
private static class TextLogisticRegressionCollector extends DelegatingCollector {
private TrainingParams trainingParams;
private LeafReader leafReader;
private double[] workingDeltas;
private ClassificationEvaluation classificationEvaluation;
private double[] weights;
private ResponseBuilder rbsp;
private NumericDocValues leafOutcomeValue;
private double totalError;
private SparseFixedBitSet positiveDocsSet;
private SparseFixedBitSet docsSet;
private IndexSearcher searcher;
TextLogisticRegressionCollector(ResponseBuilder rbsp, IndexSearcher searcher,
TrainingParams trainingParams) {
this.trainingParams = trainingParams;
this.workingDeltas = new double[trainingParams.weights.length];
this.weights = Arrays.copyOf(trainingParams.weights, trainingParams.weights.length);
this.rbsp = rbsp;
this.classificationEvaluation = new ClassificationEvaluation();
this.searcher = searcher;
positiveDocsSet = new SparseFixedBitSet(searcher.getIndexReader().numDocs());
docsSet = new SparseFixedBitSet(searcher.getIndexReader().numDocs());
}
public void doSetNextReader(LeafReaderContext context) throws IOException {
super.doSetNextReader(context);
leafReader = context.reader();
leafOutcomeValue = leafReader.getNumericDocValues(trainingParams.outcome);
}
public void collect(int doc) throws IOException{
int outcome = (int) leafOutcomeValue.get(doc);
outcome = trainingParams.positiveLabel == outcome? 1 : 0;
if (outcome == 1) {
positiveDocsSet.set(context.docBase + doc);
}
docsSet.set(context.docBase+doc);
}
public void finish() throws IOException {
Map<Integer, double[]> docVectors = new HashMap<>();
Terms terms = MultiFields.getFields(searcher.getIndexReader()).terms(trainingParams.feature);
TermsEnum termsEnum = terms.iterator();
PostingsEnum postingsEnum = null;
int termIndex = 0;
for (String termStr : trainingParams.terms) {
BytesRef term = new BytesRef(termStr);
if (termsEnum.seekExact(term)) {
postingsEnum = termsEnum.postings(postingsEnum);
while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
int docId = postingsEnum.docID();
if (docsSet.get(docId)) {
double[] vector = docVectors.get(docId);
if (vector == null) {
vector = new double[trainingParams.terms.length+1];
vector[0] = 1.0;
docVectors.put(docId, vector);
}
vector[termIndex + 1] = trainingParams.idfs[termIndex] * (1.0 + Math.log(postingsEnum.freq()));
}
}
}
termIndex++;
}
for (Map.Entry<Integer, double[]> entry : docVectors.entrySet()) {
double[] vector = entry.getValue();
int outcome = 0;
if (positiveDocsSet.get(entry.getKey())) {
outcome = 1;
}
double sig = sigmoid(sum(multiply(vector, weights)));
double error = sig - outcome;
double lastSig = sigmoid(sum(multiply(vector, trainingParams.weights)));
totalError += Math.abs(lastSig - outcome);
classificationEvaluation.count(outcome, lastSig >= trainingParams.threshold ? 1 : 0);
workingDeltas = multiply(error * trainingParams.alpha, vector);
for(int i = 0; i< workingDeltas.length; i++) {
weights[i] -= workingDeltas[i];
}
}
NamedList analytics = new NamedList();
rbsp.rsp.add("logit", analytics);
List<Double> outWeights = new ArrayList<>();
for(Double d : weights) {
outWeights.add(d);
}
analytics.add("weights", outWeights);
analytics.add("error", totalError);
analytics.add("evaluation", classificationEvaluation.toMap());
analytics.add("feature", trainingParams.feature);
analytics.add("positiveLabel", trainingParams.positiveLabel);
if(this.delegate instanceof DelegatingCollector) {
((DelegatingCollector)this.delegate).finish();
}
}
private double sigmoid(double in) {
double d = 1.0 / (1+Math.exp(-in));
return d;
}
private double[] multiply(double[] vals, double[] weights) {
for(int i = 0; i < vals.length; ++i) {
workingDeltas[i] = vals[i] * weights[i];
}
return workingDeltas;
}
private double[] multiply(double d, double[] vals) {
for(int i = 0; i<vals.length; ++i) {
workingDeltas[i] = vals[i] * d;
}
return workingDeltas;
}
private double sum(double[] vals) {
double d = 0.0d;
for(double val : vals) {
d += val;
}
return d;
}
}
private static class TrainingParams {
public final String feature;
public final String[] terms;
public final double[] idfs;
public final String outcome;
public final double[] weights;
public final int interation;
public final int positiveLabel;
public final double threshold;
public final double alpha;
public TrainingParams(String feature, String[] terms, double[] idfs, String outcome, double[] weights, int interation, double alpha, int positiveLabel, double threshold) {
this.feature = feature;
this.terms = terms;
this.idfs = idfs;
this.outcome = outcome;
this.weights = weights;
this.alpha = alpha;
this.interation = interation;
this.positiveLabel = positiveLabel;
this.threshold = threshold;
}
}
}

View File

@ -64,6 +64,7 @@ import org.apache.solr.search.facet.UniqueAgg;
import org.apache.solr.search.function.CollapseScoreFunction;
import org.apache.solr.search.function.OrdFieldSource;
import org.apache.solr.search.function.ReverseOrdFieldSource;
import org.apache.solr.search.function.SolrComparisonBoolFunction;
import org.apache.solr.search.function.distance.GeoDistValueSourceParser;
import org.apache.solr.search.function.distance.GeohashFunction;
import org.apache.solr.search.function.distance.GeohashHaversineFunction;
@ -826,6 +827,57 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
}
});
addParser("gt", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
ValueSource lhsValSource = fp.parseValueSource();
ValueSource rhsValSource = fp.parseValueSource();
return new SolrComparisonBoolFunction(lhsValSource, rhsValSource, "gt", (cmp) -> cmp > 0);
}
});
addParser("lt", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
ValueSource lhsValSource = fp.parseValueSource();
ValueSource rhsValSource = fp.parseValueSource();
return new SolrComparisonBoolFunction(lhsValSource, rhsValSource, "lt", (cmp) -> cmp < 0);
}
});
addParser("gte", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
ValueSource lhsValSource = fp.parseValueSource();
ValueSource rhsValSource = fp.parseValueSource();
return new SolrComparisonBoolFunction(lhsValSource, rhsValSource, "gte", (cmp) -> cmp >= 0);
}
});
addParser("lte", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
ValueSource lhsValSource = fp.parseValueSource();
ValueSource rhsValSource = fp.parseValueSource();
return new SolrComparisonBoolFunction(lhsValSource, rhsValSource, "lte", (cmp) -> cmp <= 0);
}
});
addParser("eq", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
ValueSource lhsValSource = fp.parseValueSource();
ValueSource rhsValSource = fp.parseValueSource();
return new SolrComparisonBoolFunction(lhsValSource, rhsValSource, "eq", (cmp) -> cmp == 0);
}
});
addParser("def", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {

View File

@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.function;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.IntDocValues;
import org.apache.lucene.queries.function.docvalues.LongDocValues;
import org.apache.lucene.queries.function.valuesource.ComparisonBoolFunction;
/**
* Refines {@link ComparisonBoolFunction} to compare based on a 'long' or 'double' depending on if the
* any of the FunctionValues are {@link LongDocValues}.
*/
public class SolrComparisonBoolFunction extends ComparisonBoolFunction {
private final Compare cmp;
public interface Compare {
boolean compare(int integer);
}
public SolrComparisonBoolFunction(ValueSource lhs, ValueSource rhs, String name, Compare cmp) {
super(lhs, rhs, name);
this.cmp = cmp;
}
@Override
public boolean compare(int doc, FunctionValues lhs, FunctionValues rhs) {
// TODO consider a separate FunctionValues impl, one for Long, one for Double
// performs the safest possible numeric comparison, if both lhs and rhs are Longs, then
// we perform a Long comparison to avoid the issues with precision when casting to doubles
boolean lhsAnInt = (lhs instanceof LongDocValues || lhs instanceof IntDocValues);
boolean rhsAnInt = (rhs instanceof LongDocValues || rhs instanceof IntDocValues);
if (lhsAnInt && rhsAnInt) {
return cmp.compare(Long.compare(lhs.longVal(doc), rhs.longVal(doc)));
} else {
return cmp.compare(Double.compare(lhs.doubleVal(doc), rhs.doubleVal(doc)));
}
}
// note: don't override equals; the "name" will be unique and is already compared
}

View File

@ -132,16 +132,15 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo
}
}
assert null != defaultSim;
final Similarity defaultSimilarity = defaultSim;
similarity = new PerFieldSimilarityWrapper() {
similarity = new PerFieldSimilarityWrapper(defaultSim) {
@Override
public Similarity get(String name) {
FieldType fieldType = core.getLatestSchema().getFieldTypeNoEx(name);
if (fieldType == null) {
return defaultSimilarity;
return defaultSim;
} else {
Similarity similarity = fieldType.getSimilarity();
return similarity == null ? defaultSimilarity : similarity;
return similarity == null ? defaultSim : similarity;
}
}
};

View File

@ -17,18 +17,11 @@
package org.apache.solr.security;
import javax.servlet.FilterChain;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;
import java.io.Closeable;
import java.io.IOException;
import java.security.Principal;
import java.util.Map;
import org.apache.http.auth.BasicUserPrincipal;
/**
*
* @lucene.experimental
@ -43,31 +36,19 @@ public abstract class AuthenticationPlugin implements Closeable {
*/
public abstract void init(Map<String, Object> pluginConfig);
protected void forward(String user, ServletRequest req, ServletResponse rsp,
FilterChain chain) throws IOException, ServletException {
if(user != null) {
final Principal p = new BasicUserPrincipal(user);
req = new HttpServletRequestWrapper((HttpServletRequest) req) {
@Override
public Principal getUserPrincipal() {
return p;
}
};
}
chain.doFilter(req,rsp);
}
/**
* This method must authenticate the request. Upon a successful authentication, this
* This method attempts to authenticate the request. Upon a successful authentication, this
* must call the next filter in the filter chain and set the user principal of the request,
* or else, upon an error or an authentication failure, throw an exception.
*
* @param request the http request
* @param response the http response
* @param filterChain the servlet filter chain
* @return false if the request not be processed by Solr (not continue), i.e.
* the response and status code have already been sent.
* @throws Exception any exception thrown during the authentication, e.g. PrivilegedActionException
*/
public abstract void doAuthenticate(ServletRequest request, ServletResponse response,
public abstract boolean doAuthenticate(ServletRequest request, ServletResponse response,
FilterChain filterChain) throws Exception;

View File

@ -99,7 +99,7 @@ public class BasicAuthPlugin extends AuthenticationPlugin implements ConfigEdita
}
@Override
public void doAuthenticate(ServletRequest servletRequest, ServletResponse servletResponse, FilterChain filterChain) throws Exception {
public boolean doAuthenticate(ServletRequest servletRequest, ServletResponse servletResponse, FilterChain filterChain) throws Exception {
HttpServletRequest request = (HttpServletRequest) servletRequest;
HttpServletResponse response = (HttpServletResponse) servletResponse;
@ -127,6 +127,7 @@ public class BasicAuthPlugin extends AuthenticationPlugin implements ConfigEdita
}
};
filterChain.doFilter(wrapper, response);
return true;
}
} else {
@ -143,8 +144,10 @@ public class BasicAuthPlugin extends AuthenticationPlugin implements ConfigEdita
} else {
request.setAttribute(AuthenticationPlugin.class.getName(), zkAuthentication.getPromptHeaders());
filterChain.doFilter(request, response);
return true;
}
}
return false;
}
@Override

View File

@ -0,0 +1,171 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.security;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.LinkedList;
import java.util.List;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;
import org.apache.curator.RetryPolicy;
import org.apache.curator.framework.AuthInfo;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.framework.api.ACLProvider;
import org.apache.curator.retry.ExponentialBackoffRetry;
import org.apache.hadoop.security.authentication.server.AuthenticationHandler;
import org.apache.hadoop.security.token.delegation.web.DelegationTokenAuthenticationFilter;
import org.apache.solr.common.cloud.SecurityAwareZkACLProvider;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkACLProvider;
import org.apache.solr.common.cloud.ZkCredentialsProvider;
import org.apache.zookeeper.data.ACL;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class DelegationTokenKerberosFilter extends DelegationTokenAuthenticationFilter {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private CuratorFramework curatorFramework;
@Override
public void init(FilterConfig conf) throws ServletException {
if (conf != null && "zookeeper".equals(conf.getInitParameter("signer.secret.provider"))) {
SolrZkClient zkClient =
(SolrZkClient)conf.getServletContext().getAttribute(KerberosPlugin.DELEGATION_TOKEN_ZK_CLIENT);
conf.getServletContext().setAttribute("signer.secret.provider.zookeeper.curator.client",
getCuratorClient(zkClient));
}
super.init(conf);
}
@Override
public void doFilter(ServletRequest request, ServletResponse response,
FilterChain filterChain) throws IOException, ServletException {
// HttpClient 4.4.x throws NPE if query string is null and parsed through URLEncodedUtils.
// See HTTPCLIENT-1746 and HADOOP-12767
HttpServletRequest httpRequest = (HttpServletRequest)request;
String queryString = httpRequest.getQueryString();
final String nonNullQueryString = queryString == null ? "" : queryString;
HttpServletRequest requestNonNullQueryString = new HttpServletRequestWrapper(httpRequest){
@Override
public String getQueryString() {
return nonNullQueryString;
}
};
super.doFilter(requestNonNullQueryString, response, filterChain);
}
@Override
public void destroy() {
super.destroy();
if (curatorFramework != null) curatorFramework.close();
curatorFramework = null;
}
@Override
protected void initializeAuthHandler(String authHandlerClassName,
FilterConfig filterConfig) throws ServletException {
// set the internal authentication handler in order to record whether the request should continue
super.initializeAuthHandler(authHandlerClassName, filterConfig);
AuthenticationHandler authHandler = getAuthenticationHandler();
super.initializeAuthHandler(KerberosPlugin.RequestContinuesRecorderAuthenticationHandler.class.getName(), filterConfig);
KerberosPlugin.RequestContinuesRecorderAuthenticationHandler newAuthHandler =
(KerberosPlugin.RequestContinuesRecorderAuthenticationHandler)getAuthenticationHandler();
newAuthHandler.setAuthHandler(authHandler);
}
protected CuratorFramework getCuratorClient(SolrZkClient zkClient) {
// should we try to build a RetryPolicy off of the ZkController?
RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000, 3);
if (zkClient == null) {
throw new IllegalArgumentException("zkClient required");
}
String zkHost = zkClient.getZkServerAddress();
String zkChroot = zkHost.substring(zkHost.indexOf("/"));
zkChroot = zkChroot.startsWith("/") ? zkChroot.substring(1) : zkChroot;
String zkNamespace = zkChroot + SecurityAwareZkACLProvider.SECURITY_ZNODE_PATH;
String zkConnectionString = zkHost.substring(0, zkHost.indexOf("/"));
SolrZkToCuratorCredentialsACLs curatorToSolrZk = new SolrZkToCuratorCredentialsACLs(zkClient);
final int connectionTimeoutMs = 30000; // this value is currently hard coded, see SOLR-7561.
curatorFramework = CuratorFrameworkFactory.builder()
.namespace(zkNamespace)
.connectString(zkConnectionString)
.retryPolicy(retryPolicy)
.aclProvider(curatorToSolrZk.getACLProvider())
.authorization(curatorToSolrZk.getAuthInfos())
.sessionTimeoutMs(zkClient.getZkClientTimeout())
.connectionTimeoutMs(connectionTimeoutMs)
.build();
curatorFramework.start();
return curatorFramework;
}
/**
* Convert Solr Zk Credentials/ACLs to Curator versions
*/
protected static class SolrZkToCuratorCredentialsACLs {
private final ACLProvider aclProvider;
private final List<AuthInfo> authInfos;
public SolrZkToCuratorCredentialsACLs(SolrZkClient zkClient) {
this.aclProvider = createACLProvider(zkClient);
this.authInfos = createAuthInfo(zkClient);
}
public ACLProvider getACLProvider() { return aclProvider; }
public List<AuthInfo> getAuthInfos() { return authInfos; }
private ACLProvider createACLProvider(SolrZkClient zkClient) {
final ZkACLProvider zkACLProvider = zkClient.getZkACLProvider();
return new ACLProvider() {
@Override
public List<ACL> getDefaultAcl() {
return zkACLProvider.getACLsToAdd(null);
}
@Override
public List<ACL> getAclForPath(String path) {
List<ACL> acls = zkACLProvider.getACLsToAdd(path);
return acls;
}
};
}
private List<AuthInfo> createAuthInfo(SolrZkClient zkClient) {
List<AuthInfo> ret = new LinkedList<AuthInfo>();
// In theory the credentials to add could change here if zookeeper hasn't been initialized
ZkCredentialsProvider credentialsProvider =
zkClient.getZkClientConnectionStrategy().getZkCredentialsToAddAutomatically();
for (ZkCredentialsProvider.ZkCredentials zkCredentials : credentialsProvider.getCredentials()) {
ret.add(new AuthInfo(zkCredentials.getScheme(), zkCredentials.getAuth()));
}
return ret;
}
}
}

View File

@ -26,6 +26,7 @@ import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.hadoop.security.authentication.server.AuthenticationFilter;
import org.apache.hadoop.security.authentication.server.AuthenticationHandler;
public class KerberosFilter extends AuthenticationFilter {
@ -34,6 +35,19 @@ public class KerberosFilter extends AuthenticationFilter {
super.init(conf);
}
@Override
protected void initializeAuthHandler(String authHandlerClassName,
FilterConfig filterConfig) throws ServletException {
// set the internal authentication handler in order to record whether the request should continue
super.initializeAuthHandler(authHandlerClassName, filterConfig);
AuthenticationHandler authHandler = getAuthenticationHandler();
super.initializeAuthHandler(
KerberosPlugin.RequestContinuesRecorderAuthenticationHandler.class.getName(), filterConfig);
KerberosPlugin.RequestContinuesRecorderAuthenticationHandler newAuthHandler =
(KerberosPlugin.RequestContinuesRecorderAuthenticationHandler)getAuthenticationHandler();
newAuthHandler.setAuthHandler(authHandler);
}
@Override
protected void doFilter(FilterChain filterChain, HttpServletRequest request,
HttpServletResponse response) throws IOException, ServletException {

View File

@ -16,14 +16,18 @@
*/
package org.apache.solr.security;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.lang.invoke.MethodHandles;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.Enumeration;
import java.util.EventListener;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import javax.servlet.Filter;
@ -41,12 +45,22 @@ import javax.servlet.SessionCookieConfig;
import javax.servlet.SessionTrackingMode;
import javax.servlet.FilterRegistration.Dynamic;
import javax.servlet.descriptor.JspConfigDescriptor;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.http.HttpServletResponseWrapper;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.collections.iterators.IteratorEnumeration;
import org.apache.hadoop.security.authentication.client.AuthenticationException;
import org.apache.hadoop.security.authentication.server.AuthenticationHandler;
import org.apache.hadoop.security.authentication.server.AuthenticationToken;
import org.apache.solr.client.solrj.impl.HttpClientConfigurer;
import org.apache.solr.client.solrj.impl.Krb5HttpClientConfigurer;
import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.SecurityAwareZkACLProvider;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.core.CoreContainer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -55,7 +69,7 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
HttpClientConfigurer kerberosConfigurer = new Krb5HttpClientConfigurer();
Filter kerberosFilter = new KerberosFilter();
Filter kerberosFilter;
public static final String NAME_RULES_PARAM = "solr.kerberos.name.rules";
public static final String COOKIE_DOMAIN_PARAM = "solr.kerberos.cookie.domain";
@ -64,6 +78,26 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
public static final String KEYTAB_PARAM = "solr.kerberos.keytab";
public static final String TOKEN_VALID_PARAM = "solr.kerberos.token.valid";
public static final String COOKIE_PORT_AWARE_PARAM = "solr.kerberos.cookie.portaware";
public static final String DELEGATION_TOKEN_ENABLED = "solr.kerberos.delegation.token.enabled";
public static final String DELEGATION_TOKEN_KIND = "solr.kerberos.delegation.token.kind";
public static final String DELEGATION_TOKEN_VALIDITY = "solr.kerberos.delegation.token.validity";
public static final String DELEGATION_TOKEN_SECRET_PROVIDER = "solr.kerberos.delegation.token.signer.secret.provider";
public static final String DELEGATION_TOKEN_SECRET_PROVIDER_ZK_PATH =
"solr.kerberos.delegation.token.signer.secret.provider.zookeper.path";
public static final String DELEGATION_TOKEN_SECRET_MANAGER_ZNODE_WORKING_PATH =
"solr.kerberos.delegation.token.secret.manager.znode.working.path";
public static final String DELEGATION_TOKEN_TYPE_DEFAULT = "solr-dt";
// filled in by Plugin/Filter
static final String REQUEST_CONTINUES_ATTR =
"org.apache.solr.security.kerberosplugin.requestcontinues";
static final String DELEGATION_TOKEN_ZK_CLIENT =
"solr.kerberos.delegation.token.zk.client";
// allows test to specify an alternate auth handler
@VisibleForTesting
public static final String AUTH_HANDLER_PARAM = "solr.kerberos.auth.handler";
private final CoreContainer coreContainer;
public KerberosPlugin(CoreContainer coreContainer) {
@ -74,12 +108,48 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
public void init(Map<String, Object> pluginConfig) {
try {
Map<String, String> params = new HashMap();
params.put("type", "kerberos");
putParam(params, "type", AUTH_HANDLER_PARAM, "kerberos");
putParam(params, "kerberos.name.rules", NAME_RULES_PARAM, "DEFAULT");
putParam(params, "token.valid", TOKEN_VALID_PARAM, "30");
putParam(params, "cookie.path", COOKIE_PATH_PARAM, "/");
if ("kerberos".equals(params.get("type"))) {
putParam(params, "kerberos.principal", PRINCIPAL_PARAM, null);
putParam(params, "kerberos.keytab", KEYTAB_PARAM, null);
} else {
// allow tests which specify AUTH_HANDLER_PARAM to avoid specifying kerberos principal/keytab
putParamOptional(params, "kerberos.principal", PRINCIPAL_PARAM);
putParamOptional(params, "kerberos.keytab", KEYTAB_PARAM);
}
String delegationTokenStr = System.getProperty(DELEGATION_TOKEN_ENABLED, null);
boolean delegationTokenEnabled =
(delegationTokenStr == null) ? false : Boolean.parseBoolean(delegationTokenStr);
ZkController controller = coreContainer.getZkController();
if (delegationTokenEnabled) {
putParam(params, "delegation-token.token-kind", DELEGATION_TOKEN_KIND, DELEGATION_TOKEN_TYPE_DEFAULT);
if (coreContainer.isZooKeeperAware()) {
putParam(params, "signer.secret.provider", DELEGATION_TOKEN_SECRET_PROVIDER, "zookeeper");
if ("zookeeper".equals(params.get("signer.secret.provider"))) {
String zkHost = controller.getZkServerAddress();
putParam(params, "token.validity", DELEGATION_TOKEN_VALIDITY, "36000");
params.put("zk-dt-secret-manager.enable", "true");
// Note - Curator complains if the znodeWorkingPath starts with /
String chrootPath = zkHost.substring(zkHost.indexOf("/"));
String relativePath = chrootPath.startsWith("/") ? chrootPath.substring(1) : chrootPath;
putParam(params, "zk-dt-secret-manager.znodeWorkingPath",
DELEGATION_TOKEN_SECRET_MANAGER_ZNODE_WORKING_PATH,
relativePath + SecurityAwareZkACLProvider.SECURITY_ZNODE_PATH + "/zkdtsm");
putParam(params, "signer.secret.provider.zookeeper.path",
DELEGATION_TOKEN_SECRET_PROVIDER_ZK_PATH, "/token");
// need to ensure krb5 is setup properly before running curator;
// the coreContainer should take care of this by calling configure on the
// kerberosConfigurer.
}
} else {
log.info("CoreContainer is not ZooKeeperAware, not setting ZK-related delegation token properties");
}
}
// Special handling for the "cookie.domain" based on whether port should be
// appended to the domain. Useful for situations where multiple solr nodes are
@ -94,16 +164,27 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
if (host==null) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Missing required parameter '"+COOKIE_DOMAIN_PARAM+"'.");
}
int port = coreContainer.getZkController().getHostPort();
int port = controller.getHostPort();
params.put("cookie.domain", host + ":" + port);
}
final ServletContext servletContext = new AttributeOnlyServletContext();
if (delegationTokenEnabled) {
kerberosFilter = new DelegationTokenKerberosFilter();
// pass an attribute-enabled context in order to pass the zkClient
// and because the filter may pass a curator instance.
if (controller != null) {
servletContext.setAttribute(DELEGATION_TOKEN_ZK_CLIENT, controller.getZkClient());
}
} else {
kerberosFilter = new KerberosFilter();
}
log.info("Params: "+params);
FilterConfig conf = new FilterConfig() {
@Override
public ServletContext getServletContext() {
return noContext;
return servletContext;
}
@Override
@ -136,11 +217,43 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
params.put(internalParamName, value);
}
private void putParamOptional(Map<String, String> params, String internalParamName, String externalParamName) {
String value = System.getProperty(externalParamName);
if (value!=null) {
params.put(internalParamName, value);
}
}
@Override
public void doAuthenticate(ServletRequest req, ServletResponse rsp,
public boolean doAuthenticate(ServletRequest req, ServletResponse rsp,
FilterChain chain) throws Exception {
log.debug("Request to authenticate using kerberos: "+req);
kerberosFilter.doFilter(req, rsp, chain);
final HttpServletResponse frsp = (HttpServletResponse)rsp;
// kerberosFilter may close the stream and write to closed streams,
// see HADOOP-13346. To work around, pass a PrintWriter that ignores
// closes
HttpServletResponse rspCloseShield = new HttpServletResponseWrapper(frsp) {
@SuppressForbidden(reason = "Hadoop DelegationTokenAuthenticationFilter uses response writer, this" +
"is providing a CloseShield on top of that")
@Override
public PrintWriter getWriter() throws IOException {
final PrintWriter pw = new PrintWriterWrapper(frsp.getWriter()) {
@Override
public void close() {};
};
return pw;
}
};
kerberosFilter.doFilter(req, rspCloseShield, chain);
String requestContinuesAttr = (String)req.getAttribute(REQUEST_CONTINUES_ATTR);
if (requestContinuesAttr == null) {
log.warn("Could not find " + REQUEST_CONTINUES_ATTR);
return false;
} else {
return Boolean.parseBoolean(requestContinuesAttr);
}
}
@Override
@ -152,7 +265,8 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
kerberosFilter.destroy();
}
protected static ServletContext noContext = new ServletContext() {
protected static class AttributeOnlyServletContext implements ServletContext {
private Map<String, Object> attributes = new HashMap<String, Object>();
@Override
public void setSessionTrackingModes(Set<SessionTrackingMode> sessionTrackingModes) {}
@ -163,10 +277,14 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
}
@Override
public void setAttribute(String name, Object object) {}
public void setAttribute(String name, Object object) {
attributes.put(name, object);
}
@Override
public void removeAttribute(String name) {}
public void removeAttribute(String name) {
attributes.remove(name);
}
@Override
public void log(String message, Throwable throwable) {}
@ -329,12 +447,12 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
@Override
public Enumeration<String> getAttributeNames() {
return null;
return Collections.enumeration(attributes.keySet());
}
@Override
public Object getAttribute(String name) {
return null;
return attributes.get(name);
}
@Override
@ -394,4 +512,44 @@ public class KerberosPlugin extends AuthenticationPlugin implements HttpClientIn
return null;
}
};
/*
* {@link AuthenticationHandler} that delegates to another {@link AuthenticationHandler}
* and records the response of managementOperation (which indicates whether the request
* should continue or not).
*/
public static class RequestContinuesRecorderAuthenticationHandler implements AuthenticationHandler {
private AuthenticationHandler authHandler;
public void setAuthHandler(AuthenticationHandler authHandler) {
this.authHandler = authHandler;
}
public String getType() {
return authHandler.getType();
}
public void init(Properties config) throws ServletException {
// authHandler has already been init'ed, nothing to do here
}
public void destroy() {
authHandler.destroy();
}
public boolean managementOperation(AuthenticationToken token,
HttpServletRequest request,
HttpServletResponse response)
throws IOException, AuthenticationException {
boolean result = authHandler.managementOperation(token, request, response);
request.setAttribute(KerberosPlugin.REQUEST_CONTINUES_ATTR, new Boolean(result).toString());
return result;
}
public AuthenticationToken authenticate(HttpServletRequest request, HttpServletResponse response)
throws IOException, AuthenticationException {
return authHandler.authenticate(request, response);
}
}
}

View File

@ -89,12 +89,12 @@ public class PKIAuthenticationPlugin extends AuthenticationPlugin implements Htt
@SuppressForbidden(reason = "Needs currentTimeMillis to compare against time in header")
@Override
public void doAuthenticate(ServletRequest request, ServletResponse response, FilterChain filterChain) throws Exception {
public boolean doAuthenticate(ServletRequest request, ServletResponse response, FilterChain filterChain) throws Exception {
String requestURI = ((HttpServletRequest) request).getRequestURI();
if (requestURI.endsWith(PATH)) {
filterChain.doFilter(request, response);
return;
return true;
}
long receivedTime = System.currentTimeMillis();
String header = ((HttpServletRequest) request).getHeader(HEADER);
@ -102,14 +102,14 @@ public class PKIAuthenticationPlugin extends AuthenticationPlugin implements Htt
//this must not happen
log.error("No SolrAuth header present");
filterChain.doFilter(request, response);
return;
return true;
}
List<String> authInfo = StrUtils.splitWS(header, false);
if (authInfo.size() < 2) {
log.error("Invalid SolrAuth Header {}", header);
filterChain.doFilter(request, response);
return;
return true;
}
String nodeName = authInfo.get(0);
@ -119,12 +119,12 @@ public class PKIAuthenticationPlugin extends AuthenticationPlugin implements Htt
if (decipher == null) {
log.error("Could not decipher a header {} . No principal set", header);
filterChain.doFilter(request, response);
return;
return true;
}
if ((receivedTime - decipher.timestamp) > MAX_VALIDITY) {
log.error("Invalid key request timestamp: {} , received timestamp: {} , TTL: {}", decipher.timestamp, receivedTime, MAX_VALIDITY);
filterChain.doFilter(request, response);
return;
return true;
}
final Principal principal = "$".equals(decipher.userName) ?
@ -132,6 +132,7 @@ public class PKIAuthenticationPlugin extends AuthenticationPlugin implements Htt
new BasicUserPrincipal(decipher.userName);
filterChain.doFilter(getWrapper((HttpServletRequest) request, principal), response);
return true;
}
private static HttpServletRequestWrapper getWrapper(final HttpServletRequest request, final Principal principal) {

View File

@ -0,0 +1,215 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.security;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Locale;
import org.apache.commons.lang.NotImplementedException;
/**
* Wrapper for PrintWriter that delegates to constructor arg
*/
public class PrintWriterWrapper extends PrintWriter {
private PrintWriter printWriter;
public PrintWriterWrapper(PrintWriter printWriter) {
super(new StringWriter());
this.printWriter = printWriter;
}
@Override
public PrintWriter append(char c) {
return printWriter.append(c);
}
@Override
public PrintWriter append(CharSequence csq) {
return printWriter.append(csq);
}
@Override
public PrintWriter append(CharSequence csq, int start, int end) {
return printWriter.append(csq, start, end);
}
@Override
public boolean checkError() {
return printWriter.checkError();
}
@Override
protected void clearError() {
throw new NotImplementedException();
}
@Override
public void close() {
printWriter.close();
}
@Override
public void flush() {
printWriter.flush();
}
@Override
public PrintWriter format(Locale l, String format, Object... args) {
return printWriter.format(l, format, args);
}
@Override
public PrintWriter format(String format, Object... args) {
throw new NotImplementedException("Forbidden API");
}
@Override
public void print(boolean b) {
printWriter.print(b);
}
@Override
public void print(char c) {
printWriter.print(c);
}
@Override
public void print(char[] s) {
printWriter.print(s);
}
@Override
public void print(double d) {
printWriter.print(d);
}
@Override
public void print(float f) {
printWriter.print(f);
}
@Override
public void print(int i) {
printWriter.print(i);
}
@Override
public void print(long l) {
printWriter.print(l);
}
@Override
public void print(Object obj) {
printWriter.print(obj);
}
@Override
public void print(String s) {
printWriter.print(s);
}
@Override
public PrintWriter printf(Locale l, String format, Object... args) {
return printWriter.printf(l, format, args);
}
@Override
public PrintWriter printf(String format, Object... args) {
throw new NotImplementedException("Forbidden API");
}
@Override
public void println() {
printWriter.println();
}
@Override
public void println(boolean x) {
printWriter.println(x);
}
@Override
public void println(char x) {
printWriter.println(x);
}
@Override
public void println(char[] x) {
printWriter.println(x);
}
@Override
public void println(double x) {
printWriter.println(x);
}
@Override
public void println(float x) {
printWriter.println(x);
}
@Override
public void println(int x) {
printWriter.println(x);
}
@Override
public void println(long x) {
printWriter.println(x);
}
@Override
public void println(Object x) {
printWriter.println(x);
}
@Override
public void println(String x) {
printWriter.println(x);
}
@Override
protected void setError() {
throw new NotImplementedException();
}
@Override
public void write(char[] buf) {
printWriter.write(buf);
}
@Override
public void write(char[] buf, int off, int len) {
printWriter.write(buf, off, len);
}
@Override
public void write(int c) {
printWriter.write(c);
}
@Override
public void write(String s) {
printWriter.write(s);
}
@Override
public void write(String s, int off, int len) {
printWriter.write(s, off, len);
}
}

View File

@ -296,6 +296,7 @@ public class SolrDispatchFilter extends BaseSolrFilter {
}
private boolean authenticateRequest(ServletRequest request, ServletResponse response, final AtomicReference<ServletRequest> wrappedRequest) throws IOException {
boolean requestContinues = false;
final AtomicBoolean isAuthenticated = new AtomicBoolean(false);
AuthenticationPlugin authenticationPlugin = cores.getAuthenticationPlugin();
if (authenticationPlugin == null) {
@ -308,7 +309,7 @@ public class SolrDispatchFilter extends BaseSolrFilter {
try {
log.debug("Request to authenticate: {}, domain: {}, port: {}", request, request.getLocalName(), request.getLocalPort());
// upon successful authentication, this should call the chain's next filter.
authenticationPlugin.doAuthenticate(request, response, new FilterChain() {
requestContinues = authenticationPlugin.doAuthenticate(request, response, new FilterChain() {
public void doFilter(ServletRequest req, ServletResponse rsp) throws IOException, ServletException {
isAuthenticated.set(true);
wrappedRequest.set(req);
@ -319,8 +320,13 @@ public class SolrDispatchFilter extends BaseSolrFilter {
throw new SolrException(ErrorCode.SERVER_ERROR, "Error during request authentication, ", e);
}
}
// failed authentication?
if (!isAuthenticated.get()) {
// requestContinues is an optional short circuit, thus we still need to check isAuthenticated.
// This is because the AuthenticationPlugin doesn't always have enough information to determine if
// it should short circuit, e.g. the Kerberos Authentication Filter will send an error and not
// call later filters in chain, but doesn't throw an exception. We could force each Plugin
// to implement isAuthenticated to simplify the check here, but that just moves the complexity to
// multiple code paths.
if (!requestContinues || !isAuthenticated.get()) {
response.flushBuffer();
return false;
}

View File

@ -26,7 +26,7 @@ import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CodecReader;
@ -76,20 +76,20 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
protected final SolrCoreState solrCoreState;
// stats
AtomicLong addCommands = new AtomicLong();
AtomicLong addCommandsCumulative = new AtomicLong();
AtomicLong deleteByIdCommands= new AtomicLong();
AtomicLong deleteByIdCommandsCumulative= new AtomicLong();
AtomicLong deleteByQueryCommands= new AtomicLong();
AtomicLong deleteByQueryCommandsCumulative= new AtomicLong();
AtomicLong expungeDeleteCommands = new AtomicLong();
AtomicLong mergeIndexesCommands = new AtomicLong();
AtomicLong commitCommands= new AtomicLong();
AtomicLong optimizeCommands= new AtomicLong();
AtomicLong rollbackCommands= new AtomicLong();
AtomicLong numDocsPending= new AtomicLong();
AtomicLong numErrors = new AtomicLong();
AtomicLong numErrorsCumulative = new AtomicLong();
LongAdder addCommands = new LongAdder();
LongAdder addCommandsCumulative = new LongAdder();
LongAdder deleteByIdCommands= new LongAdder();
LongAdder deleteByIdCommandsCumulative= new LongAdder();
LongAdder deleteByQueryCommands= new LongAdder();
LongAdder deleteByQueryCommandsCumulative= new LongAdder();
LongAdder expungeDeleteCommands = new LongAdder();
LongAdder mergeIndexesCommands = new LongAdder();
LongAdder commitCommands= new LongAdder();
LongAdder optimizeCommands= new LongAdder();
LongAdder rollbackCommands= new LongAdder();
LongAdder numDocsPending= new LongAdder();
LongAdder numErrors = new LongAdder();
LongAdder numErrorsCumulative = new LongAdder();
// tracks when auto-commit should occur
protected final CommitTracker commitTracker;
@ -158,7 +158,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
}
protected void rollbackWriter() throws IOException {
numDocsPending.set(0);
numDocsPending.reset();
solrCoreState.rollbackIndexWriter(core);
}
@ -192,8 +192,8 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
private int addDoc0(AddUpdateCommand cmd) throws IOException {
int rc = -1;
addCommands.incrementAndGet();
addCommandsCumulative.incrementAndGet();
addCommands.increment();
addCommandsCumulative.increment();
// if there is no ID field, don't overwrite
if (idField == null) {
@ -230,10 +230,10 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
rc = 1;
} finally {
if (rc != 1) {
numErrors.incrementAndGet();
numErrorsCumulative.incrementAndGet();
numErrors.increment();
numErrorsCumulative.increment();
} else {
numDocsPending.incrementAndGet();
numDocsPending.increment();
}
}
@ -368,8 +368,8 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
// we don't return the number of docs deleted because it's not always possible to quickly know that info.
@Override
public void delete(DeleteUpdateCommand cmd) throws IOException {
deleteByIdCommands.incrementAndGet();
deleteByIdCommandsCumulative.incrementAndGet();
deleteByIdCommands.increment();
deleteByIdCommandsCumulative.increment();
Term deleteTerm = new Term(idField.getName(), cmd.getIndexedId());
// SolrCore.verbose("deleteDocuments",deleteTerm,writer);
@ -426,8 +426,8 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
// we don't return the number of docs deleted because it's not always possible to quickly know that info.
@Override
public void deleteByQuery(DeleteUpdateCommand cmd) throws IOException {
deleteByQueryCommands.incrementAndGet();
deleteByQueryCommandsCumulative.incrementAndGet();
deleteByQueryCommands.increment();
deleteByQueryCommandsCumulative.increment();
boolean madeIt=false;
try {
Query q = getQuery(cmd);
@ -473,8 +473,8 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
} finally {
if (!madeIt) {
numErrors.incrementAndGet();
numErrorsCumulative.incrementAndGet();
numErrors.increment();
numErrorsCumulative.increment();
}
}
}
@ -482,7 +482,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
@Override
public int mergeIndexes(MergeIndexesCommand cmd) throws IOException {
mergeIndexesCommands.incrementAndGet();
mergeIndexesCommands.increment();
int rc;
log.info("start " + cmd);
@ -545,7 +545,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
error=false;
}
finally {
if (error) numErrors.incrementAndGet();
if (error) numErrors.increment();
}
}
@ -557,10 +557,10 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
}
if (cmd.optimize) {
optimizeCommands.incrementAndGet();
optimizeCommands.increment();
} else {
commitCommands.incrementAndGet();
if (cmd.expungeDeletes) expungeDeleteCommands.incrementAndGet();
commitCommands.increment();
if (cmd.expungeDeletes) expungeDeleteCommands.increment();
}
Future[] waitSearcher = null;
@ -622,7 +622,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
}
// SolrCore.verbose("writer.commit() end");
numDocsPending.set(0);
numDocsPending.reset();
callPostCommitCallbacks();
}
} finally {
@ -676,10 +676,10 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
solrCoreState.getCommitLock().unlock();
}
addCommands.set(0);
deleteByIdCommands.set(0);
deleteByQueryCommands.set(0);
if (error) numErrors.incrementAndGet();
addCommands.reset();
deleteByIdCommands.reset();
deleteByQueryCommands.reset();
if (error) numErrors.increment();
}
// if we are supposed to wait for the searcher to be registered, then we should do it
@ -707,7 +707,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
throw new UnsupportedOperationException("Rollback is currently not supported in SolrCloud mode. (SOLR-4895)");
}
rollbackCommands.incrementAndGet();
rollbackCommands.increment();
boolean error=true;
@ -727,13 +727,10 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
error=false;
}
finally {
addCommandsCumulative.set(
addCommandsCumulative.get() - addCommands.getAndSet( 0 ) );
deleteByIdCommandsCumulative.set(
deleteByIdCommandsCumulative.get() - deleteByIdCommands.getAndSet( 0 ) );
deleteByQueryCommandsCumulative.set(
deleteByQueryCommandsCumulative.get() - deleteByQueryCommands.getAndSet( 0 ) );
if (error) numErrors.incrementAndGet();
addCommandsCumulative.add(-addCommands.sumThenReset());
deleteByIdCommandsCumulative.add(-deleteByIdCommands.sumThenReset());
deleteByQueryCommandsCumulative.add(-deleteByQueryCommands.sumThenReset());
if (error) numErrors.increment();
}
}
@ -749,7 +746,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
commitTracker.close();
softCommitTracker.close();
numDocsPending.set(0);
numDocsPending.reset();
}
@ -882,7 +879,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
@Override
public NamedList getStatistics() {
NamedList lst = new SimpleOrderedMap();
lst.add("commits", commitCommands.get());
lst.add("commits", commitCommands.longValue());
if (commitTracker.getDocsUpperBound() > 0) {
lst.add("autocommit maxDocs", commitTracker.getDocsUpperBound());
}
@ -897,20 +894,20 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
lst.add("soft autocommit maxTime", "" + softCommitTracker.getTimeUpperBound() + "ms");
}
lst.add("soft autocommits", softCommitTracker.getCommitCount());
lst.add("optimizes", optimizeCommands.get());
lst.add("rollbacks", rollbackCommands.get());
lst.add("expungeDeletes", expungeDeleteCommands.get());
lst.add("docsPending", numDocsPending.get());
lst.add("optimizes", optimizeCommands.longValue());
lst.add("rollbacks", rollbackCommands.longValue());
lst.add("expungeDeletes", expungeDeleteCommands.longValue());
lst.add("docsPending", numDocsPending.longValue());
// pset.size() not synchronized, but it should be fine to access.
// lst.add("deletesPending", pset.size());
lst.add("adds", addCommands.get());
lst.add("deletesById", deleteByIdCommands.get());
lst.add("deletesByQuery", deleteByQueryCommands.get());
lst.add("errors", numErrors.get());
lst.add("cumulative_adds", addCommandsCumulative.get());
lst.add("cumulative_deletesById", deleteByIdCommandsCumulative.get());
lst.add("cumulative_deletesByQuery", deleteByQueryCommandsCumulative.get());
lst.add("cumulative_errors", numErrorsCumulative.get());
lst.add("adds", addCommands.longValue());
lst.add("deletesById", deleteByIdCommands.longValue());
lst.add("deletesByQuery", deleteByQueryCommands.longValue());
lst.add("errors", numErrors.longValue());
lst.add("cumulative_adds", addCommandsCumulative.longValue());
lst.add("cumulative_deletesById", deleteByIdCommandsCumulative.longValue());
lst.add("cumulative_deletesByQuery", deleteByQueryCommandsCumulative.longValue());
lst.add("cumulative_errors", numErrorsCumulative.longValue());
if (this.ulog != null) {
lst.add("transaction_logs_total_size", ulog.getTotalLogsSize());
lst.add("transaction_logs_total_number", ulog.getTotalLogsNumber());

View File

@ -28,6 +28,7 @@ import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;
import java.util.concurrent.locks.ReentrantLock;
import java.lang.invoke.MethodHandles;
import java.lang.ref.WeakReference;
@ -90,7 +91,7 @@ public class ConcurrentLRUCache<K,V> implements Cache<K,V> {
public V get(K key) {
CacheEntry<K,V> e = map.get(key);
if (e == null) {
if (islive) stats.missCounter.incrementAndGet();
if (islive) stats.missCounter.increment();
return null;
}
if (islive) e.lastAccessed = stats.accessCounter.incrementAndGet();
@ -119,9 +120,9 @@ public class ConcurrentLRUCache<K,V> implements Cache<K,V> {
currentSize = stats.size.get();
}
if (islive) {
stats.putCounter.incrementAndGet();
stats.putCounter.increment();
} else {
stats.nonLivePutCounter.incrementAndGet();
stats.nonLivePutCounter.increment();
}
// Check if we need to clear out old entries from the cache.
@ -172,7 +173,7 @@ public class ConcurrentLRUCache<K,V> implements Cache<K,V> {
isCleaning = true;
this.oldestEntry = oldestEntry; // volatile write to make isCleaning visible
long timeCurrent = stats.accessCounter.get();
long timeCurrent = stats.accessCounter.longValue();
int sz = stats.size.get();
int numRemoved = 0;
@ -532,23 +533,23 @@ public class ConcurrentLRUCache<K,V> implements Cache<K,V> {
public static class Stats {
private final AtomicLong accessCounter = new AtomicLong(0),
putCounter = new AtomicLong(0),
nonLivePutCounter = new AtomicLong(0),
missCounter = new AtomicLong();
private final AtomicLong accessCounter = new AtomicLong(0);
private final LongAdder putCounter = new LongAdder();
private final LongAdder nonLivePutCounter = new LongAdder();
private final LongAdder missCounter = new LongAdder();
private final AtomicInteger size = new AtomicInteger();
private AtomicLong evictionCounter = new AtomicLong();
public long getCumulativeLookups() {
return (accessCounter.get() - putCounter.get() - nonLivePutCounter.get()) + missCounter.get();
return (accessCounter.longValue() - putCounter.longValue() - nonLivePutCounter.longValue()) + missCounter.longValue();
}
public long getCumulativeHits() {
return accessCounter.get() - putCounter.get() - nonLivePutCounter.get();
return accessCounter.longValue() - putCounter.longValue() - nonLivePutCounter.longValue();
}
public long getCumulativePuts() {
return putCounter.get();
return putCounter.longValue();
}
public long getCumulativeEvictions() {
@ -560,18 +561,18 @@ public class ConcurrentLRUCache<K,V> implements Cache<K,V> {
}
public long getCumulativeNonLivePuts() {
return nonLivePutCounter.get();
return nonLivePutCounter.longValue();
}
public long getCumulativeMisses() {
return missCounter.get();
return missCounter.longValue();
}
public void add(Stats other) {
accessCounter.addAndGet(other.accessCounter.get());
putCounter.addAndGet(other.putCounter.get());
nonLivePutCounter.addAndGet(other.nonLivePutCounter.get());
missCounter.addAndGet(other.missCounter.get());
putCounter.add(other.putCounter.longValue());
nonLivePutCounter.add(other.nonLivePutCounter.longValue());
missCounter.add(other.missCounter.longValue());
evictionCounter.addAndGet(other.evictionCounter.get());
size.set(Math.max(size.get(), other.size.get()));
}

View File

@ -16,6 +16,12 @@
*/
package org.apache.solr.cloud;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.common.SolrInputDocument;
@ -36,12 +42,6 @@ import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Test split phase that occurs when a Collection API split call is made.
*/
@ -254,6 +254,7 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest {
address.replaceAll("/", "_"));
overseerElector.setup(ec);
overseerElector.joinElection(ec, false);
reader.close();
return zkClient;
}

View File

@ -16,6 +16,8 @@
*/
package org.apache.solr.cloud;
import javax.security.auth.login.AppConfigurationEntry;
import javax.security.auth.login.Configuration;
import java.io.File;
import java.util.Arrays;
import java.util.HashMap;
@ -24,18 +26,57 @@ import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import javax.security.auth.login.AppConfigurationEntry;
import javax.security.auth.login.Configuration;
import com.google.common.base.Preconditions;
import org.apache.hadoop.minikdc.MiniKdc;
import org.apache.solr.client.solrj.impl.Krb5HttpClientConfigurer;
public class KerberosTestUtil {
public class KerberosTestServices {
private MiniKdc kdc;
private JaasConfiguration jaasConfiguration;
private Configuration savedConfig;
private Locale savedLocale;
private KerberosTestServices(MiniKdc kdc,
JaasConfiguration jaasConfiguration,
Configuration savedConfig,
Locale savedLocale) {
this.kdc = kdc;
this.jaasConfiguration = jaasConfiguration;
this.savedConfig = savedConfig;
this.savedLocale = savedLocale;
}
public MiniKdc getKdc() {
return kdc;
}
public void start() throws Exception {
if (brokenLanguagesWithMiniKdc.contains(Locale.getDefault().getLanguage())) {
Locale.setDefault(Locale.US);
}
if (kdc != null) kdc.start();
Configuration.setConfiguration(jaasConfiguration);
Krb5HttpClientConfigurer.regenerateJaasConfiguration();
}
public void stop() {
if (kdc != null) kdc.stop();
Configuration.setConfiguration(savedConfig);
Krb5HttpClientConfigurer.regenerateJaasConfiguration();
Locale.setDefault(savedLocale);
}
public static Builder builder() {
return new Builder();
}
/**
* Returns a MiniKdc that can be used for creating kerberos principals
* and keytabs. Caller is responsible for starting/stopping the kdc.
*/
public static MiniKdc getKdc(File workDir) throws Exception {
private static MiniKdc getKdc(File workDir) throws Exception {
Properties conf = MiniKdc.createConf();
return new MiniKdc(conf, workDir);
}
@ -44,7 +85,7 @@ public class KerberosTestUtil {
* Programmatic version of a jaas.conf file suitable for connecting
* to a SASL-configured zookeeper.
*/
public static class JaasConfiguration extends Configuration {
private static class JaasConfiguration extends Configuration {
private static AppConfigurationEntry[] clientEntry;
private static AppConfigurationEntry[] serverEntry;
@ -131,17 +172,58 @@ public class KerberosTestUtil {
new Locale("ja").getLanguage(),
new Locale("hi").getLanguage()
);
/**
*returns the currently set locale, and overrides it with {@link Locale#US} if it's
* currently something MiniKdc can not handle
*
* @see Locale#setDefault
*/
public static final Locale overrideLocaleIfNotSpportedByMiniKdc() {
Locale old = Locale.getDefault();
if (brokenLanguagesWithMiniKdc.contains(Locale.getDefault().getLanguage())) {
Locale.setDefault(Locale.US);
public static class Builder {
private File kdcWorkDir;
private String clientPrincipal;
private File clientKeytab;
private String serverPrincipal;
private File serverKeytab;
private String appName;
private Locale savedLocale;
public Builder() {
savedLocale = Locale.getDefault();
}
public Builder withKdc(File kdcWorkDir) {
this.kdcWorkDir = kdcWorkDir;
return this;
}
public Builder withJaasConfiguration(String clientPrincipal, File clientKeytab,
String serverPrincipal, File serverKeytab) {
Preconditions.checkNotNull(clientPrincipal);
Preconditions.checkNotNull(clientKeytab);
this.clientPrincipal = clientPrincipal;
this.clientKeytab = clientKeytab;
this.serverPrincipal = serverPrincipal;
this.serverKeytab = serverKeytab;
this.appName = null;
return this;
}
public Builder withJaasConfiguration(String principal, File keytab, String appName) {
Preconditions.checkNotNull(principal);
Preconditions.checkNotNull(keytab);
this.clientPrincipal = principal;
this.clientKeytab = keytab;
this.serverPrincipal = null;
this.serverKeytab = null;
this.appName = appName;
return this;
}
public KerberosTestServices build() throws Exception {
final MiniKdc kdc = kdcWorkDir != null ? getKdc(kdcWorkDir) : null;
final Configuration oldConfig = clientPrincipal != null ? Configuration.getConfiguration() : null;
JaasConfiguration jaasConfiguration = null;
if (clientPrincipal != null) {
jaasConfiguration = (appName == null) ?
new JaasConfiguration(clientPrincipal, clientKeytab, serverPrincipal, serverKeytab) :
new JaasConfiguration(clientPrincipal, clientKeytab, appName);
}
return new KerberosTestServices(kdc, jaasConfiguration, oldConfig, savedLocale);
}
return old;
}
}

View File

@ -118,6 +118,7 @@ public class LeaderElectionTest extends SolrTestCaseJ4 {
if (!zkClient.isClosed()) {
zkClient.close();
}
zkStateReader.close();
}
}

View File

@ -23,6 +23,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.cloud.SecurityAwareZkACLProvider;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.ZooDefs;
@ -77,6 +78,7 @@ public class OutOfBoxZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
zkClient.makePath("/protectedMakePathNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
zkClient.create("/unprotectedCreateNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
zkClient.makePath("/unprotectedMakePathNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
zkClient.create(SecurityAwareZkACLProvider.SECURITY_ZNODE_PATH, "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
zkClient.close();
log.info("####SETUP_END " + getTestName());
@ -93,7 +95,9 @@ public class OutOfBoxZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
public void testOutOfBoxSolrZkClient() throws Exception {
SolrZkClient zkClient = new SolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
try {
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, true, true, true, true, true);
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
true, true, true, true, true,
true, true, true, true, true);
} finally {
zkClient.close();
}
@ -110,6 +114,7 @@ public class OutOfBoxZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
assertTrue(verifiedList.contains("/solr/unprotectedMakePathNode"));
assertTrue(verifiedList.contains("/solr/protectedMakePathNode"));
assertTrue(verifiedList.contains("/solr/protectedCreateNode"));
assertTrue(verifiedList.contains("/solr" + SecurityAwareZkACLProvider.SECURITY_ZNODE_PATH));
} finally {
zkClient.close();
}

View File

@ -18,18 +18,15 @@ package org.apache.solr.cloud;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.StringUtils;
import org.apache.solr.common.cloud.DefaultZkACLProvider;
import org.apache.solr.common.cloud.DefaultZkCredentialsProvider;
import org.apache.solr.common.cloud.SecurityAwareZkACLProvider;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.VMParamsAllAndReadonlyDigestZkACLProvider;
import org.apache.solr.common.cloud.VMParamsSingleSetCredentialsDigestZkCredentialsProvider;
import org.apache.solr.common.cloud.ZkACLProvider;
import org.apache.solr.common.cloud.ZkCredentialsProvider;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Id;
import org.apache.zookeeper.server.auth.DigestAuthenticationProvider;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@ -40,7 +37,6 @@ import java.io.File;
import java.io.UnsupportedEncodingException;
import java.lang.invoke.MethodHandles;
import java.nio.charset.Charset;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
@ -88,6 +84,7 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
"readonlyACLUsername", "readonlyACLPassword").getSolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
zkClient.create("/protectedCreateNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
zkClient.makePath("/protectedMakePathNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
zkClient.create(SecurityAwareZkACLProvider.SECURITY_ZNODE_PATH, "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
zkClient.close();
zkClient = new SolrZkClientFactoryUsingCompletelyNewProviders(null, null,
@ -114,7 +111,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
SolrZkClient zkClient = new SolrZkClientFactoryUsingCompletelyNewProviders(null, null,
null, null).getSolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
try {
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, false, false, false, false, false);
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
false, false, false, false, false,
false, false, false, false, false);
} finally {
zkClient.close();
}
@ -125,7 +124,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
SolrZkClient zkClient = new SolrZkClientFactoryUsingCompletelyNewProviders("connectAndAllACLUsername", "connectAndAllACLPasswordWrong",
null, null).getSolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
try {
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, false, false, false, false, false);
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
false, false, false, false, false,
false, false, false, false, false);
} finally {
zkClient.close();
}
@ -136,7 +137,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
SolrZkClient zkClient = new SolrZkClientFactoryUsingCompletelyNewProviders("connectAndAllACLUsername", "connectAndAllACLPassword",
null, null).getSolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
try {
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, true, true, true, true, true);
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
true, true, true, true, true,
true, true, true, true, true);
} finally {
zkClient.close();
}
@ -147,7 +150,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
SolrZkClient zkClient = new SolrZkClientFactoryUsingCompletelyNewProviders("readonlyACLUsername", "readonlyACLPassword",
null, null).getSolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
try {
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, true, true, false, false, false);
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
true, true, false, false, false,
false, false, false, false, false);
} finally {
zkClient.close();
}
@ -159,7 +164,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
SolrZkClient zkClient = new SolrZkClientUsingVMParamsProvidersButWithDifferentVMParamsNames(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
try {
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, false, false, false, false, false);
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
false, false, false, false, false,
false, false, false, false, false);
} finally {
zkClient.close();
}
@ -171,7 +178,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
SolrZkClient zkClient = new SolrZkClientUsingVMParamsProvidersButWithDifferentVMParamsNames(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
try {
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, false, false, false, false, false);
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
false, false, false, false, false,
false, false, false, false, false);
} finally {
zkClient.close();
}
@ -183,7 +192,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
SolrZkClient zkClient = new SolrZkClientUsingVMParamsProvidersButWithDifferentVMParamsNames(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
try {
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, true, true, true, true, true);
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
true, true, true, true, true,
true, true, true, true, true);
} finally {
zkClient.close();
}
@ -195,7 +206,9 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
SolrZkClient zkClient = new SolrZkClientUsingVMParamsProvidersButWithDifferentVMParamsNames(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
try {
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, true, true, false, false, false);
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
true, true, false, false, false,
false, false, false, false, false);
} finally {
zkClient.close();
}
@ -240,28 +253,18 @@ public class OverriddenZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
@Override
public ZkACLProvider createZkACLProvider() {
return new DefaultZkACLProvider() {
return new VMParamsAllAndReadonlyDigestZkACLProvider() {
@Override
protected List<ACL> createGlobalACLsToAdd() {
try {
List<ACL> result = new ArrayList<ACL>();
if (!StringUtils.isEmpty(digestUsername) && !StringUtils.isEmpty(digestPassword)) {
result.add(new ACL(ZooDefs.Perms.ALL, new Id("digest", DigestAuthenticationProvider.generateDigest(digestUsername + ":" + digestPassword))));
protected List<ACL> createNonSecurityACLsToAdd() {
return createACLsToAdd(true, digestUsername, digestPassword, digestReadonlyUsername, digestReadonlyPassword);
}
if (!StringUtils.isEmpty(digestReadonlyUsername) && !StringUtils.isEmpty(digestReadonlyPassword)) {
result.add(new ACL(ZooDefs.Perms.READ, new Id("digest", DigestAuthenticationProvider.generateDigest(digestReadonlyUsername + ":" + digestReadonlyPassword))));
}
if (result.isEmpty()) {
result = ZooDefs.Ids.OPEN_ACL_UNSAFE;
}
return result;
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
}
/**
* @return Set of ACLs to return security-related znodes
*/
@Override
protected List<ACL> createSecurityACLsToAdd() {
return createACLsToAdd(false, digestUsername, digestPassword, digestReadonlyUsername, digestReadonlyPassword);
}
};
}

View File

@ -130,6 +130,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
}
}
deleteNode(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName);
zkStateReader.close();
zkClient.close();
}

View File

@ -20,15 +20,12 @@ import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.nio.charset.Charset;
import java.util.Locale;
import javax.security.auth.login.Configuration;
import org.apache.hadoop.minikdc.MiniKdc;
import org.apache.lucene.util.Constants;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.cloud.DefaultZkACLProvider;
import org.apache.solr.common.cloud.SaslZkACLProvider;
import org.apache.solr.common.cloud.SecurityAwareZkACLProvider;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkACLProvider;
import org.apache.solr.util.BadZookeeperThreadsFilter;
@ -50,8 +47,6 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
private static final Charset DATA_ENCODING = Charset.forName("UTF-8");
protected Locale savedLocale = null;
protected ZkTestServer zkServer;
@BeforeClass
@ -71,7 +66,6 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
@Override
public void setUp() throws Exception {
super.setUp();
savedLocale = KerberosTestUtil.overrideLocaleIfNotSpportedByMiniKdc();
log.info("####SETUP_START " + getTestName());
createTempDir();
@ -99,6 +93,7 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
try {
zkClient.create("/protectedCreateNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
zkClient.makePath("/protectedMakePathNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
zkClient.create(SecurityAwareZkACLProvider.SECURITY_ZNODE_PATH, "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
} finally {
zkClient.close();
}
@ -115,7 +110,6 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
@Override
public void tearDown() throws Exception {
zkServer.shutdown();
Locale.setDefault(savedLocale);
super.tearDown();
}
@ -124,7 +118,9 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
// Test with Sasl enabled
SolrZkClient zkClient = new SolrZkClientWithACLs(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
try {
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, true, true, true, true, true);
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
true, true, true, true, true,
true, true, true, true, true);
} finally {
zkClient.close();
}
@ -134,7 +130,9 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
System.setProperty("zookeeper.sasl.client", "false");
zkClient = new SolrZkClientNoACLs(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
try {
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient, true, true, false, false, false);
VMParamsZkACLAndCredentialsProvidersTest.doTest(zkClient,
true, true, false, false, false,
false, false, false, false, false);
} finally {
zkClient.close();
System.clearProperty("zookeeper.sasl.client");
@ -176,8 +174,7 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
*/
public static class SaslZkTestServer extends ZkTestServer {
private String kdcDir;
private MiniKdc kdc;
private Configuration conf;
private KerberosTestServices kerberosTestServices;
public SaslZkTestServer(String zkDir, String kdcDir) {
super(zkDir);
@ -187,13 +184,11 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
public SaslZkTestServer(String zkDir, int port, String kdcDir) {
super(zkDir, port);
this.kdcDir = kdcDir;
conf = Configuration.getConfiguration();
}
@Override
public void run() throws InterruptedException {
try {
kdc = KerberosTestUtil.getKdc(new File(kdcDir));
// Don't require that credentials match the entire principal string, e.g.
// can match "solr" rather than "solr/host@DOMAIN"
System.setProperty("zookeeper.kerberos.removeRealmFromPrincipal", "true");
@ -202,12 +197,13 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
String zkClientPrincipal = "solr";
String zkServerPrincipal = "zookeeper/127.0.0.1";
kdc.start();
// Create ZK client and server principals and load them into the Configuration
kdc.createPrincipal(keytabFile, zkClientPrincipal, zkServerPrincipal);
KerberosTestUtil.JaasConfiguration jaas = new KerberosTestUtil.JaasConfiguration(
zkClientPrincipal, keytabFile, zkServerPrincipal, keytabFile);
Configuration.setConfiguration(jaas);
kerberosTestServices = KerberosTestServices.builder()
.withKdc(new File(kdcDir))
.withJaasConfiguration(zkClientPrincipal, keytabFile, zkServerPrincipal, keytabFile)
.build();
kerberosTestServices.start();
kerberosTestServices.getKdc().createPrincipal(keytabFile, zkClientPrincipal, zkServerPrincipal);
} catch (Exception ex) {
throw new RuntimeException(ex);
}
@ -220,8 +216,7 @@ public class SaslZkACLProviderTest extends SolrTestCaseJ4 {
System.clearProperty("zookeeper.authProvider.1");
System.clearProperty("zookeeper.kerberos.removeRealmFromPrincipal");
System.clearProperty("zookeeper.kerberos.removeHostFromPrincipal");
Configuration.setConfiguration(conf);
kdc.stop();
kerberosTestServices.stop();
}
}
}

View File

@ -236,21 +236,23 @@ public class TestAuthenticationFramework extends LuceneTestCase {
public void init(Map<String,Object> pluginConfig) {}
@Override
public void doAuthenticate(ServletRequest request, ServletResponse response, FilterChain filterChain)
public boolean doAuthenticate(ServletRequest request, ServletResponse response, FilterChain filterChain)
throws Exception {
if (expectedUsername == null) {
filterChain.doFilter(request, response);
return;
return true;
}
HttpServletRequest httpRequest = (HttpServletRequest)request;
String username = httpRequest.getHeader("username");
String password = httpRequest.getHeader("password");
log.info("Username: "+username+", password: "+password);
if(MockAuthenticationPlugin.expectedUsername.equals(username) && MockAuthenticationPlugin.expectedPassword.equals(password))
if(MockAuthenticationPlugin.expectedUsername.equals(username) && MockAuthenticationPlugin.expectedPassword.equals(password)) {
filterChain.doFilter(request, response);
else {
return true;
} else {
((HttpServletResponse)response).sendError(401, "Unauthorized request");
return false;
}
}

View File

@ -51,6 +51,7 @@ import org.apache.lucene.util.TestUtil;
import org.apache.commons.lang.StringUtils;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
/**
@ -104,7 +105,13 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
assertEquals(0, CLOUD_CLIENT.add(sdoc("id", "46", "val_i", "3", "ssto", "X", "subject", "ggg")).getStatus());
assertEquals(0, CLOUD_CLIENT.commit().getStatus());;
// uncommitted doc in transaction log
}
@Before
private void addUncommittedDoc99() throws Exception {
// uncommitted doc in transaction log at start of every test
// Even if an RTG causes ulog to re-open realtime searcher, next test method
// will get another copy of doc 99 in the ulog
assertEquals(0, CLOUD_CLIENT.add(sdoc("id", "99", "val_i", "1", "ssto", "X",
"subject", "uncommitted")).getStatus());
}
@ -170,13 +177,12 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
assertEquals(""+doc, 10L, doc.getFieldValue("val2_ss"));
}
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286")
public void testMultiValuedRTG() throws Exception {
SolrDocument doc = null;
// check same results as testMultiValued via RTG (committed doc)
doc = getRandClient(random()).getById("42", params("fl","val_ss:val_i, val2_ss:10, subject"));
assertEquals(""+doc, 2, doc.size());
assertEquals(""+doc, 3, doc.size());
assertEquals(""+doc, 1, doc.getFieldValue("val_ss"));
assertEquals(""+doc, 10L, doc.getFieldValue("val2_ss"));
assertEquals(""+doc, "aaa", doc.getFieldValue("subject"));
@ -219,6 +225,21 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
}
}
public void testFilterAndOneRealFieldRTG() throws Exception {
SolrParams params = params("fl","id,val_i",
"fq","{!field f='subject' v=$my_var}",
"my_var","uncommitted");
SolrDocumentList docs = getRandClient(random()).getById(Arrays.asList("42","99"), params);
final String msg = params + " => " + docs;
assertEquals(msg, 1, docs.size());
assertEquals(msg, 1, docs.getNumFound());
SolrDocument doc = docs.get(0);
assertEquals(msg, 2, doc.size());
assertEquals(msg, "99", doc.getFieldValue("id"));
assertEquals(msg, 1, doc.getFieldValue("val_i"));
}
public void testScoreAndAllRealFields() throws Exception {
for (String fl : TestPseudoReturnFields.SCORE_AND_REAL_FIELDS) {
SolrDocumentList docs = assertSearch(params("q", "*:*", "rows", "10", "fl",fl));
@ -304,7 +325,6 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
}
}
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286")
public void testFunctionsRTG() throws Exception {
// if we use RTG (committed or otherwise) functions should behave the same
for (String id : Arrays.asList("42","99")) {
@ -334,7 +354,6 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
}
}
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286")
public void testFunctionsAndExplicitRTG() throws Exception {
// shouldn't matter if we use RTG (committed or otherwise)
for (String id : Arrays.asList("42","99")) {
@ -382,7 +401,6 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
}
}
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286")
public void testFunctionsAndScoreRTG() throws Exception {
// if we use RTG (committed or otherwise) score should be ignored
@ -578,40 +596,35 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
}
}
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9289")
public void testDocIdAugmenterRTG() throws Exception {
// NOTE: once this test is fixed to pass, testAugmentersRTG should also be updated to test [docid]
// TODO: in single node, [docid] is silently ignored for uncommited docs (see SOLR-9288) ...
// here we see even more confusing: [docid] is silently ignored for both committed & uncommited docs
// behavior shouldn't matter if we are committed or uncommitted
// for an uncommitted doc, we should get -1
for (String id : Arrays.asList("42","99")) {
SolrDocument doc = getRandClient(random()).getById(id, params("fl","[docid]"));
String msg = id + ": fl=[docid] => " + doc;
assertEquals(msg, 1, doc.size());
assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer);
assertTrue(msg, -1 <= ((Integer)doc.getFieldValue("[docid]")).intValue());
}
}
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286")
public void testAugmentersRTG() throws Exception {
// behavior shouldn't matter if we are committed or uncommitted
for (String id : Arrays.asList("42","99")) {
// NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well.
for (SolrParams p : Arrays.asList(params("fl","[shard],[explain],x_alias:[value v=10 t=int]"),
params("fl","[shard]","fl","[explain],x_alias:[value v=10 t=int]"),
params("fl","[shard]","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) {
for (SolrParams p : Arrays.asList
(params("fl","[docid],[shard],[explain],x_alias:[value v=10 t=int]"),
params("fl","[docid],[shard]","fl","[explain],x_alias:[value v=10 t=int]"),
params("fl","[docid]","fl","[shard]","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) {
SolrDocument doc = getRandClient(random()).getById(id, p);
String msg = id + ": " + p + " => " + doc;
assertEquals(msg, 2, doc.size());
// assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO
assertEquals(msg, 3, doc.size());
assertTrue(msg, doc.getFieldValue("[shard]") instanceof String);
// RTG: [explain] should be ignored
assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer);
assertEquals(msg, 10, doc.getFieldValue("x_alias"));
assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer);
assertTrue(msg, -1 <= ((Integer)doc.getFieldValue("[docid]")).intValue());
}
}
}
@ -635,23 +648,22 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
}
}
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286")
public void testAugmentersAndExplicitRTG() throws Exception {
// behavior shouldn't matter if we are committed or uncommitted
for (String id : Arrays.asList("42","99")) {
// NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well.
for (SolrParams p : Arrays.asList(params("fl","id,[explain],x_alias:[value v=10 t=int]"),
params("fl","id","fl","[explain],x_alias:[value v=10 t=int]"),
params("fl","id","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) {
for (SolrParams p : Arrays.asList(params("fl","id,[docid],[explain],x_alias:[value v=10 t=int]"),
params("fl","id,[docid]","fl","[explain],x_alias:[value v=10 t=int]"),
params("fl","id","fl","[docid]","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) {
SolrDocument doc = getRandClient(random()).getById(id, p);
String msg = id + ": " + p + " => " + doc;
assertEquals(msg, 2, doc.size());
assertEquals(msg, 3, doc.size());
assertTrue(msg, doc.getFieldValue("id") instanceof String);
// assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO
// RTG: [explain] should be missing (ignored)
assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer);
assertEquals(msg, 10, doc.getFieldValue("x_alias"));
assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer);
assertTrue(msg, -1 <= ((Integer)doc.getFieldValue("[docid]")).intValue());
}
}
}
@ -688,32 +700,29 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
}
}
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286")
public void testAugmentersAndScoreRTG() throws Exception {
// if we use RTG (committed or otherwise) score should be ignored
for (String id : Arrays.asList("42","99")) {
// NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well.
SolrDocument doc = getRandClient(random()).getById(id, params("fl","x_alias:[value v=10 t=int],score"));
String msg = id + " => " + doc;
assertEquals(msg, 1, doc.size());
// assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO
assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer);
assertEquals(msg, 10, doc.getFieldValue("x_alias"));
for (SolrParams p : Arrays.asList(params("fl","x_alias:[value v=10 t=int],[explain],score"),
params("fl","x_alias:[value v=10 t=int],[explain]","fl","score"),
params("fl","x_alias:[value v=10 t=int]","fl","[explain]","fl","score"))) {
for (SolrParams p : Arrays.asList(params("fl","d_alias:[docid],x_alias:[value v=10 t=int],[explain],score"),
params("fl","d_alias:[docid],x_alias:[value v=10 t=int],[explain]","fl","score"),
params("fl","d_alias:[docid]","fl","x_alias:[value v=10 t=int]","fl","[explain]","fl","score"))) {
doc = getRandClient(random()).getById(id, p);
msg = id + ": " + p + " => " + doc;
assertEquals(msg, 1, doc.size());
assertTrue(msg, doc.getFieldValue("id") instanceof String);
// assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO
assertEquals(msg, 2, doc.size());
assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer);
assertEquals(msg, 10, doc.getFieldValue("x_alias"));
// RTG: [explain] and score should be missing (ignored)
assertTrue(msg, doc.getFieldValue("d_alias") instanceof Integer);
assertTrue(msg, -1 <= ((Integer)doc.getFieldValue("d_alias")).intValue());
}
}
}
@ -758,8 +767,7 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
// NOTE: 'ssto' is the missing one
final List<String> fl = Arrays.asList
// NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well.
("id","[explain]","score","val_*","subj*");
("id","[docid]","[explain]","score","val_*","subj*");
final int iters = atLeast(random, 10);
for (int i = 0; i< iters; i++) {
@ -778,12 +786,13 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
SolrDocument doc = getRandClient(random()).getById(id, params);
String msg = id + ": " + params + " => " + doc;
assertEquals(msg, 3, doc.size());
assertEquals(msg, 4, doc.size());
assertTrue(msg, doc.getFieldValue("id") instanceof String);
// assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO
assertTrue(msg, doc.getFieldValue("val_i") instanceof Integer);
assertEquals(msg, 1, doc.getFieldValue("val_i"));
assertTrue(msg, doc.getFieldValue("subject") instanceof String);
assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer);
assertTrue(msg, -1 <= ((Integer)doc.getFieldValue("[docid]")).intValue());
// RTG: [explain] and score should be missing (ignored)
}
}

View File

@ -16,6 +16,7 @@
*/
package org.apache.solr.cloud;
import org.apache.lucene.util.Constants;
import org.junit.BeforeClass;
/**
@ -27,6 +28,7 @@ public class TestLocalFSCloudBackupRestore extends AbstractCloudBackupRestoreTes
@BeforeClass
public static void setupClass() throws Exception {
assumeFalse("Backup/Restore is currently buggy on Windows. Tracking the fix on SOLR-9242", Constants.WINDOWS);
configureCluster(NUM_SHARDS)// nodes
.addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
.configure();

Some files were not shown because too many files have changed in this diff Show More