Merge remote-tracking branch 'origin/branch_6x' into branch_6x

This commit is contained in:
Noble Paul 2016-05-31 23:15:07 +05:30
commit 6662cdc07f
89 changed files with 10832 additions and 571 deletions

View File

@ -1192,7 +1192,7 @@ revision_re = re.compile(r'rev([a-f\d]+)')
def parse_config():
epilogue = textwrap.dedent('''
Example usage:
python3.2 -u dev-tools/scripts/smokeTestRelease.py http://people.apache.org/~whoever/staging_area/lucene-solr-4.3.0-RC1-rev1469340
python3 -u dev-tools/scripts/smokeTestRelease.py https://dist.apache.org/repos/dist/dev/lucene/lucene-solr-6.0.1-RC2-revc7510a0...
''')
description = 'Utility to test a release.'
parser = argparse.ArgumentParser(description=description, epilog=epilogue,
@ -1350,8 +1350,25 @@ def confirmAllReleasesAreTestedForBackCompat(smokeVersion, unpackPath):
else:
print(' success!')
def getScriptVersion():
topLevelDir = '../..' # Assumption: this script is in dev-tools/scripts/ of a checkout
m = re.compile(r'(.*)/').match(sys.argv[0]) # Get this script's directory
if m is not None and m.group(1) != '.':
origCwd = os.getcwd()
os.chdir(m.group(1))
os.chdir('../..')
topLevelDir = os.getcwd()
os.chdir(origCwd)
reBaseVersion = re.compile(r'version\.base\s*=\s*(\d+\.\d+)')
return reBaseVersion.search(open('%s/lucene/version.properties' % topLevelDir).read()).group(1)
def main():
c = parse_config()
scriptVersion = getScriptVersion()
if not c.version.startswith(scriptVersion + '.'):
raise RuntimeError('smokeTestRelease.py for %s.X is incompatible with a %s release.' % (scriptVersion, c.version))
print('NOTE: output encoding is %s' % sys.stdout.encoding)
smokeTest(c.java, c.url, c.revision, c.version, c.tmp_dir, c.is_signed, ' '.join(c.test_args))

View File

@ -23,6 +23,10 @@ New Features
e.g. clear the Gregorian Change Date. Also, toString(cal) is now identical to
DateTimeFormatter.ISO_INSTANT. (David Smiley)
* LUCENE-7300: The misc module now has a directory wrapper that uses hard-links if
applicable and supported when copying files from another FSDirectory in
Directory#copyFrom. (Simon Willnauer)
API Changes
* LUCENE-7163: refactor GeoRect, Polygon, and GeoUtils tests to geo
@ -82,7 +86,10 @@ Optimizations
(which is used by TermsQuery, multi-term queries and several point queries).
(Adrien Grand, Jeff Wartes, David Smiley)
* LUCENE-7299: Speed up BytesRefHash.sort(). (Adrien Grand)
* LUCENE-7299: Speed up BytesRefHash.sort() using radix sort. (Adrien Grand)
* LUCENE-7306: Speed up points indexing and merging using radix sort.
(Adrien Grand)
Bug Fixes

View File

@ -239,7 +239,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
"5.5.1-cfs",
"5.5.1-nocfs",
"6.0.0-cfs",
"6.0.0-nocfs"
"6.0.0-nocfs",
"6.0.1-cfs",
"6.0.1-nocfs"
};
final String[] unsupportedNames = {

View File

@ -145,18 +145,19 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
List<ClassificationResult<BytesRef>> assignedClasses = new ArrayList<>();
Terms classes = MultiFields.getTerms(leafReader, classFieldName);
TermsEnum classesEnum = classes.iterator();
BytesRef next;
String[] tokenizedText = tokenize(inputDocument);
int docsWithClassSize = countDocsWithClass();
while ((next = classesEnum.next()) != null) {
if (next.length > 0) {
Term term = new Term(this.classFieldName, next);
double clVal = calculateLogPrior(term, docsWithClassSize) + calculateLogLikelihood(tokenizedText, term, docsWithClassSize);
assignedClasses.add(new ClassificationResult<>(term.bytes(), clVal));
if (classes != null) {
TermsEnum classesEnum = classes.iterator();
BytesRef next;
String[] tokenizedText = tokenize(inputDocument);
int docsWithClassSize = countDocsWithClass();
while ((next = classesEnum.next()) != null) {
if (next.length > 0) {
Term term = new Term(this.classFieldName, next);
double clVal = calculateLogPrior(term, docsWithClassSize) + calculateLogLikelihood(tokenizedText, term, docsWithClassSize);
assignedClasses.add(new ClassificationResult<>(term.bytes(), clVal));
}
}
}
// normalization; the values transforms to a 0-1 range
return normClassificationResults(assignedClasses);
}
@ -168,8 +169,9 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
* @throws IOException if accessing to term vectors or search fails
*/
protected int countDocsWithClass() throws IOException {
int docCount = MultiFields.getTerms(this.leafReader, this.classFieldName).getDocCount();
if (docCount == -1) { // in case codec doesn't support getDocCount
Terms terms = MultiFields.getTerms(this.leafReader, this.classFieldName);
int docCount;
if (terms == null || terms.getDocCount() == -1) { // in case codec doesn't support getDocCount
TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector();
BooleanQuery.Builder q = new BooleanQuery.Builder();
q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST));
@ -179,6 +181,8 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
indexSearcher.search(q.build(),
classQueryCountCollector);
docCount = classQueryCountCollector.getTotalHits();
} else {
docCount = terms.getDocCount();
}
return docCount;
}

View File

@ -168,28 +168,6 @@ public class SimpleNaiveBayesDocumentClassifier extends SimpleNaiveBayesClassifi
}
}
/**
* Counts the number of documents in the index having at least a value for the 'class' field
*
* @return the no. of documents having a value for the 'class' field
* @throws java.io.IOException If accessing to term vectors or search fails
*/
protected int countDocsWithClass() throws IOException {
int docCount = MultiFields.getTerms(this.leafReader, this.classFieldName).getDocCount();
if (docCount == -1) { // in case codec doesn't support getDocCount
TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector();
BooleanQuery.Builder q = new BooleanQuery.Builder();
q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST));
if (query != null) {
q.add(query, BooleanClause.Occur.MUST);
}
indexSearcher.search(q.build(),
classQueryCountCollector);
docCount = classQueryCountCollector.getTotalHits();
}
return docCount;
}
/**
* Returns a token array from the {@link org.apache.lucene.analysis.TokenStream} in input
*

View File

@ -175,7 +175,7 @@ public class ConfusionMatrixGenerator {
public double getPrecision(String klass) {
Map<String, Long> classifications = linearizedMatrix.get(klass);
double tp = 0;
double fp = 0;
double den = 0; // tp + fp
if (classifications != null) {
for (Map.Entry<String, Long> entry : classifications.entrySet()) {
if (klass.equals(entry.getKey())) {
@ -184,11 +184,11 @@ public class ConfusionMatrixGenerator {
}
for (Map<String, Long> values : linearizedMatrix.values()) {
if (values.containsKey(klass)) {
fp += values.get(klass);
den += values.get(klass);
}
}
}
return tp > 0 ? tp / (tp + fp) : 0;
return tp > 0 ? tp / den : 0;
}
/**
@ -246,7 +246,7 @@ public class ConfusionMatrixGenerator {
if (this.accuracy == -1) {
double tp = 0d;
double tn = 0d;
double fp = 0d;
double tfp = 0d; // tp + fp
double fn = 0d;
for (Map.Entry<String, Map<String, Long>> classification : linearizedMatrix.entrySet()) {
String klass = classification.getKey();
@ -259,63 +259,46 @@ public class ConfusionMatrixGenerator {
}
for (Map<String, Long> values : linearizedMatrix.values()) {
if (values.containsKey(klass)) {
fp += values.get(klass);
tfp += values.get(klass);
} else {
tn++;
}
}
}
this.accuracy = (tp + tn) / (fp + fn + tp + tn);
this.accuracy = (tp + tn) / (tfp + fn + tn);
}
return this.accuracy;
}
/**
* get the precision (see {@link #getPrecision(String)}) over all the classes.
* get the macro averaged precision (see {@link #getPrecision(String)}) over all the classes.
*
* @return the precision as computed from the whole confusion matrix
* @return the macro averaged precision as computed from the confusion matrix
*/
public double getPrecision() {
double tp = 0;
double fp = 0;
double p = 0;
for (Map.Entry<String, Map<String, Long>> classification : linearizedMatrix.entrySet()) {
String klass = classification.getKey();
for (Map.Entry<String, Long> entry : classification.getValue().entrySet()) {
if (klass.equals(entry.getKey())) {
tp += entry.getValue();
}
}
for (Map<String, Long> values : linearizedMatrix.values()) {
if (values.containsKey(klass)) {
fp += values.get(klass);
}
}
p += getPrecision(klass);
}
return tp > 0 ? tp / (tp + fp) : 0;
return p / linearizedMatrix.size();
}
/**
* get the recall (see {@link #getRecall(String)}) over all the classes
* get the macro averaged recall (see {@link #getRecall(String)}) over all the classes
*
* @return the recall as computed from the whole confusion matrix
* @return the recall as computed from the confusion matrix
*/
public double getRecall() {
double tp = 0;
double fn = 0;
double r = 0;
for (Map.Entry<String, Map<String, Long>> classification : linearizedMatrix.entrySet()) {
String klass = classification.getKey();
for (Map.Entry<String, Long> entry : classification.getValue().entrySet()) {
if (klass.equals(entry.getKey())) {
tp += entry.getValue();
} else {
fn += entry.getValue();
}
}
r += getRecall(klass);
}
return tp + fn > 0 ? tp / (tp + fn) : 0;
return r / linearizedMatrix.size();
}
@Override

View File

@ -661,7 +661,7 @@ public class MultiDocValues {
public final OrdinalMap mapping;
/** Creates a new MultiSortedDocValues over <code>values</code> */
MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
public MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
assert docStarts.length == values.length + 1;
this.values = values;
this.docStarts = docStarts;

View File

@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Comparator;
/** Specialized {@link BytesRef} comparator that
* {@link FixedLengthBytesRefArray#iterator(Comparator)} has optimizations
* for.
* @lucene.internal */
public abstract class BytesRefComparator implements Comparator<BytesRef> {
final int comparedBytesCount;
/** Sole constructor.
* @param comparedBytesCount the maximum number of bytes to compare. */
protected BytesRefComparator(int comparedBytesCount) {
this.comparedBytesCount = comparedBytesCount;
}
/** Return the unsigned byte to use for comparison at index {@code i}, or
* {@code -1} if all bytes that are useful for comparisons are exhausted.
* This may only be called with a value of {@code i} between {@code 0}
* included and {@code comparedBytesCount} excluded. */
protected abstract int byteAt(BytesRef ref, int i);
@Override
public int compare(BytesRef o1, BytesRef o2) {
for (int i = 0; i < comparedBytesCount; ++i) {
final int b1 = byteAt(o1, i);
final int b2 = byteAt(o2, i);
if (b1 != b2) {
return b1 - b2;
} else if (b1 == -1) {
break;
}
}
return 0;
}
}

View File

@ -105,6 +105,35 @@ final class FixedLengthBytesRefArray implements SortableBytesRefArray {
orderedEntries[i] = i;
}
if (comp instanceof BytesRefComparator) {
BytesRefComparator bComp = (BytesRefComparator) comp;
new MSBRadixSorter(bComp.comparedBytesCount) {
BytesRef scratch;
{
scratch = new BytesRef();
scratch.length = valueLength;
}
@Override
protected void swap(int i, int j) {
int o = orderedEntries[i];
orderedEntries[i] = orderedEntries[j];
orderedEntries[j] = o;
}
@Override
protected int byteAt(int i, int k) {
int index1 = orderedEntries[i];
scratch.bytes = blocks[index1 / valuesPerBlock];
scratch.offset = (index1 % valuesPerBlock) * valueLength;
return bComp.byteAt(scratch, k);
}
}.sort(0, size());
return orderedEntries;
}
final BytesRef pivot = new BytesRef();
final BytesRef scratch1 = new BytesRef();
final BytesRef scratch2 = new BytesRef();

View File

@ -33,7 +33,7 @@ public abstract class InPlaceMergeSorter extends Sorter {
}
void mergeSort(int from, int to) {
if (to - from < THRESHOLD) {
if (to - from < INSERTION_SORT_THRESHOLD) {
insertionSort(from, to);
} else {
final int mid = (from + to) >>> 1;

View File

@ -38,7 +38,7 @@ public abstract class IntroSorter extends Sorter {
}
void quicksort(int from, int to, int maxDepth) {
if (to - from < THRESHOLD) {
if (to - from < INSERTION_SORT_THRESHOLD) {
insertionSort(from, to);
return;
} else if (--maxDepth < 0) {

View File

@ -0,0 +1,219 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
/** Radix sorter for variable-length strings. This class sorts based on the most
* significant byte first and falls back to {@link IntroSorter} when the size
* of the buckets to sort becomes small. It is <b>NOT</b> stable.
* Worst-case memory usage is about {@code 2.3 KB}.
* @lucene.internal */
public abstract class MSBRadixSorter extends Sorter {
// after that many levels of recursion we fall back to introsort anyway
// this is used as a protection against the fact that radix sort performs
// worse when there are long common prefixes (probably because of cache
// locality)
private static final int LEVEL_THRESHOLD = 8;
// size of histograms: 256 + 1 to indicate that the string is finished
private static final int HISTOGRAM_SIZE = 257;
// buckets below this size will be sorted with introsort
private static final int LENGTH_THRESHOLD = 100;
// we store one histogram per recursion level
private final int[][] histograms = new int[LEVEL_THRESHOLD][];
private final int[] endOffsets = new int[HISTOGRAM_SIZE];
private final int maxLength;
/**
* Sole constructor.
* @param maxLength the maximum length of keys, pass {@link Integer#MAX_VALUE} if unknown.
*/
protected MSBRadixSorter(int maxLength) {
this.maxLength = maxLength;
}
/** Return the k-th byte of the entry at index {@code i}, or {@code -1} if
* its length is less than or equal to {@code k}. This may only be called
* with a value of {@code i} between {@code 0} included and
* {@code maxLength} excluded. */
protected abstract int byteAt(int i, int k);
/** Get a fall-back sorter which may assume that the first k bytes of all compared strings are equal. */
protected Sorter getFallbackSorter(int k) {
return new IntroSorter() {
@Override
protected void swap(int i, int j) {
MSBRadixSorter.this.swap(i, j);
}
@Override
protected int compare(int i, int j) {
for (int o = k; o < maxLength; ++o) {
final int b1 = byteAt(i, o);
final int b2 = byteAt(j, o);
if (b1 != b2) {
return b1 - b2;
} else if (b1 == -1) {
break;
}
}
return 0;
}
@Override
protected void setPivot(int i) {
pivot.setLength(0);
for (int o = k; o < maxLength; ++o) {
final int b = byteAt(i, o);
if (b == -1) {
break;
}
pivot.append((byte) b);
}
}
@Override
protected int comparePivot(int j) {
for (int o = 0; o < pivot.length(); ++o) {
final int b1 = pivot.byteAt(o) & 0xff;
final int b2 = byteAt(j, k + o);
if (b1 != b2) {
return b1 - b2;
}
}
if (k + pivot.length() == maxLength) {
return 0;
}
return -1 - byteAt(j, k + pivot.length());
}
private final BytesRefBuilder pivot = new BytesRefBuilder();
};
}
@Override
protected final int compare(int i, int j) {
throw new UnsupportedOperationException("unused: not a comparison-based sort");
}
@Override
public void sort(int from, int to) {
checkRange(from, to);
sort(from, to, 0);
}
private void sort(int from, int to, int k) {
if (to - from <= LENGTH_THRESHOLD || k >= LEVEL_THRESHOLD) {
introSort(from, to, k);
} else {
radixSort(from, to, k);
}
}
private void introSort(int from, int to, int k) {
getFallbackSorter(k).sort(from, to);
}
private void radixSort(int from, int to, int k) {
int[] histogram = histograms[k];
if (histogram == null) {
histogram = histograms[k] = new int[HISTOGRAM_SIZE];
} else {
Arrays.fill(histogram, 0);
}
buildHistogram(from, to, k, histogram);
// short-circuit: if all keys have the same byte at offset k, then recurse directly
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
if (histogram[i] == to - from) {
// everything is in the same bucket, recurse
if (i > 0) {
sort(from, to, k + 1);
}
return;
} else if (histogram[i] != 0) {
break;
}
}
int[] startOffsets = histogram;
int[] endOffsets = this.endOffsets;
sumHistogram(histogram, endOffsets);
reorder(from, to, startOffsets, endOffsets, k);
endOffsets = startOffsets;
if (k + 1 < maxLength) {
// recurse on all but the first bucket since all keys are equals in this
// bucket (we already compared all bytes)
for (int prev = endOffsets[0], i = 1; i < HISTOGRAM_SIZE; ++i) {
int h = endOffsets[i];
final int bucketLen = h - prev;
if (bucketLen > 1) {
sort(from + prev, from + h, k + 1);
}
prev = h;
}
}
}
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
private int getBucket(int i, int k) {
return byteAt(i, k) + 1;
}
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */
private int[] buildHistogram(int from, int to, int k, int[] histogram) {
for (int i = from; i < to; ++i) {
histogram[getBucket(i, k)]++;
}
return histogram;
}
/** Accumulate values of the histogram so that it does not store counts but
* start offsets. {@code endOffsets} will store the end offsets. */
private static void sumHistogram(int[] histogram, int[] endOffsets) {
int accum = 0;
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
final int count = histogram[i];
histogram[i] = accum;
accum += count;
endOffsets[i] = accum;
}
}
/**
* Reorder based on start/end offsets for each bucket. When this method
* returns, startOffsets and endOffsets are equal.
* @param startOffsets start offsets per bucket
* @param endOffsets end offsets per bucket
*/
private void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) {
// reorder in place, like the dutch flag problem
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
final int limit = endOffsets[i];
for (int h1 = startOffsets[i]; h1 < limit; h1 = startOffsets[i]) {
final int b = getBucket(from + h1, k);
final int h2 = startOffsets[b]++;
swap(from + h1, from + h2);
}
}
}
}

View File

@ -23,7 +23,7 @@ import java.util.Comparator;
* @lucene.internal */
public abstract class Sorter {
static final int THRESHOLD = 20;
static final int INSERTION_SORT_THRESHOLD = 20;
/** Sole constructor, used for inheritance. */
protected Sorter() {}

View File

@ -16,58 +16,36 @@
*/
package org.apache.lucene.util;
import java.util.Arrays;
abstract class StringMSBRadixSorter extends MSBRadixSorter {
/** Radix sorter for variable-length strings. This class sorts based on the most
* significant byte first and falls back to {@link IntroSorter} when the size
* of the buckets to sort becomes small. It is <b>NOT</b> stable.
* Worst-case memory usage is about {@code 2.3 KB} */
abstract class StringMSBRadixSorter extends Sorter {
// after that many levels of recursion we fall back to introsort anyway
private static final int LEVEL_THRESHOLD = 8;
// size of histograms: 256 + 1 to indicate that the string is finished
private static final int HISTOGRAM_SIZE = 257;
// buckets below this size will be sorted with introsort
private static final int LENGTH_THRESHOLD = 100;
// we store one histogram per recursion level
private final int[][] histograms = new int[LEVEL_THRESHOLD][];
private final int[] endOffsets = new int[HISTOGRAM_SIZE];
StringMSBRadixSorter() {
super(Integer.MAX_VALUE);
}
/** Get a {@link BytesRef} for the given index. */
protected abstract BytesRef get(int i);
/** Store bytes for the given index into {@code dest}, without the first k bytes. */
private void get(int i, int k, BytesRef dest) {
@Override
protected int byteAt(int i, int k) {
BytesRef ref = get(i);
assert ref.length >= k;
dest.bytes = ref.bytes;
dest.offset = ref.offset + k;
dest.length = ref.length - k;
}
@Override
protected final int compare(int i, int j) {
throw new UnsupportedOperationException("unused: not a comparison-based sort");
}
@Override
public void sort(int from, int to) {
checkRange(from, to);
sort(from, to, 0);
}
private void sort(int from, int to, int k) {
if (to - from <= LENGTH_THRESHOLD || k >= LEVEL_THRESHOLD) {
introSort(from, to, k);
} else {
radixSort(from, to, k);
if (ref.length <= k) {
return -1;
}
return ref.bytes[ref.offset + k] & 0xff;
}
private void introSort(int from, int to, int k) {
new IntroSorter() {
@Override
protected Sorter getFallbackSorter(int k) {
return new IntroSorter() {
private void get(int i, int k, BytesRef scratch) {
BytesRef ref = StringMSBRadixSorter.this.get(i);
assert ref.length >= k;
scratch.bytes = ref.bytes;
scratch.offset = ref.offset + k;
scratch.length = ref.length - k;
}
@Override
protected void swap(int i, int j) {
StringMSBRadixSorter.this.swap(i, j);
@ -92,96 +70,7 @@ abstract class StringMSBRadixSorter extends Sorter {
}
private final BytesRef pivot = new BytesRef(),
scratch1 = new BytesRef(), scratch2 = new BytesRef();
}.sort(from, to);
}
private void radixSort(int from, int to, int k) {
int[] histogram = histograms[k];
if (histogram == null) {
histogram = histograms[k] = new int[HISTOGRAM_SIZE];
} else {
Arrays.fill(histogram, 0);
}
buildHistogram(from, to, k, histogram);
// short-circuit: if all keys have the same byte at offset k, then recurse directly
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
if (histogram[i] == to - from) {
// everything is in the same bucket, recurse
if (i > 0) {
sort(from, to, k + 1);
}
return;
} else if (histogram[i] != 0) {
break;
}
}
int[] startOffsets = histogram;
int[] endOffsets = this.endOffsets;
sumHistogram(histogram, endOffsets);
reorder(from, to, startOffsets, endOffsets, k);
endOffsets = startOffsets;
// recurse on all but the first bucket since all keys are equals in this
// bucket (we already compared all bytes)
for (int prev = endOffsets[0], i = 1; i < HISTOGRAM_SIZE; ++i) {
int h = endOffsets[i];
final int bucketLen = h - prev;
if (bucketLen > 1) {
sort(from + prev, from + h, k + 1);
}
prev = h;
}
}
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
private int getBucket(int id, int k) {
BytesRef ref = get(id);
if (ref.length <= k) {
return 0;
}
final int b = ref.bytes[ref.offset + k] & 0xff;
return b + 1;
}
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */
private int[] buildHistogram(int from, int to, int k, int[] histogram) {
for (int i = from; i < to; ++i) {
histogram[getBucket(i, k)]++;
}
return histogram;
}
/** Accumulate values of the histogram so that it does not store counts but
* start offsets. {@code endOffsets} will store the end offsets. */
private static void sumHistogram(int[] histogram, int[] endOffsets) {
int accum = 0;
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
final int count = histogram[i];
histogram[i] = accum;
accum += count;
endOffsets[i] = accum;
}
}
/**
* Reorder based on start/end offsets for each bucket. When this method
* returns, startOffsets and endOffsets are equal.
* @param startOffsets start offsets per bucket
* @param endOffsets end offsets per bucket
*/
private void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) {
// reorder in place, like the dutch flag problem
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
final int limit = endOffsets[i];
for (int h1 = startOffsets[i]; h1 < limit; h1 = startOffsets[i]) {
final int b = getBucket(from + h1, k);
final int h2 = startOffsets[b]++;
swap(from + h1, from + h2);
}
}
scratch1 = new BytesRef(), scratch2 = new BytesRef();
};
}
}

View File

@ -25,7 +25,6 @@ import java.util.List;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@ -33,10 +32,11 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefComparator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntroSorter;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.MSBRadixSorter;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.OfflineSorter;
import org.apache.lucene.util.PriorityQueue;
@ -604,39 +604,26 @@ public class BKDWriter implements Closeable {
/** Sort the heap writer by the specified dim */
private void sortHeapPointWriter(final HeapPointWriter writer, int dim) {
final int pointCount = Math.toIntExact(this.pointCount);
// Tie-break by docID:
assert pointCount < Integer.MAX_VALUE;
//int[] swapCount = new int[1];
//int[] cmpCount = new int[1];
// System.out.println("SORT length=" + length);
// All buffered points are still in heap; just do in-place sort:
new IntroSorter() {
private final byte[] pivotPackedValue = new byte[bytesPerDim];
private int pivotDocID;
// No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it
// can't matter at search time since we don't write ords into the index:
new MSBRadixSorter(bytesPerDim + Integer.BYTES) {
@Override
protected void setPivot(int i) {
pivotDocID = writer.docIDs[i];
int block = i / writer.valuesPerBlock;
int index = i % writer.valuesPerBlock;
System.arraycopy(writer.blocks.get(block), index*packedBytesLength+dim*bytesPerDim, pivotPackedValue, 0, bytesPerDim);
}
@Override
protected int comparePivot(int j) {
//cmpCount[0]++;
int block = j / writer.valuesPerBlock;
int index = j % writer.valuesPerBlock;
assert index >= 0: "index=" + index + " j=" + j;
int cmp = StringHelper.compare(bytesPerDim, pivotPackedValue, 0, writer.blocks.get(block), bytesPerDim*(index*numDims+dim));
if (cmp != 0) {
return cmp;
protected int byteAt(int i, int k) {
assert k >= 0;
if (k < bytesPerDim) {
// dim bytes
int block = i / writer.valuesPerBlock;
int index = i % writer.valuesPerBlock;
return writer.blocks.get(block)[index * packedBytesLength + dim * bytesPerDim + k] & 0xff;
} else {
// doc id
int s = 3 - (k - bytesPerDim);
return (writer.docIDs[i] >>> (s * 8)) & 0xff;
}
// Tie-break
return Integer.compare(pivotDocID, writer.docIDs[j]);
}
@Override
@ -670,26 +657,7 @@ public class BKDWriter implements Closeable {
System.arraycopy(scratch1, 0, blockJ, indexJ, packedBytesLength);
}
@Override
protected int compare(int i, int j) {
//cmpCount[0]++;
int blockI = i / writer.valuesPerBlock;
int dimI = i % writer.valuesPerBlock;
int blockJ = j / writer.valuesPerBlock;
int dimJ = j % writer.valuesPerBlock;
int cmp = StringHelper.compare(bytesPerDim, writer.blocks.get(blockI), bytesPerDim*(dimI*numDims+dim), writer.blocks.get(blockJ), bytesPerDim*(dimJ*numDims+dim));
if (cmp != 0) {
return cmp;
}
// Tie-break by docID:
// No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it
// can't matter at search time since we don't write ords into the index:
return Integer.compare(writer.docIDs[i], writer.docIDs[j]);
}
}.sort(0, Math.toIntExact(pointCount));
//System.out.println("LEN=" + length + " SWAP=" + swapCount[0] + " CMP=" + cmpCount[0]);
}.sort(0, pointCount);
}
private PointWriter sort(int dim) throws IOException {
@ -724,28 +692,28 @@ public class BKDWriter implements Closeable {
final int offset = bytesPerDim * dim;
Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
final ByteArrayDataInput reader = new ByteArrayDataInput();
@Override
public int compare(BytesRef a, BytesRef b) {
// First compare by the requested dimension we are sorting by:
int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + offset, b.bytes, b.offset + offset);
if (cmp != 0) {
return cmp;
Comparator<BytesRef> cmp;
if (dim == numDims - 1) {
// in that case the bytes for the dimension and for the doc id are contiguous,
// so we don't need a branch
cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) {
@Override
protected int byteAt(BytesRef ref, int i) {
return ref.bytes[ref.offset + offset + i] & 0xff;
}
// Tie-break by docID ... no need to tie break on ord, for the case where the same doc has
// the same value in a given dimension indexed more than once: it can't matter at search
// time since we don't write ords into the index:
return StringHelper.compare(Integer.BYTES,
a.bytes, a.offset + packedBytesLength,
b.bytes, b.offset + packedBytesLength);
}
};
};
} else {
cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) {
@Override
protected int byteAt(BytesRef ref, int i) {
if (i < bytesPerDim) {
return ref.bytes[ref.offset + offset + i] & 0xff;
} else {
return ref.bytes[ref.offset + packedBytesLength + i - bytesPerDim] & 0xff;
}
}
};
}
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, offlineSorterBufferMB, offlineSorterMaxTempFiles, bytesPerDoc) {
@ -1272,4 +1240,5 @@ public class BKDWriter implements Closeable {
return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, desc, count, singleValuePerDoc);
}
}
}

View File

@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
public class TestMSBRadixSorter extends LuceneTestCase {
private void test(BytesRef[] refs, int len) {
BytesRef[] expected = Arrays.copyOf(refs, len);
Arrays.sort(expected);
int maxLength = 0;
for (int i = 0; i < len; ++i) {
BytesRef ref = refs[i];
maxLength = Math.max(maxLength, ref.length);
}
switch (random().nextInt(3)) {
case 0:
maxLength += TestUtil.nextInt(random(), 1, 5);
break;
case 1:
maxLength = Integer.MAX_VALUE;
break;
default:
// leave unchanged
break;
}
new MSBRadixSorter(maxLength) {
protected int byteAt(int i, int k) {
BytesRef ref = refs[i];
if (ref.length <= k) {
return -1;
}
return ref.bytes[ref.offset + k] & 0xff;
}
@Override
protected void swap(int i, int j) {
BytesRef tmp = refs[i];
refs[i] = refs[j];
refs[j] = tmp;
}
}.sort(0, len);
BytesRef[] actual = Arrays.copyOf(refs, len);
assertArrayEquals(expected, actual);
}
public void testEmpty() {
test(new BytesRef[random().nextInt(5)], 0);
}
public void testOneValue() {
BytesRef bytes = new BytesRef(TestUtil.randomSimpleString(random()));
test(new BytesRef[] { bytes }, 1);
}
public void testTwoValues() {
BytesRef bytes1 = new BytesRef(TestUtil.randomSimpleString(random()));
BytesRef bytes2 = new BytesRef(TestUtil.randomSimpleString(random()));
test(new BytesRef[] { bytes1, bytes2 }, 2);
}
private void testRandom(int commonPrefixLen, int maxLen) {
byte[] commonPrefix = new byte[commonPrefixLen];
random().nextBytes(commonPrefix);
final int len = random().nextInt(100000);
BytesRef[] bytes = new BytesRef[len + random().nextInt(50)];
for (int i = 0; i < len; ++i) {
byte[] b = new byte[commonPrefixLen + random().nextInt(maxLen)];
random().nextBytes(b);
System.arraycopy(commonPrefix, 0, b, 0, commonPrefixLen);
bytes[i] = new BytesRef(b);
}
test(bytes, len);
}
public void testRandom() {
for (int iter = 0; iter < 10; ++iter) {
testRandom(0, 10);
}
}
public void testRandomWithLotsOfDuplicates() {
for (int iter = 0; iter < 10; ++iter) {
testRandom(0, 2);
}
}
public void testRandomWithSharedPrefix() {
for (int iter = 0; iter < 10; ++iter) {
testRandom(TestUtil.nextInt(random(), 1, 30), 10);
}
}
public void testRandomWithSharedPrefixAndLotsOfDuplicates() {
for (int iter = 0; iter < 10; ++iter) {
testRandom(TestUtil.nextInt(random(), 1, 30), 2);
}
}
}

View File

@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.HardlinkCopyDirectoryWrapper;
import org.apache.lucene.util.SuppressForbidden;
import java.io.IOException;
@ -45,7 +46,8 @@ public class IndexMergeTool {
Directory[] indexes = new Directory[args.length - 1];
for (int i = 1; i < args.length; i++) {
indexes[i - 1] = FSDirectory.open(Paths.get(args[i]));
// try to use hardlinks if possible
indexes[i - 1] = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(args[i])));
}
System.out.println("Merging...");

View File

@ -0,0 +1,95 @@
package org.apache.lucene.store;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.FileAlreadyExistsException;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.security.AccessController;
import java.security.PrivilegedAction;
/**
* This directory wrapper overrides {@link Directory#copyFrom(Directory, String, String, IOContext)} in order
* to optionally use a hard-link instead of a full byte by byte file copy if applicable. Hard-links are only used if the
* underlying filesystem supports it and if the {@link java.nio.file.LinkPermission} "hard" is granted.
*
* <p><b>NOTE:</b> Using hard-links changes the copy semantics of
* {@link Directory#copyFrom(Directory, String, String, IOContext)}. When hard-links are used changes to the source file
* will be reflected in the target file and vice-versa. Within Lucene, files are write once and should not be modified
* after they have been written. This directory should not be used in situations where files change after they have
* been written.
* </p>
*/
public final class HardlinkCopyDirectoryWrapper extends FilterDirectory {
/**
* Creates a new HardlinkCopyDirectoryWrapper delegating to the given directory
*/
public HardlinkCopyDirectoryWrapper(Directory in) {
super(in);
}
@Override
public void copyFrom(Directory from, String srcFile, String destFile, IOContext context) throws IOException {
final Directory fromUnwrapped = FilterDirectory.unwrap(from);
final Directory toUnwrapped = FilterDirectory.unwrap(this);
// try to unwrap to FSDirectory - we might be able to just create hard-links of these files and save copying
// the entire file.
Exception suppressedException = null;
boolean tryCopy = true;
if (fromUnwrapped instanceof FSDirectory
&& toUnwrapped instanceof FSDirectory) {
final Path fromPath = ((FSDirectory) fromUnwrapped).getDirectory();
final Path toPath = ((FSDirectory) toUnwrapped).getDirectory();
if (Files.isReadable(fromPath.resolve(srcFile)) && Files.isWritable(toPath)) {
// only try hardlinks if we have permission to access the files
// if not super.copyFrom() will give us the right exceptions
suppressedException = AccessController.doPrivileged((PrivilegedAction<Exception>) () -> {
try {
Files.createLink(toPath.resolve(destFile), fromPath.resolve(srcFile));
} catch (FileNotFoundException | NoSuchFileException | FileAlreadyExistsException ex) {
return ex; // in these cases we bubble up since it's a true error condition.
} catch (IOException
| UnsupportedOperationException // if the FS doesn't support hard-links
| SecurityException ex // we don't have permission to use hard-links just fall back to byte copy
) {
// hard-links are not supported or the files are on different filesystems
// we could go deeper and check if their filesstores are the same and opt
// out earlier but for now we just fall back to normal file-copy
return ex;
}
return null;
});
tryCopy = suppressedException != null;
}
}
if (tryCopy) {
try {
super.copyFrom(from, srcFile, destFile, context);
} catch (Exception ex) {
if (suppressedException != null) {
ex.addSuppressed(suppressedException);
}
throw ex;
}
}
}
}

View File

@ -0,0 +1,84 @@
package org.apache.lucene.store;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Collections;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.util.IOUtils;
public class TestHardLinkCopyDirectoryWrapper extends BaseDirectoryTestCase {
@Override
protected Directory getDirectory(Path file) throws IOException {
Directory open = random().nextBoolean() ? newFSDirectory(file) : newDirectory();
return new HardlinkCopyDirectoryWrapper(open);
}
/**
* Tests that we use hardlinks if possible on Directory#copyFrom
*/
public void testCopyHardLinks() throws IOException {
Path tempDir = createTempDir();
Path dir_1 = tempDir.resolve("dir_1");
Path dir_2 = tempDir.resolve("dir_2");
Files.createDirectories(dir_1);
Files.createDirectories(dir_2);
Directory luceneDir_1 = newFSDirectory(dir_1);
Directory luceneDir_2 = newFSDirectory(dir_2);
try {
try (IndexOutput output = luceneDir_1.createOutput("foo.bar", IOContext.DEFAULT)) {
CodecUtil.writeHeader(output, "foo", 0);
output.writeString("hey man, nice shot!");
CodecUtil.writeFooter(output);
}
// In case luceneDir_1 has an NRTCachingDirectory
luceneDir_1.sync(Collections.singleton("foo.bar"));
try {
Files.createLink(tempDir.resolve("test"), dir_1.resolve("foo.bar"));
BasicFileAttributes destAttr = Files.readAttributes(tempDir.resolve("test"), BasicFileAttributes.class);
BasicFileAttributes sourceAttr = Files.readAttributes(dir_1.resolve("foo.bar"), BasicFileAttributes.class);
assumeTrue("hardlinks are not supported", destAttr.fileKey() != null
&& destAttr.fileKey().equals(sourceAttr.fileKey()));
} catch (UnsupportedOperationException ex) {
assumeFalse("hardlinks are not supported", true);
}
HardlinkCopyDirectoryWrapper wrapper = new HardlinkCopyDirectoryWrapper(luceneDir_2);
wrapper.copyFrom(luceneDir_1, "foo.bar", "bar.foo", IOContext.DEFAULT);
assertTrue(Files.exists(dir_2.resolve("bar.foo")));
BasicFileAttributes destAttr = Files.readAttributes(dir_2.resolve("bar.foo"), BasicFileAttributes.class);
BasicFileAttributes sourceAttr = Files.readAttributes(dir_1.resolve("foo.bar"), BasicFileAttributes.class);
assertEquals(destAttr.fileKey(), sourceAttr.fileKey());
try (ChecksumIndexInput indexInput = wrapper.openChecksumInput("bar.foo", IOContext.DEFAULT)) {
CodecUtil.checkHeader(indexInput, "foo", 0, 0);
assertEquals("hey man, nice shot!", indexInput.readString());
CodecUtil.checkFooter(indexInput);
}
} finally {
// close them in a finally block we might run into an assume here
IOUtils.close(luceneDir_1, luceneDir_2);
}
}
}

View File

@ -105,7 +105,7 @@ public class TestHalfFloatPoint extends LuceneTestCase {
assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(rounded), 0f);
} else if (Float.isFinite(rounded) == false) {
assertFalse(Float.isNaN(rounded));
assertTrue(Math.abs(f) > 65520);
assertTrue(Math.abs(f) >= 65520);
} else {
int index = Arrays.binarySearch(values, f);
float closest;

View File

@ -32,16 +32,17 @@ import org.locationtech.spatial4j.shape.SpatialRelation;
public class DateRangePrefixTreeTest extends LuceneTestCase {
@ParametersFactory
@ParametersFactory(argumentFormatting = "calendar=%s")
public static Iterable<Object[]> parameters() {
return Arrays.asList(new Object[][]{
{DateRangePrefixTree.DEFAULT_CAL}, {DateRangePrefixTree.JAVA_UTIL_TIME_COMPAT_CAL}
{"default", DateRangePrefixTree.DEFAULT_CAL},
{"compat", DateRangePrefixTree.JAVA_UTIL_TIME_COMPAT_CAL}
});
}
private final DateRangePrefixTree tree;
public DateRangePrefixTreeTest(Calendar templateCal) {
public DateRangePrefixTreeTest(String suiteName, Calendar templateCal) {
tree = new DateRangePrefixTree(templateCal);
}

View File

@ -38,7 +38,9 @@ grant {
permission java.io.FilePermission "${junit4.childvm.cwd}${/}jacoco.db", "write";
permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,write,delete";
permission java.io.FilePermission "${clover.db.dir}${/}-", "read,write,delete";
// misc HardlinkCopyDirectoryWrapper needs this to test if hardlinks can be created
permission java.nio.file.LinkPermission "hard";
// needed by SSD detection tests in TestIOUtils (creates symlinks)
permission java.nio.file.LinkPermission "symbolic";

View File

@ -116,6 +116,9 @@ New Features
* SOLR-8988: Adds query option facet.distrib.mco which when set to true allows the use of facet.mincount=1 in cloud mode.
(Keith Laban, Dennis Gove)
* SOLR-8583: Apply highlighting to hl.alternateField by default for Default and FastVectorHighlighter.
Turn off with hl.highlightAlternate=false (janhoy, David Smiley)
Bug Fixes
----------------------
@ -205,6 +208,12 @@ Bug Fixes
* SOLR-9151: Fix SolrCLI so that bin/solr -e cloud example can be run from any CWD (janhoy)
* SOLR-9141: Fix ClassCastException when using the /sql handler count() function with
single-shard collections (Minoru Osuka via James Dyer)
* SOLR-9165: Spellcheck does not return collations if "maxCollationTries" is used with "cursorMark".
(James Dyer)
Optimizations
----------------------
* SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation.
@ -295,6 +304,14 @@ Other Changes
* SOLR-9119: several static methods in ValueSourceParser have been made private (hossman)
* SOLR-9110: Move JoinFromCollection- SubQueryTransformer- BlockJoinFacet- Distrib Tests to SolrCloudTestCase (Mikhail Khludnev)
* SOLR-9160: Sync 6x and 7.0 move of UninvertingReader, SlowCompositeReaderWrapper for Solr (LUCENE-7283)
(yonik)
* SOLR-9136: Separate out the error statistics into server-side error vs client-side error
(Jessica Cheng Mallet via Erick Erickson)
================== 6.0.1 ==================
(No Changes)

View File

@ -36,12 +36,12 @@ import org.apache.lucene.search.DocValuesRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.uninverting.UninvertingReader.Type;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.RuleBasedCollator;

View File

@ -56,7 +56,8 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
// Statistics
private final AtomicLong numRequests = new AtomicLong();
private final AtomicLong numErrors = new AtomicLong();
private final AtomicLong numServerErrors = new AtomicLong();
private final AtomicLong numClientErrors = new AtomicLong();
private final AtomicLong numTimeouts = new AtomicLong();
private final Timer requestTimes = new Timer();
@ -164,23 +165,33 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
}
}
} catch (Exception e) {
boolean incrementErrors = true;
boolean isServerError = true;
if (e instanceof SolrException) {
SolrException se = (SolrException)e;
if (se.code() == SolrException.ErrorCode.CONFLICT.code) {
// TODO: should we allow this to be counted as an error (numErrors++)?
} else {
SolrException.log(log, e);
incrementErrors = false;
} else if (se.code() >= 400 && se.code() < 500) {
isServerError = false;
}
} else {
SolrException.log(log, e);
if (e instanceof SyntaxError) {
isServerError = false;
e = new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
}
rsp.setException(e);
numErrors.incrementAndGet();
if (incrementErrors) {
SolrException.log(log, e);
if (isServerError) {
numServerErrors.incrementAndGet();
} else {
numClientErrors.incrementAndGet();
}
}
}
finally {
timer.stop();
@ -263,7 +274,9 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
Snapshot snapshot = requestTimes.getSnapshot();
lst.add("handlerStart",handlerStart);
lst.add("requests", numRequests.longValue());
lst.add("errors", numErrors.longValue());
lst.add("errors", numServerErrors.longValue() + numClientErrors.longValue());
lst.add("serverErrors", numServerErrors.longValue());
lst.add("clientErrors", numClientErrors.longValue());
lst.add("timeouts", numTimeouts.longValue());
lst.add("totalTime", requestTimes.getSum());
lst.add("avgRequestsPerSecond", requestTimes.getMeanRate());

View File

@ -26,16 +26,6 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import com.carrotsearch.hppc.IntHashSet;
import com.carrotsearch.hppc.IntObjectHashMap;
import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.LongObjectHashMap;
import com.carrotsearch.hppc.LongObjectMap;
import com.carrotsearch.hppc.cursors.IntObjectCursor;
import com.carrotsearch.hppc.cursors.LongCursor;
import com.carrotsearch.hppc.cursors.LongObjectCursor;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
@ -53,7 +43,6 @@ import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Query;
import org.apache.solr.search.QueryWrapperFilter;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
@ -61,7 +50,6 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@ -87,12 +75,24 @@ import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocSlice;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryWrapperFilter;
import org.apache.solr.search.SolrConstantScoreQuery;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpecParsing;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.apache.solr.util.plugin.SolrCoreAware;
import com.carrotsearch.hppc.IntHashSet;
import com.carrotsearch.hppc.IntObjectHashMap;
import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.LongObjectHashMap;
import com.carrotsearch.hppc.LongObjectMap;
import com.carrotsearch.hppc.cursors.IntObjectCursor;
import com.carrotsearch.hppc.cursors.LongCursor;
import com.carrotsearch.hppc.cursors.LongObjectCursor;
import com.carrotsearch.hppc.cursors.ObjectCursor;
/**
* The ExpandComponent is designed to work with the CollapsingPostFilter.
* The CollapsingPostFilter collapses a result set on a field.

View File

@ -20,7 +20,6 @@ import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
@ -61,6 +60,7 @@ import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.lucene.util.AttributeSource.State;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
@ -389,8 +389,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
preFetchFieldNames.add(keyField.getName());
}
FastVectorHighlighter fvh = null; // lazy
FieldQuery fvhFieldQuery = null; // lazy
FvhContainer fvhContainer = new FvhContainer(); // Lazy container for fvh and fieldQuery
IndexReader reader = new TermVectorReusingLeafReader(req.getSearcher().getLeafReader()); // SOLR-5855
@ -408,30 +407,10 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
SchemaField schemaField = schema.getFieldOrNull(fieldName);
Object fieldHighlights; // object type allows flexibility for subclassers
if (schemaField == null) {
fieldHighlights = null;
} else if (schemaField.getType() instanceof org.apache.solr.schema.TrieField) {
// TODO: highlighting numeric fields is broken (Lucene) - so we disable them until fixed (see LUCENE-3080)!
fieldHighlights = null;
} else if (useFastVectorHighlighter(params, schemaField)) {
if (fvhFieldQuery == null) {
fvh = new FastVectorHighlighter(
// FVH cannot process hl.usePhraseHighlighter parameter per-field basis
params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
// FVH cannot process hl.requireFieldMatch parameter per-field basis
params.getBool(HighlightParams.FIELD_MATCH, false));
fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT));
fvhFieldQuery = fvh.getFieldQuery(query, reader);
}
fieldHighlights =
doHighlightingByFastVectorHighlighter(doc, docId, schemaField, fvh, fvhFieldQuery, reader, req);
} else { // standard/default highlighter
fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req);
}
fieldHighlights = doHighlightingOfField(doc, docId, schemaField, fvhContainer, query, reader, req, params);
if (fieldHighlights == null) {
// no summaries made; copy text from alternate field
fieldHighlights = alternateField(doc, fieldName, req);
fieldHighlights = alternateField(doc, docId, fieldName, fvhContainer, query, reader, req);
}
if (fieldHighlights != null) {
@ -443,6 +422,34 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
return fragments;
}
private Object doHighlightingOfField(Document doc, int docId, SchemaField schemaField,
FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req,
SolrParams params) throws IOException {
Object fieldHighlights;
if (schemaField == null) {
fieldHighlights = null;
} else if (schemaField.getType() instanceof org.apache.solr.schema.TrieField) {
// TODO: highlighting numeric fields is broken (Lucene) - so we disable them until fixed (see LUCENE-3080)!
fieldHighlights = null;
} else if (useFastVectorHighlighter(params, schemaField)) {
if (fvhContainer.fieldQuery == null) {
FastVectorHighlighter fvh = new FastVectorHighlighter(
// FVH cannot process hl.usePhraseHighlighter parameter per-field basis
params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
// FVH cannot process hl.requireFieldMatch parameter per-field basis
params.getBool(HighlightParams.FIELD_MATCH, false));
fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT));
fvhContainer.fvh = fvh;
fvhContainer.fieldQuery = fvh.getFieldQuery(query, reader);
}
fieldHighlights =
doHighlightingByFastVectorHighlighter(doc, docId, schemaField, fvhContainer, reader, req);
} else { // standard/default highlighter
fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req);
}
return fieldHighlights;
}
/** Returns the field names to be passed to {@link SolrIndexSearcher#doc(int, Set)}.
* Subclasses might over-ride to include fields in search-results and other stored field values needed so as to avoid
* the possibility of extra trips to disk. The uniqueKey will be added after if the result isn't null. */
@ -469,14 +476,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
/** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */
@SuppressWarnings("unchecked")
protected Object doHighlightingByFastVectorHighlighter(Document doc, int docId,
SchemaField schemaField, FastVectorHighlighter highlighter,
FieldQuery fieldQuery,
SchemaField schemaField, FvhContainer fvhContainer,
IndexReader reader, SolrQueryRequest req) throws IOException {
SolrParams params = req.getParams();
String fieldName = schemaField.getName();
SolrFragmentsBuilder solrFb = getSolrFragmentsBuilder(fieldName, params);
String[] snippets = highlighter.getBestFragments( fieldQuery, reader, docId, fieldName,
String[] snippets = fvhContainer.fvh.getBestFragments( fvhContainer.fieldQuery, reader, docId, fieldName,
params.getFieldInt( fieldName, HighlightParams.FRAGSIZE, 100 ),
params.getFieldInt( fieldName, HighlightParams.SNIPPETS, 1 ),
getFragListBuilder( fieldName, params ),
@ -497,12 +503,12 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
final String fieldName = schemaField.getName();
final int mvToExamine =
req.getParams().getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_EXAMINE,
params.getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_EXAMINE,
(schemaField.multiValued()) ? Integer.MAX_VALUE : 1);
// Technically this is the max *fragments* (snippets), not max values:
int mvToMatch =
req.getParams().getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.MAX_VALUE);
params.getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.MAX_VALUE);
if (mvToExamine <= 0 || mvToMatch <= 0) {
return null;
}
@ -557,7 +563,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
}
Highlighter highlighter;
if (req.getParams().getFieldBool(fieldName, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) {
if (params.getFieldBool(fieldName, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) {
// We're going to call getPhraseHighlighter and it might consume the tokenStream. If it does, the tokenStream
// needs to implement reset() efficiently.
@ -662,12 +668,38 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
/** Returns the alternate highlight object for this field -- a String[] by default. Null if none. */
@SuppressWarnings("unchecked")
protected Object alternateField(Document doc, String fieldName, SolrQueryRequest req) {
protected Object alternateField(Document doc, int docId, String fieldName, FvhContainer fvhContainer, Query query,
IndexReader reader, SolrQueryRequest req) throws IOException {
IndexSchema schema = req.getSearcher().getSchema();
SolrParams params = req.getParams();
String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD);
int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0);
if (alternateField == null || alternateField.length() == 0) {
return null;
}
if (params.getFieldBool(fieldName, HighlightParams.HIGHLIGHT_ALTERNATE, true) && !alternateField.equals(fieldName)) {
// Try to highlight alternate field
Object fieldHighlights = null;
SchemaField schemaField = schema.getFieldOrNull(alternateField);
if (schemaField != null) {
HashMap<String, String> invariants = new HashMap<>();
invariants.put("f." + alternateField + "." + HighlightParams.SNIPPETS, "1");
// Enforce maxAlternateFieldLength by FRAGSIZE. Minimum 18 due to FVH limitations
invariants.put("f." + alternateField + "." + HighlightParams.FRAGSIZE,
alternateFieldLen > 0 ? String.valueOf(Math.max(18, alternateFieldLen)) : String.valueOf(Integer.MAX_VALUE));
SolrParams origParams = req.getParams();
req.setParams(SolrParams.wrapDefaults(new MapSolrParams(invariants), origParams));
fieldHighlights = doHighlightingOfField(doc, docId, schemaField, fvhContainer, query, reader, req, params);
req.setParams(origParams);
if (fieldHighlights != null) {
return fieldHighlights;
}
}
}
// Fallback to static non-highlighted
IndexableField[] docFields = doc.getFields(alternateField);
if (docFields.length == 0) {
// The alternate field did not exist, treat the original field as fallback instead
@ -685,7 +717,6 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
String[] altTexts = listFields.toArray(new String[listFields.size()]);
Encoder encoder = getEncoder(fieldName, params);
int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0);
List<String> altList = new ArrayList<>();
int len = 0;
for( String altText: altTexts ){
@ -707,6 +738,12 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
final TokenStream tStream = schemaField.getType().getIndexAnalyzer().tokenStream(schemaField.getName(), docText);
return new TokenOrderingFilter(tStream, 10);
}
// Wraps FVH to allow pass-by-reference
private class FvhContainer {
private FastVectorHighlighter fvh;
private FieldQuery fieldQuery;
}
}
/** Orders Tokens in a window first by their startOffset ascending.

View File

@ -0,0 +1,296 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.index;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CompositeReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader.CoreClosedListener;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
/**
* This class forces a composite reader (eg a {@link
* MultiReader} or {@link DirectoryReader}) to emulate a
* {@link LeafReader}. This requires implementing the postings
* APIs on-the-fly, using the static methods in {@link
* MultiFields}, {@link MultiDocValues}, by stepping through
* the sub-readers to merge fields/terms, appending docs, etc.
*
* <p><b>NOTE</b>: this class almost always results in a
* performance hit. If this is important to your use case,
* you'll get better performance by gathering the sub readers using
* {@link IndexReader#getContext()} to get the
* leaves and then operate per-LeafReader,
* instead of using this class.
*/
public final class SlowCompositeReaderWrapper extends LeafReader {
private final CompositeReader in;
private final Fields fields;
private final boolean merging;
/** This method is sugar for getting an {@link LeafReader} from
* an {@link IndexReader} of any kind. If the reader is already atomic,
* it is returned unchanged, otherwise wrapped by this class.
*/
public static LeafReader wrap(IndexReader reader) throws IOException {
if (reader instanceof CompositeReader) {
return new SlowCompositeReaderWrapper((CompositeReader) reader, false);
} else {
assert reader instanceof LeafReader;
return (LeafReader) reader;
}
}
SlowCompositeReaderWrapper(CompositeReader reader, boolean merging) throws IOException {
super();
in = reader;
if (getFieldInfos().hasPointValues()) {
throw new IllegalArgumentException("cannot wrap points");
}
fields = MultiFields.getFields(in);
in.registerParentReader(this);
this.merging = merging;
}
@Override
public String toString() {
return "SlowCompositeReaderWrapper(" + in + ")";
}
@Override
public void addCoreClosedListener(CoreClosedListener listener) {
addCoreClosedListenerAsReaderClosedListener(in, listener);
}
@Override
public void removeCoreClosedListener(CoreClosedListener listener) {
removeCoreClosedListenerAsReaderClosedListener(in, listener);
}
@Override
public Fields fields() {
ensureOpen();
return fields;
}
@Override
public NumericDocValues getNumericDocValues(String field) throws IOException {
ensureOpen();
return MultiDocValues.getNumericValues(in, field);
}
@Override
public Bits getDocsWithField(String field) throws IOException {
ensureOpen();
return MultiDocValues.getDocsWithField(in, field);
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
ensureOpen();
return MultiDocValues.getBinaryValues(in, field);
}
@Override
public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
ensureOpen();
return MultiDocValues.getSortedNumericValues(in, field);
}
@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
ensureOpen();
OrdinalMap map = null;
synchronized (cachedOrdMaps) {
map = cachedOrdMaps.get(field);
if (map == null) {
// uncached, or not a multi dv
SortedDocValues dv = MultiDocValues.getSortedValues(in, field);
if (dv instanceof MultiSortedDocValues) {
map = ((MultiSortedDocValues)dv).mapping;
if (map.owner == getCoreCacheKey() && merging == false) {
cachedOrdMaps.put(field, map);
}
}
return dv;
}
}
int size = in.leaves().size();
final SortedDocValues[] values = new SortedDocValues[size];
final int[] starts = new int[size+1];
for (int i = 0; i < size; i++) {
LeafReaderContext context = in.leaves().get(i);
final LeafReader reader = context.reader();
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) {
return null;
}
SortedDocValues v = reader.getSortedDocValues(field);
if (v == null) {
v = DocValues.emptySorted();
}
values[i] = v;
starts[i] = context.docBase;
}
starts[size] = maxDoc();
return new MultiSortedDocValues(values, starts, map);
}
@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
ensureOpen();
OrdinalMap map = null;
synchronized (cachedOrdMaps) {
map = cachedOrdMaps.get(field);
if (map == null) {
// uncached, or not a multi dv
SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
if (dv instanceof MultiSortedSetDocValues) {
map = ((MultiSortedSetDocValues)dv).mapping;
if (map.owner == getCoreCacheKey() && merging == false) {
cachedOrdMaps.put(field, map);
}
}
return dv;
}
}
assert map != null;
int size = in.leaves().size();
final SortedSetDocValues[] values = new SortedSetDocValues[size];
final int[] starts = new int[size+1];
for (int i = 0; i < size; i++) {
LeafReaderContext context = in.leaves().get(i);
final LeafReader reader = context.reader();
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if(fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET){
return null;
}
SortedSetDocValues v = reader.getSortedSetDocValues(field);
if (v == null) {
v = DocValues.emptySortedSet();
}
values[i] = v;
starts[i] = context.docBase;
}
starts[size] = maxDoc();
return new MultiSortedSetDocValues(values, starts, map);
}
// TODO: this could really be a weak map somewhere else on the coreCacheKey,
// but do we really need to optimize slow-wrapper any more?
private final Map<String,OrdinalMap> cachedOrdMaps = new HashMap<>();
@Override
public NumericDocValues getNormValues(String field) throws IOException {
ensureOpen();
return MultiDocValues.getNormValues(in, field);
}
@Override
public Fields getTermVectors(int docID) throws IOException {
ensureOpen();
return in.getTermVectors(docID);
}
@Override
public int numDocs() {
// Don't call ensureOpen() here (it could affect performance)
return in.numDocs();
}
@Override
public int maxDoc() {
// Don't call ensureOpen() here (it could affect performance)
return in.maxDoc();
}
@Override
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
ensureOpen();
in.document(docID, visitor);
}
@Override
public Bits getLiveDocs() {
ensureOpen();
return MultiFields.getLiveDocs(in);
}
@Override
public PointValues getPointValues() {
ensureOpen();
return null;
}
@Override
public FieldInfos getFieldInfos() {
ensureOpen();
return MultiFields.getMergedFieldInfos(in);
}
@Override
public Object getCoreCacheKey() {
return in.getCoreCacheKey();
}
@Override
public Object getCombinedCoreAndDeletesKey() {
return in.getCombinedCoreAndDeletesKey();
}
@Override
protected void doClose() throws IOException {
// TODO: as this is a wrapper, should we really close the delegate?
in.close();
}
@Override
public void checkIntegrity() throws IOException {
ensureOpen();
for (LeafReaderContext ctx : in.leaves()) {
ctx.reader().checkIntegrity();
}
}
}

View File

@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergePolicyWrapper;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.MergeTrigger;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import org.apache.solr.index.SlowCompositeReaderWrapper;
// TODO: remove this and add indexSort specification directly to solrconfig.xml? But for BWC, also accept SortingMergePolicy specifiction?
public final class SortingMergePolicy extends MergePolicyWrapper {
private final Sort sort;
/** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */
public SortingMergePolicy(MergePolicy in, Sort sort) {
super(in);
this.sort = sort;
}
/** Return the {@link Sort} order that is used to sort segments when merging. */
public Sort getSort() {
return sort;
}
@Override
public String toString() {
return "SortingMergePolicy(" + in + ", sort=" + sort + ")";
}
}

View File

@ -28,6 +28,7 @@ import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexableField;
@ -41,22 +42,16 @@ import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialArgsParser;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SpatialOptions;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.apache.solr.util.DistanceUnits;
import org.apache.solr.util.MapListener;
import org.apache.solr.util.SpatialUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Throwables;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.context.SpatialContextFactory;
import org.locationtech.spatial4j.distance.DistanceUtils;
@ -66,6 +61,12 @@ import org.locationtech.spatial4j.io.SupportedFormats;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
import org.locationtech.spatial4j.shape.Shape;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Throwables;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
/**
* Abstract base class for Solr FieldTypes based on a Lucene 4 {@link SpatialStrategy}.

View File

@ -23,10 +23,10 @@ import java.nio.ByteBuffer;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.util.Base64;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -22,15 +22,14 @@ import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
@ -40,6 +39,7 @@ import org.apache.solr.analysis.SolrAnalyzer;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.search.function.OrdFieldSource;
import org.apache.solr.uninverting.UninvertingReader.Type;
/**
*
*/

View File

@ -40,12 +40,12 @@ import org.apache.lucene.search.DocValuesRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.uninverting.UninvertingReader.Type;
/**
* Field for collated sort keys.

View File

@ -44,7 +44,7 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldValueQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.response.TextResponseWriter;

View File

@ -16,12 +16,6 @@
*/
package org.apache.solr.schema;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.IOException;
import java.io.InputStream;
import java.lang.invoke.MethodHandles;
@ -31,6 +25,12 @@ import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.LegacyIntField;
@ -45,7 +45,6 @@ import org.apache.lucene.search.DocValuesRangeQuery;
import org.apache.lucene.search.LegacyNumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRef;
@ -55,6 +54,7 @@ import org.apache.solr.common.EnumFieldValue;
import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;

View File

@ -16,17 +16,17 @@
*/
package org.apache.solr.schema;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.search.function.FileFloatSource;
import java.io.IOException;
import java.util.Map;
import org.apache.solr.uninverting.UninvertingReader.Type;
/** Get values from an external file instead of the index.
*

View File

@ -49,7 +49,6 @@ import org.apache.lucene.search.SortedNumericSelector;
import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRef;
@ -66,6 +65,7 @@ import org.apache.solr.query.SolrRangeQuery;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.search.Sorting;
import org.apache.solr.uninverting.UninvertingReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -18,23 +18,23 @@ package org.apache.solr.schema;
import java.io.IOException;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.distance.DistanceUtils;
import org.locationtech.spatial4j.io.GeohashUtils;
import org.locationtech.spatial4j.shape.Point;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.LiteralValueSource;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SolrConstantScoreQuery;
import org.apache.solr.search.SpatialOptions;
import org.apache.solr.search.function.ValueSourceRangeFilter;
import org.apache.solr.search.function.distance.GeohashHaversineFunction;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.apache.solr.util.SpatialUtils;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.distance.DistanceUtils;
import org.locationtech.spatial4j.io.GeohashUtils;
import org.locationtech.spatial4j.shape.Point;
/**
* This is a class that represents a <a

View File

@ -51,7 +51,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.lucene.util.Version;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;

View File

@ -22,8 +22,8 @@ import java.util.Map;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.VectorValueSource;
@ -37,7 +37,6 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.Weight;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.DelegatingCollector;
@ -45,8 +44,8 @@ import org.apache.solr.search.ExtendedQueryBase;
import org.apache.solr.search.PostFilter;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SpatialOptions;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.apache.solr.util.SpatialUtils;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.distance.DistanceUtils;
import org.locationtech.spatial4j.shape.Point;

View File

@ -21,7 +21,6 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.locationtech.spatial4j.distance.DistanceUtils;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource;
@ -30,13 +29,14 @@ import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SpatialOptions;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.locationtech.spatial4j.distance.DistanceUtils;
/**
* A point type that indexes a point in an n-dimensional space as separate fields and supports range queries.

View File

@ -33,14 +33,14 @@ import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeSource.State;
import org.apache.lucene.util.AttributeSource;
import org.apache.solr.analysis.SolrAnalyzer;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.search.Sorting;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -20,16 +20,16 @@ import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.IntDocValues;
import org.apache.lucene.search.*;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.uninverting.UninvertingReader.Type;
/**
* Utility Field used for random sorting. It should not be passed a value.

View File

@ -27,10 +27,10 @@ import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.uninverting.UninvertingReader.Type;
public class StrField extends PrimitiveFieldType {

View File

@ -16,14 +16,16 @@
*/
package org.apache.solr.schema;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
import org.apache.lucene.search.*;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.QueryBuilder;
import org.apache.solr.common.SolrException;
@ -31,9 +33,7 @@ import org.apache.solr.query.SolrRangeQuery;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.search.Sorting;
import java.util.Map;
import java.io.IOException;
import org.apache.solr.uninverting.UninvertingReader.Type;
/** <code>TextField</code> is the basic type for configurable text analysis.
* Analyzers for field types using this implementation should be defined in the schema.

View File

@ -26,8 +26,8 @@ import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.FieldType.LegacyNumericType;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.LegacyDoubleField;
import org.apache.lucene.document.LegacyFloatField;
import org.apache.lucene.document.LegacyIntField;
@ -47,7 +47,6 @@ import org.apache.lucene.search.LegacyNumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRef;
@ -61,6 +60,7 @@ import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.FunctionRangeQuery;
import org.apache.solr.search.QParser;
import org.apache.solr.search.function.ValueSourceRangeFilter;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.apache.solr.util.DateMathParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -25,15 +25,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import com.carrotsearch.hppc.FloatArrayList;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.IntLongHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import com.carrotsearch.hppc.cursors.IntLongCursor;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
@ -49,13 +41,12 @@ import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef;
@ -65,8 +56,8 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.QueryElevationComponent;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
@ -75,6 +66,14 @@ import org.apache.solr.schema.StrField;
import org.apache.solr.schema.TrieFloatField;
import org.apache.solr.schema.TrieIntField;
import org.apache.solr.schema.TrieLongField;
import org.apache.solr.uninverting.UninvertingReader;
import com.carrotsearch.hppc.FloatArrayList;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.IntLongHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import com.carrotsearch.hppc.cursors.IntLongCursor;
/**

View File

@ -20,16 +20,16 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.solr.uninverting.UninvertingReader;
/**
* Lucene 5.0 removes "accidental" insanity, so you must explicitly

View File

@ -18,13 +18,12 @@ package org.apache.solr.search;
import java.net.URL;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.JmxMonitoredMap.JmxAugmentedSolrInfoMBean;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.core.JmxMonitoredMap.JmxAugmentedSolrInfoMBean;
import org.apache.solr.uninverting.UninvertingReader;
/**
* A SolrInfoMBean that provides introspection of the Solr FieldCache

View File

@ -53,7 +53,6 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiPostingsEnum;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
@ -94,7 +93,6 @@ import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
@ -109,6 +107,7 @@ import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
@ -122,6 +121,7 @@ import org.apache.solr.schema.TrieFloatField;
import org.apache.solr.schema.TrieIntField;
import org.apache.solr.search.facet.UnInvertedField;
import org.apache.solr.search.stats.StatsSource;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.update.IndexFingerprint;
import org.apache.solr.update.SolrIndexConfig;
import org.slf4j.Logger;

View File

@ -27,17 +27,16 @@ import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.uninverting.DocTermOrds;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrCore;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.TrieField;
import org.apache.solr.search.BitDocSet;
@ -45,6 +44,7 @@ import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrCache;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.uninverting.DocTermOrds;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -20,13 +20,12 @@ import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
@ -34,6 +33,7 @@ import org.apache.lucene.queries.function.docvalues.IntDocValues;
import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueInt;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.Insanity;
import org.apache.solr.search.SolrIndexSearcher;

View File

@ -20,18 +20,18 @@ import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.IntDocValues;
import org.apache.lucene.search.SortedSetSelector;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.Insanity;
import org.apache.solr.search.SolrIndexSearcher;

View File

@ -25,7 +25,6 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.join.JoinUtil;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.Aliases;
@ -45,6 +44,7 @@ import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.util.RefCounted;
/**

View File

@ -24,6 +24,7 @@ import java.util.List;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.CursorMarkParams;
import org.apache.solr.common.params.DisMaxParams;
import org.apache.solr.common.params.GroupParams;
import org.apache.solr.common.params.ModifiableSolrParams;
@ -118,6 +119,8 @@ public class SpellCheckCollator {
params.set(CommonParams.FL, "id");
// we'll sort by doc id to ensure no scoring is done.
params.set(CommonParams.SORT, "_docid_ asc");
// CursorMark does not like _docid_ sorting, and we don't need it.
params.remove(CursorMarkParams.CURSOR_MARK_PARAM);
// If a dismax query, don't add unnecessary clauses for scoring
params.remove(DisMaxParams.TIE);
params.remove(DisMaxParams.PF);

View File

@ -0,0 +1,887 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.codecs.PostingsFormat; // javadocs
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.StringHelper;
/**
* This class enables fast access to multiple term ords for
* a specified field across all docIDs.
*
* Like FieldCache, it uninverts the index and holds a
* packed data structure in RAM to enable fast access.
* Unlike FieldCache, it can handle multi-valued fields,
* and, it does not hold the term bytes in RAM. Rather, you
* must obtain a TermsEnum from the {@link #getOrdTermsEnum}
* method, and then seek-by-ord to get the term's bytes.
*
* While normally term ords are type long, in this API they are
* int as the internal representation here cannot address
* more than MAX_INT unique terms. Also, typically this
* class is used on fields with relatively few unique terms
* vs the number of documents. In addition, there is an
* internal limit (16 MB) on how many bytes each chunk of
* documents may consume. If you trip this limit you'll hit
* an IllegalStateException.
*
* Deleted documents are skipped during uninversion, and if
* you look them up you'll get 0 ords.
*
* The returned per-document ords do not retain their
* original order in the document. Instead they are returned
* in sorted (by ord, ie term's BytesRef comparator) order. They
* are also de-dup'd (ie if doc has same term more than once
* in this field, you'll only get that ord back once).
*
* This class
* will create its own term index internally, allowing to
* create a wrapped TermsEnum that can handle ord. The
* {@link #getOrdTermsEnum} method then provides this
* wrapped enum.
*
* The RAM consumption of this class can be high!
*
* @lucene.experimental
*/
/*
* Final form of the un-inverted field:
* Each document points to a list of term numbers that are contained in that document.
*
* Term numbers are in sorted order, and are encoded as variable-length deltas from the
* previous term number. Real term numbers start at 2 since 0 and 1 are reserved. A
* term number of 0 signals the end of the termNumber list.
*
* There is a single int[maxDoc()] which either contains a pointer into a byte[] for
* the termNumber lists, or directly contains the termNumber list if it fits in the 4
* bytes of an integer. If the first byte in the integer is 1, the next 3 bytes
* are a pointer into a byte[] where the termNumber list starts.
*
* There are actually 256 byte arrays, to compensate for the fact that the pointers
* into the byte arrays are only 3 bytes long. The correct byte array for a document
* is a function of its id.
*
* To save space and speed up faceting, any term that matches enough documents will
* not be un-inverted... it will be skipped while building the un-inverted field structure,
* and will use a set intersection method during faceting.
*
* To further save memory, the terms (the actual string values) are not all stored in
* memory, but a TermIndex is used to convert term numbers to term values only
* for the terms needed after faceting has completed. Only every 128th term value
* is stored, along with its corresponding term number, and this is used as an
* index to find the closest term and iterate until the desired number is hit (very
* much like Lucene's own internal term index).
*
*/
public class DocTermOrds implements Accountable {
// Term ords are shifted by this, internally, to reserve
// values 0 (end term) and 1 (index is a pointer into byte array)
private final static int TNUM_OFFSET = 2;
/** Every 128th term is indexed, by default. */
public final static int DEFAULT_INDEX_INTERVAL_BITS = 7; // decrease to a low number like 2 for testing
private int indexIntervalBits;
private int indexIntervalMask;
private int indexInterval;
/** Don't uninvert terms that exceed this count. */
protected final int maxTermDocFreq;
/** Field we are uninverting. */
protected final String field;
/** Number of terms in the field. */
protected int numTermsInField;
/** Total number of references to term numbers. */
protected long termInstances;
private long memsz;
/** Total time to uninvert the field. */
protected int total_time;
/** Time for phase1 of the uninvert process. */
protected int phase1_time;
/** Holds the per-document ords or a pointer to the ords. */
protected int[] index;
/** Holds term ords for documents. */
protected byte[][] tnums = new byte[256][];
/** Total bytes (sum of term lengths) for all indexed terms.*/
protected long sizeOfIndexedStrings;
/** Holds the indexed (by default every 128th) terms. */
protected BytesRef[] indexedTermsArray = new BytesRef[0];
/** If non-null, only terms matching this prefix were
* indexed. */
protected BytesRef prefix;
/** Ordinal of the first term in the field, or 0 if the
* {@link PostingsFormat} does not implement {@link
* TermsEnum#ord}. */
protected int ordBase;
/** Used while uninverting. */
protected PostingsEnum postingsEnum;
/** If true, check and throw an exception if the field has docValues enabled.
* Normally, docValues should be used in preference to DocTermOrds. */
protected boolean checkForDocValues = true;
/** Returns total bytes used. */
public long ramBytesUsed() {
// can cache the mem size since it shouldn't change
if (memsz!=0) return memsz;
long sz = 8*8 + 32; // local fields
if (index != null) sz += index.length * 4;
if (tnums!=null) {
for (byte[] arr : tnums)
if (arr != null) sz += arr.length;
}
memsz = sz;
return sz;
}
/** Inverts all terms */
public DocTermOrds(LeafReader reader, Bits liveDocs, String field) throws IOException {
this(reader, liveDocs, field, null, Integer.MAX_VALUE);
}
// TODO: instead of all these ctors and options, take termsenum!
/** Inverts only terms starting w/ prefix */
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix) throws IOException {
this(reader, liveDocs, field, termPrefix, Integer.MAX_VALUE);
}
/** Inverts only terms starting w/ prefix, and only terms
* whose docFreq (not taking deletions into account) is
* &lt;= maxTermDocFreq */
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq) throws IOException {
this(reader, liveDocs, field, termPrefix, maxTermDocFreq, DEFAULT_INDEX_INTERVAL_BITS);
}
/** Inverts only terms starting w/ prefix, and only terms
* whose docFreq (not taking deletions into account) is
* &lt;= maxTermDocFreq, with a custom indexing interval
* (default is every 128nd term). */
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq, int indexIntervalBits) throws IOException {
this(field, maxTermDocFreq, indexIntervalBits);
uninvert(reader, liveDocs, termPrefix);
}
/** Subclass inits w/ this, but be sure you then call
* uninvert, only once */
protected DocTermOrds(String field, int maxTermDocFreq, int indexIntervalBits) {
//System.out.println("DTO init field=" + field + " maxTDFreq=" + maxTermDocFreq);
this.field = field;
this.maxTermDocFreq = maxTermDocFreq;
this.indexIntervalBits = indexIntervalBits;
indexIntervalMask = 0xffffffff >>> (32-indexIntervalBits);
indexInterval = 1 << indexIntervalBits;
}
/**
* Returns a TermsEnum that implements ord, or null if no terms in field.
* <p>
* we build a "private" terms
* index internally (WARNING: consumes RAM) and use that
* index to implement ord. This also enables ord on top
* of a composite reader. The returned TermsEnum is
* unpositioned. This returns null if there are no terms.
* </p>
* <p><b>NOTE</b>: you must pass the same reader that was
* used when creating this class
*/
public TermsEnum getOrdTermsEnum(LeafReader reader) throws IOException {
// NOTE: see LUCENE-6529 before attempting to optimize this method to
// return a TermsEnum directly from the reader if it already supports ord().
assert null != indexedTermsArray;
if (0 == indexedTermsArray.length) {
return null;
} else {
return new OrdWrappedTermsEnum(reader);
}
}
/**
* Returns the number of terms in this field
*/
public int numTerms() {
return numTermsInField;
}
/**
* Returns {@code true} if no terms were indexed.
*/
public boolean isEmpty() {
return index == null;
}
/** Subclass can override this */
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
}
/** Invoked during {@link #uninvert(org.apache.lucene.index.LeafReader,Bits,BytesRef)}
* to record the document frequency for each uninverted
* term. */
protected void setActualDocFreq(int termNum, int df) throws IOException {
}
/** Call this only once (if you subclass!) */
protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix) throws IOException {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (checkForDocValues && info != null && info.getDocValuesType() != DocValuesType.NONE) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
}
//System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
final long startTime = System.nanoTime();
prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix);
final int maxDoc = reader.maxDoc();
final int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number
final int[] lastTerm = new int[maxDoc]; // last term we saw for this document
final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)
final Terms terms = reader.terms(field);
if (terms == null) {
// No terms
return;
}
final TermsEnum te = terms.iterator();
final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef();
//System.out.println("seekStart=" + seekStart.utf8ToString());
if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) {
// No terms match
return;
}
// For our "term index wrapper"
final List<BytesRef> indexedTerms = new ArrayList<>();
final PagedBytes indexedTermsBytes = new PagedBytes(15);
// we need a minimum of 9 bytes, but round up to 12 since the space would
// be wasted with most allocators anyway.
byte[] tempArr = new byte[12];
//
// enumerate all terms, and build an intermediate form of the un-inverted field.
//
// During this intermediate form, every document has a (potential) byte[]
// and the int[maxDoc()] array either contains the termNumber list directly
// or the *end* offset of the termNumber list in its byte array (for faster
// appending and faster creation of the final form).
//
// idea... if things are too large while building, we could do a range of docs
// at a time (but it would be a fair amount slower to build)
// could also do ranges in parallel to take advantage of multiple CPUs
// OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
// values. This requires going over the field first to find the most
// frequent terms ahead of time.
int termNum = 0;
postingsEnum = null;
// Loop begins with te positioned to first term (we call
// seek above):
for (;;) {
final BytesRef t = te.term();
if (t == null || (termPrefix != null && !StringHelper.startsWith(t, termPrefix))) {
break;
}
//System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);
visitTerm(te, termNum);
if ((termNum & indexIntervalMask) == 0) {
// Index this term
sizeOfIndexedStrings += t.length;
BytesRef indexedTerm = new BytesRef();
indexedTermsBytes.copy(t, indexedTerm);
// TODO: really should 1) strip off useless suffix,
// and 2) use FST not array/PagedBytes
indexedTerms.add(indexedTerm);
}
final int df = te.docFreq();
if (df <= maxTermDocFreq) {
postingsEnum = te.postings(postingsEnum, PostingsEnum.NONE);
// dF, but takes deletions into account
int actualDF = 0;
for (;;) {
int doc = postingsEnum.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
//System.out.println(" chunk=" + chunk + " docs");
actualDF ++;
termInstances++;
//System.out.println(" docID=" + doc);
// add TNUM_OFFSET to the term number to make room for special reserved values:
// 0 (end term) and 1 (index into byte array follows)
int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
lastTerm[doc] = termNum;
int val = index[doc];
if ((val & 0xff)==1) {
// index into byte array (actually the end of
// the doc-specific byte[] when building)
int pos = val >>> 8;
int ilen = vIntSize(delta);
byte[] arr = bytes[doc];
int newend = pos+ilen;
if (newend > arr.length) {
// We avoid a doubling strategy to lower memory usage.
// this faceting method isn't for docs with many terms.
// In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
// TODO: figure out what array lengths we can round up to w/o actually using more memory
// (how much space does a byte[] take up? Is data preceded by a 32 bit length only?
// It should be safe to round up to the nearest 32 bits in any case.
int newLen = (newend + 3) & 0xfffffffc; // 4 byte alignment
byte[] newarr = new byte[newLen];
System.arraycopy(arr, 0, newarr, 0, pos);
arr = newarr;
bytes[doc] = newarr;
}
pos = writeInt(delta, arr, pos);
index[doc] = (pos<<8) | 1; // update pointer to end index in byte[]
} else {
// OK, this int has data in it... find the end (a zero starting byte - not
// part of another number, hence not following a byte with the high bit set).
int ipos;
if (val==0) {
ipos=0;
} else if ((val & 0x0000ff80)==0) {
ipos=1;
} else if ((val & 0x00ff8000)==0) {
ipos=2;
} else if ((val & 0xff800000)==0) {
ipos=3;
} else {
ipos=4;
}
//System.out.println(" ipos=" + ipos);
int endPos = writeInt(delta, tempArr, ipos);
//System.out.println(" endpos=" + endPos);
if (endPos <= 4) {
//System.out.println(" fits!");
// value will fit in the integer... move bytes back
for (int j=ipos; j<endPos; j++) {
val |= (tempArr[j] & 0xff) << (j<<3);
}
index[doc] = val;
} else {
// value won't fit... move integer into byte[]
for (int j=0; j<ipos; j++) {
tempArr[j] = (byte)val;
val >>>=8;
}
// point at the end index in the byte[]
index[doc] = (endPos<<8) | 1;
bytes[doc] = tempArr;
tempArr = new byte[12];
}
}
}
setActualDocFreq(termNum, actualDF);
}
termNum++;
if (te.next() == null) {
break;
}
}
numTermsInField = termNum;
long midPoint = System.nanoTime();
if (termInstances == 0) {
// we didn't invert anything
// lower memory consumption.
tnums = null;
} else {
this.index = index;
//
// transform intermediate form into the final form, building a single byte[]
// at a time, and releasing the intermediate byte[]s as we go to avoid
// increasing the memory footprint.
//
for (int pass = 0; pass<256; pass++) {
byte[] target = tnums[pass];
int pos=0; // end in target;
if (target != null) {
pos = target.length;
} else {
target = new byte[4096];
}
// loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
// where pp is the pass (which array we are building), and xx is all values.
// each pass shares the same byte[] for termNumber lists.
for (int docbase = pass<<16; docbase<maxDoc; docbase+=(1<<24)) {
int lim = Math.min(docbase + (1<<16), maxDoc);
for (int doc=docbase; doc<lim; doc++) {
//System.out.println(" pass=" + pass + " process docID=" + doc);
int val = index[doc];
if ((val&0xff) == 1) {
int len = val >>> 8;
//System.out.println(" ptr pos=" + pos);
index[doc] = (pos<<8)|1; // change index to point to start of array
if ((pos & 0xff000000) != 0) {
// we only have 24 bits for the array index
throw new IllegalStateException("Too many values for UnInvertedField faceting on field "+field);
}
byte[] arr = bytes[doc];
/*
for(byte b : arr) {
//System.out.println(" b=" + Integer.toHexString((int) b));
}
*/
bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
if (target.length <= pos + len) {
int newlen = target.length;
/*** we don't have to worry about the array getting too large
* since the "pos" param will overflow first (only 24 bits available)
if ((newlen<<1) <= 0) {
// overflow...
newlen = Integer.MAX_VALUE;
if (newlen <= pos + len) {
throw new SolrException(400,"Too many terms to uninvert field!");
}
} else {
while (newlen <= pos + len) newlen<<=1; // doubling strategy
}
****/
while (newlen <= pos + len) newlen<<=1; // doubling strategy
byte[] newtarget = new byte[newlen];
System.arraycopy(target, 0, newtarget, 0, pos);
target = newtarget;
}
System.arraycopy(arr, 0, target, pos, len);
pos += len + 1; // skip single byte at end and leave it 0 for terminator
}
}
}
// shrink array
if (pos < target.length) {
byte[] newtarget = new byte[pos];
System.arraycopy(target, 0, newtarget, 0, pos);
target = newtarget;
}
tnums[pass] = target;
if ((pass << 16) > maxDoc)
break;
}
}
indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]);
long endTime = System.nanoTime();
total_time = (int) TimeUnit.MILLISECONDS.convert(endTime-startTime, TimeUnit.NANOSECONDS);
phase1_time = (int) TimeUnit.MILLISECONDS.convert(midPoint-startTime, TimeUnit.NANOSECONDS);
}
/** Number of bytes to represent an unsigned int as a vint. */
private static int vIntSize(int x) {
if ((x & (0xffffffff << (7*1))) == 0 ) {
return 1;
}
if ((x & (0xffffffff << (7*2))) == 0 ) {
return 2;
}
if ((x & (0xffffffff << (7*3))) == 0 ) {
return 3;
}
if ((x & (0xffffffff << (7*4))) == 0 ) {
return 4;
}
return 5;
}
// todo: if we know the size of the vInt already, we could do
// a single switch on the size
private static int writeInt(int x, byte[] arr, int pos) {
int a;
a = (x >>> (7*4));
if (a != 0) {
arr[pos++] = (byte)(a | 0x80);
}
a = (x >>> (7*3));
if (a != 0) {
arr[pos++] = (byte)(a | 0x80);
}
a = (x >>> (7*2));
if (a != 0) {
arr[pos++] = (byte)(a | 0x80);
}
a = (x >>> (7*1));
if (a != 0) {
arr[pos++] = (byte)(a | 0x80);
}
arr[pos++] = (byte)(x & 0x7f);
return pos;
}
/**
* "wrap" our own terms index around the original IndexReader.
* Only valid if there are terms for this field rom the original reader
*/
private final class OrdWrappedTermsEnum extends TermsEnum {
private final TermsEnum termsEnum;
private BytesRef term;
private long ord = -indexInterval-1; // force "real" seek
public OrdWrappedTermsEnum(LeafReader reader) throws IOException {
assert indexedTermsArray != null;
assert 0 != indexedTermsArray.length;
termsEnum = reader.fields().terms(field).iterator();
}
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
return termsEnum.postings(reuse, flags);
}
@Override
public BytesRef term() {
return term;
}
@Override
public BytesRef next() throws IOException {
if (++ord < 0) {
ord = 0;
}
if (termsEnum.next() == null) {
term = null;
return null;
}
return setTerm(); // this is extra work if we know we are in bounds...
}
@Override
public int docFreq() throws IOException {
return termsEnum.docFreq();
}
@Override
public long totalTermFreq() throws IOException {
return termsEnum.totalTermFreq();
}
@Override
public long ord() {
return ordBase + ord;
}
@Override
public SeekStatus seekCeil(BytesRef target) throws IOException {
// already here
if (term != null && term.equals(target)) {
return SeekStatus.FOUND;
}
int startIdx = Arrays.binarySearch(indexedTermsArray, target);
if (startIdx >= 0) {
// we hit the term exactly... lucky us!
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
assert seekStatus == TermsEnum.SeekStatus.FOUND;
ord = startIdx << indexIntervalBits;
setTerm();
assert term != null;
return SeekStatus.FOUND;
}
// we didn't hit the term exactly
startIdx = -startIdx-1;
if (startIdx == 0) {
// our target occurs *before* the first term
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND;
ord = 0;
setTerm();
assert term != null;
return SeekStatus.NOT_FOUND;
}
// back up to the start of the block
startIdx--;
if ((ord >> indexIntervalBits) == startIdx && term != null && term.compareTo(target) <= 0) {
// we are already in the right block and the current term is before the term we want,
// so we don't need to seek.
} else {
// seek to the right block
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(indexedTermsArray[startIdx]);
assert seekStatus == TermsEnum.SeekStatus.FOUND;
ord = startIdx << indexIntervalBits;
setTerm();
assert term != null; // should be non-null since it's in the index
}
while (term != null && term.compareTo(target) < 0) {
next();
}
if (term == null) {
return SeekStatus.END;
} else if (term.compareTo(target) == 0) {
return SeekStatus.FOUND;
} else {
return SeekStatus.NOT_FOUND;
}
}
@Override
public void seekExact(long targetOrd) throws IOException {
int delta = (int) (targetOrd - ordBase - ord);
//System.out.println(" seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord + " ii=" + indexInterval);
if (delta < 0 || delta > indexInterval) {
final int idx = (int) (targetOrd >>> indexIntervalBits);
final BytesRef base = indexedTermsArray[idx];
//System.out.println(" do seek term=" + base.utf8ToString());
ord = idx << indexIntervalBits;
delta = (int) (targetOrd - ord);
final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(base);
assert seekStatus == TermsEnum.SeekStatus.FOUND;
} else {
//System.out.println("seek w/in block");
}
while (--delta >= 0) {
BytesRef br = termsEnum.next();
if (br == null) {
assert false;
return;
}
ord++;
}
setTerm();
assert term != null;
}
private BytesRef setTerm() throws IOException {
term = termsEnum.term();
//System.out.println(" setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString()));
if (prefix != null && !StringHelper.startsWith(term, prefix)) {
term = null;
}
return term;
}
}
/** Returns the term ({@link BytesRef}) corresponding to
* the provided ordinal. */
public BytesRef lookupTerm(TermsEnum termsEnum, int ord) throws IOException {
termsEnum.seekExact(ord);
return termsEnum.term();
}
/** Returns a SortedSetDocValues view of this instance */
public SortedSetDocValues iterator(LeafReader reader) throws IOException {
if (isEmpty()) {
return DocValues.emptySortedSet();
} else {
return new Iterator(reader);
}
}
private class Iterator extends SortedSetDocValues {
final LeafReader reader;
final TermsEnum te; // used internally for lookupOrd() and lookupTerm()
// currently we read 5 at a time (using the logic of the old iterator)
final int buffer[] = new int[5];
int bufferUpto;
int bufferLength;
private int tnum;
private int upto;
private byte[] arr;
Iterator(LeafReader reader) throws IOException {
this.reader = reader;
this.te = termsEnum();
}
@Override
public long nextOrd() {
while (bufferUpto == bufferLength) {
if (bufferLength < buffer.length) {
return NO_MORE_ORDS;
} else {
bufferLength = read(buffer);
bufferUpto = 0;
}
}
return buffer[bufferUpto++];
}
/** Buffer must be at least 5 ints long. Returns number
* of term ords placed into buffer; if this count is
* less than buffer.length then that is the end. */
int read(int[] buffer) {
int bufferUpto = 0;
if (arr == null) {
// code is inlined into upto
//System.out.println("inlined");
int code = upto;
int delta = 0;
for (;;) {
delta = (delta << 7) | (code & 0x7f);
if ((code & 0x80)==0) {
if (delta==0) break;
tnum += delta - TNUM_OFFSET;
buffer[bufferUpto++] = ordBase+tnum;
//System.out.println(" tnum=" + tnum);
delta = 0;
}
code >>>= 8;
}
} else {
// code is a pointer
for(;;) {
int delta = 0;
for(;;) {
byte b = arr[upto++];
delta = (delta << 7) | (b & 0x7f);
//System.out.println(" cycle: upto=" + upto + " delta=" + delta + " b=" + b);
if ((b & 0x80) == 0) break;
}
//System.out.println(" delta=" + delta);
if (delta == 0) break;
tnum += delta - TNUM_OFFSET;
//System.out.println(" tnum=" + tnum);
buffer[bufferUpto++] = ordBase+tnum;
if (bufferUpto == buffer.length) {
break;
}
}
}
return bufferUpto;
}
@Override
public void setDocument(int docID) {
tnum = 0;
final int code = index[docID];
if ((code & 0xff)==1) {
// a pointer
upto = code>>>8;
//System.out.println(" pointer! upto=" + upto);
int whichArray = (docID >>> 16) & 0xff;
arr = tnums[whichArray];
} else {
//System.out.println(" inline!");
arr = null;
upto = code;
}
bufferUpto = 0;
bufferLength = read(buffer);
}
@Override
public BytesRef lookupOrd(long ord) {
try {
return DocTermOrds.this.lookupTerm(te, (int) ord);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public long getValueCount() {
return numTerms();
}
@Override
public long lookupTerm(BytesRef key) {
try {
switch (te.seekCeil(key)) {
case FOUND:
assert te.ord() >= 0;
return te.ord();
case NOT_FOUND:
assert te.ord() >= 0;
return -te.ord()-1;
default: /* END */
return -numTerms()-1;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public TermsEnum termsEnum() {
try {
return getOrdTermsEnum(reader);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
}

View File

@ -0,0 +1,466 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import java.io.PrintStream;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.IndexReader; // javadocs
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LegacyNumericUtils;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Expert: Maintains caches of term values.
*
* <p>Created: May 19, 2004 11:13:14 AM
*
* @since lucene 1.4
* @see FieldCacheSanityChecker
*
* @lucene.internal
*/
interface FieldCache {
/**
* Placeholder indicating creation of this cache is currently in-progress.
*/
public static final class CreationPlaceholder implements Accountable {
Accountable value;
@Override
public long ramBytesUsed() {
// don't call on the in-progress value, might make things angry.
return RamUsageEstimator.NUM_BYTES_OBJECT_REF;
}
}
/**
* interface to all parsers. It is used to parse different numeric types.
*/
public interface Parser {
/**
* Pulls a {@link TermsEnum} from the given {@link Terms}. This method allows certain parsers
* to filter the actual TermsEnum before the field cache is filled.
*
* @param terms the {@link Terms} instance to create the {@link TermsEnum} from.
* @return a possibly filtered {@link TermsEnum} instance, this method must not return <code>null</code>.
* @throws IOException if an {@link IOException} occurs
* @deprecated index with Points instead
*/
@Deprecated
public TermsEnum termsEnum(Terms terms) throws IOException;
/** Parse's this field's value */
public long parseValue(BytesRef term);
}
/**
* Base class for points parsers. These parsers do not use the inverted index, but instead
* uninvert point data.
*
* This abstraction can be cleaned up when Parser.termsEnum is removed.
*/
public abstract class PointParser implements Parser {
public final TermsEnum termsEnum(Terms terms) throws IOException {
throw new UnsupportedOperationException("makes no sense for parsing points");
}
}
/** Expert: The cache used internally by sorting and range query classes. */
public static FieldCache DEFAULT = new FieldCacheImpl();
/**
* A parser instance for int values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.IntPoint}.
*/
public static final Parser INT_POINT_PARSER = new PointParser() {
@Override
public long parseValue(BytesRef point) {
return NumericUtils.sortableBytesToInt(point.bytes, point.offset);
}
@Override
public String toString() {
return FieldCache.class.getName()+".INT_POINT_PARSER";
}
};
/**
* A parser instance for long values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.LongPoint}.
*/
public static final Parser LONG_POINT_PARSER = new PointParser() {
@Override
public long parseValue(BytesRef point) {
return NumericUtils.sortableBytesToLong(point.bytes, point.offset);
}
@Override
public String toString() {
return FieldCache.class.getName()+".LONG_POINT_PARSER";
}
};
/**
* A parser instance for float values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.FloatPoint}.
*/
public static final Parser FLOAT_POINT_PARSER = new PointParser() {
@Override
public long parseValue(BytesRef point) {
return NumericUtils.sortableFloatBits(NumericUtils.sortableBytesToInt(point.bytes, point.offset));
}
@Override
public String toString() {
return FieldCache.class.getName()+".FLOAT_POINT_PARSER";
}
};
/**
* A parser instance for double values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.DoublePoint}.
*/
public static final Parser DOUBLE_POINT_PARSER = new PointParser() {
@Override
public long parseValue(BytesRef point) {
return NumericUtils.sortableDoubleBits(NumericUtils.sortableBytesToLong(point.bytes, point.offset));
}
@Override
public String toString() {
return FieldCache.class.getName()+".DOUBLE_POINT_PARSER";
}
};
/**
* A parser instance for int values encoded by {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.LegacyIntField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
* @deprecated Index with points and use {@link #INT_POINT_PARSER} instead.
*/
@Deprecated
public static final Parser LEGACY_INT_PARSER = new Parser() {
@Override
public long parseValue(BytesRef term) {
return LegacyNumericUtils.prefixCodedToInt(term);
}
@Override
public TermsEnum termsEnum(Terms terms) throws IOException {
return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
}
@Override
public String toString() {
return FieldCache.class.getName()+".LEGACY_INT_PARSER";
}
};
/**
* A parser instance for float values encoded with {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.LegacyFloatField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
* @deprecated Index with points and use {@link #FLOAT_POINT_PARSER} instead.
*/
@Deprecated
public static final Parser LEGACY_FLOAT_PARSER = new Parser() {
@Override
public long parseValue(BytesRef term) {
int val = LegacyNumericUtils.prefixCodedToInt(term);
if (val<0) val ^= 0x7fffffff;
return val;
}
@Override
public String toString() {
return FieldCache.class.getName()+".LEGACY_FLOAT_PARSER";
}
@Override
public TermsEnum termsEnum(Terms terms) throws IOException {
return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
}
};
/**
* A parser instance for long values encoded by {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.LegacyLongField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
* @deprecated Index with points and use {@link #LONG_POINT_PARSER} instead.
*/
@Deprecated
public static final Parser LEGACY_LONG_PARSER = new Parser() {
@Override
public long parseValue(BytesRef term) {
return LegacyNumericUtils.prefixCodedToLong(term);
}
@Override
public String toString() {
return FieldCache.class.getName()+".LEGACY_LONG_PARSER";
}
@Override
public TermsEnum termsEnum(Terms terms) throws IOException {
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
}
};
/**
* A parser instance for double values encoded with {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.LegacyDoubleField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
* @deprecated Index with points and use {@link #DOUBLE_POINT_PARSER} instead.
*/
@Deprecated
public static final Parser LEGACY_DOUBLE_PARSER = new Parser() {
@Override
public long parseValue(BytesRef term) {
long val = LegacyNumericUtils.prefixCodedToLong(term);
if (val<0) val ^= 0x7fffffffffffffffL;
return val;
}
@Override
public String toString() {
return FieldCache.class.getName()+".LEGACY_DOUBLE_PARSER";
}
@Override
public TermsEnum termsEnum(Terms terms) throws IOException {
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
}
};
/** Checks the internal cache for an appropriate entry, and if none is found,
* reads the terms/points in <code>field</code> and returns a bit set at the size of
* <code>reader.maxDoc()</code>, with turned on bits for each docid that
* does have a value for this field.
* @param parser May be {@code null} if coming from the inverted index, otherwise
* can be a {@link PointParser} to compute from point values.
*/
public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException;
/**
* Returns a {@link NumericDocValues} over the values found in documents in the given
* field. If the field was indexed as {@link NumericDocValuesField}, it simply
* uses {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)} to read the values.
* Otherwise, it checks the internal cache for an appropriate entry, and if
* none is found, reads the terms/points in <code>field</code> as longs and returns
* an array of size <code>reader.maxDoc()</code> of the value each document
* has in the given field.
*
* @param reader
* Used to get field values.
* @param field
* Which field contains the longs.
* @param parser
* Computes long for string values. May be {@code null} if the
* requested field was indexed as {@link NumericDocValuesField} or
* {@link org.apache.lucene.document.LegacyLongField}.
* @param setDocsWithField
* If true then {@link #getDocsWithField} will also be computed and
* stored in the FieldCache.
* @return The values in the given field for each document.
* @throws IOException
* If any error occurs.
*/
public NumericDocValues getNumerics(LeafReader reader, String field, Parser parser, boolean setDocsWithField) throws IOException;
/** Checks the internal cache for an appropriate entry, and if none
* is found, reads the term values in <code>field</code>
* and returns a {@link BinaryDocValues} instance, providing a
* method to retrieve the term (as a BytesRef) per document.
* @param reader Used to get field values.
* @param field Which field contains the strings.
* @param setDocsWithField If true then {@link #getDocsWithField} will
* also be computed and stored in the FieldCache.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField) throws IOException;
/** Expert: just like {@link #getTerms(org.apache.lucene.index.LeafReader,String,boolean)},
* but you can specify whether more RAM should be consumed in exchange for
* faster lookups (default is "true"). Note that the
* first call for a given reader and field "wins",
* subsequent calls will share the same cache entry. */
public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField, float acceptableOverheadRatio) throws IOException;
/** Checks the internal cache for an appropriate entry, and if none
* is found, reads the term values in <code>field</code>
* and returns a {@link SortedDocValues} instance,
* providing methods to retrieve sort ordinals and terms
* (as a ByteRef) per document.
* @param reader Used to get field values.
* @param field Which field contains the strings.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public SortedDocValues getTermsIndex(LeafReader reader, String field) throws IOException;
/** Expert: just like {@link
* #getTermsIndex(org.apache.lucene.index.LeafReader,String)}, but you can specify
* whether more RAM should be consumed in exchange for
* faster lookups (default is "true"). Note that the
* first call for a given reader and field "wins",
* subsequent calls will share the same cache entry. */
public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException;
/** Can be passed to {@link #getDocTermOrds} to filter for 32-bit numeric terms */
public static final BytesRef INT32_TERM_PREFIX = new BytesRef(new byte[] { LegacyNumericUtils.SHIFT_START_INT });
/** Can be passed to {@link #getDocTermOrds} to filter for 64-bit numeric terms */
public static final BytesRef INT64_TERM_PREFIX = new BytesRef(new byte[] { LegacyNumericUtils.SHIFT_START_LONG });
/**
* Checks the internal cache for an appropriate entry, and if none is found, reads the term values
* in <code>field</code> and returns a {@link DocTermOrds} instance, providing a method to retrieve
* the terms (as ords) per document.
*
* @param reader Used to build a {@link DocTermOrds} instance
* @param field Which field contains the strings.
* @param prefix prefix for a subset of the terms which should be uninverted. Can be null or
* {@link #INT32_TERM_PREFIX} or {@link #INT64_TERM_PREFIX}
*
* @return a {@link DocTermOrds} instance
* @throws IOException If any error occurs.
*/
public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException;
/**
* EXPERT: A unique Identifier/Description for each item in the FieldCache.
* Can be useful for logging/debugging.
* @lucene.experimental
*/
public final class CacheEntry {
private final Object readerKey;
private final String fieldName;
private final Class<?> cacheType;
private final Object custom;
private final Accountable value;
public CacheEntry(Object readerKey, String fieldName,
Class<?> cacheType,
Object custom,
Accountable value) {
this.readerKey = readerKey;
this.fieldName = fieldName;
this.cacheType = cacheType;
this.custom = custom;
this.value = value;
}
public Object getReaderKey() {
return readerKey;
}
public String getFieldName() {
return fieldName;
}
public Class<?> getCacheType() {
return cacheType;
}
public Object getCustom() {
return custom;
}
public Object getValue() {
return value;
}
/**
* The most recently estimated size of the value, null unless
* estimateSize has been called.
*/
public String getEstimatedSize() {
long bytesUsed = value == null ? 0L : value.ramBytesUsed();
return RamUsageEstimator.humanReadableUnits(bytesUsed);
}
@Override
public String toString() {
StringBuilder b = new StringBuilder(250);
b.append("'").append(getReaderKey()).append("'=>");
b.append("'").append(getFieldName()).append("',");
b.append(getCacheType()).append(",").append(getCustom());
b.append("=>").append(getValue().getClass().getName()).append("#");
b.append(System.identityHashCode(getValue()));
String s = getEstimatedSize();
b.append(" (size =~ ").append(s).append(')');
return b.toString();
}
}
/**
* EXPERT: Generates an array of CacheEntry objects representing all items
* currently in the FieldCache.
* <p>
* NOTE: These CacheEntry objects maintain a strong reference to the
* Cached Values. Maintaining references to a CacheEntry the AtomicIndexReader
* associated with it has garbage collected will prevent the Value itself
* from being garbage collected when the Cache drops the WeakReference.
* </p>
* @lucene.experimental
*/
public CacheEntry[] getCacheEntries();
/**
* <p>
* EXPERT: Instructs the FieldCache to forcibly expunge all entries
* from the underlying caches. This is intended only to be used for
* test methods as a way to ensure a known base state of the Cache
* (with out needing to rely on GC to free WeakReferences).
* It should not be relied on for "Cache maintenance" in general
* application code.
* </p>
* @lucene.experimental
*/
public void purgeAllCaches();
/**
* Expert: drops all cache entries associated with this
* reader {@link IndexReader#getCoreCacheKey}. NOTE: this cache key must
* precisely match the reader that the cache entry is
* keyed on. If you pass a top-level reader, it usually
* will have no effect as Lucene now caches at the segment
* reader level.
*/
public void purgeByCacheKey(Object coreCacheKey);
/**
* If non-null, FieldCacheImpl will warn whenever
* entries are created that are not sane according to
* {@link FieldCacheSanityChecker}.
*/
public void setInfoStream(PrintStream stream);
/** counterpart of {@link #setInfoStream(PrintStream)} */
public PrintStream getInfoStream();
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,425 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.MapOfSets;
import org.apache.solr.uninverting.FieldCache.CacheEntry;
/**
* Provides methods for sanity checking that entries in the FieldCache
* are not wasteful or inconsistent.
* </p>
* <p>
* Lucene 2.9 Introduced numerous enhancements into how the FieldCache
* is used by the low levels of Lucene searching (for Sorting and
* ValueSourceQueries) to improve both the speed for Sorting, as well
* as reopening of IndexReaders. But these changes have shifted the
* usage of FieldCache from "top level" IndexReaders (frequently a
* MultiReader or DirectoryReader) down to the leaf level SegmentReaders.
* As a result, existing applications that directly access the FieldCache
* may find RAM usage increase significantly when upgrading to 2.9 or
* Later. This class provides an API for these applications (or their
* Unit tests) to check at run time if the FieldCache contains "insane"
* usages of the FieldCache.
* </p>
* @lucene.experimental
* @see FieldCache
* @see FieldCacheSanityChecker.Insanity
* @see FieldCacheSanityChecker.InsanityType
*/
final class FieldCacheSanityChecker {
public FieldCacheSanityChecker() {
/* NOOP */
}
/**
* Quick and dirty convenience method
* @see #check
*/
public static Insanity[] checkSanity(FieldCache cache) {
return checkSanity(cache.getCacheEntries());
}
/**
* Quick and dirty convenience method that instantiates an instance with
* "good defaults" and uses it to test the CacheEntrys
* @see #check
*/
public static Insanity[] checkSanity(CacheEntry... cacheEntries) {
FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker();
return sanityChecker.check(cacheEntries);
}
/**
* Tests a CacheEntry[] for indication of "insane" cache usage.
* <p>
* <B>NOTE:</b>FieldCache CreationPlaceholder objects are ignored.
* (:TODO: is this a bad idea? are we masking a real problem?)
* </p>
*/
public Insanity[] check(CacheEntry... cacheEntries) {
if (null == cacheEntries || 0 == cacheEntries.length)
return new Insanity[0];
// the indirect mapping lets MapOfSet dedup identical valIds for us
//
// maps the (valId) identityhashCode of cache values to
// sets of CacheEntry instances
final MapOfSets<Integer, CacheEntry> valIdToItems = new MapOfSets<>(new HashMap<Integer, Set<CacheEntry>>(17));
// maps ReaderField keys to Sets of ValueIds
final MapOfSets<ReaderField, Integer> readerFieldToValIds = new MapOfSets<>(new HashMap<ReaderField, Set<Integer>>(17));
//
// any keys that we know result in more then one valId
final Set<ReaderField> valMismatchKeys = new HashSet<>();
// iterate over all the cacheEntries to get the mappings we'll need
for (int i = 0; i < cacheEntries.length; i++) {
final CacheEntry item = cacheEntries[i];
final Object val = item.getValue();
// It's OK to have dup entries, where one is eg
// float[] and the other is the Bits (from
// getDocWithField())
if (val instanceof FieldCacheImpl.BitsEntry) {
continue;
}
if (val instanceof FieldCache.CreationPlaceholder)
continue;
final ReaderField rf = new ReaderField(item.getReaderKey(),
item.getFieldName());
final Integer valId = Integer.valueOf(System.identityHashCode(val));
// indirect mapping, so the MapOfSet will dedup identical valIds for us
valIdToItems.put(valId, item);
if (1 < readerFieldToValIds.put(rf, valId)) {
valMismatchKeys.add(rf);
}
}
final List<Insanity> insanity = new ArrayList<>(valMismatchKeys.size() * 3);
insanity.addAll(checkValueMismatch(valIdToItems,
readerFieldToValIds,
valMismatchKeys));
insanity.addAll(checkSubreaders(valIdToItems,
readerFieldToValIds));
return insanity.toArray(new Insanity[insanity.size()]);
}
/**
* Internal helper method used by check that iterates over
* valMismatchKeys and generates a Collection of Insanity
* instances accordingly. The MapOfSets are used to populate
* the Insanity objects.
* @see InsanityType#VALUEMISMATCH
*/
private Collection<Insanity> checkValueMismatch(MapOfSets<Integer, CacheEntry> valIdToItems,
MapOfSets<ReaderField, Integer> readerFieldToValIds,
Set<ReaderField> valMismatchKeys) {
final List<Insanity> insanity = new ArrayList<>(valMismatchKeys.size() * 3);
if (! valMismatchKeys.isEmpty() ) {
// we have multiple values for some ReaderFields
final Map<ReaderField, Set<Integer>> rfMap = readerFieldToValIds.getMap();
final Map<Integer, Set<CacheEntry>> valMap = valIdToItems.getMap();
for (final ReaderField rf : valMismatchKeys) {
final List<CacheEntry> badEntries = new ArrayList<>(valMismatchKeys.size() * 2);
for(final Integer value: rfMap.get(rf)) {
for (final CacheEntry cacheEntry : valMap.get(value)) {
badEntries.add(cacheEntry);
}
}
CacheEntry[] badness = new CacheEntry[badEntries.size()];
badness = badEntries.toArray(badness);
insanity.add(new Insanity(InsanityType.VALUEMISMATCH,
"Multiple distinct value objects for " +
rf.toString(), badness));
}
}
return insanity;
}
/**
* Internal helper method used by check that iterates over
* the keys of readerFieldToValIds and generates a Collection
* of Insanity instances whenever two (or more) ReaderField instances are
* found that have an ancestry relationships.
*
* @see InsanityType#SUBREADER
*/
private Collection<Insanity> checkSubreaders( MapOfSets<Integer, CacheEntry> valIdToItems,
MapOfSets<ReaderField, Integer> readerFieldToValIds) {
final List<Insanity> insanity = new ArrayList<>(23);
Map<ReaderField, Set<ReaderField>> badChildren = new HashMap<>(17);
MapOfSets<ReaderField, ReaderField> badKids = new MapOfSets<>(badChildren); // wrapper
Map<Integer, Set<CacheEntry>> viToItemSets = valIdToItems.getMap();
Map<ReaderField, Set<Integer>> rfToValIdSets = readerFieldToValIds.getMap();
Set<ReaderField> seen = new HashSet<>(17);
Set<ReaderField> readerFields = rfToValIdSets.keySet();
for (final ReaderField rf : readerFields) {
if (seen.contains(rf)) continue;
List<Object> kids = getAllDescendantReaderKeys(rf.readerKey);
for (Object kidKey : kids) {
ReaderField kid = new ReaderField(kidKey, rf.fieldName);
if (badChildren.containsKey(kid)) {
// we've already process this kid as RF and found other problems
// track those problems as our own
badKids.put(rf, kid);
badKids.putAll(rf, badChildren.get(kid));
badChildren.remove(kid);
} else if (rfToValIdSets.containsKey(kid)) {
// we have cache entries for the kid
badKids.put(rf, kid);
}
seen.add(kid);
}
seen.add(rf);
}
// every mapping in badKids represents an Insanity
for (final ReaderField parent : badChildren.keySet()) {
Set<ReaderField> kids = badChildren.get(parent);
List<CacheEntry> badEntries = new ArrayList<>(kids.size() * 2);
// put parent entr(ies) in first
{
for (final Integer value : rfToValIdSets.get(parent)) {
badEntries.addAll(viToItemSets.get(value));
}
}
// now the entries for the descendants
for (final ReaderField kid : kids) {
for (final Integer value : rfToValIdSets.get(kid)) {
badEntries.addAll(viToItemSets.get(value));
}
}
CacheEntry[] badness = new CacheEntry[badEntries.size()];
badness = badEntries.toArray(badness);
insanity.add(new Insanity(InsanityType.SUBREADER,
"Found caches for descendants of " +
parent.toString(),
badness));
}
return insanity;
}
/**
* Checks if the seed is an IndexReader, and if so will walk
* the hierarchy of subReaders building up a list of the objects
* returned by {@code seed.getCoreCacheKey()}
*/
private List<Object> getAllDescendantReaderKeys(Object seed) {
List<Object> all = new ArrayList<>(17); // will grow as we iter
all.add(seed);
for (int i = 0; i < all.size(); i++) {
final Object obj = all.get(i);
// TODO: We don't check closed readers here (as getTopReaderContext
// throws AlreadyClosedException), what should we do? Reflection?
if (obj instanceof IndexReader) {
try {
final List<IndexReaderContext> childs =
((IndexReader) obj).getContext().children();
if (childs != null) { // it is composite reader
for (final IndexReaderContext ctx : childs) {
all.add(ctx.reader().getCoreCacheKey());
}
}
} catch (AlreadyClosedException ace) {
// ignore this reader
}
}
}
// need to skip the first, because it was the seed
return all.subList(1, all.size());
}
/**
* Simple pair object for using "readerKey + fieldName" a Map key
*/
private final static class ReaderField {
public final Object readerKey;
public final String fieldName;
public ReaderField(Object readerKey, String fieldName) {
this.readerKey = readerKey;
this.fieldName = fieldName;
}
@Override
public int hashCode() {
return System.identityHashCode(readerKey) * fieldName.hashCode();
}
@Override
public boolean equals(Object that) {
if (! (that instanceof ReaderField)) return false;
ReaderField other = (ReaderField) that;
return (this.readerKey == other.readerKey &&
this.fieldName.equals(other.fieldName));
}
@Override
public String toString() {
return readerKey.toString() + "+" + fieldName;
}
}
/**
* Simple container for a collection of related CacheEntry objects that
* in conjunction with each other represent some "insane" usage of the
* FieldCache.
*/
public final static class Insanity {
private final InsanityType type;
private final String msg;
private final CacheEntry[] entries;
public Insanity(InsanityType type, String msg, CacheEntry... entries) {
if (null == type) {
throw new IllegalArgumentException
("Insanity requires non-null InsanityType");
}
if (null == entries || 0 == entries.length) {
throw new IllegalArgumentException
("Insanity requires non-null/non-empty CacheEntry[]");
}
this.type = type;
this.msg = msg;
this.entries = entries;
}
/**
* Type of insane behavior this object represents
*/
public InsanityType getType() { return type; }
/**
* Description of hte insane behavior
*/
public String getMsg() { return msg; }
/**
* CacheEntry objects which suggest a problem
*/
public CacheEntry[] getCacheEntries() { return entries; }
/**
* Multi-Line representation of this Insanity object, starting with
* the Type and Msg, followed by each CacheEntry.toString() on its
* own line prefaced by a tab character
*/
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append(getType()).append(": ");
String m = getMsg();
if (null != m) buf.append(m);
buf.append('\n');
CacheEntry[] ce = getCacheEntries();
for (int i = 0; i < ce.length; i++) {
buf.append('\t').append(ce[i].toString()).append('\n');
}
return buf.toString();
}
}
/**
* An Enumeration of the different types of "insane" behavior that
* may be detected in a FieldCache.
*
* @see InsanityType#SUBREADER
* @see InsanityType#VALUEMISMATCH
* @see InsanityType#EXPECTED
*/
public final static class InsanityType {
private final String label;
private InsanityType(final String label) {
this.label = label;
}
@Override
public String toString() { return label; }
/**
* Indicates an overlap in cache usage on a given field
* in sub/super readers.
*/
public final static InsanityType SUBREADER
= new InsanityType("SUBREADER");
/**
* <p>
* Indicates entries have the same reader+fieldname but
* different cached values. This can happen if different datatypes,
* or parsers are used -- and while it's not necessarily a bug
* it's typically an indication of a possible problem.
* </p>
* <p>
* <b>NOTE:</b> Only the reader, fieldname, and cached value are actually
* tested -- if two cache entries have different parsers or datatypes but
* the cached values are the same Object (== not just equal()) this method
* does not consider that a red flag. This allows for subtle variations
* in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...)
* </p>
*/
public final static InsanityType VALUEMISMATCH
= new InsanityType("VALUEMISMATCH");
/**
* Indicates an expected bit of "insanity". This may be useful for
* clients that wish to preserve/log information about insane usage
* but indicate that it was expected.
*/
public final static InsanityType EXPECTED
= new InsanityType("EXPECTED");
}
}

View File

@ -0,0 +1,391 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import org.apache.lucene.document.BinaryDocValuesField; // javadocs
import org.apache.lucene.document.NumericDocValuesField; // javadocs
import org.apache.lucene.document.SortedDocValuesField; // javadocs
import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
import org.apache.lucene.document.StringField; // javadocs
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.Bits;
import org.apache.solr.uninverting.FieldCache.CacheEntry;
/**
* A FilterReader that exposes <i>indexed</i> values as if they also had
* docvalues.
* <p>
* This is accomplished by "inverting the inverted index" or "uninversion".
* <p>
* The uninversion process happens lazily: upon the first request for the
* field's docvalues (e.g. via {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)}
* or similar), it will create the docvalues on-the-fly if needed and cache it,
* based on the core cache key of the wrapped LeafReader.
*/
public class UninvertingReader extends FilterLeafReader {
/**
* Specifies the type of uninversion to apply for the field.
*/
public static enum Type {
/**
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.IntPoint})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
INTEGER_POINT,
/**
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LongPoint})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
LONG_POINT,
/**
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.FloatPoint})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
FLOAT_POINT,
/**
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.DoublePoint})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
DOUBLE_POINT,
/**
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LegacyIntField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
* @deprecated Index with points and use {@link #INTEGER_POINT} instead.
*/
@Deprecated
LEGACY_INTEGER,
/**
* Single-valued Long, (e.g. indexed with {@link org.apache.lucene.document.LegacyLongField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
* @deprecated Index with points and use {@link #LONG_POINT} instead.
*/
@Deprecated
LEGACY_LONG,
/**
* Single-valued Float, (e.g. indexed with {@link org.apache.lucene.document.LegacyFloatField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
* @deprecated Index with points and use {@link #FLOAT_POINT} instead.
*/
@Deprecated
LEGACY_FLOAT,
/**
* Single-valued Double, (e.g. indexed with {@link org.apache.lucene.document.LegacyDoubleField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
* @deprecated Index with points and use {@link #DOUBLE_POINT} instead.
*/
@Deprecated
LEGACY_DOUBLE,
/**
* Single-valued Binary, (e.g. indexed with {@link StringField})
* <p>
* Fields with this type act as if they were indexed with
* {@link BinaryDocValuesField}.
*/
BINARY,
/**
* Single-valued Binary, (e.g. indexed with {@link StringField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedDocValuesField}.
*/
SORTED,
/**
* Multi-valued Binary, (e.g. indexed with {@link StringField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_BINARY,
/**
* Multi-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LegacyIntField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_INTEGER,
/**
* Multi-valued Float, (e.g. indexed with {@link org.apache.lucene.document.LegacyFloatField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_FLOAT,
/**
* Multi-valued Long, (e.g. indexed with {@link org.apache.lucene.document.LegacyLongField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_LONG,
/**
* Multi-valued Double, (e.g. indexed with {@link org.apache.lucene.document.LegacyDoubleField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_DOUBLE
}
/**
* Wraps a provided DirectoryReader. Note that for convenience, the returned reader
* can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
* and so on.
*/
public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
return new UninvertingDirectoryReader(in, mapping);
}
static class UninvertingDirectoryReader extends FilterDirectoryReader {
final Map<String,Type> mapping;
public UninvertingDirectoryReader(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
super(in, new FilterDirectoryReader.SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader reader) {
return new UninvertingReader(reader, mapping);
}
});
this.mapping = mapping;
}
@Override
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
return new UninvertingDirectoryReader(in, mapping);
}
}
final Map<String,Type> mapping;
final FieldInfos fieldInfos;
/**
* Create a new UninvertingReader with the specified mapping
* <p>
* Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)}
* instead.
*
* @lucene.internal
*/
public UninvertingReader(LeafReader in, Map<String,Type> mapping) {
super(in);
this.mapping = mapping;
ArrayList<FieldInfo> filteredInfos = new ArrayList<>();
for (FieldInfo fi : in.getFieldInfos()) {
DocValuesType type = fi.getDocValuesType();
if (type == DocValuesType.NONE) {
Type t = mapping.get(fi.name);
if (t != null) {
if (t == Type.INTEGER_POINT || t == Type.LONG_POINT || t == Type.FLOAT_POINT || t == Type.DOUBLE_POINT) {
// type uses points
if (fi.getPointDimensionCount() == 0) {
continue;
}
} else {
// type uses inverted index
if (fi.getIndexOptions() == IndexOptions.NONE) {
continue;
}
}
switch(t) {
case INTEGER_POINT:
case LONG_POINT:
case FLOAT_POINT:
case DOUBLE_POINT:
case LEGACY_INTEGER:
case LEGACY_LONG:
case LEGACY_FLOAT:
case LEGACY_DOUBLE:
type = DocValuesType.NUMERIC;
break;
case BINARY:
type = DocValuesType.BINARY;
break;
case SORTED:
type = DocValuesType.SORTED;
break;
case SORTED_SET_BINARY:
case SORTED_SET_INTEGER:
case SORTED_SET_FLOAT:
case SORTED_SET_LONG:
case SORTED_SET_DOUBLE:
type = DocValuesType.SORTED_SET;
break;
default:
throw new AssertionError();
}
}
}
filteredInfos.add(new FieldInfo(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(),
fi.hasPayloads(), fi.getIndexOptions(), type, fi.getDocValuesGen(), fi.attributes(),
fi.getPointDimensionCount(), fi.getPointNumBytes()));
}
fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()]));
}
@Override
public FieldInfos getFieldInfos() {
return fieldInfos;
}
@Override
public NumericDocValues getNumericDocValues(String field) throws IOException {
Type v = getType(field);
if (v != null) {
switch (v) {
case INTEGER_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.INT_POINT_PARSER, true);
case FLOAT_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.FLOAT_POINT_PARSER, true);
case LONG_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LONG_POINT_PARSER, true);
case DOUBLE_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.DOUBLE_POINT_PARSER, true);
case LEGACY_INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_INT_PARSER, true);
case LEGACY_FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_FLOAT_PARSER, true);
case LEGACY_LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_LONG_PARSER, true);
case LEGACY_DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_DOUBLE_PARSER, true);
}
}
return super.getNumericDocValues(field);
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
Type v = getType(field);
if (v == Type.BINARY) {
return FieldCache.DEFAULT.getTerms(in, field, true);
} else {
return in.getBinaryDocValues(field);
}
}
@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
Type v = getType(field);
if (v == Type.SORTED) {
return FieldCache.DEFAULT.getTermsIndex(in, field);
} else {
return in.getSortedDocValues(field);
}
}
@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
Type v = getType(field);
if (v != null) {
switch (v) {
case SORTED_SET_INTEGER:
case SORTED_SET_FLOAT:
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX);
case SORTED_SET_LONG:
case SORTED_SET_DOUBLE:
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX);
case SORTED_SET_BINARY:
return FieldCache.DEFAULT.getDocTermOrds(in, field, null);
}
}
return in.getSortedSetDocValues(field);
}
@Override
public Bits getDocsWithField(String field) throws IOException {
Type v = getType(field);
if (v != null) {
switch (v) {
case INTEGER_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.INT_POINT_PARSER);
case FLOAT_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.FLOAT_POINT_PARSER);
case LONG_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LONG_POINT_PARSER);
case DOUBLE_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.DOUBLE_POINT_PARSER);
case LEGACY_INTEGER: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_INT_PARSER);
case LEGACY_FLOAT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_FLOAT_PARSER);
case LEGACY_LONG: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_LONG_PARSER);
case LEGACY_DOUBLE: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_DOUBLE_PARSER);
default:
return FieldCache.DEFAULT.getDocsWithField(in, field, null);
}
} else {
return in.getDocsWithField(field);
}
}
/**
* Returns the field's uninversion type, or null
* if the field doesn't exist or doesn't have a mapping.
*/
private Type getType(String field) {
FieldInfo info = fieldInfos.fieldInfo(field);
if (info == null || info.getDocValuesType() == DocValuesType.NONE) {
return null;
}
return mapping.get(field);
}
@Override
public Object getCoreCacheKey() {
return in.getCoreCacheKey();
}
@Override
public Object getCombinedCoreAndDeletesKey() {
return in.getCombinedCoreAndDeletesKey();
}
@Override
public String toString() {
return "Uninverting(" + in.toString() + ")";
}
/**
* Return information about the backing cache
* @lucene.internal
*/
public static String[] getUninvertedStats() {
CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
String[] info = new String[entries.length];
for (int i = 0; i < entries.length; i++) {
info[i] = entries[i].toString();
}
return info;
}
}

View File

@ -0,0 +1,21 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Support for creating docvalues on-the-fly from the inverted index at runtime.
*/
package org.apache.solr.uninverting;

View File

@ -29,8 +29,9 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.uninverting.UninvertingReader;
/**
* Allows access to uninverted docvalues by delete-by-queries.

View File

@ -24,7 +24,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Terms;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
@ -34,6 +33,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LegacyNumericUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;

View File

@ -16,103 +16,124 @@
*/
package org.apache.solr.cloud;
import static org.hamcrest.CoreMatchers.not;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.junit.After;
import org.junit.Before;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.common.cloud.ZkStateReader;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.hamcrest.CoreMatchers.*;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.HashSet;
import java.util.Set;
/**
* Tests using fromIndex that points to a collection in SolrCloud mode.
*/
public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase {
public class DistribJoinFromCollectionTest extends SolrCloudTestCase{
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
final private static String[] scoreModes = {"avg","max","min","total"};
public DistribJoinFromCollectionTest() {
super();
}
@Before
@Override
public void setUp() throws Exception {
super.setUp();
System.setProperty("numShards", Integer.toString(sliceCount));
}
@Override
@After
public void tearDown() throws Exception {
try {
super.tearDown();
} catch (Exception exc) {}
resetExceptionIgnores();
}
// resetExceptionIgnores();
private static String toColl = "to_2x2";
private static String fromColl = "from_1x4";
@Test
public void test() throws Exception {
private static Integer toDocId;
@BeforeClass
public static void setupCluster() throws Exception {
final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf");
String configName = "solrCloudCollectionConfig";
int nodeCount = 5;
configureCluster(nodeCount)
.addConfig(configName, configDir)
.configure();
Map<String, String> collectionProperties = new HashMap<>();
collectionProperties.put("config", "solrconfig-tlog.xml" );
collectionProperties.put("schema", "schema.xml");
// create a collection holding data for the "to" side of the JOIN
String toColl = "to_2x2";
createCollection(toColl, 2, 2, 2);
ensureAllReplicasAreActive(toColl, "shard1", 2, 2, 30);
ensureAllReplicasAreActive(toColl, "shard2", 2, 2, 30);
int shards = 2;
int replicas = 2 ;
assertNotNull(cluster.createCollection(toColl, shards, replicas,
configName,
collectionProperties));
// get the set of nodes where replicas for the "to" collection exist
Set<String> nodeSet = new HashSet<>();
ClusterState cs = cloudClient.getZkStateReader().getClusterState();
for (Slice slice : cs.getActiveSlices(toColl))
ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader();
ClusterState cs = zkStateReader.getClusterState();
for (Slice slice : cs.getCollection(toColl).getActiveSlices())
for (Replica replica : slice.getReplicas())
nodeSet.add(replica.getNodeName());
assertTrue(nodeSet.size() > 0);
// deploy the "from" collection to all nodes where the "to" collection exists
String fromColl = "from_1x2";
createCollection(null, fromColl, 1, nodeSet.size(), 1, null, StringUtils.join(nodeSet,","));
ensureAllReplicasAreActive(fromColl, "shard1", 1, nodeSet.size(), 30);
// both to and from collections are up and active, index some docs ...
Integer toDocId = indexDoc(toColl, 1001, "a", null, "b");
assertNotNull(cluster.createCollection(fromColl, 1, 4,
configName, StringUtils.join(nodeSet,","), null,
collectionProperties));
AbstractDistribZkTestBase.waitForRecoveriesToFinish(toColl, zkStateReader, false, true, 30);
AbstractDistribZkTestBase.waitForRecoveriesToFinish(fromColl, zkStateReader, false, true, 30);
toDocId = indexDoc(toColl, 1001, "a", null, "b");
indexDoc(fromColl, 2001, "a", "c", null);
Thread.sleep(1000); // so the commits fire
}
@Test
public void testScore() throws Exception {
//without score
testJoins(toColl, fromColl, toDocId, false);
}
@Test
public void testNoScore() throws Exception {
//with score
testJoins(toColl, fromColl, toDocId, true);
}
@AfterClass
public static void shutdown() {
log.info("DistribJoinFromCollectionTest logic complete ... deleting the " + toColl + " and " + fromColl + " collections");
// try to clean up
for (String c : new String[]{ toColl, fromColl }) {
try {
CollectionAdminRequest.Delete req = new CollectionAdminRequest.Delete()
.setCollectionName(c);
req.process(cloudClient);
CollectionAdminRequest.Delete req = CollectionAdminRequest.deleteCollection(c);
req.process(cluster.getSolrClient());
} catch (Exception e) {
// don't fail the test
log.warn("Could not delete collection {} after test completed due to: " + e, c);
@ -126,12 +147,13 @@ public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase
throws SolrServerException, IOException {
// verify the join with fromIndex works
final String fromQ = "match_s:c match_s:not_1_0_score_after_weight_normalization";
CloudSolrClient client = cluster.getSolrClient();
{
final String joinQ = "{!join " + anyScoreMode(isScoresTest)
+ "from=join_s fromIndex=" + fromColl +
" to=join_s}" + fromQ;
QueryRequest qr = new QueryRequest(params("collection", toColl, "q", joinQ, "fl", "id,get_s,score"));
QueryResponse rsp = new QueryResponse(cloudClient.request(qr), cloudClient);
QueryResponse rsp = new QueryResponse(client.request(qr), client);
SolrDocumentList hits = rsp.getResults();
assertTrue("Expected 1 doc, got "+hits, hits.getNumFound() == 1);
SolrDocument doc = hits.get(0);
@ -145,16 +167,14 @@ public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase
// create an alias for the fromIndex and then query through the alias
String alias = fromColl+"Alias";
CollectionAdminRequest.CreateAlias request = new CollectionAdminRequest.CreateAlias();
request.setAliasName(alias);
request.setAliasedCollections(fromColl);
request.process(cloudClient);
CollectionAdminRequest.CreateAlias request = CollectionAdminRequest.createAlias(alias,fromColl);
request.process(client);
{
final String joinQ = "{!join " + anyScoreMode(isScoresTest)
+ "from=join_s fromIndex=" + alias + " to=join_s}"+fromQ;
final QueryRequest qr = new QueryRequest(params("collection", toColl, "q", joinQ, "fl", "id,get_s,score"));
final QueryResponse rsp = new QueryResponse(cloudClient.request(qr), cloudClient);
final QueryResponse rsp = new QueryResponse(client.request(qr), client);
final SolrDocumentList hits = rsp.getResults();
assertTrue("Expected 1 doc", hits.getNumFound() == 1);
SolrDocument doc = hits.get(0);
@ -171,7 +191,7 @@ public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase
final String joinQ = "{!join " + (anyScoreMode(isScoresTest))
+ "from=join_s fromIndex=" + fromColl + " to=join_s}match_s:d";
final QueryRequest qr = new QueryRequest(params("collection", toColl, "q", joinQ, "fl", "id,get_s,score"));
final QueryResponse rsp = new QueryResponse(cloudClient.request(qr), cloudClient);
final QueryResponse rsp = new QueryResponse(client.request(qr), client);
final SolrDocumentList hits = rsp.getResults();
assertTrue("Expected no hits", hits.getNumFound() == 0);
}
@ -195,14 +215,14 @@ public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase
+ "from=join_s fromIndex=" + wrongName + " to=join_s}match_s:c";
final QueryRequest qr = new QueryRequest(params("collection", toColl, "q", joinQ, "fl", "id,get_s,score"));
try {
cloudClient.request(qr);
cluster.getSolrClient().request(qr);
} catch (HttpSolrClient.RemoteSolrException ex) {
assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, ex.code());
assertTrue(ex.getMessage().contains(wrongName));
}
}
protected Integer indexDoc(String collection, int id, String joinField, String matchField, String getField) throws Exception {
protected static Integer indexDoc(String collection, int id, String joinField, String matchField, String getField) throws Exception {
UpdateRequest up = new UpdateRequest();
up.setCommitWithin(50);
up.setParam("collection", collection);
@ -215,7 +235,7 @@ public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase
if (getField != null)
doc.addField("get_s", getField);
up.add(doc);
cloudClient.request(up);
cluster.getSolrClient().request(up);
return docId;
}
}

View File

@ -703,7 +703,87 @@ public class HighlighterTest extends SolrTestCaseJ4 {
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='a piece of text']"
);
}
@Test
public void testAlternateSummaryWithHighlighting() {
//long document
assertU(adoc("tv_text", "keyword is only here, tv_text alternate field",
"t_text", "a piece of text to be substituted",
"other_t", "keyword",
"id", "1",
"foo_t","hi"));
assertU(commit());
assertU(optimize());
// Prove that hl.highlightAlternate is default true and respects maxAlternateFieldLength
HashMap<String,String> args = new HashMap<>();
args.put("hl", "true");
args.put("hl.fragsize","0");
args.put("hl.fl", "t_text");
args.put("hl.simple.pre", "<simplepre>");
args.put("hl.simple.post", "</simplepost>");
args.put("hl.alternateField", "tv_text");
args.put("hl.maxAlternateFieldLength", "39");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
assertQ("Alternate summarization with highlighting",
sumLRF.makeRequest("tv_text:keyword"),
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='<simplepre>keyword</simplepost> is only here, tv_text']"
);
// Query on other field than hl or alternate. Still we get the hightlighted snippet from alternate
assertQ("Alternate summarization with highlighting, query other field",
sumLRF.makeRequest("other_t:keyword"),
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='<simplepre>keyword</simplepost> is only here, tv_text']"
);
// With hl.requireFieldMatch, will not highlight but fall back to plain-text alternate
args.put("hl.requireFieldMatch", "true");
sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
assertQ("Alternate summarization with highlighting, requireFieldMatch",
sumLRF.makeRequest("other_t:keyword"),
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only here, tv_text alternate']"
);
args.put("hl.requireFieldMatch", "false");
// Works with field specific params, overriding maxAlternateFieldLength to return everything
args.remove("hl.alternateField");
args.put("f.t_text.hl.alternateField", "tv_text");
args.put("f.t_text.hl.maxAlternateFieldLength", "0");
sumLRF = h.getRequestFactory("standard", 0, 200, args);
assertQ("Alternate summarization with highlighting",
sumLRF.makeRequest("tv_text:keyword"),
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='<simplepre>keyword</simplepost> is only here, tv_text alternate field']"
);
// Prove fallback highlighting works also with FVH
args.put("hl.useFastVectorHighlighter", "true");
args.put("hl.tag.pre", "<fvhpre>");
args.put("hl.tag.post", "</fvhpost>");
args.put("f.t_text.hl.maxAlternateFieldLength", "18");
sumLRF = h.getRequestFactory("standard", 0, 200, args);
assertQ("Alternate summarization with highlighting using FVH",
sumLRF.makeRequest("tv_text:keyword"),
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='<fvhpre>keyword</fvhpost> is only here']"
);
// Prove it is possible to turn off highlighting of alternate field
args.put("hl.highlightAlternate", "false");
sumLRF = h.getRequestFactory("standard", 0, 200, args);
assertQ("Alternate summarization without highlighting",
sumLRF.makeRequest("tv_text:keyword"),
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only he']"
);
}
@Test
public void testPhraseHighlighter() {
HashMap<String,String> args = new HashMap<>();

View File

@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestSlowCompositeReaderWrapper extends LuceneTestCase {
public void testCoreListenerOnSlowCompositeReaderWrapper() throws IOException {
RandomIndexWriter w = new RandomIndexWriter(random(), newDirectory());
final int numDocs = TestUtil.nextInt(random(), 1, 5);
for (int i = 0; i < numDocs; ++i) {
w.addDocument(new Document());
if (random().nextBoolean()) {
w.commit();
}
}
w.commit();
w.close();
final IndexReader reader = DirectoryReader.open(w.w.getDirectory());
final LeafReader leafReader = SlowCompositeReaderWrapper.wrap(reader);
final int numListeners = TestUtil.nextInt(random(), 1, 10);
final List<LeafReader.CoreClosedListener> listeners = new ArrayList<>();
AtomicInteger counter = new AtomicInteger(numListeners);
for (int i = 0; i < numListeners; ++i) {
CountCoreListener listener = new CountCoreListener(counter, leafReader.getCoreCacheKey());
listeners.add(listener);
leafReader.addCoreClosedListener(listener);
}
for (int i = 0; i < 100; ++i) {
leafReader.addCoreClosedListener(listeners.get(random().nextInt(listeners.size())));
}
final int removed = random().nextInt(numListeners);
Collections.shuffle(listeners, random());
for (int i = 0; i < removed; ++i) {
leafReader.removeCoreClosedListener(listeners.get(i));
}
assertEquals(numListeners, counter.get());
// make sure listeners are registered on the wrapped reader and that closing any of them has the same effect
if (random().nextBoolean()) {
reader.close();
} else {
leafReader.close();
}
assertEquals(removed, counter.get());
w.w.getDirectory().close();
}
private static final class CountCoreListener implements LeafReader.CoreClosedListener {
private final AtomicInteger count;
private final Object coreCacheKey;
public CountCoreListener(AtomicInteger count, Object coreCacheKey) {
this.count = count;
this.coreCacheKey = coreCacheKey;
}
@Override
public void onClose(Object coreCacheKey) {
assertSame(this.coreCacheKey, coreCacheKey);
count.decrementAndGet();
}
}
}

View File

@ -25,12 +25,12 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.uninverting.DocTermOrds;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.uninverting.DocTermOrds;
import org.apache.solr.util.RefCounted;
import org.junit.After;
import org.junit.BeforeClass;

View File

@ -17,6 +17,8 @@
package org.apache.solr.response.transform;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
@ -26,27 +28,58 @@ import java.util.Random;
import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.cloud.AbstractFullDistribZkTestBase;
import org.apache.solr.cloud.AbstractDistribZkTestBase;
import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.ContentStreamBase;
import org.junit.BeforeClass;
import org.junit.Test;
@SuppressSSL
public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBase {
public class TestSubQueryTransformerDistrib extends SolrCloudTestCase {
@Override
protected String getCloudSchemaFile() {
return "schema-docValuesJoin.xml";
final static String people = "people";
final static String depts = "departments";
@BeforeClass
public static void setupCluster() throws Exception {
final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf");
String configName = "solrCloudCollectionConfig";
int nodeCount = 5;
configureCluster(nodeCount)
.addConfig(configName, configDir)
.configure();
Map<String, String> collectionProperties = new HashMap<>();
collectionProperties.put("config", "solrconfig-doctransformers.xml" );
collectionProperties.put("schema", "schema-docValuesJoin.xml");
int shards = 2;
int replicas = 2 ;
assertNotNull(cluster.createCollection(people, shards, replicas,
configName,
collectionProperties));
assertNotNull(cluster.createCollection(depts, shards, replicas,
configName, collectionProperties));
CloudSolrClient client = cluster.getSolrClient();
client.setDefaultCollection(people);
ZkStateReader zkStateReader = client.getZkStateReader();
AbstractDistribZkTestBase.waitForRecoveriesToFinish(people, zkStateReader, true, true, 30);
AbstractDistribZkTestBase.waitForRecoveriesToFinish(depts, zkStateReader, false, true, 30);
}
@Override
protected String getCloudSolrConfig() {
return "solrconfig-basic.xml";
}
@SuppressWarnings("serial")
@Test
@ -54,14 +87,6 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas
int peopleMultiplier = atLeast(1);
int deptMultiplier = atLeast(1);
final String people = "people";
createCollection(people, 2, 1, 10);
final String depts = "departments";
createCollection(depts, 2, 1, 10);
createIndex(people, peopleMultiplier, depts, deptMultiplier);
Random random1 = random();
@ -79,7 +104,7 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas
"depts.rows",""+(deptMultiplier*2),
"depts.logParamsList","q,fl,rows,row.dept_ss_dv"}));
final QueryResponse rsp = new QueryResponse();
rsp.setResponse(cloudClient.request(qr, people));
rsp.setResponse(cluster.getSolrClient().request(qr, people));
final SolrDocumentList hits = rsp.getResults();
assertEquals(peopleMultiplier, hits.getNumFound());
@ -116,6 +141,7 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas
private void createIndex(String people, int peopleMultiplier, String depts, int deptMultiplier)
throws SolrServerException, IOException {
int id=0;
List<String> peopleDocs = new ArrayList<>();
for (int p=0; p < peopleMultiplier; p++){
@ -161,13 +187,16 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas
private void addDocs(String collection, List<String> docs) throws SolrServerException, IOException {
StringBuilder upd = new StringBuilder("<update>");
upd.append("<delete><query>*:*</query></delete>");
for (Iterator<String> iterator = docs.iterator(); iterator.hasNext();) {
String add = iterator.next();
upd.append(add);
if (rarely()) {
upd.append(commit("softCommit", "true"));
}
if (!rarely() || !iterator.hasNext()) {
if (rarely() || !iterator.hasNext()) {
if (!iterator.hasNext()) {
upd.append(commit("softCommit", "false"));
}
@ -176,7 +205,7 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas
ContentStreamUpdateRequest req = new ContentStreamUpdateRequest("/update");
req.addContentStream(new ContentStreamBase.StringStream(upd.toString(),"text/xml"));
cloudClient.request(req, collection);
cluster.getSolrClient().request(req, collection);
upd.setLength("<update>".length());
}
}

View File

@ -42,13 +42,12 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
@ -56,6 +55,7 @@ import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.uninverting.UninvertingReader;
import org.junit.BeforeClass;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -16,7 +16,12 @@
*/
package org.apache.solr.search.join;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@ -24,34 +29,56 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.FacetField.Count;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.cloud.AbstractDistribZkTestBase;
import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.junit.BeforeClass;
import org.junit.Test;
@Slow
public class BlockJoinFacetDistribTest extends BaseDistributedSearchTestCase {
public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
private static final String collection = "facetcollection";
@BeforeClass
public static void beforeSuperClass() throws Exception {
schemaString = "schema-blockjoinfacetcomponent.xml";
configString = "solrconfig-blockjoinfacetcomponent.xml";
}
public static void setupCluster() throws Exception {
final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf");
@ShardsFixed(num = 3)
public void test() throws Exception {
testBJQFacetComponent();
String configName = "solrCloudCollectionConfig";
int nodeCount = 6;
configureCluster(nodeCount)
.addConfig(configName, configDir)
.configure();
Map<String, String> collectionProperties = new HashMap<>();
collectionProperties.put("config", "solrconfig-blockjoinfacetcomponent.xml" );
collectionProperties.put("schema", "schema-blockjoinfacetcomponent.xml");
// create a collection holding data for the "to" side of the JOIN
int shards = 3;
int replicas = 2 ;
assertNotNull(cluster.createCollection(collection, shards, replicas,
configName,
collectionProperties));
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collection,
cluster.getSolrClient().getZkStateReader(), false, true, 30);
}
final static List<String> colors = Arrays.asList("red","blue","brown","white","black","yellow","cyan","magenta","blur",
"fuchsia", "light","dark","green","grey","don't","know","any","more" );
final static List<String> sizes = Arrays.asList("s","m","l","xl","xxl","xml","xxxl","3","4","5","6","petite","maxi");
private void testBJQFacetComponent() throws Exception {
@Test
public void testBJQFacetComponent() throws Exception {
assert ! colors.removeAll(sizes): "there is no colors in sizes";
Collections.shuffle(colors,random());
@ -64,8 +91,11 @@ public class BlockJoinFacetDistribTest extends BaseDistributedSearchTestCase {
}
};
cluster.getSolrClient().deleteByQuery(collection, "*:*");
final int parents = atLeast(10);
boolean aggregationOccurs = false;
List<SolrInputDocument> parentDocs = new ArrayList<>();
for(int parent=0; parent<parents || !aggregationOccurs;parent++){
assert parent < 2000000 : "parent num "+parent+
" aggregationOccurs:"+aggregationOccurs+". Sorry! too tricky loop condition.";
@ -89,22 +119,18 @@ public class BlockJoinFacetDistribTest extends BaseDistributedSearchTestCase {
}
pdoc.addChildDocument(childDoc);
}
indexDoc(pdoc);
parentDocs.add(pdoc);
if (!parentDocs.isEmpty() && rarely()) {
indexDocs(parentDocs);
parentDocs.clear();
cluster.getSolrClient().commit(collection, false, false, true);
}
}
commit();
//handle.clear();
handle.put("timestamp", SKIPVAL);
handle.put("_version_", SKIPVAL); // not a cloud test, but may use updateLog
handle.put("maxScore", SKIP);// see org.apache.solr.TestDistributedSearch.test()
handle.put("shards", SKIP);
handle.put("distrib", SKIP);
handle.put("rid", SKIP);
handle.put("track", SKIP);
handle.put("facet_fields", UNORDERED);
handle.put("SIZE_s", UNORDERED);
handle.put("COLOR_s", UNORDERED);
if (!parentDocs.isEmpty()) {
indexDocs(parentDocs);
}
cluster.getSolrClient().commit(collection);
// to parent query
final String childQueryClause = "COLOR_s:("+(matchingColors.toString().replaceAll("[,\\[\\]]", " "))+")";
QueryResponse results = query("q", "{!parent which=\"type_s:parent\"}"+childQueryClause,
@ -122,15 +148,24 @@ public class BlockJoinFacetDistribTest extends BaseDistributedSearchTestCase {
String msg = ""+parentIdsByAttrValue+" "+color_s+" "+size_s;
for (FacetField facet: new FacetField[]{color_s, size_s}) {
for (Count c : facet.getValues()) {
assertEquals(c.getName()+"("+msg+")", parentIdsByAttrValue.get(c.getName()).size(), c.getCount());
assertEquals(c.getName()+"("+msg+")",
parentIdsByAttrValue.get(c.getName()).size(), c.getCount());
}
}
assertEquals(msg , parentIdsByAttrValue.size(),color_s.getValueCount() + size_s.getValueCount());
// }
//System.out.println(parentIdsByAttrValue);
}
protected String getCloudSolrConfig() {
return configString;
private QueryResponse query(String ... arg) throws SolrServerException, IOException {
ModifiableSolrParams solrParams = new ModifiableSolrParams();
for(int i=0; i<arg.length; i+=2) {
solrParams.add(arg[i], arg[i+1]);
}
return cluster.getSolrClient().query(collection, solrParams);
}
private void indexDocs(Collection<SolrInputDocument> pdocs) throws SolrServerException, IOException {
cluster.getSolrClient().add(collection, pdocs);
}
}

View File

@ -25,6 +25,7 @@ import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.CursorMarkParams;
import org.apache.solr.common.params.GroupParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SpellingParams;
@ -594,5 +595,34 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 {
List<?> collations = (List<?>) collationList.getAll("collation");
assertTrue(collations.size() == 2);
}
@Test
public void testWithCursorMark() throws Exception
{
SolrCore core = h.getCore();
SearchComponent speller = core.getSearchComponent("spellcheck");
assertTrue("speller is null and it shouldn't be", speller != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(SpellCheckComponent.COMPONENT_NAME, "true");
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10");
params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "2");
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1");
params.add(CommonParams.Q, "lowerfilt:(+fauth)");
params.add(CommonParams.SORT, "id asc");
params.add(CursorMarkParams.CURSOR_MARK_PARAM, CursorMarkParams.CURSOR_MARK_START);
SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
SolrQueryResponse rsp = new SolrQueryResponse();
rsp.addResponseHeader(new SimpleOrderedMap());
SolrQueryRequest req = new LocalSolrQueryRequest(core, params);
handler.handleRequest(req, rsp);
req.close();
NamedList values = rsp.getValues();
NamedList spellCheck = (NamedList) values.get("spellcheck");
NamedList collationList = (NamedList) spellCheck.get("collations");
List<?> collations = (List<?>) collationList.getAll("collation");
assertTrue(collations.size() == 1);
}
}

View File

@ -0,0 +1,681 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LegacyNumericUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.index.SlowCompositeReaderWrapper;
// TODO:
// - test w/ del docs
// - test prefix
// - test w/ cutoff
// - crank docs way up so we get some merging sometimes
public class TestDocTermOrds extends LuceneTestCase {
public void testEmptyIndex() throws IOException {
final Directory dir = newDirectory();
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
iw.close();
final DirectoryReader ir = DirectoryReader.open(dir);
TestUtil.checkReader(ir);
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
TestUtil.checkReader(composite);
// check the leaves
// (normally there are none for an empty index, so this is really just future
// proofing in case that changes for some reason)
for (LeafReaderContext rc : ir.leaves()) {
final LeafReader r = rc.reader();
final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "any_field");
assertNull("OrdTermsEnum should be null (leaf)", dto.getOrdTermsEnum(r));
assertEquals("iterator should be empty (leaf)", 0, dto.iterator(r).getValueCount());
}
// check the composite
final DocTermOrds dto = new DocTermOrds(composite, composite.getLiveDocs(), "any_field");
assertNull("OrdTermsEnum should be null (composite)", dto.getOrdTermsEnum(composite));
assertEquals("iterator should be empty (composite)", 0, dto.iterator(composite).getValueCount());
ir.close();
dir.close();
}
public void testSimple() throws Exception {
Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
Document doc = new Document();
Field field = newTextField("field", "", Field.Store.NO);
doc.add(field);
field.setStringValue("a b c");
w.addDocument(doc);
field.setStringValue("d e f");
w.addDocument(doc);
field.setStringValue("a f");
w.addDocument(doc);
final IndexReader r = w.getReader();
w.close();
final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(ar);
final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
SortedSetDocValues iter = dto.iterator(ar);
iter.setDocument(0);
assertEquals(0, iter.nextOrd());
assertEquals(1, iter.nextOrd());
assertEquals(2, iter.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
iter.setDocument(1);
assertEquals(3, iter.nextOrd());
assertEquals(4, iter.nextOrd());
assertEquals(5, iter.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
iter.setDocument(2);
assertEquals(0, iter.nextOrd());
assertEquals(5, iter.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
r.close();
dir.close();
}
public void testRandom() throws Exception {
Directory dir = newDirectory();
final int NUM_TERMS = atLeast(20);
final Set<BytesRef> terms = new HashSet<>();
while(terms.size() < NUM_TERMS) {
final String s = TestUtil.randomRealisticUnicodeString(random());
//final String s = _TestUtil.randomSimpleString(random);
if (s.length() > 0) {
terms.add(new BytesRef(s));
}
}
final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
Arrays.sort(termsArray);
final int NUM_DOCS = atLeast(100);
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
// Sometimes swap in codec that impls ord():
if (random().nextInt(10) == 7) {
// Make sure terms index has ords:
Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
conf.setCodec(codec);
}
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
final int[][] idToOrds = new int[NUM_DOCS][];
final Set<Integer> ordsForDocSet = new HashSet<>();
for(int id=0;id<NUM_DOCS;id++) {
Document doc = new Document();
doc.add(new LegacyIntField("id", id, Field.Store.YES));
final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
while(ordsForDocSet.size() < termCount) {
ordsForDocSet.add(random().nextInt(termsArray.length));
}
final int[] ordsForDoc = new int[termCount];
int upto = 0;
if (VERBOSE) {
System.out.println("TEST: doc id=" + id);
}
for(int ord : ordsForDocSet) {
ordsForDoc[upto++] = ord;
Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
if (VERBOSE) {
System.out.println(" f=" + termsArray[ord].utf8ToString());
}
doc.add(field);
}
ordsForDocSet.clear();
Arrays.sort(ordsForDoc);
idToOrds[id] = ordsForDoc;
w.addDocument(doc);
}
final DirectoryReader r = w.getReader();
w.close();
if (VERBOSE) {
System.out.println("TEST: reader=" + r);
}
for(LeafReaderContext ctx : r.leaves()) {
if (VERBOSE) {
System.out.println("\nTEST: sub=" + ctx.reader());
}
verify(ctx.reader(), idToOrds, termsArray, null);
}
// Also test top-level reader: its enum does not support
// ord, so this forces the OrdWrapper to run:
if (VERBOSE) {
System.out.println("TEST: top reader");
}
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(slowR);
verify(slowR, idToOrds, termsArray, null);
FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());
r.close();
dir.close();
}
public void testRandomWithPrefix() throws Exception {
Directory dir = newDirectory();
final Set<String> prefixes = new HashSet<>();
final int numPrefix = TestUtil.nextInt(random(), 2, 7);
if (VERBOSE) {
System.out.println("TEST: use " + numPrefix + " prefixes");
}
while(prefixes.size() < numPrefix) {
prefixes.add(TestUtil.randomRealisticUnicodeString(random()));
//prefixes.add(_TestUtil.randomSimpleString(random));
}
final String[] prefixesArray = prefixes.toArray(new String[prefixes.size()]);
final int NUM_TERMS = atLeast(20);
final Set<BytesRef> terms = new HashSet<>();
while(terms.size() < NUM_TERMS) {
final String s = prefixesArray[random().nextInt(prefixesArray.length)] + TestUtil.randomRealisticUnicodeString(random());
//final String s = prefixesArray[random.nextInt(prefixesArray.length)] + _TestUtil.randomSimpleString(random);
if (s.length() > 0) {
terms.add(new BytesRef(s));
}
}
final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
Arrays.sort(termsArray);
final int NUM_DOCS = atLeast(100);
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
// Sometimes swap in codec that impls ord():
if (random().nextInt(10) == 7) {
Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
conf.setCodec(codec);
}
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
final int[][] idToOrds = new int[NUM_DOCS][];
final Set<Integer> ordsForDocSet = new HashSet<>();
for(int id=0;id<NUM_DOCS;id++) {
Document doc = new Document();
doc.add(new LegacyIntField("id", id, Field.Store.YES));
final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
while(ordsForDocSet.size() < termCount) {
ordsForDocSet.add(random().nextInt(termsArray.length));
}
final int[] ordsForDoc = new int[termCount];
int upto = 0;
if (VERBOSE) {
System.out.println("TEST: doc id=" + id);
}
for(int ord : ordsForDocSet) {
ordsForDoc[upto++] = ord;
Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
if (VERBOSE) {
System.out.println(" f=" + termsArray[ord].utf8ToString());
}
doc.add(field);
}
ordsForDocSet.clear();
Arrays.sort(ordsForDoc);
idToOrds[id] = ordsForDoc;
w.addDocument(doc);
}
final DirectoryReader r = w.getReader();
w.close();
if (VERBOSE) {
System.out.println("TEST: reader=" + r);
}
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(slowR);
for(String prefix : prefixesArray) {
final BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix);
final int[][] idToOrdsPrefix = new int[NUM_DOCS][];
for(int id=0;id<NUM_DOCS;id++) {
final int[] docOrds = idToOrds[id];
final List<Integer> newOrds = new ArrayList<>();
for(int ord : idToOrds[id]) {
if (StringHelper.startsWith(termsArray[ord], prefixRef)) {
newOrds.add(ord);
}
}
final int[] newOrdsArray = new int[newOrds.size()];
int upto = 0;
for(int ord : newOrds) {
newOrdsArray[upto++] = ord;
}
idToOrdsPrefix[id] = newOrdsArray;
}
for(LeafReaderContext ctx : r.leaves()) {
if (VERBOSE) {
System.out.println("\nTEST: sub=" + ctx.reader());
}
verify(ctx.reader(), idToOrdsPrefix, termsArray, prefixRef);
}
// Also test top-level reader: its enum does not support
// ord, so this forces the OrdWrapper to run:
if (VERBOSE) {
System.out.println("TEST: top reader");
}
verify(slowR, idToOrdsPrefix, termsArray, prefixRef);
}
FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());
r.close();
dir.close();
}
private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef) throws Exception {
final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(),
"field",
prefixRef,
Integer.MAX_VALUE,
TestUtil.nextInt(random(), 2, 10));
final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER, false);
/*
for(int docID=0;docID<subR.maxDoc();docID++) {
System.out.println(" docID=" + docID + " id=" + docIDToID[docID]);
}
*/
if (VERBOSE) {
System.out.println("TEST: verify prefix=" + (prefixRef==null ? "null" : prefixRef.utf8ToString()));
System.out.println("TEST: all TERMS:");
TermsEnum allTE = MultiFields.getTerms(r, "field").iterator();
int ord = 0;
while(allTE.next() != null) {
System.out.println(" ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
}
}
//final TermsEnum te = subR.fields().terms("field").iterator();
final TermsEnum te = dto.getOrdTermsEnum(r);
if (dto.numTerms() == 0) {
if (prefixRef == null) {
assertNull(MultiFields.getTerms(r, "field"));
} else {
Terms terms = MultiFields.getTerms(r, "field");
if (terms != null) {
TermsEnum termsEnum = terms.iterator();
TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef);
if (result != TermsEnum.SeekStatus.END) {
assertFalse("term=" + termsEnum.term().utf8ToString() + " matches prefix=" + prefixRef.utf8ToString(), StringHelper.startsWith(termsEnum.term(), prefixRef));
} else {
// ok
}
} else {
// ok
}
}
return;
}
if (VERBOSE) {
System.out.println("TEST: TERMS:");
te.seekExact(0);
while(true) {
System.out.println(" ord=" + te.ord() + " term=" + te.term().utf8ToString());
if (te.next() == null) {
break;
}
}
}
SortedSetDocValues iter = dto.iterator(r);
for(int docID=0;docID<r.maxDoc();docID++) {
if (VERBOSE) {
System.out.println("TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.get(docID) + ")");
}
iter.setDocument(docID);
final int[] answers = idToOrds[(int) docIDToID.get(docID)];
int upto = 0;
long ord;
while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
te.seekExact(ord);
final BytesRef expected = termsArray[answers[upto++]];
if (VERBOSE) {
System.out.println(" exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
}
assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord=" + ord, expected, te.term());
}
assertEquals(answers.length, upto);
}
}
public void testBackToTheFuture() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(newStringField("foo", "bar", Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(newStringField("foo", "baz", Field.Store.NO));
// we need a second value for a doc, or we don't actually test DocTermOrds!
doc.add(newStringField("foo", "car", Field.Store.NO));
iw.addDocument(doc);
DirectoryReader r1 = DirectoryReader.open(iw);
iw.deleteDocuments(new Term("foo", "baz"));
DirectoryReader r2 = DirectoryReader.open(iw);
FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r2), "foo", null);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r1), "foo", null);
assertEquals(3, v.getValueCount());
v.setDocument(1);
assertEquals(1, v.nextOrd());
iw.close();
r1.close();
r2.close();
dir.close();
}
public void testNumericEncoded32() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = DirectoryReader.open(dir);
LeafReader ar = getOnlyLeafReader(ir);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT32_TERM_PREFIX);
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));
value = v.lookupOrd(1);
assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
ir.close();
dir.close();
}
public void testNumericEncoded64() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = DirectoryReader.open(dir);
LeafReader ar = getOnlyLeafReader(ir);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT64_TERM_PREFIX);
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));
value = v.lookupOrd(1);
assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
ir.close();
dir.close();
}
public void testSortedTermsEnum() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("field", "hello", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("field", "world", Field.Store.NO));
// we need a second value for a doc, or we don't actually test DocTermOrds!
doc.add(new StringField("field", "hello", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("field", "beer", Field.Store.NO));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
LeafReader ar = getOnlyLeafReader(ireader);
SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field", null);
assertEquals(3, dv.getValueCount());
TermsEnum termsEnum = dv.termsEnum();
// next()
assertEquals("beer", termsEnum.next().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals("hello", termsEnum.next().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals("world", termsEnum.next().utf8ToString());
assertEquals(2, termsEnum.ord());
// seekCeil()
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
// seekExact()
assertTrue(termsEnum.seekExact(new BytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertTrue(termsEnum.seekExact(new BytesRef("hello")));
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertTrue(termsEnum.seekExact(new BytesRef("world")));
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
assertFalse(termsEnum.seekExact(new BytesRef("bogus")));
// seek(ord)
termsEnum.seekExact(0);
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
termsEnum.seekExact(1);
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
termsEnum.seekExact(2);
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
// lookupTerm(BytesRef)
assertEquals(-1, dv.lookupTerm(new BytesRef("apple")));
assertEquals(0, dv.lookupTerm(new BytesRef("beer")));
assertEquals(-2, dv.lookupTerm(new BytesRef("car")));
assertEquals(1, dv.lookupTerm(new BytesRef("hello")));
assertEquals(-3, dv.lookupTerm(new BytesRef("matter")));
assertEquals(2, dv.lookupTerm(new BytesRef("world")));
assertEquals(-4, dv.lookupTerm(new BytesRef("zany")));
ireader.close();
directory.close();
}
public void testActuallySingleValued() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwconfig = newIndexWriterConfig(null);
iwconfig.setMergePolicy(newLogMergePolicy());
IndexWriter iw = new IndexWriter(dir, iwconfig);
Document doc = new Document();
doc.add(new StringField("foo", "bar", Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("foo", "baz", Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("foo", "baz", Field.Store.NO));
doc.add(new StringField("foo", "baz", Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = DirectoryReader.open(dir);
LeafReader ar = getOnlyLeafReader(ir);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null);
assertNotNull(DocValues.unwrapSingleton(v)); // actually a single-valued field
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(0, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(2);
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(3);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals("bar", value.utf8ToString());
value = v.lookupOrd(1);
assertEquals("baz", value.utf8ToString());
ir.close();
dir.close();
}
}

View File

@ -0,0 +1,731 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LegacyDoubleField;
import org.apache.lucene.document.LegacyFloatField;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LegacyNumericUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.junit.AfterClass;
import org.junit.BeforeClass;
public class TestFieldCache extends LuceneTestCase {
private static LeafReader reader;
private static int NUM_DOCS;
private static int NUM_ORDS;
private static String[] unicodeStrings;
private static BytesRef[][] multiValued;
private static Directory directory;
@BeforeClass
public static void beforeClass() throws Exception {
NUM_DOCS = atLeast(500);
NUM_ORDS = atLeast(2);
directory = newDirectory();
IndexWriter writer= new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy()));
long theLong = Long.MAX_VALUE;
double theDouble = Double.MAX_VALUE;
int theInt = Integer.MAX_VALUE;
float theFloat = Float.MAX_VALUE;
unicodeStrings = new String[NUM_DOCS];
multiValued = new BytesRef[NUM_DOCS][NUM_ORDS];
if (VERBOSE) {
System.out.println("TEST: setUp");
}
for (int i = 0; i < NUM_DOCS; i++){
Document doc = new Document();
doc.add(new LongPoint("theLong", theLong--));
doc.add(new DoublePoint("theDouble", theDouble--));
doc.add(new IntPoint("theInt", theInt--));
doc.add(new FloatPoint("theFloat", theFloat--));
if (i%2 == 0) {
doc.add(new IntPoint("sparse", i));
}
if (i%2 == 0) {
doc.add(new IntPoint("numInt", i));
}
// sometimes skip the field:
if (random().nextInt(40) != 17) {
unicodeStrings[i] = generateString(i);
doc.add(newStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES));
}
// sometimes skip the field:
if (random().nextInt(10) != 8) {
for (int j = 0; j < NUM_ORDS; j++) {
String newValue = generateString(i);
multiValued[i][j] = new BytesRef(newValue);
doc.add(newStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
}
Arrays.sort(multiValued[i]);
}
writer.addDocument(doc);
}
writer.forceMerge(1); // this test relies on one segment and docid order
IndexReader r = DirectoryReader.open(writer);
assertEquals(1, r.leaves().size());
reader = r.leaves().get(0).reader();
TestUtil.checkReader(reader);
writer.close();
}
@AfterClass
public static void afterClass() throws Exception {
reader.close();
reader = null;
directory.close();
directory = null;
unicodeStrings = null;
multiValued = null;
}
public void test() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", doubles, cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.get(i));
}
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", longs, cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Long.MAX_VALUE - i, longs.get(i));
}
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", ints, cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Integer.MAX_VALUE - i, ints.get(i));
}
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", floats, cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.get(i));
}
Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertTrue(docsWithField.get(i));
}
docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertEquals(i%2 == 0, docsWithField.get(i));
}
// getTermsIndex
SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
for (int i = 0; i < NUM_DOCS; i++) {
final String s;
final int ord = termsIndex.getOrd(i);
if (ord == -1) {
s = null;
} else {
s = termsIndex.lookupOrd(ord).utf8ToString();
}
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
}
int nTerms = termsIndex.getValueCount();
TermsEnum tenum = termsIndex.termsEnum();
for (int i=0; i<nTerms; i++) {
BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
final BytesRef val = termsIndex.lookupOrd(i);
// System.out.println("i="+i);
assertEquals(val, val1);
}
// seek the enum around (note this isn't a great test here)
int num = atLeast(100);
for (int i = 0; i < num; i++) {
int k = random().nextInt(nTerms);
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
assertEquals(val, tenum.term());
}
for(int i=0;i<nTerms;i++) {
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
assertEquals(val, tenum.term());
}
// test bad field
termsIndex = cache.getTermsIndex(reader, "bogusfield");
// getTerms
BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString", true);
Bits bits = cache.getDocsWithField(reader, "theRandomUnicodeString", null);
for (int i = 0; i < NUM_DOCS; i++) {
final String s;
if (!bits.get(i)) {
s = null;
} else {
s = terms.get(i).utf8ToString();
}
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
}
// test bad field
terms = cache.getTerms(reader, "bogusfield", false);
// getDocTermOrds
SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
int numEntries = cache.getCacheEntries().length;
// ask for it again, and check that we didnt create any additional entries:
termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
assertEquals(numEntries, cache.getCacheEntries().length);
for (int i = 0; i < NUM_DOCS; i++) {
termOrds.setDocument(i);
// This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
for (BytesRef v : values) {
if (v == null) {
// why does this test use null values... instead of an empty list: confusing
break;
}
long ord = termOrds.nextOrd();
assert ord != SortedSetDocValues.NO_MORE_ORDS;
BytesRef scratch = termOrds.lookupOrd(ord);
assertEquals(v, scratch);
}
assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
}
// test bad field
termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
assertTrue(termOrds.getValueCount() == 0);
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
}
public void testEmptyIndex() throws Exception {
Directory dir = newDirectory();
IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(500));
writer.close();
IndexReader r = DirectoryReader.open(dir);
LeafReader reader = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(reader);
FieldCache.DEFAULT.getTerms(reader, "foobar", true);
FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
r.close();
dir.close();
}
private static String generateString(int i) {
String s = null;
if (i > 0 && random().nextInt(3) == 1) {
// reuse past string -- try to find one that's not null
for(int iter = 0; iter < 10 && s == null;iter++) {
s = unicodeStrings[random().nextInt(i)];
}
if (s == null) {
s = TestUtil.randomUnicodeString(random());
}
} else {
s = TestUtil.randomUnicodeString(random());
}
return s;
}
public void testDocsWithField() throws Exception {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, true);
// The double[] takes one slots, and docsWithField should also
// have been populated:
assertEquals(2, cache.getCacheEntries().length);
Bits bits = cache.getDocsWithField(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
// No new entries should appear:
assertEquals(2, cache.getCacheEntries().length);
assertTrue(bits instanceof Bits.MatchAllBits);
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.INT_POINT_PARSER, true);
assertEquals(4, cache.getCacheEntries().length);
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
assertEquals(4, cache.getCacheEntries().length);
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, ints.get(i));
} else {
assertFalse(docsWithField.get(i));
}
}
NumericDocValues numInts = cache.getNumerics(reader, "numInt", FieldCache.INT_POINT_PARSER, random().nextBoolean());
docsWithField = cache.getDocsWithField(reader, "numInt", FieldCache.INT_POINT_PARSER);
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, numInts.get(i));
} else {
assertFalse(docsWithField.get(i));
}
}
}
public void testGetDocsWithFieldThreadSafety() throws Exception {
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
int NUM_THREADS = 3;
Thread[] threads = new Thread[NUM_THREADS];
final AtomicBoolean failed = new AtomicBoolean();
final AtomicInteger iters = new AtomicInteger();
final int NUM_ITER = 200 * RANDOM_MULTIPLIER;
final CyclicBarrier restart = new CyclicBarrier(NUM_THREADS,
new Runnable() {
@Override
public void run() {
cache.purgeAllCaches();
iters.incrementAndGet();
}
});
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
threads[threadIDX] = new Thread() {
@Override
public void run() {
try {
while(!failed.get()) {
final int op = random().nextInt(3);
if (op == 0) {
// Purge all caches & resume, once all
// threads get here:
restart.await();
if (iters.get() >= NUM_ITER) {
break;
}
} else if (op == 1) {
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
for (int i = 0; i < docsWithField.length(); i++) {
assertEquals(i%2 == 0, docsWithField.get(i));
}
} else {
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.INT_POINT_PARSER, true);
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, ints.get(i));
} else {
assertFalse(docsWithField.get(i));
}
}
}
}
} catch (Throwable t) {
failed.set(true);
restart.reset();
throw new RuntimeException(t);
}
}
};
threads[threadIDX].start();
}
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
threads[threadIDX].join();
}
assertFalse(failed.get());
}
public void testDocValuesIntegration() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
doc.add(new NumericDocValuesField("numeric", 42));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
// Binary type: can be retrieved via getTerms()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.INT_POINT_PARSER, false);
});
BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary", true);
final BytesRef term = binary.get(0);
assertEquals("binary value", term.utf8ToString());
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "binary");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getDocTermOrds(ar, "binary", null);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "binary");
});
Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary", null);
assertTrue(bits.get(0));
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.INT_POINT_PARSER, false);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "sorted");
});
binary = FieldCache.DEFAULT.getTerms(ar, "sorted", true);
BytesRef scratch = binary.get(0);
assertEquals("sorted value", scratch.utf8ToString());
SortedDocValues sorted = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
assertEquals(0, sorted.getOrd(0));
assertEquals(1, sorted.getValueCount());
scratch = sorted.get(0);
assertEquals("sorted value", scratch.utf8ToString());
SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted", null);
sortedSet.setDocument(0);
assertEquals(0, sortedSet.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
assertEquals(1, sortedSet.getValueCount());
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted", null);
assertTrue(bits.get(0));
// Numeric type: can be retrieved via getInts() and so on
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.INT_POINT_PARSER, false);
assertEquals(42, numeric.get(0));
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTerms(ar, "numeric", true);
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "numeric");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getDocTermOrds(ar, "numeric", null);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "numeric");
});
bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric", null);
assertTrue(bits.get(0));
// SortedSet type: can be retrieved via getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.INT_POINT_PARSER, false);
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTerms(ar, "sortedset", true);
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "sortedset");
});
sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null);
sortedSet.setDocument(0);
assertEquals(0, sortedSet.nextOrd());
assertEquals(1, sortedSet.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
assertEquals(2, sortedSet.getValueCount());
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset", null);
assertTrue(bits.get(0));
ir.close();
dir.close();
}
public void testNonexistantFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER, true);
assertEquals(0, ints.get(0));
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER, true);
assertEquals(0, longs.get(0));
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER, true);
assertEquals(0, floats.get(0));
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER, true);
assertEquals(0, doubles.get(0));
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
BytesRef scratch = binaries.get(0);
assertEquals(0, scratch.length);
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(-1, sorted.getOrd(0));
scratch = sorted.get(0);
assertEquals(0, scratch.length);
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
sortedSet.setDocument(0);
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
assertFalse(bits.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
public void testNonIndexedFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new StoredField("bogusbytes", "bogus"));
doc.add(new StoredField("bogusshorts", "bogus"));
doc.add(new StoredField("bogusints", "bogus"));
doc.add(new StoredField("boguslongs", "bogus"));
doc.add(new StoredField("bogusfloats", "bogus"));
doc.add(new StoredField("bogusdoubles", "bogus"));
doc.add(new StoredField("bogusterms", "bogus"));
doc.add(new StoredField("bogustermsindex", "bogus"));
doc.add(new StoredField("bogusmultivalued", "bogus"));
doc.add(new StoredField("bogusbits", "bogus"));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER, true);
assertEquals(0, ints.get(0));
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER, true);
assertEquals(0, longs.get(0));
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER, true);
assertEquals(0, floats.get(0));
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER, true);
assertEquals(0, doubles.get(0));
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
BytesRef scratch = binaries.get(0);
assertEquals(0, scratch.length);
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(-1, sorted.getOrd(0));
scratch = sorted.get(0);
assertEquals(0, scratch.length);
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
sortedSet.setDocument(0);
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
assertFalse(bits.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
// Make sure that the use of GrowableWriter doesn't prevent from using the full long range
public void testLongFieldCache() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
cfg.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
Document doc = new Document();
LongPoint field = new LongPoint("f", 0L);
StoredField field2 = new StoredField("f", 0L);
doc.add(field);
doc.add(field2);
final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
for (int i = 0; i < values.length; ++i) {
final long v;
switch (random().nextInt(10)) {
case 0:
v = Long.MIN_VALUE;
break;
case 1:
v = 0;
break;
case 2:
v = Long.MAX_VALUE;
break;
default:
v = TestUtil.nextLong(random(), -10, 10);
break;
}
values[i] = v;
if (v == 0 && random().nextBoolean()) {
// missing
iw.addDocument(new Document());
} else {
field.setLongValue(v);
field2.setLongValue(v);
iw.addDocument(doc);
}
}
iw.forceMerge(1);
final DirectoryReader reader = iw.getReader();
final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LONG_POINT_PARSER, false);
for (int i = 0; i < values.length; ++i) {
assertEquals(values[i], longs.get(i));
}
reader.close();
iw.close();
dir.close();
}
// Make sure that the use of GrowableWriter doesn't prevent from using the full int range
public void testIntFieldCache() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
cfg.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
Document doc = new Document();
IntPoint field = new IntPoint("f", 0);
doc.add(field);
final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
for (int i = 0; i < values.length; ++i) {
final int v;
switch (random().nextInt(10)) {
case 0:
v = Integer.MIN_VALUE;
break;
case 1:
v = 0;
break;
case 2:
v = Integer.MAX_VALUE;
break;
default:
v = TestUtil.nextInt(random(), -10, 10);
break;
}
values[i] = v;
if (v == 0 && random().nextBoolean()) {
// missing
iw.addDocument(new Document());
} else {
field.setIntValue(v);
iw.addDocument(doc);
}
}
iw.forceMerge(1);
final DirectoryReader reader = iw.getReader();
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.INT_POINT_PARSER, false);
for (int i = 0; i < values.length; ++i) {
assertEquals(values[i], ints.get(i));
}
reader.close();
iw.close();
dir.close();
}
}

View File

@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestFieldCacheReopen extends LuceneTestCase {
// TODO: make a version of this that tests the same thing with UninvertingReader.wrap()
// LUCENE-1579: Ensure that on a reopened reader, that any
// shared segments reuse the doc values arrays in
// FieldCache
public void testFieldCacheReuseAfterReopen() throws Exception {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random())).
setMergePolicy(newLogMergePolicy(10))
);
Document doc = new Document();
doc.add(new IntPoint("number", 17));
writer.addDocument(doc);
writer.commit();
// Open reader1
DirectoryReader r = DirectoryReader.open(dir);
LeafReader r1 = getOnlyLeafReader(r);
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(r1, "number", FieldCache.INT_POINT_PARSER, false);
assertEquals(17, ints.get(0));
// Add new segment
writer.addDocument(doc);
writer.commit();
// Reopen reader1 --> reader2
DirectoryReader r2 = DirectoryReader.openIfChanged(r);
assertNotNull(r2);
r.close();
LeafReader sub0 = r2.leaves().get(0).reader();
final NumericDocValues ints2 = FieldCache.DEFAULT.getNumerics(sub0, "number", FieldCache.INT_POINT_PARSER, false);
r2.close();
assertTrue(ints == ints2);
writer.close();
dir.close();
}
}

View File

@ -0,0 +1,164 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LegacyDoubleField;
import org.apache.lucene.document.LegacyFloatField;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.uninverting.FieldCacheSanityChecker.Insanity;
import org.apache.solr.uninverting.FieldCacheSanityChecker.InsanityType;
public class TestFieldCacheSanityChecker extends LuceneTestCase {
protected LeafReader readerA;
protected LeafReader readerB;
protected LeafReader readerX;
protected LeafReader readerAclone;
protected Directory dirA, dirB;
private static final int NUM_DOCS = 1000;
@Override
public void setUp() throws Exception {
super.setUp();
dirA = newDirectory();
dirB = newDirectory();
IndexWriter wA = new IndexWriter(dirA, newIndexWriterConfig(new MockAnalyzer(random())));
IndexWriter wB = new IndexWriter(dirB, newIndexWriterConfig(new MockAnalyzer(random())));
long theLong = Long.MAX_VALUE;
double theDouble = Double.MAX_VALUE;
int theInt = Integer.MAX_VALUE;
float theFloat = Float.MAX_VALUE;
for (int i = 0; i < NUM_DOCS; i++){
Document doc = new Document();
doc.add(new LegacyLongField("theLong", theLong--, Field.Store.NO));
doc.add(new LegacyDoubleField("theDouble", theDouble--, Field.Store.NO));
doc.add(new LegacyIntField("theInt", theInt--, Field.Store.NO));
doc.add(new LegacyFloatField("theFloat", theFloat--, Field.Store.NO));
if (0 == i % 3) {
wA.addDocument(doc);
} else {
wB.addDocument(doc);
}
}
wA.close();
wB.close();
DirectoryReader rA = DirectoryReader.open(dirA);
readerA = SlowCompositeReaderWrapper.wrap(rA);
readerAclone = SlowCompositeReaderWrapper.wrap(rA);
readerA = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirA));
readerB = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirB));
readerX = SlowCompositeReaderWrapper.wrap(new MultiReader(readerA, readerB));
}
@Override
public void tearDown() throws Exception {
readerA.close();
readerAclone.close();
readerB.close();
readerX.close();
dirA.close();
dirB.close();
super.tearDown();
}
public void testSanity() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
cache.getNumerics(readerA, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
cache.getNumerics(readerAclone, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
cache.getNumerics(readerB, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
cache.getNumerics(readerX, "theInt", FieldCache.LEGACY_INT_PARSER, false);
// // //
Insanity[] insanity =
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
if (0 < insanity.length)
dumpArray(getTestClass().getName() + "#" + getTestName()
+ " INSANITY", insanity, System.err);
assertEquals("shouldn't be any cache insanity", 0, insanity.length);
cache.purgeAllCaches();
}
public void testInsanity1() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
cache.getNumerics(readerX, "theInt", FieldCache.LEGACY_INT_PARSER, false);
cache.getTerms(readerX, "theInt", false);
// // //
Insanity[] insanity =
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
assertEquals("wrong number of cache errors", 1, insanity.length);
assertEquals("wrong type of cache error",
InsanityType.VALUEMISMATCH,
insanity[0].getType());
assertEquals("wrong number of entries in cache error", 2,
insanity[0].getCacheEntries().length);
// we expect bad things, don't let tearDown complain about them
cache.purgeAllCaches();
}
public void testInsanity2() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
cache.getTerms(readerA, "theInt", false);
cache.getTerms(readerB, "theInt", false);
cache.getTerms(readerX, "theInt", false);
// // //
Insanity[] insanity =
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
assertEquals("wrong number of cache errors", 1, insanity.length);
assertEquals("wrong type of cache error",
InsanityType.SUBREADER,
insanity[0].getType());
assertEquals("wrong number of entries in cache error", 3,
insanity[0].getCacheEntries().length);
// we expect bad things, don't let tearDown complain about them
cache.purgeAllCaches();
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,318 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.uninverting.UninvertingReader.Type;
/** random sorting tests with uninversion */
public class TestFieldCacheSortRandom extends LuceneTestCase {
public void testRandomStringSort() throws Exception {
testRandomStringSort(SortField.Type.STRING);
}
public void testRandomStringValSort() throws Exception {
testRandomStringSort(SortField.Type.STRING_VAL);
}
private void testRandomStringSort(SortField.Type type) throws Exception {
Random random = new Random(random().nextLong());
final int NUM_DOCS = atLeast(100);
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
final boolean allowDups = random.nextBoolean();
final Set<String> seen = new HashSet<>();
final int maxLength = TestUtil.nextInt(random, 5, 100);
if (VERBOSE) {
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
}
int numDocs = 0;
final List<BytesRef> docValues = new ArrayList<>();
// TODO: deletions
while (numDocs < NUM_DOCS) {
final Document doc = new Document();
// 10% of the time, the document is missing the value:
final BytesRef br;
if (random().nextInt(10) != 7) {
final String s;
if (random.nextBoolean()) {
s = TestUtil.randomSimpleString(random, maxLength);
} else {
s = TestUtil.randomUnicodeString(random, maxLength);
}
if (!allowDups) {
if (seen.contains(s)) {
continue;
}
seen.add(s);
}
if (VERBOSE) {
System.out.println(" " + numDocs + ": s=" + s);
}
doc.add(new StringField("stringdv", s, Field.Store.NO));
docValues.add(new BytesRef(s));
} else {
br = null;
if (VERBOSE) {
System.out.println(" " + numDocs + ": <missing>");
}
docValues.add(null);
}
doc.add(new IntPoint("id", numDocs));
doc.add(new StoredField("id", numDocs));
writer.addDocument(doc);
numDocs++;
if (random.nextInt(40) == 17) {
// force flush
writer.getReader().close();
}
}
Map<String,UninvertingReader.Type> mapping = new HashMap<>();
mapping.put("stringdv", Type.SORTED);
mapping.put("id", Type.INTEGER_POINT);
final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping);
writer.close();
if (VERBOSE) {
System.out.println(" reader=" + r);
}
final IndexSearcher s = newSearcher(r, false);
final int ITERS = atLeast(100);
for(int iter=0;iter<ITERS;iter++) {
final boolean reverse = random.nextBoolean();
final TopFieldDocs hits;
final SortField sf;
final boolean sortMissingLast;
final boolean missingIsNull;
sf = new SortField("stringdv", type, reverse);
sortMissingLast = random().nextBoolean();
missingIsNull = true;
if (sortMissingLast) {
sf.setMissingValue(SortField.STRING_LAST);
}
final Sort sort;
if (random.nextBoolean()) {
sort = new Sort(sf);
} else {
sort = new Sort(sf, SortField.FIELD_DOC);
}
final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
int queryType = random.nextInt(2);
if (queryType == 0) {
hits = s.search(new ConstantScoreQuery(f),
hitCount, sort, random.nextBoolean(), random.nextBoolean());
} else {
hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
}
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
}
// Compute expected results:
Collections.sort(f.matchValues, new Comparator<BytesRef>() {
@Override
public int compare(BytesRef a, BytesRef b) {
if (a == null) {
if (b == null) {
return 0;
}
if (sortMissingLast) {
return 1;
} else {
return -1;
}
} else if (b == null) {
if (sortMissingLast) {
return -1;
} else {
return 1;
}
} else {
return a.compareTo(b);
}
}
});
if (reverse) {
Collections.reverse(f.matchValues);
}
final List<BytesRef> expected = f.matchValues;
if (VERBOSE) {
System.out.println(" expected:");
for(int idx=0;idx<expected.size();idx++) {
BytesRef br = expected.get(idx);
if (br == null && missingIsNull == false) {
br = new BytesRef();
}
System.out.println(" " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
if (idx == hitCount-1) {
break;
}
}
}
if (VERBOSE) {
System.out.println(" actual:");
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
BytesRef br = (BytesRef) fd.fields[0];
System.out.println(" " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
}
}
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
BytesRef br = expected.get(hitIDX);
if (br == null && missingIsNull == false) {
br = new BytesRef();
}
// Normally, the old codecs (that don't support
// docsWithField via doc values) will always return
// an empty BytesRef for the missing case; however,
// if all docs in a given segment were missing, in
// that case it will return null! So we must map
// null here, too:
BytesRef br2 = (BytesRef) fd.fields[0];
if (br2 == null && missingIsNull == false) {
br2 = new BytesRef();
}
assertEquals(br, br2);
}
}
r.close();
dir.close();
}
private static class RandomQuery extends Query {
private final long seed;
private float density;
private final List<BytesRef> docValues;
public final List<BytesRef> matchValues = Collections.synchronizedList(new ArrayList<BytesRef>());
// density should be 0.0 ... 1.0
public RandomQuery(long seed, float density, List<BytesRef> docValues) {
this.seed = seed;
this.density = density;
this.docValues = docValues;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new ConstantScoreWeight(this) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
Random random = new Random(seed ^ context.docBase);
final int maxDoc = context.reader().maxDoc();
final NumericDocValues idSource = DocValues.getNumeric(context.reader(), "id");
assertNotNull(idSource);
final FixedBitSet bits = new FixedBitSet(maxDoc);
for(int docID=0;docID<maxDoc;docID++) {
if (random.nextFloat() <= density) {
bits.set(docID);
//System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID);
matchValues.add(docValues.get((int) idSource.get(docID)));
}
}
return new ConstantScoreScorer(this, score(), new BitSetIterator(bits, bits.approximateCardinality()));
}
};
}
@Override
public String toString(String field) {
return "RandomFilter(density=" + density + ")";
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&
equalsTo(getClass().cast(other));
}
private boolean equalsTo(RandomQuery other) {
return seed == other.seed &&
docValues == other.docValues &&
density == other.density;
}
@Override
public int hashCode() {
int h = classHash();
h = 31 * h + Objects.hash(seed, density);
h = 31 * h + System.identityHashCode(docValues);
return h;
}
}
}

View File

@ -0,0 +1,592 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
public class TestFieldCacheVsDocValues extends LuceneTestCase {
@Override
public void setUp() throws Exception {
super.setUp();
assumeFalse("test unsupported on J9 temporarily, see https://issues.apache.org/jira/browse/LUCENE-6522",
Constants.JAVA_VENDOR.startsWith("IBM"));
}
public void testByteMissingVsFieldCache() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestMissingVsFieldCache(Byte.MIN_VALUE, Byte.MAX_VALUE);
}
}
public void testShortMissingVsFieldCache() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestMissingVsFieldCache(Short.MIN_VALUE, Short.MAX_VALUE);
}
}
public void testIntMissingVsFieldCache() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestMissingVsFieldCache(Integer.MIN_VALUE, Integer.MAX_VALUE);
}
}
public void testLongMissingVsFieldCache() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestMissingVsFieldCache(Long.MIN_VALUE, Long.MAX_VALUE);
}
}
public void testSortedFixedLengthVsFieldCache() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
int fixedLength = TestUtil.nextInt(random(), 1, 10);
doTestSortedVsFieldCache(fixedLength, fixedLength);
}
}
public void testSortedVariableLengthVsFieldCache() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedVsFieldCache(1, 10);
}
}
public void testSortedSetFixedLengthVsUninvertedField() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
int fixedLength = TestUtil.nextInt(random(), 1, 10);
doTestSortedSetVsUninvertedField(fixedLength, fixedLength);
}
}
public void testSortedSetVariableLengthVsUninvertedField() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedSetVsUninvertedField(1, 10);
}
}
// LUCENE-4853
public void testHugeBinaryValues() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
// FSDirectory because SimpleText will consume gobbs of
// space when storing big binary values:
Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
boolean doFixed = random().nextBoolean();
int numDocs;
int fixedLength = 0;
if (doFixed) {
// Sometimes make all values fixed length since some
// codecs have different code paths for this:
numDocs = TestUtil.nextInt(random(), 10, 20);
fixedLength = TestUtil.nextInt(random(), 65537, 256 * 1024);
} else {
numDocs = TestUtil.nextInt(random(), 100, 200);
}
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
List<byte[]> docBytes = new ArrayList<>();
long totalBytes = 0;
for(int docID=0;docID<numDocs;docID++) {
// we don't use RandomIndexWriter because it might add
// more docvalues than we expect !!!!
// Must be > 64KB in size to ensure more than 2 pages in
// PagedBytes would be needed:
int numBytes;
if (doFixed) {
numBytes = fixedLength;
} else if (docID == 0 || random().nextInt(5) == 3) {
numBytes = TestUtil.nextInt(random(), 65537, 3 * 1024 * 1024);
} else {
numBytes = TestUtil.nextInt(random(), 1, 1024 * 1024);
}
totalBytes += numBytes;
if (totalBytes > 5 * 1024*1024) {
break;
}
byte[] bytes = new byte[numBytes];
random().nextBytes(bytes);
docBytes.add(bytes);
Document doc = new Document();
BytesRef b = new BytesRef(bytes);
b.length = bytes.length;
doc.add(new BinaryDocValuesField("field", b));
doc.add(new StringField("id", ""+docID, Field.Store.YES));
try {
w.addDocument(doc);
} catch (IllegalArgumentException iae) {
if (iae.getMessage().indexOf("is too large") == -1) {
throw iae;
} else {
// OK: some codecs can't handle binary DV > 32K
assertFalse(codecAcceptsHugeBinaryValues("field"));
w.rollback();
d.close();
return;
}
}
}
DirectoryReader r;
try {
r = DirectoryReader.open(w);
} catch (IllegalArgumentException iae) {
if (iae.getMessage().indexOf("is too large") == -1) {
throw iae;
} else {
assertFalse(codecAcceptsHugeBinaryValues("field"));
// OK: some codecs can't handle binary DV > 32K
w.rollback();
d.close();
return;
}
}
w.close();
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(ar);
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
for(int docID=0;docID<docBytes.size();docID++) {
Document doc = ar.document(docID);
BytesRef bytes = s.get(docID);
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
assertEquals(expected.length, bytes.length);
assertEquals(new BytesRef(expected), bytes);
}
assertTrue(codecAcceptsHugeBinaryValues("field"));
ar.close();
d.close();
}
private static final int LARGE_BINARY_FIELD_LENGTH = (1 << 15) - 2;
// TODO: get this out of here and into the deprecated codecs (4.0, 4.2)
public void testHugeBinaryValueLimit() throws Exception {
// We only test DVFormats that have a limit
assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
Analyzer analyzer = new MockAnalyzer(random());
// FSDirectory because SimpleText will consume gobbs of
// space when storing big binary values:
Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
boolean doFixed = random().nextBoolean();
int numDocs;
int fixedLength = 0;
if (doFixed) {
// Sometimes make all values fixed length since some
// codecs have different code paths for this:
numDocs = TestUtil.nextInt(random(), 10, 20);
fixedLength = LARGE_BINARY_FIELD_LENGTH;
} else {
numDocs = TestUtil.nextInt(random(), 100, 200);
}
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
List<byte[]> docBytes = new ArrayList<>();
long totalBytes = 0;
for(int docID=0;docID<numDocs;docID++) {
// we don't use RandomIndexWriter because it might add
// more docvalues than we expect !!!!
// Must be > 64KB in size to ensure more than 2 pages in
// PagedBytes would be needed:
int numBytes;
if (doFixed) {
numBytes = fixedLength;
} else if (docID == 0 || random().nextInt(5) == 3) {
numBytes = LARGE_BINARY_FIELD_LENGTH;
} else {
numBytes = TestUtil.nextInt(random(), 1, LARGE_BINARY_FIELD_LENGTH);
}
totalBytes += numBytes;
if (totalBytes > 5 * 1024*1024) {
break;
}
byte[] bytes = new byte[numBytes];
random().nextBytes(bytes);
docBytes.add(bytes);
Document doc = new Document();
BytesRef b = new BytesRef(bytes);
b.length = bytes.length;
doc.add(new BinaryDocValuesField("field", b));
doc.add(new StringField("id", ""+docID, Field.Store.YES));
w.addDocument(doc);
}
DirectoryReader r = DirectoryReader.open(w);
w.close();
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(ar
);
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
for(int docID=0;docID<docBytes.size();docID++) {
Document doc = ar.document(docID);
BytesRef bytes = s.get(docID);
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
assertEquals(expected.length, bytes.length);
assertEquals(new BytesRef(expected), bytes);
}
ar.close();
d.close();
}
private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field indexedField = new StringField("indexed", "", Field.Store.NO);
Field dvField = new SortedDocValuesField("dv", new BytesRef());
doc.add(idField);
doc.add(indexedField);
doc.add(dvField);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
final int length;
if (minLength == maxLength) {
length = minLength; // fixed length
} else {
length = TestUtil.nextInt(random(), minLength, maxLength);
}
String value = TestUtil.randomSimpleString(random(), length);
indexedField.setStringValue(value);
dvField.setBytesValue(new BytesRef(value));
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.close();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
SortedDocValues actual = r.getSortedDocValues("dv");
assertEquals(r.maxDoc(), expected, actual);
}
ir.close();
dir.close();
}
private void doTestSortedSetVsUninvertedField(int minLength, int maxLength) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
doc.add(idField);
final int length = TestUtil.nextInt(random(), minLength, maxLength);
int numValues = random().nextInt(17);
// create a random list of strings
List<String> values = new ArrayList<>();
for (int v = 0; v < numValues; v++) {
values.add(TestUtil.randomSimpleString(random(), minLength, length));
}
// add in any order to the indexed field
ArrayList<String> unordered = new ArrayList<>(values);
Collections.shuffle(unordered, random());
for (String v : values) {
doc.add(newStringField("indexed", v, Field.Store.NO));
}
// add in any order to the dv field
ArrayList<String> unordered2 = new ArrayList<>(values);
Collections.shuffle(unordered2, random());
for (String v : unordered2) {
doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// compare per-segment
DirectoryReader ir = writer.getReader();
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
SortedSetDocValues actual = r.getSortedSetDocValues("dv");
assertEquals(r.maxDoc(), expected, actual);
}
ir.close();
writer.forceMerge(1);
// now compare again after the merge
ir = writer.getReader();
LeafReader ar = getOnlyLeafReader(ir);
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
assertEquals(ir.maxDoc(), expected, actual);
ir.close();
writer.close();
dir.close();
}
private void doTestMissingVsFieldCache(LongProducer longs) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Field idField = new StringField("id", "", Field.Store.NO);
Field indexedField = newStringField("indexed", "", Field.Store.NO);
Field dvField = new NumericDocValuesField("dv", 0);
// index some docs
int numDocs = atLeast(300);
// numDocs should be always > 256 so that in case of a codec that optimizes
// for numbers of values <= 256, all storage layouts are tested
assert numDocs > 256;
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
long value = longs.next();
indexedField.setStringValue(Long.toString(value));
dvField.setLongValue(value);
Document doc = new Document();
doc.add(idField);
// 1/4 of the time we neglect to add the fields
if (random().nextInt(4) > 0) {
doc.add(indexedField);
doc.add(dvField);
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// merge some segments and ensure that at least one of them has more than
// 256 values
writer.forceMerge(numDocs / 256);
writer.close();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
Bits expected = FieldCache.DEFAULT.getDocsWithField(r, "indexed", null);
Bits actual = FieldCache.DEFAULT.getDocsWithField(r, "dv", null);
assertEquals(expected, actual);
}
ir.close();
dir.close();
}
private void doTestMissingVsFieldCache(final long minValue, final long maxValue) throws Exception {
doTestMissingVsFieldCache(new LongProducer() {
@Override
long next() {
return TestUtil.nextLong(random(), minValue, maxValue);
}
});
}
static abstract class LongProducer {
abstract long next();
}
private void assertEquals(Bits expected, Bits actual) throws Exception {
assertEquals(expected.length(), actual.length());
for (int i = 0; i < expected.length(); i++) {
assertEquals(expected.get(i), actual.get(i));
}
}
private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
assertEquals(maxDoc, DocValues.singleton(expected), DocValues.singleton(actual));
}
private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
// can be null for the segment if no docs actually had any SortedDocValues
// in this case FC.getDocTermsOrds returns EMPTY
if (actual == null) {
assertEquals(expected.getValueCount(), 0);
return;
}
assertEquals(expected.getValueCount(), actual.getValueCount());
// compare ord lists
for (int i = 0; i < maxDoc; i++) {
expected.setDocument(i);
actual.setDocument(i);
long expectedOrd;
while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
assertEquals(expectedOrd, actual.nextOrd());
}
assertEquals(NO_MORE_ORDS, actual.nextOrd());
}
// compare ord dictionary
for (long i = 0; i < expected.getValueCount(); i++) {
final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd(i));
final BytesRef actualBytes = actual.lookupOrd(i);
assertEquals(expectedBytes, actualBytes);
}
// compare termsenum
assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
}
private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
BytesRef ref;
// sequential next() through all terms
while ((ref = expected.next()) != null) {
assertEquals(ref, actual.next());
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
assertNull(actual.next());
// sequential seekExact(ord) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
actual.seekExact(i);
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// sequential seekExact(BytesRef) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
assertTrue(actual.seekExact(expected.term()));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// sequential seekCeil(BytesRef) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekExact(ord)
for (long i = 0; i < numOrds; i++) {
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
expected.seekExact(randomOrd);
actual.seekExact(randomOrd);
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekExact(BytesRef)
for (long i = 0; i < numOrds; i++) {
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
expected.seekExact(randomOrd);
actual.seekExact(expected.term());
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekCeil(BytesRef)
for (long i = 0; i < numOrds; i++) {
BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
SeekStatus expectedStatus = expected.seekCeil(target);
assertEquals(expectedStatus, actual.seekCeil(target));
if (expectedStatus != SeekStatus.END) {
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
}
}
protected boolean codecAcceptsHugeBinaryValues(String field) {
String name = TestUtil.getDocValuesFormat(field);
return !(name.equals("Memory")); // Direct has a different type of limit
}
}

View File

@ -0,0 +1,228 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
// TODO: what happened to this test... its not actually uninverting?
public class TestFieldCacheWithThreads extends LuceneTestCase {
public void test() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
final List<Long> numbers = new ArrayList<>();
final List<BytesRef> binary = new ArrayList<>();
final List<BytesRef> sorted = new ArrayList<>();
final int numDocs = atLeast(100);
for(int i=0;i<numDocs;i++) {
Document d = new Document();
long number = random().nextLong();
d.add(new NumericDocValuesField("number", number));
BytesRef bytes = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
d.add(new BinaryDocValuesField("bytes", bytes));
binary.add(bytes);
bytes = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
d.add(new SortedDocValuesField("sorted", bytes));
sorted.add(bytes);
w.addDocument(d);
numbers.add(number);
}
w.forceMerge(1);
final IndexReader r = DirectoryReader.open(w);
w.close();
assertEquals(1, r.leaves().size());
final LeafReader ar = r.leaves().get(0).reader();
int numThreads = TestUtil.nextInt(random(), 2, 5);
List<Thread> threads = new ArrayList<>();
final CountDownLatch startingGun = new CountDownLatch(1);
for(int t=0;t<numThreads;t++) {
final Random threadRandom = new Random(random().nextLong());
Thread thread = new Thread() {
@Override
public void run() {
try {
//NumericDocValues ndv = ar.getNumericDocValues("number");
NumericDocValues ndv = FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.LONG_POINT_PARSER, false);
//BinaryDocValues bdv = ar.getBinaryDocValues("bytes");
BinaryDocValues bdv = FieldCache.DEFAULT.getTerms(ar, "bytes", false);
SortedDocValues sdv = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
startingGun.await();
int iters = atLeast(1000);
for(int iter=0;iter<iters;iter++) {
int docID = threadRandom.nextInt(numDocs);
switch(threadRandom.nextInt(4)) {
case 0:
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.INT_POINT_PARSER, false).get(docID));
break;
case 1:
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.LONG_POINT_PARSER, false).get(docID));
break;
case 2:
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.FLOAT_POINT_PARSER, false).get(docID));
break;
case 3:
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.DOUBLE_POINT_PARSER, false).get(docID));
break;
}
BytesRef term = bdv.get(docID);
assertEquals(binary.get(docID), term);
term = sdv.get(docID);
assertEquals(sorted.get(docID), term);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
thread.start();
threads.add(thread);
}
startingGun.countDown();
for(Thread thread : threads) {
thread.join();
}
r.close();
dir.close();
}
public void test2() throws Exception {
Random random = random();
final int NUM_DOCS = atLeast(100);
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
final boolean allowDups = random.nextBoolean();
final Set<String> seen = new HashSet<>();
if (VERBOSE) {
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
}
int numDocs = 0;
final List<BytesRef> docValues = new ArrayList<>();
// TODO: deletions
while (numDocs < NUM_DOCS) {
final String s;
if (random.nextBoolean()) {
s = TestUtil.randomSimpleString(random);
} else {
s = TestUtil.randomUnicodeString(random);
}
final BytesRef br = new BytesRef(s);
if (!allowDups) {
if (seen.contains(s)) {
continue;
}
seen.add(s);
}
if (VERBOSE) {
System.out.println(" " + numDocs + ": s=" + s);
}
final Document doc = new Document();
doc.add(new SortedDocValuesField("stringdv", br));
doc.add(new NumericDocValuesField("id", numDocs));
docValues.add(br);
writer.addDocument(doc);
numDocs++;
if (random.nextInt(40) == 17) {
// force flush
writer.getReader().close();
}
}
writer.forceMerge(1);
final DirectoryReader r = writer.getReader();
writer.close();
final LeafReader sr = getOnlyLeafReader(r);
final long END_TIME = System.nanoTime() + TimeUnit.NANOSECONDS.convert((TEST_NIGHTLY ? 30 : 1), TimeUnit.SECONDS);
final int NUM_THREADS = TestUtil.nextInt(random(), 1, 10);
Thread[] threads = new Thread[NUM_THREADS];
for(int thread=0;thread<NUM_THREADS;thread++) {
threads[thread] = new Thread() {
@Override
public void run() {
Random random = random();
final SortedDocValues stringDVDirect;
final NumericDocValues docIDToID;
try {
stringDVDirect = sr.getSortedDocValues("stringdv");
docIDToID = sr.getNumericDocValues("id");
assertNotNull(stringDVDirect);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
while(System.nanoTime() < END_TIME) {
final SortedDocValues source;
source = stringDVDirect;
for(int iter=0;iter<100;iter++) {
final int docID = random.nextInt(sr.maxDoc());
BytesRef term = source.get(docID);
assertEquals(docValues.get((int) docIDToID.get(docID)), term);
}
}
}
};
threads[thread].start();
}
for(Thread thread : threads) {
thread.join();
}
r.close();
dir.close();
}
}

View File

@ -0,0 +1,497 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LegacyDoubleField;
import org.apache.lucene.document.LegacyFloatField;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LegacyNumericUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.junit.AfterClass;
import org.junit.BeforeClass;
/** random assortment of tests against legacy numerics */
public class TestLegacyFieldCache extends LuceneTestCase {
private static LeafReader reader;
private static int NUM_DOCS;
private static Directory directory;
@BeforeClass
public static void beforeClass() throws Exception {
NUM_DOCS = atLeast(500);
directory = newDirectory();
RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
long theLong = Long.MAX_VALUE;
double theDouble = Double.MAX_VALUE;
int theInt = Integer.MAX_VALUE;
float theFloat = Float.MAX_VALUE;
if (VERBOSE) {
System.out.println("TEST: setUp");
}
for (int i = 0; i < NUM_DOCS; i++){
Document doc = new Document();
doc.add(new LegacyLongField("theLong", theLong--, Field.Store.NO));
doc.add(new LegacyDoubleField("theDouble", theDouble--, Field.Store.NO));
doc.add(new LegacyIntField("theInt", theInt--, Field.Store.NO));
doc.add(new LegacyFloatField("theFloat", theFloat--, Field.Store.NO));
if (i%2 == 0) {
doc.add(new LegacyIntField("sparse", i, Field.Store.NO));
}
if (i%2 == 0) {
doc.add(new LegacyIntField("numInt", i, Field.Store.NO));
}
writer.addDocument(doc);
}
IndexReader r = writer.getReader();
reader = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(reader);
writer.close();
}
@AfterClass
public static void afterClass() throws Exception {
reader.close();
reader = null;
directory.close();
directory = null;
}
public void testInfoStream() throws Exception {
try {
FieldCache cache = FieldCache.DEFAULT;
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
cache.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
cache.getNumerics(reader, "theDouble", new FieldCache.Parser() {
@Override
public TermsEnum termsEnum(Terms terms) throws IOException {
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
}
@Override
public long parseValue(BytesRef term) {
int val = (int) LegacyNumericUtils.prefixCodedToLong(term);
if (val<0) val ^= 0x7fffffff;
return val;
}
}, false);
assertTrue(bos.toString(IOUtils.UTF_8).indexOf("WARNING") != -1);
} finally {
FieldCache.DEFAULT.setInfoStream(null);
FieldCache.DEFAULT.purgeAllCaches();
}
}
public void test() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", doubles, cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.get(i));
}
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LEGACY_LONG_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", longs, cache.getNumerics(reader, "theLong", FieldCache.LEGACY_LONG_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Long.MAX_VALUE - i, longs.get(i));
}
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", ints, cache.getNumerics(reader, "theInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Integer.MAX_VALUE - i, ints.get(i));
}
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.LEGACY_FLOAT_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", floats, cache.getNumerics(reader, "theFloat", FieldCache.LEGACY_FLOAT_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.get(i));
}
Bits docsWithField = cache.getDocsWithField(reader, "theLong", null);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", null));
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertTrue(docsWithField.get(i));
}
docsWithField = cache.getDocsWithField(reader, "sparse", null);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", null));
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertEquals(i%2 == 0, docsWithField.get(i));
}
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
}
public void testEmptyIndex() throws Exception {
Directory dir = newDirectory();
IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(500));
writer.close();
IndexReader r = DirectoryReader.open(dir);
LeafReader reader = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(reader);
FieldCache.DEFAULT.getTerms(reader, "foobar", true);
FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
r.close();
dir.close();
}
public void testDocsWithField() throws Exception {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, true);
// The double[] takes one slots, and docsWithField should also
// have been populated:
assertEquals(2, cache.getCacheEntries().length);
Bits bits = cache.getDocsWithField(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER);
// No new entries should appear:
assertEquals(2, cache.getCacheEntries().length);
assertTrue(bits instanceof Bits.MatchAllBits);
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.LEGACY_INT_PARSER, true);
assertEquals(4, cache.getCacheEntries().length);
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.LEGACY_INT_PARSER);
assertEquals(4, cache.getCacheEntries().length);
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, ints.get(i));
} else {
assertFalse(docsWithField.get(i));
}
}
NumericDocValues numInts = cache.getNumerics(reader, "numInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean());
docsWithField = cache.getDocsWithField(reader, "numInt", FieldCache.LEGACY_INT_PARSER);
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, numInts.get(i));
} else {
assertFalse(docsWithField.get(i));
}
}
}
public void testGetDocsWithFieldThreadSafety() throws Exception {
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
int NUM_THREADS = 3;
Thread[] threads = new Thread[NUM_THREADS];
final AtomicBoolean failed = new AtomicBoolean();
final AtomicInteger iters = new AtomicInteger();
final int NUM_ITER = 200 * RANDOM_MULTIPLIER;
final CyclicBarrier restart = new CyclicBarrier(NUM_THREADS,
new Runnable() {
@Override
public void run() {
cache.purgeAllCaches();
iters.incrementAndGet();
}
});
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
threads[threadIDX] = new Thread() {
@Override
public void run() {
try {
while(!failed.get()) {
final int op = random().nextInt(3);
if (op == 0) {
// Purge all caches & resume, once all
// threads get here:
restart.await();
if (iters.get() >= NUM_ITER) {
break;
}
} else if (op == 1) {
Bits docsWithField = cache.getDocsWithField(reader, "sparse", null);
for (int i = 0; i < docsWithField.length(); i++) {
assertEquals(i%2 == 0, docsWithField.get(i));
}
} else {
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.LEGACY_INT_PARSER, true);
Bits docsWithField = cache.getDocsWithField(reader, "sparse", null);
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, ints.get(i));
} else {
assertFalse(docsWithField.get(i));
}
}
}
}
} catch (Throwable t) {
failed.set(true);
restart.reset();
throw new RuntimeException(t);
}
}
};
threads[threadIDX].start();
}
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
threads[threadIDX].join();
}
assertFalse(failed.get());
}
public void testDocValuesIntegration() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
doc.add(new NumericDocValuesField("numeric", 42));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
// Binary type: can be retrieved via getTerms()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.LEGACY_INT_PARSER, false);
});
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.LEGACY_INT_PARSER, false);
});
// Numeric type: can be retrieved via getInts() and so on
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.LEGACY_INT_PARSER, false);
assertEquals(42, numeric.get(0));
// SortedSet type: can be retrieved via getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.LEGACY_INT_PARSER, false);
});
ir.close();
dir.close();
}
public void testNonexistantFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.LEGACY_INT_PARSER, true);
assertEquals(0, ints.get(0));
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LEGACY_LONG_PARSER, true);
assertEquals(0, longs.get(0));
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.LEGACY_FLOAT_PARSER, true);
assertEquals(0, floats.get(0));
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.LEGACY_DOUBLE_PARSER, true);
assertEquals(0, doubles.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
public void testNonIndexedFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new StoredField("bogusbytes", "bogus"));
doc.add(new StoredField("bogusshorts", "bogus"));
doc.add(new StoredField("bogusints", "bogus"));
doc.add(new StoredField("boguslongs", "bogus"));
doc.add(new StoredField("bogusfloats", "bogus"));
doc.add(new StoredField("bogusdoubles", "bogus"));
doc.add(new StoredField("bogusbits", "bogus"));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.LEGACY_INT_PARSER, true);
assertEquals(0, ints.get(0));
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LEGACY_LONG_PARSER, true);
assertEquals(0, longs.get(0));
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.LEGACY_FLOAT_PARSER, true);
assertEquals(0, floats.get(0));
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.LEGACY_DOUBLE_PARSER, true);
assertEquals(0, doubles.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
// Make sure that the use of GrowableWriter doesn't prevent from using the full long range
public void testLongFieldCache() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
cfg.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
Document doc = new Document();
LegacyLongField field = new LegacyLongField("f", 0L, Store.YES);
doc.add(field);
final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
for (int i = 0; i < values.length; ++i) {
final long v;
switch (random().nextInt(10)) {
case 0:
v = Long.MIN_VALUE;
break;
case 1:
v = 0;
break;
case 2:
v = Long.MAX_VALUE;
break;
default:
v = TestUtil.nextLong(random(), -10, 10);
break;
}
values[i] = v;
if (v == 0 && random().nextBoolean()) {
// missing
iw.addDocument(new Document());
} else {
field.setLongValue(v);
iw.addDocument(doc);
}
}
iw.forceMerge(1);
final DirectoryReader reader = iw.getReader();
final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_LONG_PARSER, false);
for (int i = 0; i < values.length; ++i) {
assertEquals(values[i], longs.get(i));
}
reader.close();
iw.close();
dir.close();
}
// Make sure that the use of GrowableWriter doesn't prevent from using the full int range
public void testIntFieldCache() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
cfg.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
Document doc = new Document();
LegacyIntField field = new LegacyIntField("f", 0, Store.YES);
doc.add(field);
final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
for (int i = 0; i < values.length; ++i) {
final int v;
switch (random().nextInt(10)) {
case 0:
v = Integer.MIN_VALUE;
break;
case 1:
v = 0;
break;
case 2:
v = Integer.MAX_VALUE;
break;
default:
v = TestUtil.nextInt(random(), -10, 10);
break;
}
values[i] = v;
if (v == 0 && random().nextBoolean()) {
// missing
iw.addDocument(new Document());
} else {
field.setIntValue(v);
iw.addDocument(doc);
}
}
iw.forceMerge(1);
final DirectoryReader reader = iw.getReader();
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_INT_PARSER, false);
for (int i = 0; i < values.length; ++i) {
assertEquals(values[i], ints.get(i));
}
reader.close();
iw.close();
dir.close();
}
}

View File

@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LegacyNumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestNumericTerms32 extends LuceneTestCase {
// distance of entries
private static int distance;
// shift the starting of the values to the left, to also have negative values:
private static final int startOffset = - 1 << 15;
// number of docs to generate for testing
private static int noDocs;
private static Directory directory = null;
private static IndexReader reader = null;
private static IndexSearcher searcher = null;
@BeforeClass
public static void beforeClass() throws Exception {
noDocs = atLeast(4096);
distance = (1 << 30) / noDocs;
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
.setMergePolicy(newLogMergePolicy()));
final FieldType storedInt = new FieldType(LegacyIntField.TYPE_NOT_STORED);
storedInt.setStored(true);
storedInt.freeze();
final FieldType storedInt8 = new FieldType(storedInt);
storedInt8.setNumericPrecisionStep(8);
final FieldType storedInt4 = new FieldType(storedInt);
storedInt4.setNumericPrecisionStep(4);
final FieldType storedInt2 = new FieldType(storedInt);
storedInt2.setNumericPrecisionStep(2);
LegacyIntField
field8 = new LegacyIntField("field8", 0, storedInt8),
field4 = new LegacyIntField("field4", 0, storedInt4),
field2 = new LegacyIntField("field2", 0, storedInt2);
Document doc = new Document();
// add fields, that have a distance to test general functionality
doc.add(field8); doc.add(field4); doc.add(field2);
// Add a series of noDocs docs with increasing int values
for (int l=0; l<noDocs; l++) {
int val=distance*l+startOffset;
field8.setIntValue(val);
field4.setIntValue(val);
field2.setIntValue(val);
val=l-(noDocs/2);
writer.addDocument(doc);
}
Map<String,Type> map = new HashMap<>();
map.put("field2", Type.LEGACY_INTEGER);
map.put("field4", Type.LEGACY_INTEGER);
map.put("field8", Type.LEGACY_INTEGER);
reader = UninvertingReader.wrap(writer.getReader(), map);
searcher=newSearcher(reader);
writer.close();
}
@AfterClass
public static void afterClass() throws Exception {
searcher = null;
TestUtil.checkReader(reader);
reader.close();
reader = null;
directory.close();
directory = null;
}
private void testSorting(int precisionStep) throws Exception {
String field="field"+precisionStep;
// 10 random tests, the index order is ascending,
// so using a reverse sort field should retun descending documents
int num = TestUtil.nextInt(random(), 10, 20);
for (int i = 0; i < num; i++) {
int lower=(int)(random().nextDouble()*noDocs*distance)+startOffset;
int upper=(int)(random().nextDouble()*noDocs*distance)+startOffset;
if (lower>upper) {
int a=lower; lower=upper; upper=a;
}
Query tq= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true);
TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.INT, true)));
if (topDocs.totalHits==0) continue;
ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd);
int last = searcher.doc(sd[0].doc).getField(field).numericValue().intValue();
for (int j=1; j<sd.length; j++) {
int act = searcher.doc(sd[j].doc).getField(field).numericValue().intValue();
assertTrue("Docs should be sorted backwards", last>act );
last=act;
}
}
}
@Test
public void testSorting_8bit() throws Exception {
testSorting(8);
}
@Test
public void testSorting_4bit() throws Exception {
testSorting(4);
}
@Test
public void testSorting_2bit() throws Exception {
testSorting(2);
}
}

View File

@ -0,0 +1,166 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LegacyNumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestNumericTerms64 extends LuceneTestCase {
// distance of entries
private static long distance;
// shift the starting of the values to the left, to also have negative values:
private static final long startOffset = - 1L << 31;
// number of docs to generate for testing
private static int noDocs;
private static Directory directory = null;
private static IndexReader reader = null;
private static IndexSearcher searcher = null;
@BeforeClass
public static void beforeClass() throws Exception {
noDocs = atLeast(4096);
distance = (1L << 60) / noDocs;
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
.setMergePolicy(newLogMergePolicy()));
final FieldType storedLong = new FieldType(LegacyLongField.TYPE_NOT_STORED);
storedLong.setStored(true);
storedLong.freeze();
final FieldType storedLong8 = new FieldType(storedLong);
storedLong8.setNumericPrecisionStep(8);
final FieldType storedLong4 = new FieldType(storedLong);
storedLong4.setNumericPrecisionStep(4);
final FieldType storedLong6 = new FieldType(storedLong);
storedLong6.setNumericPrecisionStep(6);
final FieldType storedLong2 = new FieldType(storedLong);
storedLong2.setNumericPrecisionStep(2);
LegacyLongField
field8 = new LegacyLongField("field8", 0L, storedLong8),
field6 = new LegacyLongField("field6", 0L, storedLong6),
field4 = new LegacyLongField("field4", 0L, storedLong4),
field2 = new LegacyLongField("field2", 0L, storedLong2);
Document doc = new Document();
// add fields, that have a distance to test general functionality
doc.add(field8); doc.add(field6); doc.add(field4); doc.add(field2);
// Add a series of noDocs docs with increasing long values, by updating the fields
for (int l=0; l<noDocs; l++) {
long val=distance*l+startOffset;
field8.setLongValue(val);
field6.setLongValue(val);
field4.setLongValue(val);
field2.setLongValue(val);
val=l-(noDocs/2);
writer.addDocument(doc);
}
Map<String,Type> map = new HashMap<>();
map.put("field2", Type.LEGACY_LONG);
map.put("field4", Type.LEGACY_LONG);
map.put("field6", Type.LEGACY_LONG);
map.put("field8", Type.LEGACY_LONG);
reader = UninvertingReader.wrap(writer.getReader(), map);
searcher=newSearcher(reader);
writer.close();
}
@AfterClass
public static void afterClass() throws Exception {
searcher = null;
TestUtil.checkReader(reader);
reader.close();
reader = null;
directory.close();
directory = null;
}
private void testSorting(int precisionStep) throws Exception {
String field="field"+precisionStep;
// 10 random tests, the index order is ascending,
// so using a reverse sort field should retun descending documents
int num = TestUtil.nextInt(random(), 10, 20);
for (int i = 0; i < num; i++) {
long lower=(long)(random().nextDouble()*noDocs*distance)+startOffset;
long upper=(long)(random().nextDouble()*noDocs*distance)+startOffset;
if (lower>upper) {
long a=lower; lower=upper; upper=a;
}
Query tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.LONG, true)));
if (topDocs.totalHits==0) continue;
ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd);
long last=searcher.doc(sd[0].doc).getField(field).numericValue().longValue();
for (int j=1; j<sd.length; j++) {
long act=searcher.doc(sd[j].doc).getField(field).numericValue().longValue();
assertTrue("Docs should be sorted backwards", last>act );
last=act;
}
}
}
@Test
public void testSorting_8bit() throws Exception {
testSorting(8);
}
@Test
public void testSorting_6bit() throws Exception {
testSorting(6);
}
@Test
public void testSorting_4bit() throws Exception {
testSorting(4);
}
@Test
public void testSorting_2bit() throws Exception {
testSorting(2);
}
}

View File

@ -0,0 +1,395 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LegacyNumericUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.uninverting.UninvertingReader.Type;
public class TestUninvertingReader extends LuceneTestCase {
public void testSortedSetInteger() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
Collections.singletonMap("foo", Type.SORTED_SET_INTEGER));
LeafReader ar = ir.leaves().get(0).reader();
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));
value = v.lookupOrd(1);
assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
TestUtil.checkReader(ir);
ir.close();
dir.close();
}
public void testSortedSetFloat() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(-3f), Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
Collections.singletonMap("foo", Type.SORTED_SET_FLOAT));
LeafReader ar = ir.leaves().get(0).reader();
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals(Float.floatToRawIntBits(-3f), LegacyNumericUtils.prefixCodedToInt(value));
value = v.lookupOrd(1);
assertEquals(Float.floatToRawIntBits(5f), LegacyNumericUtils.prefixCodedToInt(value));
TestUtil.checkReader(ir);
ir.close();
dir.close();
}
public void testSortedSetLong() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
Collections.singletonMap("foo", Type.SORTED_SET_LONG));
LeafReader ar = ir.leaves().get(0).reader();
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));
value = v.lookupOrd(1);
assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
TestUtil.checkReader(ir);
ir.close();
dir.close();
}
public void testSortedSetDouble() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(-3d), Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
Collections.singletonMap("foo", Type.SORTED_SET_DOUBLE));
LeafReader ar = ir.leaves().get(0).reader();
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals(Double.doubleToRawLongBits(-3d), LegacyNumericUtils.prefixCodedToLong(value));
value = v.lookupOrd(1);
assertEquals(Double.doubleToRawLongBits(5d), LegacyNumericUtils.prefixCodedToLong(value));
TestUtil.checkReader(ir);
ir.close();
dir.close();
}
/** Tests {@link Type#SORTED_SET_INTEGER} using Integer based fields, with and w/o precision steps */
public void testSortedSetIntegerManyValues() throws IOException {
final Directory dir = newDirectory();
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
final FieldType NO_TRIE_TYPE = new FieldType(LegacyIntField.TYPE_NOT_STORED);
NO_TRIE_TYPE.setNumericPrecisionStep(Integer.MAX_VALUE);
final Map<String,Type> UNINVERT_MAP = new LinkedHashMap<String,Type>();
UNINVERT_MAP.put("notrie_single", Type.SORTED_SET_INTEGER);
UNINVERT_MAP.put("notrie_multi", Type.SORTED_SET_INTEGER);
UNINVERT_MAP.put("trie_single", Type.SORTED_SET_INTEGER);
UNINVERT_MAP.put("trie_multi", Type.SORTED_SET_INTEGER);
final Set<String> MULTI_VALUES = new LinkedHashSet<String>();
MULTI_VALUES.add("trie_multi");
MULTI_VALUES.add("notrie_multi");
final int NUM_DOCS = TestUtil.nextInt(random(), 200, 1500);
final int MIN = TestUtil.nextInt(random(), 10, 100);
final int MAX = MIN + TestUtil.nextInt(random(), 10, 100);
final long EXPECTED_VALSET_SIZE = 1 + MAX - MIN;
{ // (at least) one doc should have every value, so that at least one segment has every value
final Document doc = new Document();
for (int i = MIN; i <= MAX; i++) {
doc.add(new LegacyIntField("trie_multi", i, Field.Store.NO));
doc.add(new LegacyIntField("notrie_multi", i, NO_TRIE_TYPE));
}
iw.addDocument(doc);
}
// now add some more random docs (note: starting at i=1 because of previously added doc)
for (int i = 1; i < NUM_DOCS; i++) {
final Document doc = new Document();
if (0 != TestUtil.nextInt(random(), 0, 9)) {
int val = TestUtil.nextInt(random(), MIN, MAX);
doc.add(new LegacyIntField("trie_single", val, Field.Store.NO));
doc.add(new LegacyIntField("notrie_single", val, NO_TRIE_TYPE));
}
if (0 != TestUtil.nextInt(random(), 0, 9)) {
int numMulti = atLeast(1);
while (0 < numMulti--) {
int val = TestUtil.nextInt(random(), MIN, MAX);
doc.add(new LegacyIntField("trie_multi", val, Field.Store.NO));
doc.add(new LegacyIntField("notrie_multi", val, NO_TRIE_TYPE));
}
}
iw.addDocument(doc);
}
iw.close();
final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
TestUtil.checkReader(ir);
final int NUM_LEAVES = ir.leaves().size();
// check the leaves: no more then total set size
for (LeafReaderContext rc : ir.leaves()) {
final LeafReader ar = rc.reader();
for (String f : UNINVERT_MAP.keySet()) {
final SortedSetDocValues v = DocValues.getSortedSet(ar, f);
final long valSetSize = v.getValueCount();
assertTrue(f + ": Expected no more then " + EXPECTED_VALSET_SIZE + " values per segment, got " +
valSetSize + " from: " + ar.toString(),
valSetSize <= EXPECTED_VALSET_SIZE);
if (1 == NUM_LEAVES && MULTI_VALUES.contains(f)) {
// tighter check on multi fields in single segment index since we know one doc has all of them
assertEquals(f + ": Single segment LeafReader's value set should have had exactly expected size",
EXPECTED_VALSET_SIZE, valSetSize);
}
}
}
// check the composite of all leaves: exact expectation of set size
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
TestUtil.checkReader(composite);
for (String f : MULTI_VALUES) {
final SortedSetDocValues v = composite.getSortedSetDocValues(f);
final long valSetSize = v.getValueCount();
assertEquals(f + ": Composite reader value set should have had exactly expected size",
EXPECTED_VALSET_SIZE, valSetSize);
}
ir.close();
dir.close();
}
public void testSortedSetEmptyIndex() throws IOException {
final Directory dir = newDirectory();
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
iw.close();
final Map<String,Type> UNINVERT_MAP = new LinkedHashMap<String,Type>();
for (Type t : EnumSet.allOf(Type.class)) {
UNINVERT_MAP.put(t.name(), t);
}
final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
TestUtil.checkReader(ir);
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
TestUtil.checkReader(composite);
for (String f : UNINVERT_MAP.keySet()) {
// check the leaves
// (normally there are none for an empty index, so this is really just future
// proofing in case that changes for some reason)
for (LeafReaderContext rc : ir.leaves()) {
final LeafReader ar = rc.reader();
assertNull(f + ": Expected no doc values from empty index (leaf)",
ar.getSortedSetDocValues(f));
}
// check the composite
assertNull(f + ": Expected no doc values from empty index (composite)",
composite.getSortedSetDocValues(f));
}
ir.close();
dir.close();
}
public void testFieldInfos() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
BytesRef idBytes = new BytesRef("id");
doc.add(new StringField("id", idBytes, Store.YES));
doc.add(new LegacyIntField("int", 5, Store.YES));
doc.add(new NumericDocValuesField("dv", 5));
doc.add(new IntPoint("dint", 5));
doc.add(new StoredField("stored", 5)); // not indexed
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
Map<String, Type> uninvertingMap = new HashMap<>();
uninvertingMap.put("int", Type.LEGACY_INTEGER);
uninvertingMap.put("dv", Type.LEGACY_INTEGER);
uninvertingMap.put("dint", Type.INTEGER_POINT);
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
uninvertingMap);
LeafReader leafReader = ir.leaves().get(0).reader();
FieldInfo intFInfo = leafReader.getFieldInfos().fieldInfo("int");
assertEquals(DocValuesType.NUMERIC, intFInfo.getDocValuesType());
assertEquals(0, intFInfo.getPointDimensionCount());
assertEquals(0, intFInfo.getPointNumBytes());
FieldInfo dintFInfo = leafReader.getFieldInfos().fieldInfo("dint");
assertEquals(DocValuesType.NUMERIC, dintFInfo.getDocValuesType());
assertEquals(1, dintFInfo.getPointDimensionCount());
assertEquals(4, dintFInfo.getPointNumBytes());
FieldInfo dvFInfo = leafReader.getFieldInfos().fieldInfo("dv");
assertEquals(DocValuesType.NUMERIC, dvFInfo.getDocValuesType());
FieldInfo storedFInfo = leafReader.getFieldInfos().fieldInfo("stored");
assertEquals(DocValuesType.NONE, storedFInfo.getDocValuesType());
TestUtil.checkReader(ir);
ir.close();
dir.close();
}
}

View File

@ -22,25 +22,24 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.core.SolrCore;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.ResultContext;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.DocList;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.CopyField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.junit.BeforeClass;
import org.junit.Test;

View File

@ -503,7 +503,7 @@ public class FacetStream extends TupleStream implements Expressible {
t.put(identifier, d);
++m;
} else {
long l = (long)bucket.get("count");
long l = ((Number)bucket.get("count")).longValue();
t.put("count(*)", l);
}
}

View File

@ -46,6 +46,7 @@ public interface HighlightParams {
public static final String DEFAULT_SUMMARY = HIGHLIGHT + ".defaultSummary";
public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";
public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength";
public static final String HIGHLIGHT_ALTERNATE = HIGHLIGHT+".highlightAlternate";
public static final String MAX_MULTIVALUED_TO_EXAMINE = HIGHLIGHT + ".maxMultiValuedToExamine";
public static final String MAX_MULTIVALUED_TO_MATCH = HIGHLIGHT + ".maxMultiValuedToMatch";

View File

@ -73,14 +73,19 @@ public class StreamingTest extends SolrCloudTestCase {
.withFunctionName("parallel", ParallelStream.class);
private static String zkHost;
private static int numShards;
private static int numWorkers;
@BeforeClass
public static void configureCluster() throws Exception {
configureCluster(2)
numShards = random().nextInt(2) + 1; //1 - 3
numWorkers = numShards > 2 ? random().nextInt(numShards - 1) + 1 : numShards;
configureCluster(numShards)
.addConfig("conf", getFile("solrj").toPath().resolve("solr").resolve("configsets").resolve("streaming").resolve("conf"))
.configure();
CollectionAdminRequest.createCollection(COLLECTION, "conf", 2, 1).process(cluster.getSolrClient());
CollectionAdminRequest.createCollection(COLLECTION, "conf", numShards, 1).process(cluster.getSolrClient());
AbstractDistribZkTestBase.waitForRecoveriesToFinish(COLLECTION, cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
zkHost = cluster.getZkServer().getZkAddress();
@ -147,12 +152,11 @@ public class StreamingTest extends SolrCloudTestCase {
SolrParams sParamsA = StreamingTest.mapParams("q", "*:*", "fl", "id,a_s,a_i,a_f", "sort", "a_s asc,a_f asc", "partitionKeys", "none");
CloudSolrStream stream = new CloudSolrStream(zkHost, COLLECTION, sParamsA);
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, stream, 2, new FieldComparator("a_s",ComparatorOrder.ASCENDING));
ParallelStream pstream = parallelStream(stream, new FieldComparator("a_s", ComparatorOrder.ASCENDING));
attachStreamFactory(pstream);
List<Tuple> tuples = getTuples(pstream);
assert(tuples.size() == 20); // Each tuple will be double counted.
assert(tuples.size() == (10 * numWorkers)); // Each tuple will be double counted.
}
@ -174,7 +178,7 @@ public class StreamingTest extends SolrCloudTestCase {
SolrParams sParams = mapParams("q", "*:*", "fl", "id,a_s,a_i,a_f", "sort", "a_f asc,a_i asc", "partitionKeys", "a_f");
CloudSolrStream stream = new CloudSolrStream(zkHost, COLLECTION, sParams);
UniqueStream ustream = new UniqueStream(stream, new FieldEqualitor("a_f"));
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, ustream, 2, new FieldComparator("a_f",ComparatorOrder.ASCENDING));
ParallelStream pstream = parallelStream(ustream, new FieldComparator("a_f", ComparatorOrder.ASCENDING));
attachStreamFactory(pstream);
List<Tuple> tuples = getTuples(pstream);
assert(tuples.size() == 5);
@ -183,7 +187,7 @@ public class StreamingTest extends SolrCloudTestCase {
//Test the eofTuples
Map<String,Tuple> eofTuples = pstream.getEofTuples();
assert(eofTuples.size() == 2); //There should be an EOF tuple for each worker.
assert(eofTuples.size() == numWorkers); //There should be an EOF tuple for each worker.
}
@ -253,7 +257,7 @@ public class StreamingTest extends SolrCloudTestCase {
SolrParams sParams = mapParams("q", "*:*", "fl", "id,a_s,a_i", "sort", "a_i asc", "partitionKeys", "a_i");
CloudSolrStream stream = new CloudSolrStream(zkHost, COLLECTION, sParams);
RankStream rstream = new RankStream(stream, 11, new FieldComparator("a_i",ComparatorOrder.DESCENDING));
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, rstream, 2, new FieldComparator("a_i",ComparatorOrder.DESCENDING));
ParallelStream pstream = parallelStream(rstream, new FieldComparator("a_i", ComparatorOrder.DESCENDING));
attachStreamFactory(pstream);
List<Tuple> tuples = getTuples(pstream);
@ -405,9 +409,7 @@ public class StreamingTest extends SolrCloudTestCase {
ReducerStream rstream = new ReducerStream(stream,
new FieldEqualitor("a_s"),
new GroupOperation(new FieldComparator("a_f", ComparatorOrder.DESCENDING), 5));
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, rstream, 2, new FieldComparator("a_s",ComparatorOrder.ASCENDING));
ParallelStream pstream = parallelStream(rstream, new FieldComparator("a_s", ComparatorOrder.ASCENDING));
attachStreamFactory(pstream);
List<Tuple> tuples = getTuples(pstream);
@ -433,9 +435,7 @@ public class StreamingTest extends SolrCloudTestCase {
rstream = new ReducerStream(stream,
new FieldEqualitor("a_s"),
new GroupOperation(new FieldComparator("a_f", ComparatorOrder.ASCENDING), 3));
pstream = new ParallelStream(zkHost, COLLECTION, rstream, 2, new FieldComparator("a_s",ComparatorOrder.DESCENDING));
pstream = parallelStream(rstream, new FieldComparator("a_s", ComparatorOrder.DESCENDING));
attachStreamFactory(pstream);
tuples = getTuples(pstream);
@ -1401,7 +1401,7 @@ public class StreamingTest extends SolrCloudTestCase {
new CountMetric()};
RollupStream rollupStream = new RollupStream(stream, buckets, metrics);
ParallelStream parallelStream = new ParallelStream(zkHost, COLLECTION, rollupStream, 2, new FieldComparator("a_s", ComparatorOrder.ASCENDING));
ParallelStream parallelStream = parallelStream(rollupStream, new FieldComparator("a_s", ComparatorOrder.ASCENDING));
attachStreamFactory(parallelStream);
List<Tuple> tuples = getTuples(parallelStream);
@ -1501,9 +1501,7 @@ public class StreamingTest extends SolrCloudTestCase {
ReducerStream rstream = new ReducerStream(stream,
new FieldEqualitor("a_s"),
new GroupOperation(new FieldComparator("a_s", ComparatorOrder.ASCENDING), 2));
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, rstream, 2, new FieldComparator("a_s", ComparatorOrder.ASCENDING));
ParallelStream pstream = parallelStream(rstream, new FieldComparator("a_s", ComparatorOrder.ASCENDING));
attachStreamFactory(pstream);
List<Tuple> tuples = getTuples(pstream);
assert(tuples.size() == 0);
@ -1636,7 +1634,7 @@ public class StreamingTest extends SolrCloudTestCase {
CloudSolrStream streamB = new CloudSolrStream(zkHost, COLLECTION, sParamsB);
MergeStream mstream = new MergeStream(streamA, streamB, new FieldComparator("a_i",ComparatorOrder.ASCENDING));
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, mstream, 2, new FieldComparator("a_i",ComparatorOrder.ASCENDING));
ParallelStream pstream = parallelStream(mstream, new FieldComparator("a_i", ComparatorOrder.ASCENDING));
attachStreamFactory(pstream);
List<Tuple> tuples = getTuples(pstream);
@ -1651,7 +1649,7 @@ public class StreamingTest extends SolrCloudTestCase {
streamB = new CloudSolrStream(zkHost, COLLECTION, sParamsB);
mstream = new MergeStream(streamA, streamB, new FieldComparator("a_i",ComparatorOrder.DESCENDING));
pstream = new ParallelStream(zkHost, COLLECTION, mstream, 2, new FieldComparator("a_i",ComparatorOrder.DESCENDING));
pstream = parallelStream(mstream, new FieldComparator("a_i", ComparatorOrder.DESCENDING));
attachStreamFactory(pstream);
tuples = getTuples(pstream);
@ -1684,14 +1682,13 @@ public class StreamingTest extends SolrCloudTestCase {
CloudSolrStream streamB = new CloudSolrStream(zkHost, COLLECTION, sParamsB);
MergeStream mstream = new MergeStream(streamA, streamB, new FieldComparator("a_i",ComparatorOrder.ASCENDING));
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, mstream, 2, new FieldComparator("a_i",ComparatorOrder.ASCENDING));
ParallelStream pstream = parallelStream(mstream, new FieldComparator("a_i", ComparatorOrder.ASCENDING));
attachStreamFactory(pstream);
List<Tuple> tuples = getTuples(pstream);
assert(tuples.size() == 9);
Map<String, Tuple> eofTuples = pstream.getEofTuples();
assert(eofTuples.size() == 2); // There should be an EOF Tuple for each worker.
assert(eofTuples.size() == numWorkers); // There should be an EOF Tuple for each worker.
}
@ -1834,5 +1831,10 @@ public class StreamingTest extends SolrCloudTestCase {
return params;
}
private ParallelStream parallelStream(TupleStream stream, FieldComparator comparator) throws IOException {
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, stream, numWorkers, comparator);
return pstream;
}
}