mirror of https://github.com/apache/lucene.git
Merge remote-tracking branch 'origin/branch_6x' into branch_6x
This commit is contained in:
commit
6662cdc07f
|
@ -1192,7 +1192,7 @@ revision_re = re.compile(r'rev([a-f\d]+)')
|
|||
def parse_config():
|
||||
epilogue = textwrap.dedent('''
|
||||
Example usage:
|
||||
python3.2 -u dev-tools/scripts/smokeTestRelease.py http://people.apache.org/~whoever/staging_area/lucene-solr-4.3.0-RC1-rev1469340
|
||||
python3 -u dev-tools/scripts/smokeTestRelease.py https://dist.apache.org/repos/dist/dev/lucene/lucene-solr-6.0.1-RC2-revc7510a0...
|
||||
''')
|
||||
description = 'Utility to test a release.'
|
||||
parser = argparse.ArgumentParser(description=description, epilog=epilogue,
|
||||
|
@ -1350,8 +1350,25 @@ def confirmAllReleasesAreTestedForBackCompat(smokeVersion, unpackPath):
|
|||
else:
|
||||
print(' success!')
|
||||
|
||||
def getScriptVersion():
|
||||
topLevelDir = '../..' # Assumption: this script is in dev-tools/scripts/ of a checkout
|
||||
m = re.compile(r'(.*)/').match(sys.argv[0]) # Get this script's directory
|
||||
if m is not None and m.group(1) != '.':
|
||||
origCwd = os.getcwd()
|
||||
os.chdir(m.group(1))
|
||||
os.chdir('../..')
|
||||
topLevelDir = os.getcwd()
|
||||
os.chdir(origCwd)
|
||||
reBaseVersion = re.compile(r'version\.base\s*=\s*(\d+\.\d+)')
|
||||
return reBaseVersion.search(open('%s/lucene/version.properties' % topLevelDir).read()).group(1)
|
||||
|
||||
def main():
|
||||
c = parse_config()
|
||||
|
||||
scriptVersion = getScriptVersion()
|
||||
if not c.version.startswith(scriptVersion + '.'):
|
||||
raise RuntimeError('smokeTestRelease.py for %s.X is incompatible with a %s release.' % (scriptVersion, c.version))
|
||||
|
||||
print('NOTE: output encoding is %s' % sys.stdout.encoding)
|
||||
smokeTest(c.java, c.url, c.revision, c.version, c.tmp_dir, c.is_signed, ' '.join(c.test_args))
|
||||
|
||||
|
|
|
@ -23,6 +23,10 @@ New Features
|
|||
e.g. clear the Gregorian Change Date. Also, toString(cal) is now identical to
|
||||
DateTimeFormatter.ISO_INSTANT. (David Smiley)
|
||||
|
||||
* LUCENE-7300: The misc module now has a directory wrapper that uses hard-links if
|
||||
applicable and supported when copying files from another FSDirectory in
|
||||
Directory#copyFrom. (Simon Willnauer)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-7163: refactor GeoRect, Polygon, and GeoUtils tests to geo
|
||||
|
@ -82,7 +86,10 @@ Optimizations
|
|||
(which is used by TermsQuery, multi-term queries and several point queries).
|
||||
(Adrien Grand, Jeff Wartes, David Smiley)
|
||||
|
||||
* LUCENE-7299: Speed up BytesRefHash.sort(). (Adrien Grand)
|
||||
* LUCENE-7299: Speed up BytesRefHash.sort() using radix sort. (Adrien Grand)
|
||||
|
||||
* LUCENE-7306: Speed up points indexing and merging using radix sort.
|
||||
(Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
|
|
|
@ -239,7 +239,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
"5.5.1-cfs",
|
||||
"5.5.1-nocfs",
|
||||
"6.0.0-cfs",
|
||||
"6.0.0-nocfs"
|
||||
"6.0.0-nocfs",
|
||||
"6.0.1-cfs",
|
||||
"6.0.1-nocfs"
|
||||
};
|
||||
|
||||
final String[] unsupportedNames = {
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -145,18 +145,19 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
|
|||
List<ClassificationResult<BytesRef>> assignedClasses = new ArrayList<>();
|
||||
|
||||
Terms classes = MultiFields.getTerms(leafReader, classFieldName);
|
||||
TermsEnum classesEnum = classes.iterator();
|
||||
BytesRef next;
|
||||
String[] tokenizedText = tokenize(inputDocument);
|
||||
int docsWithClassSize = countDocsWithClass();
|
||||
while ((next = classesEnum.next()) != null) {
|
||||
if (next.length > 0) {
|
||||
Term term = new Term(this.classFieldName, next);
|
||||
double clVal = calculateLogPrior(term, docsWithClassSize) + calculateLogLikelihood(tokenizedText, term, docsWithClassSize);
|
||||
assignedClasses.add(new ClassificationResult<>(term.bytes(), clVal));
|
||||
if (classes != null) {
|
||||
TermsEnum classesEnum = classes.iterator();
|
||||
BytesRef next;
|
||||
String[] tokenizedText = tokenize(inputDocument);
|
||||
int docsWithClassSize = countDocsWithClass();
|
||||
while ((next = classesEnum.next()) != null) {
|
||||
if (next.length > 0) {
|
||||
Term term = new Term(this.classFieldName, next);
|
||||
double clVal = calculateLogPrior(term, docsWithClassSize) + calculateLogLikelihood(tokenizedText, term, docsWithClassSize);
|
||||
assignedClasses.add(new ClassificationResult<>(term.bytes(), clVal));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// normalization; the values transforms to a 0-1 range
|
||||
return normClassificationResults(assignedClasses);
|
||||
}
|
||||
|
@ -168,8 +169,9 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
|
|||
* @throws IOException if accessing to term vectors or search fails
|
||||
*/
|
||||
protected int countDocsWithClass() throws IOException {
|
||||
int docCount = MultiFields.getTerms(this.leafReader, this.classFieldName).getDocCount();
|
||||
if (docCount == -1) { // in case codec doesn't support getDocCount
|
||||
Terms terms = MultiFields.getTerms(this.leafReader, this.classFieldName);
|
||||
int docCount;
|
||||
if (terms == null || terms.getDocCount() == -1) { // in case codec doesn't support getDocCount
|
||||
TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector();
|
||||
BooleanQuery.Builder q = new BooleanQuery.Builder();
|
||||
q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST));
|
||||
|
@ -179,6 +181,8 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
|
|||
indexSearcher.search(q.build(),
|
||||
classQueryCountCollector);
|
||||
docCount = classQueryCountCollector.getTotalHits();
|
||||
} else {
|
||||
docCount = terms.getDocCount();
|
||||
}
|
||||
return docCount;
|
||||
}
|
||||
|
|
|
@ -168,28 +168,6 @@ public class SimpleNaiveBayesDocumentClassifier extends SimpleNaiveBayesClassifi
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts the number of documents in the index having at least a value for the 'class' field
|
||||
*
|
||||
* @return the no. of documents having a value for the 'class' field
|
||||
* @throws java.io.IOException If accessing to term vectors or search fails
|
||||
*/
|
||||
protected int countDocsWithClass() throws IOException {
|
||||
int docCount = MultiFields.getTerms(this.leafReader, this.classFieldName).getDocCount();
|
||||
if (docCount == -1) { // in case codec doesn't support getDocCount
|
||||
TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector();
|
||||
BooleanQuery.Builder q = new BooleanQuery.Builder();
|
||||
q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST));
|
||||
if (query != null) {
|
||||
q.add(query, BooleanClause.Occur.MUST);
|
||||
}
|
||||
indexSearcher.search(q.build(),
|
||||
classQueryCountCollector);
|
||||
docCount = classQueryCountCollector.getTotalHits();
|
||||
}
|
||||
return docCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a token array from the {@link org.apache.lucene.analysis.TokenStream} in input
|
||||
*
|
||||
|
|
|
@ -175,7 +175,7 @@ public class ConfusionMatrixGenerator {
|
|||
public double getPrecision(String klass) {
|
||||
Map<String, Long> classifications = linearizedMatrix.get(klass);
|
||||
double tp = 0;
|
||||
double fp = 0;
|
||||
double den = 0; // tp + fp
|
||||
if (classifications != null) {
|
||||
for (Map.Entry<String, Long> entry : classifications.entrySet()) {
|
||||
if (klass.equals(entry.getKey())) {
|
||||
|
@ -184,11 +184,11 @@ public class ConfusionMatrixGenerator {
|
|||
}
|
||||
for (Map<String, Long> values : linearizedMatrix.values()) {
|
||||
if (values.containsKey(klass)) {
|
||||
fp += values.get(klass);
|
||||
den += values.get(klass);
|
||||
}
|
||||
}
|
||||
}
|
||||
return tp > 0 ? tp / (tp + fp) : 0;
|
||||
return tp > 0 ? tp / den : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -246,7 +246,7 @@ public class ConfusionMatrixGenerator {
|
|||
if (this.accuracy == -1) {
|
||||
double tp = 0d;
|
||||
double tn = 0d;
|
||||
double fp = 0d;
|
||||
double tfp = 0d; // tp + fp
|
||||
double fn = 0d;
|
||||
for (Map.Entry<String, Map<String, Long>> classification : linearizedMatrix.entrySet()) {
|
||||
String klass = classification.getKey();
|
||||
|
@ -259,63 +259,46 @@ public class ConfusionMatrixGenerator {
|
|||
}
|
||||
for (Map<String, Long> values : linearizedMatrix.values()) {
|
||||
if (values.containsKey(klass)) {
|
||||
fp += values.get(klass);
|
||||
tfp += values.get(klass);
|
||||
} else {
|
||||
tn++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
this.accuracy = (tp + tn) / (fp + fn + tp + tn);
|
||||
this.accuracy = (tp + tn) / (tfp + fn + tn);
|
||||
}
|
||||
return this.accuracy;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the precision (see {@link #getPrecision(String)}) over all the classes.
|
||||
* get the macro averaged precision (see {@link #getPrecision(String)}) over all the classes.
|
||||
*
|
||||
* @return the precision as computed from the whole confusion matrix
|
||||
* @return the macro averaged precision as computed from the confusion matrix
|
||||
*/
|
||||
public double getPrecision() {
|
||||
double tp = 0;
|
||||
double fp = 0;
|
||||
double p = 0;
|
||||
for (Map.Entry<String, Map<String, Long>> classification : linearizedMatrix.entrySet()) {
|
||||
String klass = classification.getKey();
|
||||
for (Map.Entry<String, Long> entry : classification.getValue().entrySet()) {
|
||||
if (klass.equals(entry.getKey())) {
|
||||
tp += entry.getValue();
|
||||
}
|
||||
}
|
||||
for (Map<String, Long> values : linearizedMatrix.values()) {
|
||||
if (values.containsKey(klass)) {
|
||||
fp += values.get(klass);
|
||||
}
|
||||
}
|
||||
p += getPrecision(klass);
|
||||
}
|
||||
|
||||
return tp > 0 ? tp / (tp + fp) : 0;
|
||||
return p / linearizedMatrix.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the recall (see {@link #getRecall(String)}) over all the classes
|
||||
* get the macro averaged recall (see {@link #getRecall(String)}) over all the classes
|
||||
*
|
||||
* @return the recall as computed from the whole confusion matrix
|
||||
* @return the recall as computed from the confusion matrix
|
||||
*/
|
||||
public double getRecall() {
|
||||
double tp = 0;
|
||||
double fn = 0;
|
||||
double r = 0;
|
||||
for (Map.Entry<String, Map<String, Long>> classification : linearizedMatrix.entrySet()) {
|
||||
String klass = classification.getKey();
|
||||
for (Map.Entry<String, Long> entry : classification.getValue().entrySet()) {
|
||||
if (klass.equals(entry.getKey())) {
|
||||
tp += entry.getValue();
|
||||
} else {
|
||||
fn += entry.getValue();
|
||||
}
|
||||
}
|
||||
r += getRecall(klass);
|
||||
}
|
||||
|
||||
return tp + fn > 0 ? tp / (tp + fn) : 0;
|
||||
return r / linearizedMatrix.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -661,7 +661,7 @@ public class MultiDocValues {
|
|||
public final OrdinalMap mapping;
|
||||
|
||||
/** Creates a new MultiSortedDocValues over <code>values</code> */
|
||||
MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
|
||||
public MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
|
||||
assert docStarts.length == values.length + 1;
|
||||
this.values = values;
|
||||
this.docStarts = docStarts;
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
/** Specialized {@link BytesRef} comparator that
|
||||
* {@link FixedLengthBytesRefArray#iterator(Comparator)} has optimizations
|
||||
* for.
|
||||
* @lucene.internal */
|
||||
public abstract class BytesRefComparator implements Comparator<BytesRef> {
|
||||
|
||||
final int comparedBytesCount;
|
||||
|
||||
/** Sole constructor.
|
||||
* @param comparedBytesCount the maximum number of bytes to compare. */
|
||||
protected BytesRefComparator(int comparedBytesCount) {
|
||||
this.comparedBytesCount = comparedBytesCount;
|
||||
}
|
||||
|
||||
/** Return the unsigned byte to use for comparison at index {@code i}, or
|
||||
* {@code -1} if all bytes that are useful for comparisons are exhausted.
|
||||
* This may only be called with a value of {@code i} between {@code 0}
|
||||
* included and {@code comparedBytesCount} excluded. */
|
||||
protected abstract int byteAt(BytesRef ref, int i);
|
||||
|
||||
@Override
|
||||
public int compare(BytesRef o1, BytesRef o2) {
|
||||
for (int i = 0; i < comparedBytesCount; ++i) {
|
||||
final int b1 = byteAt(o1, i);
|
||||
final int b2 = byteAt(o2, i);
|
||||
if (b1 != b2) {
|
||||
return b1 - b2;
|
||||
} else if (b1 == -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
|
@ -105,6 +105,35 @@ final class FixedLengthBytesRefArray implements SortableBytesRefArray {
|
|||
orderedEntries[i] = i;
|
||||
}
|
||||
|
||||
if (comp instanceof BytesRefComparator) {
|
||||
BytesRefComparator bComp = (BytesRefComparator) comp;
|
||||
new MSBRadixSorter(bComp.comparedBytesCount) {
|
||||
|
||||
BytesRef scratch;
|
||||
|
||||
{
|
||||
scratch = new BytesRef();
|
||||
scratch.length = valueLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
int o = orderedEntries[i];
|
||||
orderedEntries[i] = orderedEntries[j];
|
||||
orderedEntries[j] = o;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int byteAt(int i, int k) {
|
||||
int index1 = orderedEntries[i];
|
||||
scratch.bytes = blocks[index1 / valuesPerBlock];
|
||||
scratch.offset = (index1 % valuesPerBlock) * valueLength;
|
||||
return bComp.byteAt(scratch, k);
|
||||
}
|
||||
}.sort(0, size());
|
||||
return orderedEntries;
|
||||
}
|
||||
|
||||
final BytesRef pivot = new BytesRef();
|
||||
final BytesRef scratch1 = new BytesRef();
|
||||
final BytesRef scratch2 = new BytesRef();
|
||||
|
|
|
@ -33,7 +33,7 @@ public abstract class InPlaceMergeSorter extends Sorter {
|
|||
}
|
||||
|
||||
void mergeSort(int from, int to) {
|
||||
if (to - from < THRESHOLD) {
|
||||
if (to - from < INSERTION_SORT_THRESHOLD) {
|
||||
insertionSort(from, to);
|
||||
} else {
|
||||
final int mid = (from + to) >>> 1;
|
||||
|
|
|
@ -38,7 +38,7 @@ public abstract class IntroSorter extends Sorter {
|
|||
}
|
||||
|
||||
void quicksort(int from, int to, int maxDepth) {
|
||||
if (to - from < THRESHOLD) {
|
||||
if (to - from < INSERTION_SORT_THRESHOLD) {
|
||||
insertionSort(from, to);
|
||||
return;
|
||||
} else if (--maxDepth < 0) {
|
||||
|
|
|
@ -0,0 +1,219 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/** Radix sorter for variable-length strings. This class sorts based on the most
|
||||
* significant byte first and falls back to {@link IntroSorter} when the size
|
||||
* of the buckets to sort becomes small. It is <b>NOT</b> stable.
|
||||
* Worst-case memory usage is about {@code 2.3 KB}.
|
||||
* @lucene.internal */
|
||||
public abstract class MSBRadixSorter extends Sorter {
|
||||
|
||||
// after that many levels of recursion we fall back to introsort anyway
|
||||
// this is used as a protection against the fact that radix sort performs
|
||||
// worse when there are long common prefixes (probably because of cache
|
||||
// locality)
|
||||
private static final int LEVEL_THRESHOLD = 8;
|
||||
// size of histograms: 256 + 1 to indicate that the string is finished
|
||||
private static final int HISTOGRAM_SIZE = 257;
|
||||
// buckets below this size will be sorted with introsort
|
||||
private static final int LENGTH_THRESHOLD = 100;
|
||||
|
||||
// we store one histogram per recursion level
|
||||
private final int[][] histograms = new int[LEVEL_THRESHOLD][];
|
||||
private final int[] endOffsets = new int[HISTOGRAM_SIZE];
|
||||
|
||||
private final int maxLength;
|
||||
|
||||
/**
|
||||
* Sole constructor.
|
||||
* @param maxLength the maximum length of keys, pass {@link Integer#MAX_VALUE} if unknown.
|
||||
*/
|
||||
protected MSBRadixSorter(int maxLength) {
|
||||
this.maxLength = maxLength;
|
||||
}
|
||||
|
||||
/** Return the k-th byte of the entry at index {@code i}, or {@code -1} if
|
||||
* its length is less than or equal to {@code k}. This may only be called
|
||||
* with a value of {@code i} between {@code 0} included and
|
||||
* {@code maxLength} excluded. */
|
||||
protected abstract int byteAt(int i, int k);
|
||||
|
||||
/** Get a fall-back sorter which may assume that the first k bytes of all compared strings are equal. */
|
||||
protected Sorter getFallbackSorter(int k) {
|
||||
return new IntroSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
MSBRadixSorter.this.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
for (int o = k; o < maxLength; ++o) {
|
||||
final int b1 = byteAt(i, o);
|
||||
final int b2 = byteAt(j, o);
|
||||
if (b1 != b2) {
|
||||
return b1 - b2;
|
||||
} else if (b1 == -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
pivot.setLength(0);
|
||||
for (int o = k; o < maxLength; ++o) {
|
||||
final int b = byteAt(i, o);
|
||||
if (b == -1) {
|
||||
break;
|
||||
}
|
||||
pivot.append((byte) b);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
for (int o = 0; o < pivot.length(); ++o) {
|
||||
final int b1 = pivot.byteAt(o) & 0xff;
|
||||
final int b2 = byteAt(j, k + o);
|
||||
if (b1 != b2) {
|
||||
return b1 - b2;
|
||||
}
|
||||
}
|
||||
if (k + pivot.length() == maxLength) {
|
||||
return 0;
|
||||
}
|
||||
return -1 - byteAt(j, k + pivot.length());
|
||||
}
|
||||
|
||||
private final BytesRefBuilder pivot = new BytesRefBuilder();
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final int compare(int i, int j) {
|
||||
throw new UnsupportedOperationException("unused: not a comparison-based sort");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void sort(int from, int to) {
|
||||
checkRange(from, to);
|
||||
sort(from, to, 0);
|
||||
}
|
||||
|
||||
private void sort(int from, int to, int k) {
|
||||
if (to - from <= LENGTH_THRESHOLD || k >= LEVEL_THRESHOLD) {
|
||||
introSort(from, to, k);
|
||||
} else {
|
||||
radixSort(from, to, k);
|
||||
}
|
||||
}
|
||||
|
||||
private void introSort(int from, int to, int k) {
|
||||
getFallbackSorter(k).sort(from, to);
|
||||
}
|
||||
|
||||
private void radixSort(int from, int to, int k) {
|
||||
int[] histogram = histograms[k];
|
||||
if (histogram == null) {
|
||||
histogram = histograms[k] = new int[HISTOGRAM_SIZE];
|
||||
} else {
|
||||
Arrays.fill(histogram, 0);
|
||||
}
|
||||
|
||||
buildHistogram(from, to, k, histogram);
|
||||
|
||||
// short-circuit: if all keys have the same byte at offset k, then recurse directly
|
||||
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
|
||||
if (histogram[i] == to - from) {
|
||||
// everything is in the same bucket, recurse
|
||||
if (i > 0) {
|
||||
sort(from, to, k + 1);
|
||||
}
|
||||
return;
|
||||
} else if (histogram[i] != 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int[] startOffsets = histogram;
|
||||
int[] endOffsets = this.endOffsets;
|
||||
sumHistogram(histogram, endOffsets);
|
||||
reorder(from, to, startOffsets, endOffsets, k);
|
||||
endOffsets = startOffsets;
|
||||
|
||||
if (k + 1 < maxLength) {
|
||||
// recurse on all but the first bucket since all keys are equals in this
|
||||
// bucket (we already compared all bytes)
|
||||
for (int prev = endOffsets[0], i = 1; i < HISTOGRAM_SIZE; ++i) {
|
||||
int h = endOffsets[i];
|
||||
final int bucketLen = h - prev;
|
||||
if (bucketLen > 1) {
|
||||
sort(from + prev, from + h, k + 1);
|
||||
}
|
||||
prev = h;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
|
||||
private int getBucket(int i, int k) {
|
||||
return byteAt(i, k) + 1;
|
||||
}
|
||||
|
||||
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */
|
||||
private int[] buildHistogram(int from, int to, int k, int[] histogram) {
|
||||
for (int i = from; i < to; ++i) {
|
||||
histogram[getBucket(i, k)]++;
|
||||
}
|
||||
return histogram;
|
||||
}
|
||||
|
||||
/** Accumulate values of the histogram so that it does not store counts but
|
||||
* start offsets. {@code endOffsets} will store the end offsets. */
|
||||
private static void sumHistogram(int[] histogram, int[] endOffsets) {
|
||||
int accum = 0;
|
||||
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
|
||||
final int count = histogram[i];
|
||||
histogram[i] = accum;
|
||||
accum += count;
|
||||
endOffsets[i] = accum;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reorder based on start/end offsets for each bucket. When this method
|
||||
* returns, startOffsets and endOffsets are equal.
|
||||
* @param startOffsets start offsets per bucket
|
||||
* @param endOffsets end offsets per bucket
|
||||
*/
|
||||
private void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) {
|
||||
// reorder in place, like the dutch flag problem
|
||||
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
|
||||
final int limit = endOffsets[i];
|
||||
for (int h1 = startOffsets[i]; h1 < limit; h1 = startOffsets[i]) {
|
||||
final int b = getBucket(from + h1, k);
|
||||
final int h2 = startOffsets[b]++;
|
||||
swap(from + h1, from + h2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -23,7 +23,7 @@ import java.util.Comparator;
|
|||
* @lucene.internal */
|
||||
public abstract class Sorter {
|
||||
|
||||
static final int THRESHOLD = 20;
|
||||
static final int INSERTION_SORT_THRESHOLD = 20;
|
||||
|
||||
/** Sole constructor, used for inheritance. */
|
||||
protected Sorter() {}
|
||||
|
|
|
@ -16,58 +16,36 @@
|
|||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
abstract class StringMSBRadixSorter extends MSBRadixSorter {
|
||||
|
||||
/** Radix sorter for variable-length strings. This class sorts based on the most
|
||||
* significant byte first and falls back to {@link IntroSorter} when the size
|
||||
* of the buckets to sort becomes small. It is <b>NOT</b> stable.
|
||||
* Worst-case memory usage is about {@code 2.3 KB} */
|
||||
abstract class StringMSBRadixSorter extends Sorter {
|
||||
|
||||
// after that many levels of recursion we fall back to introsort anyway
|
||||
private static final int LEVEL_THRESHOLD = 8;
|
||||
// size of histograms: 256 + 1 to indicate that the string is finished
|
||||
private static final int HISTOGRAM_SIZE = 257;
|
||||
// buckets below this size will be sorted with introsort
|
||||
private static final int LENGTH_THRESHOLD = 100;
|
||||
|
||||
// we store one histogram per recursion level
|
||||
private final int[][] histograms = new int[LEVEL_THRESHOLD][];
|
||||
private final int[] endOffsets = new int[HISTOGRAM_SIZE];
|
||||
StringMSBRadixSorter() {
|
||||
super(Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
/** Get a {@link BytesRef} for the given index. */
|
||||
protected abstract BytesRef get(int i);
|
||||
|
||||
/** Store bytes for the given index into {@code dest}, without the first k bytes. */
|
||||
private void get(int i, int k, BytesRef dest) {
|
||||
@Override
|
||||
protected int byteAt(int i, int k) {
|
||||
BytesRef ref = get(i);
|
||||
assert ref.length >= k;
|
||||
dest.bytes = ref.bytes;
|
||||
dest.offset = ref.offset + k;
|
||||
dest.length = ref.length - k;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final int compare(int i, int j) {
|
||||
throw new UnsupportedOperationException("unused: not a comparison-based sort");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void sort(int from, int to) {
|
||||
checkRange(from, to);
|
||||
sort(from, to, 0);
|
||||
}
|
||||
|
||||
private void sort(int from, int to, int k) {
|
||||
if (to - from <= LENGTH_THRESHOLD || k >= LEVEL_THRESHOLD) {
|
||||
introSort(from, to, k);
|
||||
} else {
|
||||
radixSort(from, to, k);
|
||||
if (ref.length <= k) {
|
||||
return -1;
|
||||
}
|
||||
return ref.bytes[ref.offset + k] & 0xff;
|
||||
}
|
||||
|
||||
private void introSort(int from, int to, int k) {
|
||||
new IntroSorter() {
|
||||
@Override
|
||||
protected Sorter getFallbackSorter(int k) {
|
||||
return new IntroSorter() {
|
||||
|
||||
private void get(int i, int k, BytesRef scratch) {
|
||||
BytesRef ref = StringMSBRadixSorter.this.get(i);
|
||||
assert ref.length >= k;
|
||||
scratch.bytes = ref.bytes;
|
||||
scratch.offset = ref.offset + k;
|
||||
scratch.length = ref.length - k;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
StringMSBRadixSorter.this.swap(i, j);
|
||||
|
@ -92,96 +70,7 @@ abstract class StringMSBRadixSorter extends Sorter {
|
|||
}
|
||||
|
||||
private final BytesRef pivot = new BytesRef(),
|
||||
scratch1 = new BytesRef(), scratch2 = new BytesRef();
|
||||
}.sort(from, to);
|
||||
}
|
||||
|
||||
private void radixSort(int from, int to, int k) {
|
||||
int[] histogram = histograms[k];
|
||||
if (histogram == null) {
|
||||
histogram = histograms[k] = new int[HISTOGRAM_SIZE];
|
||||
} else {
|
||||
Arrays.fill(histogram, 0);
|
||||
}
|
||||
|
||||
buildHistogram(from, to, k, histogram);
|
||||
|
||||
// short-circuit: if all keys have the same byte at offset k, then recurse directly
|
||||
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
|
||||
if (histogram[i] == to - from) {
|
||||
// everything is in the same bucket, recurse
|
||||
if (i > 0) {
|
||||
sort(from, to, k + 1);
|
||||
}
|
||||
return;
|
||||
} else if (histogram[i] != 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int[] startOffsets = histogram;
|
||||
int[] endOffsets = this.endOffsets;
|
||||
sumHistogram(histogram, endOffsets);
|
||||
reorder(from, to, startOffsets, endOffsets, k);
|
||||
endOffsets = startOffsets;
|
||||
|
||||
// recurse on all but the first bucket since all keys are equals in this
|
||||
// bucket (we already compared all bytes)
|
||||
for (int prev = endOffsets[0], i = 1; i < HISTOGRAM_SIZE; ++i) {
|
||||
int h = endOffsets[i];
|
||||
final int bucketLen = h - prev;
|
||||
if (bucketLen > 1) {
|
||||
sort(from + prev, from + h, k + 1);
|
||||
}
|
||||
prev = h;
|
||||
}
|
||||
}
|
||||
|
||||
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
|
||||
private int getBucket(int id, int k) {
|
||||
BytesRef ref = get(id);
|
||||
if (ref.length <= k) {
|
||||
return 0;
|
||||
}
|
||||
final int b = ref.bytes[ref.offset + k] & 0xff;
|
||||
return b + 1;
|
||||
}
|
||||
|
||||
/** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */
|
||||
private int[] buildHistogram(int from, int to, int k, int[] histogram) {
|
||||
for (int i = from; i < to; ++i) {
|
||||
histogram[getBucket(i, k)]++;
|
||||
}
|
||||
return histogram;
|
||||
}
|
||||
|
||||
/** Accumulate values of the histogram so that it does not store counts but
|
||||
* start offsets. {@code endOffsets} will store the end offsets. */
|
||||
private static void sumHistogram(int[] histogram, int[] endOffsets) {
|
||||
int accum = 0;
|
||||
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
|
||||
final int count = histogram[i];
|
||||
histogram[i] = accum;
|
||||
accum += count;
|
||||
endOffsets[i] = accum;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reorder based on start/end offsets for each bucket. When this method
|
||||
* returns, startOffsets and endOffsets are equal.
|
||||
* @param startOffsets start offsets per bucket
|
||||
* @param endOffsets end offsets per bucket
|
||||
*/
|
||||
private void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) {
|
||||
// reorder in place, like the dutch flag problem
|
||||
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
|
||||
final int limit = endOffsets[i];
|
||||
for (int h1 = startOffsets[i]; h1 < limit; h1 = startOffsets[i]) {
|
||||
final int b = getBucket(from + h1, k);
|
||||
final int h2 = startOffsets[b]++;
|
||||
swap(from + h1, from + h2);
|
||||
}
|
||||
}
|
||||
scratch1 = new BytesRef(), scratch2 = new BytesRef();
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,6 @@ import java.util.List;
|
|||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
|
@ -33,10 +32,11 @@ import org.apache.lucene.store.IndexOutput;
|
|||
import org.apache.lucene.store.TrackingDirectoryWrapper;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefComparator;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntroSorter;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.MSBRadixSorter;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
@ -604,39 +604,26 @@ public class BKDWriter implements Closeable {
|
|||
|
||||
/** Sort the heap writer by the specified dim */
|
||||
private void sortHeapPointWriter(final HeapPointWriter writer, int dim) {
|
||||
final int pointCount = Math.toIntExact(this.pointCount);
|
||||
// Tie-break by docID:
|
||||
|
||||
assert pointCount < Integer.MAX_VALUE;
|
||||
//int[] swapCount = new int[1];
|
||||
//int[] cmpCount = new int[1];
|
||||
|
||||
// System.out.println("SORT length=" + length);
|
||||
|
||||
// All buffered points are still in heap; just do in-place sort:
|
||||
new IntroSorter() {
|
||||
private final byte[] pivotPackedValue = new byte[bytesPerDim];
|
||||
private int pivotDocID;
|
||||
// No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it
|
||||
// can't matter at search time since we don't write ords into the index:
|
||||
new MSBRadixSorter(bytesPerDim + Integer.BYTES) {
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
pivotDocID = writer.docIDs[i];
|
||||
int block = i / writer.valuesPerBlock;
|
||||
int index = i % writer.valuesPerBlock;
|
||||
System.arraycopy(writer.blocks.get(block), index*packedBytesLength+dim*bytesPerDim, pivotPackedValue, 0, bytesPerDim);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
//cmpCount[0]++;
|
||||
int block = j / writer.valuesPerBlock;
|
||||
int index = j % writer.valuesPerBlock;
|
||||
assert index >= 0: "index=" + index + " j=" + j;
|
||||
int cmp = StringHelper.compare(bytesPerDim, pivotPackedValue, 0, writer.blocks.get(block), bytesPerDim*(index*numDims+dim));
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
protected int byteAt(int i, int k) {
|
||||
assert k >= 0;
|
||||
if (k < bytesPerDim) {
|
||||
// dim bytes
|
||||
int block = i / writer.valuesPerBlock;
|
||||
int index = i % writer.valuesPerBlock;
|
||||
return writer.blocks.get(block)[index * packedBytesLength + dim * bytesPerDim + k] & 0xff;
|
||||
} else {
|
||||
// doc id
|
||||
int s = 3 - (k - bytesPerDim);
|
||||
return (writer.docIDs[i] >>> (s * 8)) & 0xff;
|
||||
}
|
||||
|
||||
// Tie-break
|
||||
return Integer.compare(pivotDocID, writer.docIDs[j]);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -670,26 +657,7 @@ public class BKDWriter implements Closeable {
|
|||
System.arraycopy(scratch1, 0, blockJ, indexJ, packedBytesLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
//cmpCount[0]++;
|
||||
int blockI = i / writer.valuesPerBlock;
|
||||
int dimI = i % writer.valuesPerBlock;
|
||||
int blockJ = j / writer.valuesPerBlock;
|
||||
int dimJ = j % writer.valuesPerBlock;
|
||||
int cmp = StringHelper.compare(bytesPerDim, writer.blocks.get(blockI), bytesPerDim*(dimI*numDims+dim), writer.blocks.get(blockJ), bytesPerDim*(dimJ*numDims+dim));
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
// Tie-break by docID:
|
||||
|
||||
// No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it
|
||||
// can't matter at search time since we don't write ords into the index:
|
||||
return Integer.compare(writer.docIDs[i], writer.docIDs[j]);
|
||||
}
|
||||
}.sort(0, Math.toIntExact(pointCount));
|
||||
//System.out.println("LEN=" + length + " SWAP=" + swapCount[0] + " CMP=" + cmpCount[0]);
|
||||
}.sort(0, pointCount);
|
||||
}
|
||||
|
||||
private PointWriter sort(int dim) throws IOException {
|
||||
|
@ -724,28 +692,28 @@ public class BKDWriter implements Closeable {
|
|||
|
||||
final int offset = bytesPerDim * dim;
|
||||
|
||||
Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
|
||||
|
||||
final ByteArrayDataInput reader = new ByteArrayDataInput();
|
||||
|
||||
@Override
|
||||
public int compare(BytesRef a, BytesRef b) {
|
||||
// First compare by the requested dimension we are sorting by:
|
||||
int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + offset, b.bytes, b.offset + offset);
|
||||
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
Comparator<BytesRef> cmp;
|
||||
if (dim == numDims - 1) {
|
||||
// in that case the bytes for the dimension and for the doc id are contiguous,
|
||||
// so we don't need a branch
|
||||
cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) {
|
||||
@Override
|
||||
protected int byteAt(BytesRef ref, int i) {
|
||||
return ref.bytes[ref.offset + offset + i] & 0xff;
|
||||
}
|
||||
|
||||
// Tie-break by docID ... no need to tie break on ord, for the case where the same doc has
|
||||
// the same value in a given dimension indexed more than once: it can't matter at search
|
||||
// time since we don't write ords into the index:
|
||||
|
||||
return StringHelper.compare(Integer.BYTES,
|
||||
a.bytes, a.offset + packedBytesLength,
|
||||
b.bytes, b.offset + packedBytesLength);
|
||||
}
|
||||
};
|
||||
};
|
||||
} else {
|
||||
cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) {
|
||||
@Override
|
||||
protected int byteAt(BytesRef ref, int i) {
|
||||
if (i < bytesPerDim) {
|
||||
return ref.bytes[ref.offset + offset + i] & 0xff;
|
||||
} else {
|
||||
return ref.bytes[ref.offset + packedBytesLength + i - bytesPerDim] & 0xff;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, offlineSorterBufferMB, offlineSorterMaxTempFiles, bytesPerDoc) {
|
||||
|
||||
|
@ -1272,4 +1240,5 @@ public class BKDWriter implements Closeable {
|
|||
return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, desc, count, singleValuePerDoc);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class TestMSBRadixSorter extends LuceneTestCase {
|
||||
|
||||
private void test(BytesRef[] refs, int len) {
|
||||
BytesRef[] expected = Arrays.copyOf(refs, len);
|
||||
Arrays.sort(expected);
|
||||
|
||||
int maxLength = 0;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
BytesRef ref = refs[i];
|
||||
maxLength = Math.max(maxLength, ref.length);
|
||||
}
|
||||
switch (random().nextInt(3)) {
|
||||
case 0:
|
||||
maxLength += TestUtil.nextInt(random(), 1, 5);
|
||||
break;
|
||||
case 1:
|
||||
maxLength = Integer.MAX_VALUE;
|
||||
break;
|
||||
default:
|
||||
// leave unchanged
|
||||
break;
|
||||
}
|
||||
|
||||
new MSBRadixSorter(maxLength) {
|
||||
|
||||
protected int byteAt(int i, int k) {
|
||||
BytesRef ref = refs[i];
|
||||
if (ref.length <= k) {
|
||||
return -1;
|
||||
}
|
||||
return ref.bytes[ref.offset + k] & 0xff;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
BytesRef tmp = refs[i];
|
||||
refs[i] = refs[j];
|
||||
refs[j] = tmp;
|
||||
}
|
||||
}.sort(0, len);
|
||||
BytesRef[] actual = Arrays.copyOf(refs, len);
|
||||
assertArrayEquals(expected, actual);
|
||||
}
|
||||
|
||||
public void testEmpty() {
|
||||
test(new BytesRef[random().nextInt(5)], 0);
|
||||
}
|
||||
|
||||
public void testOneValue() {
|
||||
BytesRef bytes = new BytesRef(TestUtil.randomSimpleString(random()));
|
||||
test(new BytesRef[] { bytes }, 1);
|
||||
}
|
||||
|
||||
public void testTwoValues() {
|
||||
BytesRef bytes1 = new BytesRef(TestUtil.randomSimpleString(random()));
|
||||
BytesRef bytes2 = new BytesRef(TestUtil.randomSimpleString(random()));
|
||||
test(new BytesRef[] { bytes1, bytes2 }, 2);
|
||||
}
|
||||
|
||||
private void testRandom(int commonPrefixLen, int maxLen) {
|
||||
byte[] commonPrefix = new byte[commonPrefixLen];
|
||||
random().nextBytes(commonPrefix);
|
||||
final int len = random().nextInt(100000);
|
||||
BytesRef[] bytes = new BytesRef[len + random().nextInt(50)];
|
||||
for (int i = 0; i < len; ++i) {
|
||||
byte[] b = new byte[commonPrefixLen + random().nextInt(maxLen)];
|
||||
random().nextBytes(b);
|
||||
System.arraycopy(commonPrefix, 0, b, 0, commonPrefixLen);
|
||||
bytes[i] = new BytesRef(b);
|
||||
}
|
||||
test(bytes, len);
|
||||
}
|
||||
|
||||
public void testRandom() {
|
||||
for (int iter = 0; iter < 10; ++iter) {
|
||||
testRandom(0, 10);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomWithLotsOfDuplicates() {
|
||||
for (int iter = 0; iter < 10; ++iter) {
|
||||
testRandom(0, 2);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomWithSharedPrefix() {
|
||||
for (int iter = 0; iter < 10; ++iter) {
|
||||
testRandom(TestUtil.nextInt(random(), 1, 30), 10);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomWithSharedPrefixAndLotsOfDuplicates() {
|
||||
for (int iter = 0; iter < 10; ++iter) {
|
||||
testRandom(TestUtil.nextInt(random(), 1, 30), 2);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexWriterConfig;
|
|||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.HardlinkCopyDirectoryWrapper;
|
||||
import org.apache.lucene.util.SuppressForbidden;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -45,7 +46,8 @@ public class IndexMergeTool {
|
|||
|
||||
Directory[] indexes = new Directory[args.length - 1];
|
||||
for (int i = 1; i < args.length; i++) {
|
||||
indexes[i - 1] = FSDirectory.open(Paths.get(args[i]));
|
||||
// try to use hardlinks if possible
|
||||
indexes[i - 1] = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(args[i])));
|
||||
}
|
||||
|
||||
System.out.println("Merging...");
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
package org.apache.lucene.store;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.FileAlreadyExistsException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.NoSuchFileException;
|
||||
import java.nio.file.Path;
|
||||
import java.security.AccessController;
|
||||
import java.security.PrivilegedAction;
|
||||
|
||||
/**
|
||||
* This directory wrapper overrides {@link Directory#copyFrom(Directory, String, String, IOContext)} in order
|
||||
* to optionally use a hard-link instead of a full byte by byte file copy if applicable. Hard-links are only used if the
|
||||
* underlying filesystem supports it and if the {@link java.nio.file.LinkPermission} "hard" is granted.
|
||||
*
|
||||
* <p><b>NOTE:</b> Using hard-links changes the copy semantics of
|
||||
* {@link Directory#copyFrom(Directory, String, String, IOContext)}. When hard-links are used changes to the source file
|
||||
* will be reflected in the target file and vice-versa. Within Lucene, files are write once and should not be modified
|
||||
* after they have been written. This directory should not be used in situations where files change after they have
|
||||
* been written.
|
||||
* </p>
|
||||
*/
|
||||
public final class HardlinkCopyDirectoryWrapper extends FilterDirectory {
|
||||
/**
|
||||
* Creates a new HardlinkCopyDirectoryWrapper delegating to the given directory
|
||||
*/
|
||||
public HardlinkCopyDirectoryWrapper(Directory in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyFrom(Directory from, String srcFile, String destFile, IOContext context) throws IOException {
|
||||
final Directory fromUnwrapped = FilterDirectory.unwrap(from);
|
||||
final Directory toUnwrapped = FilterDirectory.unwrap(this);
|
||||
// try to unwrap to FSDirectory - we might be able to just create hard-links of these files and save copying
|
||||
// the entire file.
|
||||
Exception suppressedException = null;
|
||||
boolean tryCopy = true;
|
||||
if (fromUnwrapped instanceof FSDirectory
|
||||
&& toUnwrapped instanceof FSDirectory) {
|
||||
final Path fromPath = ((FSDirectory) fromUnwrapped).getDirectory();
|
||||
final Path toPath = ((FSDirectory) toUnwrapped).getDirectory();
|
||||
|
||||
if (Files.isReadable(fromPath.resolve(srcFile)) && Files.isWritable(toPath)) {
|
||||
// only try hardlinks if we have permission to access the files
|
||||
// if not super.copyFrom() will give us the right exceptions
|
||||
suppressedException = AccessController.doPrivileged((PrivilegedAction<Exception>) () -> {
|
||||
try {
|
||||
Files.createLink(toPath.resolve(destFile), fromPath.resolve(srcFile));
|
||||
} catch (FileNotFoundException | NoSuchFileException | FileAlreadyExistsException ex) {
|
||||
return ex; // in these cases we bubble up since it's a true error condition.
|
||||
} catch (IOException
|
||||
| UnsupportedOperationException // if the FS doesn't support hard-links
|
||||
| SecurityException ex // we don't have permission to use hard-links just fall back to byte copy
|
||||
) {
|
||||
// hard-links are not supported or the files are on different filesystems
|
||||
// we could go deeper and check if their filesstores are the same and opt
|
||||
// out earlier but for now we just fall back to normal file-copy
|
||||
return ex;
|
||||
}
|
||||
return null;
|
||||
});
|
||||
tryCopy = suppressedException != null;
|
||||
}
|
||||
}
|
||||
if (tryCopy) {
|
||||
try {
|
||||
super.copyFrom(from, srcFile, destFile, context);
|
||||
} catch (Exception ex) {
|
||||
if (suppressedException != null) {
|
||||
ex.addSuppressed(suppressedException);
|
||||
}
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
package org.apache.lucene.store;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
public class TestHardLinkCopyDirectoryWrapper extends BaseDirectoryTestCase {
|
||||
|
||||
@Override
|
||||
protected Directory getDirectory(Path file) throws IOException {
|
||||
Directory open = random().nextBoolean() ? newFSDirectory(file) : newDirectory();
|
||||
return new HardlinkCopyDirectoryWrapper(open);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that we use hardlinks if possible on Directory#copyFrom
|
||||
*/
|
||||
public void testCopyHardLinks() throws IOException {
|
||||
Path tempDir = createTempDir();
|
||||
Path dir_1 = tempDir.resolve("dir_1");
|
||||
Path dir_2 = tempDir.resolve("dir_2");
|
||||
Files.createDirectories(dir_1);
|
||||
Files.createDirectories(dir_2);
|
||||
|
||||
Directory luceneDir_1 = newFSDirectory(dir_1);
|
||||
Directory luceneDir_2 = newFSDirectory(dir_2);
|
||||
try {
|
||||
try (IndexOutput output = luceneDir_1.createOutput("foo.bar", IOContext.DEFAULT)) {
|
||||
CodecUtil.writeHeader(output, "foo", 0);
|
||||
output.writeString("hey man, nice shot!");
|
||||
CodecUtil.writeFooter(output);
|
||||
}
|
||||
// In case luceneDir_1 has an NRTCachingDirectory
|
||||
luceneDir_1.sync(Collections.singleton("foo.bar"));
|
||||
try {
|
||||
Files.createLink(tempDir.resolve("test"), dir_1.resolve("foo.bar"));
|
||||
BasicFileAttributes destAttr = Files.readAttributes(tempDir.resolve("test"), BasicFileAttributes.class);
|
||||
BasicFileAttributes sourceAttr = Files.readAttributes(dir_1.resolve("foo.bar"), BasicFileAttributes.class);
|
||||
assumeTrue("hardlinks are not supported", destAttr.fileKey() != null
|
||||
&& destAttr.fileKey().equals(sourceAttr.fileKey()));
|
||||
} catch (UnsupportedOperationException ex) {
|
||||
assumeFalse("hardlinks are not supported", true);
|
||||
}
|
||||
|
||||
HardlinkCopyDirectoryWrapper wrapper = new HardlinkCopyDirectoryWrapper(luceneDir_2);
|
||||
wrapper.copyFrom(luceneDir_1, "foo.bar", "bar.foo", IOContext.DEFAULT);
|
||||
assertTrue(Files.exists(dir_2.resolve("bar.foo")));
|
||||
BasicFileAttributes destAttr = Files.readAttributes(dir_2.resolve("bar.foo"), BasicFileAttributes.class);
|
||||
BasicFileAttributes sourceAttr = Files.readAttributes(dir_1.resolve("foo.bar"), BasicFileAttributes.class);
|
||||
assertEquals(destAttr.fileKey(), sourceAttr.fileKey());
|
||||
try (ChecksumIndexInput indexInput = wrapper.openChecksumInput("bar.foo", IOContext.DEFAULT)) {
|
||||
CodecUtil.checkHeader(indexInput, "foo", 0, 0);
|
||||
assertEquals("hey man, nice shot!", indexInput.readString());
|
||||
CodecUtil.checkFooter(indexInput);
|
||||
}
|
||||
} finally {
|
||||
// close them in a finally block we might run into an assume here
|
||||
IOUtils.close(luceneDir_1, luceneDir_2);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -105,7 +105,7 @@ public class TestHalfFloatPoint extends LuceneTestCase {
|
|||
assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(rounded), 0f);
|
||||
} else if (Float.isFinite(rounded) == false) {
|
||||
assertFalse(Float.isNaN(rounded));
|
||||
assertTrue(Math.abs(f) > 65520);
|
||||
assertTrue(Math.abs(f) >= 65520);
|
||||
} else {
|
||||
int index = Arrays.binarySearch(values, f);
|
||||
float closest;
|
||||
|
|
|
@ -32,16 +32,17 @@ import org.locationtech.spatial4j.shape.SpatialRelation;
|
|||
|
||||
public class DateRangePrefixTreeTest extends LuceneTestCase {
|
||||
|
||||
@ParametersFactory
|
||||
@ParametersFactory(argumentFormatting = "calendar=%s")
|
||||
public static Iterable<Object[]> parameters() {
|
||||
return Arrays.asList(new Object[][]{
|
||||
{DateRangePrefixTree.DEFAULT_CAL}, {DateRangePrefixTree.JAVA_UTIL_TIME_COMPAT_CAL}
|
||||
{"default", DateRangePrefixTree.DEFAULT_CAL},
|
||||
{"compat", DateRangePrefixTree.JAVA_UTIL_TIME_COMPAT_CAL}
|
||||
});
|
||||
}
|
||||
|
||||
private final DateRangePrefixTree tree;
|
||||
|
||||
public DateRangePrefixTreeTest(Calendar templateCal) {
|
||||
public DateRangePrefixTreeTest(String suiteName, Calendar templateCal) {
|
||||
tree = new DateRangePrefixTree(templateCal);
|
||||
}
|
||||
|
||||
|
|
|
@ -38,7 +38,9 @@ grant {
|
|||
permission java.io.FilePermission "${junit4.childvm.cwd}${/}jacoco.db", "write";
|
||||
permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,write,delete";
|
||||
permission java.io.FilePermission "${clover.db.dir}${/}-", "read,write,delete";
|
||||
|
||||
|
||||
// misc HardlinkCopyDirectoryWrapper needs this to test if hardlinks can be created
|
||||
permission java.nio.file.LinkPermission "hard";
|
||||
// needed by SSD detection tests in TestIOUtils (creates symlinks)
|
||||
permission java.nio.file.LinkPermission "symbolic";
|
||||
|
||||
|
|
|
@ -116,6 +116,9 @@ New Features
|
|||
* SOLR-8988: Adds query option facet.distrib.mco which when set to true allows the use of facet.mincount=1 in cloud mode.
|
||||
(Keith Laban, Dennis Gove)
|
||||
|
||||
* SOLR-8583: Apply highlighting to hl.alternateField by default for Default and FastVectorHighlighter.
|
||||
Turn off with hl.highlightAlternate=false (janhoy, David Smiley)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
@ -205,6 +208,12 @@ Bug Fixes
|
|||
|
||||
* SOLR-9151: Fix SolrCLI so that bin/solr -e cloud example can be run from any CWD (janhoy)
|
||||
|
||||
* SOLR-9141: Fix ClassCastException when using the /sql handler count() function with
|
||||
single-shard collections (Minoru Osuka via James Dyer)
|
||||
|
||||
* SOLR-9165: Spellcheck does not return collations if "maxCollationTries" is used with "cursorMark".
|
||||
(James Dyer)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation.
|
||||
|
@ -295,6 +304,14 @@ Other Changes
|
|||
|
||||
* SOLR-9119: several static methods in ValueSourceParser have been made private (hossman)
|
||||
|
||||
* SOLR-9110: Move JoinFromCollection- SubQueryTransformer- BlockJoinFacet- Distrib Tests to SolrCloudTestCase (Mikhail Khludnev)
|
||||
|
||||
* SOLR-9160: Sync 6x and 7.0 move of UninvertingReader, SlowCompositeReaderWrapper for Solr (LUCENE-7283)
|
||||
(yonik)
|
||||
|
||||
* SOLR-9136: Separate out the error statistics into server-side error vs client-side error
|
||||
(Jessica Cheng Mallet via Erick Erickson)
|
||||
|
||||
================== 6.0.1 ==================
|
||||
(No Changes)
|
||||
|
||||
|
|
|
@ -36,12 +36,12 @@ import org.apache.lucene.search.DocValuesRangeQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.RuleBasedCollator;
|
||||
|
|
|
@ -56,7 +56,8 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
|
|||
|
||||
// Statistics
|
||||
private final AtomicLong numRequests = new AtomicLong();
|
||||
private final AtomicLong numErrors = new AtomicLong();
|
||||
private final AtomicLong numServerErrors = new AtomicLong();
|
||||
private final AtomicLong numClientErrors = new AtomicLong();
|
||||
private final AtomicLong numTimeouts = new AtomicLong();
|
||||
private final Timer requestTimes = new Timer();
|
||||
|
||||
|
@ -164,23 +165,33 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
|
|||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
boolean incrementErrors = true;
|
||||
boolean isServerError = true;
|
||||
if (e instanceof SolrException) {
|
||||
SolrException se = (SolrException)e;
|
||||
if (se.code() == SolrException.ErrorCode.CONFLICT.code) {
|
||||
// TODO: should we allow this to be counted as an error (numErrors++)?
|
||||
|
||||
} else {
|
||||
SolrException.log(log, e);
|
||||
incrementErrors = false;
|
||||
} else if (se.code() >= 400 && se.code() < 500) {
|
||||
isServerError = false;
|
||||
}
|
||||
} else {
|
||||
SolrException.log(log, e);
|
||||
if (e instanceof SyntaxError) {
|
||||
isServerError = false;
|
||||
e = new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
|
||||
}
|
||||
}
|
||||
|
||||
rsp.setException(e);
|
||||
numErrors.incrementAndGet();
|
||||
|
||||
if (incrementErrors) {
|
||||
SolrException.log(log, e);
|
||||
|
||||
if (isServerError) {
|
||||
numServerErrors.incrementAndGet();
|
||||
} else {
|
||||
numClientErrors.incrementAndGet();
|
||||
}
|
||||
}
|
||||
}
|
||||
finally {
|
||||
timer.stop();
|
||||
|
@ -263,7 +274,9 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
|
|||
Snapshot snapshot = requestTimes.getSnapshot();
|
||||
lst.add("handlerStart",handlerStart);
|
||||
lst.add("requests", numRequests.longValue());
|
||||
lst.add("errors", numErrors.longValue());
|
||||
lst.add("errors", numServerErrors.longValue() + numClientErrors.longValue());
|
||||
lst.add("serverErrors", numServerErrors.longValue());
|
||||
lst.add("clientErrors", numClientErrors.longValue());
|
||||
lst.add("timeouts", numTimeouts.longValue());
|
||||
lst.add("totalTime", requestTimes.getSum());
|
||||
lst.add("avgRequestsPerSecond", requestTimes.getMeanRate());
|
||||
|
|
|
@ -26,16 +26,6 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.carrotsearch.hppc.IntHashSet;
|
||||
import com.carrotsearch.hppc.IntObjectHashMap;
|
||||
import com.carrotsearch.hppc.LongHashSet;
|
||||
import com.carrotsearch.hppc.LongObjectHashMap;
|
||||
import com.carrotsearch.hppc.LongObjectMap;
|
||||
import com.carrotsearch.hppc.cursors.IntObjectCursor;
|
||||
import com.carrotsearch.hppc.cursors.LongCursor;
|
||||
import com.carrotsearch.hppc.cursors.LongObjectCursor;
|
||||
import com.carrotsearch.hppc.cursors.ObjectCursor;
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -53,7 +43,6 @@ import org.apache.lucene.search.Collector;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.search.QueryWrapperFilter;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
|
@ -61,7 +50,6 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.search.TopDocsCollector;
|
||||
import org.apache.lucene.search.TopFieldCollector;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
@ -87,12 +75,24 @@ import org.apache.solr.search.DocIterator;
|
|||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.DocSlice;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.QueryWrapperFilter;
|
||||
import org.apache.solr.search.SolrConstantScoreQuery;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.SortSpecParsing;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.apache.solr.util.plugin.PluginInfoInitialized;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
|
||||
import com.carrotsearch.hppc.IntHashSet;
|
||||
import com.carrotsearch.hppc.IntObjectHashMap;
|
||||
import com.carrotsearch.hppc.LongHashSet;
|
||||
import com.carrotsearch.hppc.LongObjectHashMap;
|
||||
import com.carrotsearch.hppc.LongObjectMap;
|
||||
import com.carrotsearch.hppc.cursors.IntObjectCursor;
|
||||
import com.carrotsearch.hppc.cursors.LongCursor;
|
||||
import com.carrotsearch.hppc.cursors.LongObjectCursor;
|
||||
import com.carrotsearch.hppc.cursors.ObjectCursor;
|
||||
|
||||
/**
|
||||
* The ExpandComponent is designed to work with the CollapsingPostFilter.
|
||||
* The CollapsingPostFilter collapses a result set on a field.
|
||||
|
|
|
@ -20,7 +20,6 @@ import java.io.IOException;
|
|||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
|
@ -61,6 +60,7 @@ import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
|
|||
import org.apache.lucene.util.AttributeSource.State;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.HighlightParams;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
|
@ -389,8 +389,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
preFetchFieldNames.add(keyField.getName());
|
||||
}
|
||||
|
||||
FastVectorHighlighter fvh = null; // lazy
|
||||
FieldQuery fvhFieldQuery = null; // lazy
|
||||
FvhContainer fvhContainer = new FvhContainer(); // Lazy container for fvh and fieldQuery
|
||||
|
||||
IndexReader reader = new TermVectorReusingLeafReader(req.getSearcher().getLeafReader()); // SOLR-5855
|
||||
|
||||
|
@ -408,30 +407,10 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
SchemaField schemaField = schema.getFieldOrNull(fieldName);
|
||||
|
||||
Object fieldHighlights; // object type allows flexibility for subclassers
|
||||
if (schemaField == null) {
|
||||
fieldHighlights = null;
|
||||
} else if (schemaField.getType() instanceof org.apache.solr.schema.TrieField) {
|
||||
// TODO: highlighting numeric fields is broken (Lucene) - so we disable them until fixed (see LUCENE-3080)!
|
||||
fieldHighlights = null;
|
||||
} else if (useFastVectorHighlighter(params, schemaField)) {
|
||||
if (fvhFieldQuery == null) {
|
||||
fvh = new FastVectorHighlighter(
|
||||
// FVH cannot process hl.usePhraseHighlighter parameter per-field basis
|
||||
params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
|
||||
// FVH cannot process hl.requireFieldMatch parameter per-field basis
|
||||
params.getBool(HighlightParams.FIELD_MATCH, false));
|
||||
fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT));
|
||||
fvhFieldQuery = fvh.getFieldQuery(query, reader);
|
||||
}
|
||||
fieldHighlights =
|
||||
doHighlightingByFastVectorHighlighter(doc, docId, schemaField, fvh, fvhFieldQuery, reader, req);
|
||||
} else { // standard/default highlighter
|
||||
fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req);
|
||||
}
|
||||
fieldHighlights = doHighlightingOfField(doc, docId, schemaField, fvhContainer, query, reader, req, params);
|
||||
|
||||
if (fieldHighlights == null) {
|
||||
// no summaries made; copy text from alternate field
|
||||
fieldHighlights = alternateField(doc, fieldName, req);
|
||||
fieldHighlights = alternateField(doc, docId, fieldName, fvhContainer, query, reader, req);
|
||||
}
|
||||
|
||||
if (fieldHighlights != null) {
|
||||
|
@ -443,6 +422,34 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
return fragments;
|
||||
}
|
||||
|
||||
private Object doHighlightingOfField(Document doc, int docId, SchemaField schemaField,
|
||||
FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req,
|
||||
SolrParams params) throws IOException {
|
||||
Object fieldHighlights;
|
||||
if (schemaField == null) {
|
||||
fieldHighlights = null;
|
||||
} else if (schemaField.getType() instanceof org.apache.solr.schema.TrieField) {
|
||||
// TODO: highlighting numeric fields is broken (Lucene) - so we disable them until fixed (see LUCENE-3080)!
|
||||
fieldHighlights = null;
|
||||
} else if (useFastVectorHighlighter(params, schemaField)) {
|
||||
if (fvhContainer.fieldQuery == null) {
|
||||
FastVectorHighlighter fvh = new FastVectorHighlighter(
|
||||
// FVH cannot process hl.usePhraseHighlighter parameter per-field basis
|
||||
params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
|
||||
// FVH cannot process hl.requireFieldMatch parameter per-field basis
|
||||
params.getBool(HighlightParams.FIELD_MATCH, false));
|
||||
fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT));
|
||||
fvhContainer.fvh = fvh;
|
||||
fvhContainer.fieldQuery = fvh.getFieldQuery(query, reader);
|
||||
}
|
||||
fieldHighlights =
|
||||
doHighlightingByFastVectorHighlighter(doc, docId, schemaField, fvhContainer, reader, req);
|
||||
} else { // standard/default highlighter
|
||||
fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req);
|
||||
}
|
||||
return fieldHighlights;
|
||||
}
|
||||
|
||||
/** Returns the field names to be passed to {@link SolrIndexSearcher#doc(int, Set)}.
|
||||
* Subclasses might over-ride to include fields in search-results and other stored field values needed so as to avoid
|
||||
* the possibility of extra trips to disk. The uniqueKey will be added after if the result isn't null. */
|
||||
|
@ -469,14 +476,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
/** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */
|
||||
@SuppressWarnings("unchecked")
|
||||
protected Object doHighlightingByFastVectorHighlighter(Document doc, int docId,
|
||||
SchemaField schemaField, FastVectorHighlighter highlighter,
|
||||
FieldQuery fieldQuery,
|
||||
SchemaField schemaField, FvhContainer fvhContainer,
|
||||
IndexReader reader, SolrQueryRequest req) throws IOException {
|
||||
SolrParams params = req.getParams();
|
||||
String fieldName = schemaField.getName();
|
||||
SolrFragmentsBuilder solrFb = getSolrFragmentsBuilder(fieldName, params);
|
||||
|
||||
String[] snippets = highlighter.getBestFragments( fieldQuery, reader, docId, fieldName,
|
||||
String[] snippets = fvhContainer.fvh.getBestFragments( fvhContainer.fieldQuery, reader, docId, fieldName,
|
||||
params.getFieldInt( fieldName, HighlightParams.FRAGSIZE, 100 ),
|
||||
params.getFieldInt( fieldName, HighlightParams.SNIPPETS, 1 ),
|
||||
getFragListBuilder( fieldName, params ),
|
||||
|
@ -497,12 +503,12 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
final String fieldName = schemaField.getName();
|
||||
|
||||
final int mvToExamine =
|
||||
req.getParams().getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_EXAMINE,
|
||||
params.getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_EXAMINE,
|
||||
(schemaField.multiValued()) ? Integer.MAX_VALUE : 1);
|
||||
|
||||
// Technically this is the max *fragments* (snippets), not max values:
|
||||
int mvToMatch =
|
||||
req.getParams().getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.MAX_VALUE);
|
||||
params.getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.MAX_VALUE);
|
||||
if (mvToExamine <= 0 || mvToMatch <= 0) {
|
||||
return null;
|
||||
}
|
||||
|
@ -557,7 +563,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
}
|
||||
|
||||
Highlighter highlighter;
|
||||
if (req.getParams().getFieldBool(fieldName, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) {
|
||||
if (params.getFieldBool(fieldName, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) {
|
||||
// We're going to call getPhraseHighlighter and it might consume the tokenStream. If it does, the tokenStream
|
||||
// needs to implement reset() efficiently.
|
||||
|
||||
|
@ -662,12 +668,38 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
|
||||
/** Returns the alternate highlight object for this field -- a String[] by default. Null if none. */
|
||||
@SuppressWarnings("unchecked")
|
||||
protected Object alternateField(Document doc, String fieldName, SolrQueryRequest req) {
|
||||
protected Object alternateField(Document doc, int docId, String fieldName, FvhContainer fvhContainer, Query query,
|
||||
IndexReader reader, SolrQueryRequest req) throws IOException {
|
||||
IndexSchema schema = req.getSearcher().getSchema();
|
||||
SolrParams params = req.getParams();
|
||||
String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD);
|
||||
int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0);
|
||||
if (alternateField == null || alternateField.length() == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (params.getFieldBool(fieldName, HighlightParams.HIGHLIGHT_ALTERNATE, true) && !alternateField.equals(fieldName)) {
|
||||
// Try to highlight alternate field
|
||||
Object fieldHighlights = null;
|
||||
SchemaField schemaField = schema.getFieldOrNull(alternateField);
|
||||
if (schemaField != null) {
|
||||
HashMap<String, String> invariants = new HashMap<>();
|
||||
invariants.put("f." + alternateField + "." + HighlightParams.SNIPPETS, "1");
|
||||
// Enforce maxAlternateFieldLength by FRAGSIZE. Minimum 18 due to FVH limitations
|
||||
invariants.put("f." + alternateField + "." + HighlightParams.FRAGSIZE,
|
||||
alternateFieldLen > 0 ? String.valueOf(Math.max(18, alternateFieldLen)) : String.valueOf(Integer.MAX_VALUE));
|
||||
SolrParams origParams = req.getParams();
|
||||
req.setParams(SolrParams.wrapDefaults(new MapSolrParams(invariants), origParams));
|
||||
fieldHighlights = doHighlightingOfField(doc, docId, schemaField, fvhContainer, query, reader, req, params);
|
||||
req.setParams(origParams);
|
||||
if (fieldHighlights != null) {
|
||||
return fieldHighlights;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Fallback to static non-highlighted
|
||||
IndexableField[] docFields = doc.getFields(alternateField);
|
||||
if (docFields.length == 0) {
|
||||
// The alternate field did not exist, treat the original field as fallback instead
|
||||
|
@ -685,7 +717,6 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
String[] altTexts = listFields.toArray(new String[listFields.size()]);
|
||||
|
||||
Encoder encoder = getEncoder(fieldName, params);
|
||||
int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0);
|
||||
List<String> altList = new ArrayList<>();
|
||||
int len = 0;
|
||||
for( String altText: altTexts ){
|
||||
|
@ -707,6 +738,12 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
final TokenStream tStream = schemaField.getType().getIndexAnalyzer().tokenStream(schemaField.getName(), docText);
|
||||
return new TokenOrderingFilter(tStream, 10);
|
||||
}
|
||||
|
||||
// Wraps FVH to allow pass-by-reference
|
||||
private class FvhContainer {
|
||||
private FastVectorHighlighter fvh;
|
||||
private FieldQuery fieldQuery;
|
||||
}
|
||||
}
|
||||
|
||||
/** Orders Tokens in a window first by their startOffset ascending.
|
||||
|
|
|
@ -0,0 +1,296 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.CompositeReader;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader.CoreClosedListener;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
|
||||
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
|
||||
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/**
|
||||
* This class forces a composite reader (eg a {@link
|
||||
* MultiReader} or {@link DirectoryReader}) to emulate a
|
||||
* {@link LeafReader}. This requires implementing the postings
|
||||
* APIs on-the-fly, using the static methods in {@link
|
||||
* MultiFields}, {@link MultiDocValues}, by stepping through
|
||||
* the sub-readers to merge fields/terms, appending docs, etc.
|
||||
*
|
||||
* <p><b>NOTE</b>: this class almost always results in a
|
||||
* performance hit. If this is important to your use case,
|
||||
* you'll get better performance by gathering the sub readers using
|
||||
* {@link IndexReader#getContext()} to get the
|
||||
* leaves and then operate per-LeafReader,
|
||||
* instead of using this class.
|
||||
*/
|
||||
|
||||
public final class SlowCompositeReaderWrapper extends LeafReader {
|
||||
|
||||
private final CompositeReader in;
|
||||
private final Fields fields;
|
||||
private final boolean merging;
|
||||
|
||||
/** This method is sugar for getting an {@link LeafReader} from
|
||||
* an {@link IndexReader} of any kind. If the reader is already atomic,
|
||||
* it is returned unchanged, otherwise wrapped by this class.
|
||||
*/
|
||||
public static LeafReader wrap(IndexReader reader) throws IOException {
|
||||
if (reader instanceof CompositeReader) {
|
||||
return new SlowCompositeReaderWrapper((CompositeReader) reader, false);
|
||||
} else {
|
||||
assert reader instanceof LeafReader;
|
||||
return (LeafReader) reader;
|
||||
}
|
||||
}
|
||||
|
||||
SlowCompositeReaderWrapper(CompositeReader reader, boolean merging) throws IOException {
|
||||
super();
|
||||
in = reader;
|
||||
if (getFieldInfos().hasPointValues()) {
|
||||
throw new IllegalArgumentException("cannot wrap points");
|
||||
}
|
||||
fields = MultiFields.getFields(in);
|
||||
in.registerParentReader(this);
|
||||
this.merging = merging;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SlowCompositeReaderWrapper(" + in + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addCoreClosedListener(CoreClosedListener listener) {
|
||||
addCoreClosedListenerAsReaderClosedListener(in, listener);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void removeCoreClosedListener(CoreClosedListener listener) {
|
||||
removeCoreClosedListenerAsReaderClosedListener(in, listener);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() {
|
||||
ensureOpen();
|
||||
return fields;
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNumericDocValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return MultiDocValues.getNumericValues(in, field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getDocsWithField(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return MultiDocValues.getDocsWithField(in, field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return MultiDocValues.getBinaryValues(in, field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return MultiDocValues.getSortedNumericValues(in, field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedDocValues getSortedDocValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
OrdinalMap map = null;
|
||||
synchronized (cachedOrdMaps) {
|
||||
map = cachedOrdMaps.get(field);
|
||||
if (map == null) {
|
||||
// uncached, or not a multi dv
|
||||
SortedDocValues dv = MultiDocValues.getSortedValues(in, field);
|
||||
if (dv instanceof MultiSortedDocValues) {
|
||||
map = ((MultiSortedDocValues)dv).mapping;
|
||||
if (map.owner == getCoreCacheKey() && merging == false) {
|
||||
cachedOrdMaps.put(field, map);
|
||||
}
|
||||
}
|
||||
return dv;
|
||||
}
|
||||
}
|
||||
int size = in.leaves().size();
|
||||
final SortedDocValues[] values = new SortedDocValues[size];
|
||||
final int[] starts = new int[size+1];
|
||||
for (int i = 0; i < size; i++) {
|
||||
LeafReaderContext context = in.leaves().get(i);
|
||||
final LeafReader reader = context.reader();
|
||||
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
|
||||
if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) {
|
||||
return null;
|
||||
}
|
||||
SortedDocValues v = reader.getSortedDocValues(field);
|
||||
if (v == null) {
|
||||
v = DocValues.emptySorted();
|
||||
}
|
||||
values[i] = v;
|
||||
starts[i] = context.docBase;
|
||||
}
|
||||
starts[size] = maxDoc();
|
||||
return new MultiSortedDocValues(values, starts, map);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
OrdinalMap map = null;
|
||||
synchronized (cachedOrdMaps) {
|
||||
map = cachedOrdMaps.get(field);
|
||||
if (map == null) {
|
||||
// uncached, or not a multi dv
|
||||
SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
|
||||
if (dv instanceof MultiSortedSetDocValues) {
|
||||
map = ((MultiSortedSetDocValues)dv).mapping;
|
||||
if (map.owner == getCoreCacheKey() && merging == false) {
|
||||
cachedOrdMaps.put(field, map);
|
||||
}
|
||||
}
|
||||
return dv;
|
||||
}
|
||||
}
|
||||
|
||||
assert map != null;
|
||||
int size = in.leaves().size();
|
||||
final SortedSetDocValues[] values = new SortedSetDocValues[size];
|
||||
final int[] starts = new int[size+1];
|
||||
for (int i = 0; i < size; i++) {
|
||||
LeafReaderContext context = in.leaves().get(i);
|
||||
final LeafReader reader = context.reader();
|
||||
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
|
||||
if(fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET){
|
||||
return null;
|
||||
}
|
||||
SortedSetDocValues v = reader.getSortedSetDocValues(field);
|
||||
if (v == null) {
|
||||
v = DocValues.emptySortedSet();
|
||||
}
|
||||
values[i] = v;
|
||||
starts[i] = context.docBase;
|
||||
}
|
||||
starts[size] = maxDoc();
|
||||
return new MultiSortedSetDocValues(values, starts, map);
|
||||
}
|
||||
|
||||
// TODO: this could really be a weak map somewhere else on the coreCacheKey,
|
||||
// but do we really need to optimize slow-wrapper any more?
|
||||
private final Map<String,OrdinalMap> cachedOrdMaps = new HashMap<>();
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNormValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return MultiDocValues.getNormValues(in, field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields getTermVectors(int docID) throws IOException {
|
||||
ensureOpen();
|
||||
return in.getTermVectors(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numDocs() {
|
||||
// Don't call ensureOpen() here (it could affect performance)
|
||||
return in.numDocs();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxDoc() {
|
||||
// Don't call ensureOpen() here (it could affect performance)
|
||||
return in.maxDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
|
||||
ensureOpen();
|
||||
in.document(docID, visitor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getLiveDocs() {
|
||||
ensureOpen();
|
||||
return MultiFields.getLiveDocs(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PointValues getPointValues() {
|
||||
ensureOpen();
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
ensureOpen();
|
||||
return MultiFields.getMergedFieldInfos(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getCoreCacheKey() {
|
||||
return in.getCoreCacheKey();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getCombinedCoreAndDeletesKey() {
|
||||
return in.getCombinedCoreAndDeletesKey();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doClose() throws IOException {
|
||||
// TODO: as this is a wrapper, should we really close the delegate?
|
||||
in.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
ensureOpen();
|
||||
for (LeafReaderContext ctx : in.leaves()) {
|
||||
ctx.reader().checkIntegrity();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.MergePolicy;
|
||||
import org.apache.lucene.index.MergePolicyWrapper;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.MergeTrigger;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.SegmentCommitInfo;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
|
||||
// TODO: remove this and add indexSort specification directly to solrconfig.xml? But for BWC, also accept SortingMergePolicy specifiction?
|
||||
|
||||
public final class SortingMergePolicy extends MergePolicyWrapper {
|
||||
|
||||
private final Sort sort;
|
||||
|
||||
/** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */
|
||||
public SortingMergePolicy(MergePolicy in, Sort sort) {
|
||||
super(in);
|
||||
this.sort = sort;
|
||||
}
|
||||
|
||||
/** Return the {@link Sort} order that is used to sort segments when merging. */
|
||||
public Sort getSort() {
|
||||
return sort;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SortingMergePolicy(" + in + ", sort=" + sort + ")";
|
||||
}
|
||||
}
|
|
@ -28,6 +28,7 @@ import java.util.Set;
|
|||
import java.util.TreeSet;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
|
@ -41,22 +42,16 @@ import org.apache.lucene.spatial.SpatialStrategy;
|
|||
import org.apache.lucene.spatial.query.SpatialArgs;
|
||||
import org.apache.lucene.spatial.query.SpatialArgsParser;
|
||||
import org.apache.lucene.spatial.query.SpatialOperation;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.SpatialOptions;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.util.DistanceUnits;
|
||||
import org.apache.solr.util.MapListener;
|
||||
import org.apache.solr.util.SpatialUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.context.SpatialContextFactory;
|
||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||
|
@ -66,6 +61,12 @@ import org.locationtech.spatial4j.io.SupportedFormats;
|
|||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Rectangle;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
|
||||
/**
|
||||
* Abstract base class for Solr FieldTypes based on a Lucene 4 {@link SpatialStrategy}.
|
||||
|
|
|
@ -23,10 +23,10 @@ import java.nio.ByteBuffer;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.util.Base64;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
|
|
@ -22,15 +22,14 @@ import java.util.Map;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
|
@ -40,6 +39,7 @@ import org.apache.solr.analysis.SolrAnalyzer;
|
|||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.function.OrdFieldSource;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
|
|
@ -40,12 +40,12 @@ import org.apache.lucene.search.DocValuesRangeQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
/**
|
||||
* Field for collated sort keys.
|
||||
|
|
|
@ -44,7 +44,7 @@ import org.apache.lucene.search.BooleanQuery;
|
|||
import org.apache.lucene.search.FieldValueQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
|
|
|
@ -16,12 +16,6 @@
|
|||
*/
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
|
@ -31,6 +25,12 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
|
@ -45,7 +45,6 @@ import org.apache.lucene.search.DocValuesRangeQuery;
|
|||
import org.apache.lucene.search.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
|
@ -55,6 +54,7 @@ import org.apache.solr.common.EnumFieldValue;
|
|||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.w3c.dom.Document;
|
||||
|
|
|
@ -16,17 +16,17 @@
|
|||
*/
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.function.FileFloatSource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
/** Get values from an external file instead of the index.
|
||||
*
|
||||
|
|
|
@ -49,7 +49,6 @@ import org.apache.lucene.search.SortedNumericSelector;
|
|||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
|
@ -66,6 +65,7 @@ import org.apache.solr.query.SolrRangeQuery;
|
|||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.Sorting;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
|
|
@ -18,23 +18,23 @@ package org.apache.solr.schema;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||
import org.locationtech.spatial4j.io.GeohashUtils;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.LiteralValueSource;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.SolrConstantScoreQuery;
|
||||
import org.apache.solr.search.SpatialOptions;
|
||||
import org.apache.solr.search.function.ValueSourceRangeFilter;
|
||||
import org.apache.solr.search.function.distance.GeohashHaversineFunction;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.util.SpatialUtils;
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||
import org.locationtech.spatial4j.io.GeohashUtils;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
|
||||
/**
|
||||
* This is a class that represents a <a
|
||||
|
|
|
@ -51,7 +51,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
|
|
|
@ -22,8 +22,8 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.VectorValueSource;
|
||||
|
@ -37,7 +37,6 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.DelegatingCollector;
|
||||
|
@ -45,8 +44,8 @@ import org.apache.solr.search.ExtendedQueryBase;
|
|||
import org.apache.solr.search.PostFilter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.SpatialOptions;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.util.SpatialUtils;
|
||||
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
|
@ -30,13 +29,14 @@ import org.apache.lucene.search.BooleanClause;
|
|||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.SpatialOptions;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||
|
||||
/**
|
||||
* A point type that indexes a point in an n-dimensional space as separate fields and supports range queries.
|
||||
|
|
|
@ -33,14 +33,14 @@ import org.apache.lucene.index.IndexableField;
|
|||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.AttributeSource.State;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.solr.analysis.SolrAnalyzer;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.Sorting;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
|
|
@ -20,16 +20,16 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
/**
|
||||
* Utility Field used for random sorting. It should not be passed a value.
|
||||
|
|
|
@ -27,10 +27,10 @@ import org.apache.lucene.document.SortedSetDocValuesField;
|
|||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
public class StrField extends PrimitiveFieldType {
|
||||
|
||||
|
|
|
@ -16,14 +16,16 @@
|
|||
*/
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.QueryBuilder;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -31,9 +33,7 @@ import org.apache.solr.query.SolrRangeQuery;
|
|||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.Sorting;
|
||||
|
||||
import java.util.Map;
|
||||
import java.io.IOException;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
/** <code>TextField</code> is the basic type for configurable text analysis.
|
||||
* Analyzers for field types using this implementation should be defined in the schema.
|
||||
|
|
|
@ -26,8 +26,8 @@ import java.util.List;
|
|||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.FieldType.LegacyNumericType;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.LegacyDoubleField;
|
||||
import org.apache.lucene.document.LegacyFloatField;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
|
@ -47,7 +47,6 @@ import org.apache.lucene.search.LegacyNumericRangeQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
|
@ -61,6 +60,7 @@ import org.apache.solr.response.TextResponseWriter;
|
|||
import org.apache.solr.search.FunctionRangeQuery;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.function.ValueSourceRangeFilter;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.util.DateMathParser;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
|
|
@ -25,15 +25,7 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.carrotsearch.hppc.FloatArrayList;
|
||||
import com.carrotsearch.hppc.IntArrayList;
|
||||
import com.carrotsearch.hppc.IntIntHashMap;
|
||||
import com.carrotsearch.hppc.IntLongHashMap;
|
||||
import com.carrotsearch.hppc.cursors.IntIntCursor;
|
||||
import com.carrotsearch.hppc.cursors.IntLongCursor;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -49,13 +41,12 @@ import org.apache.lucene.queries.function.FunctionValues;
|
|||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -65,8 +56,8 @@ import org.apache.solr.common.SolrException;
|
|||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.handler.component.ResponseBuilder;
|
||||
import org.apache.solr.handler.component.QueryElevationComponent;
|
||||
import org.apache.solr.handler.component.ResponseBuilder;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
|
@ -75,6 +66,14 @@ import org.apache.solr.schema.StrField;
|
|||
import org.apache.solr.schema.TrieFloatField;
|
||||
import org.apache.solr.schema.TrieIntField;
|
||||
import org.apache.solr.schema.TrieLongField;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
|
||||
import com.carrotsearch.hppc.FloatArrayList;
|
||||
import com.carrotsearch.hppc.IntArrayList;
|
||||
import com.carrotsearch.hppc.IntIntHashMap;
|
||||
import com.carrotsearch.hppc.IntLongHashMap;
|
||||
import com.carrotsearch.hppc.cursors.IntIntCursor;
|
||||
import com.carrotsearch.hppc.cursors.IntLongCursor;
|
||||
|
||||
/**
|
||||
|
||||
|
|
|
@ -20,16 +20,16 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
|
||||
/**
|
||||
* Lucene 5.0 removes "accidental" insanity, so you must explicitly
|
||||
|
|
|
@ -18,13 +18,12 @@ package org.apache.solr.search;
|
|||
|
||||
import java.net.URL;
|
||||
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
|
||||
import org.apache.solr.core.JmxMonitoredMap.JmxAugmentedSolrInfoMBean;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrInfoMBean;
|
||||
import org.apache.solr.core.JmxMonitoredMap.JmxAugmentedSolrInfoMBean;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
|
||||
/**
|
||||
* A SolrInfoMBean that provides introspection of the Solr FieldCache
|
||||
|
|
|
@ -53,7 +53,6 @@ import org.apache.lucene.index.LeafReaderContext;
|
|||
import org.apache.lucene.index.MultiPostingsEnum;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
|
@ -94,7 +93,6 @@ import org.apache.lucene.search.TopScoreDocCollector;
|
|||
import org.apache.lucene.search.TotalHitCountCollector;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
@ -109,6 +107,7 @@ import org.apache.solr.core.DirectoryFactory.DirContext;
|
|||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrInfoMBean;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
|
@ -122,6 +121,7 @@ import org.apache.solr.schema.TrieFloatField;
|
|||
import org.apache.solr.schema.TrieIntField;
|
||||
import org.apache.solr.search.facet.UnInvertedField;
|
||||
import org.apache.solr.search.stats.StatsSource;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.apache.solr.update.IndexFingerprint;
|
||||
import org.apache.solr.update.SolrIndexConfig;
|
||||
import org.slf4j.Logger;
|
||||
|
|
|
@ -27,17 +27,16 @@ import java.util.concurrent.atomic.AtomicLong;
|
|||
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.uninverting.DocTermOrds;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.TrieField;
|
||||
import org.apache.solr.search.BitDocSet;
|
||||
|
@ -45,6 +44,7 @@ import org.apache.solr.search.DocIterator;
|
|||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.SolrCache;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.uninverting.DocTermOrds;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
|
|
@ -20,13 +20,12 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
|
@ -34,6 +33,7 @@ import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
|||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.lucene.util.mutable.MutableValue;
|
||||
import org.apache.lucene.util.mutable.MutableValueInt;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.Insanity;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
|
|
@ -20,18 +20,18 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
||||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.Insanity;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.join.JoinUtil;
|
||||
import org.apache.lucene.search.join.ScoreMode;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.Aliases;
|
||||
|
@ -45,6 +44,7 @@ import org.apache.solr.search.QParser;
|
|||
import org.apache.solr.search.QParserPlugin;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
|
||||
/**
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.List;
|
|||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.CursorMarkParams;
|
||||
import org.apache.solr.common.params.DisMaxParams;
|
||||
import org.apache.solr.common.params.GroupParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
|
@ -118,6 +119,8 @@ public class SpellCheckCollator {
|
|||
params.set(CommonParams.FL, "id");
|
||||
// we'll sort by doc id to ensure no scoring is done.
|
||||
params.set(CommonParams.SORT, "_docid_ asc");
|
||||
// CursorMark does not like _docid_ sorting, and we don't need it.
|
||||
params.remove(CursorMarkParams.CURSOR_MARK_PARAM);
|
||||
// If a dismax query, don't add unnecessary clauses for scoring
|
||||
params.remove(DisMaxParams.TIE);
|
||||
params.remove(DisMaxParams.PF);
|
||||
|
|
|
@ -0,0 +1,887 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.lucene.codecs.PostingsFormat; // javadocs
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/**
|
||||
* This class enables fast access to multiple term ords for
|
||||
* a specified field across all docIDs.
|
||||
*
|
||||
* Like FieldCache, it uninverts the index and holds a
|
||||
* packed data structure in RAM to enable fast access.
|
||||
* Unlike FieldCache, it can handle multi-valued fields,
|
||||
* and, it does not hold the term bytes in RAM. Rather, you
|
||||
* must obtain a TermsEnum from the {@link #getOrdTermsEnum}
|
||||
* method, and then seek-by-ord to get the term's bytes.
|
||||
*
|
||||
* While normally term ords are type long, in this API they are
|
||||
* int as the internal representation here cannot address
|
||||
* more than MAX_INT unique terms. Also, typically this
|
||||
* class is used on fields with relatively few unique terms
|
||||
* vs the number of documents. In addition, there is an
|
||||
* internal limit (16 MB) on how many bytes each chunk of
|
||||
* documents may consume. If you trip this limit you'll hit
|
||||
* an IllegalStateException.
|
||||
*
|
||||
* Deleted documents are skipped during uninversion, and if
|
||||
* you look them up you'll get 0 ords.
|
||||
*
|
||||
* The returned per-document ords do not retain their
|
||||
* original order in the document. Instead they are returned
|
||||
* in sorted (by ord, ie term's BytesRef comparator) order. They
|
||||
* are also de-dup'd (ie if doc has same term more than once
|
||||
* in this field, you'll only get that ord back once).
|
||||
*
|
||||
* This class
|
||||
* will create its own term index internally, allowing to
|
||||
* create a wrapped TermsEnum that can handle ord. The
|
||||
* {@link #getOrdTermsEnum} method then provides this
|
||||
* wrapped enum.
|
||||
*
|
||||
* The RAM consumption of this class can be high!
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
/*
|
||||
* Final form of the un-inverted field:
|
||||
* Each document points to a list of term numbers that are contained in that document.
|
||||
*
|
||||
* Term numbers are in sorted order, and are encoded as variable-length deltas from the
|
||||
* previous term number. Real term numbers start at 2 since 0 and 1 are reserved. A
|
||||
* term number of 0 signals the end of the termNumber list.
|
||||
*
|
||||
* There is a single int[maxDoc()] which either contains a pointer into a byte[] for
|
||||
* the termNumber lists, or directly contains the termNumber list if it fits in the 4
|
||||
* bytes of an integer. If the first byte in the integer is 1, the next 3 bytes
|
||||
* are a pointer into a byte[] where the termNumber list starts.
|
||||
*
|
||||
* There are actually 256 byte arrays, to compensate for the fact that the pointers
|
||||
* into the byte arrays are only 3 bytes long. The correct byte array for a document
|
||||
* is a function of its id.
|
||||
*
|
||||
* To save space and speed up faceting, any term that matches enough documents will
|
||||
* not be un-inverted... it will be skipped while building the un-inverted field structure,
|
||||
* and will use a set intersection method during faceting.
|
||||
*
|
||||
* To further save memory, the terms (the actual string values) are not all stored in
|
||||
* memory, but a TermIndex is used to convert term numbers to term values only
|
||||
* for the terms needed after faceting has completed. Only every 128th term value
|
||||
* is stored, along with its corresponding term number, and this is used as an
|
||||
* index to find the closest term and iterate until the desired number is hit (very
|
||||
* much like Lucene's own internal term index).
|
||||
*
|
||||
*/
|
||||
|
||||
public class DocTermOrds implements Accountable {
|
||||
|
||||
// Term ords are shifted by this, internally, to reserve
|
||||
// values 0 (end term) and 1 (index is a pointer into byte array)
|
||||
private final static int TNUM_OFFSET = 2;
|
||||
|
||||
/** Every 128th term is indexed, by default. */
|
||||
public final static int DEFAULT_INDEX_INTERVAL_BITS = 7; // decrease to a low number like 2 for testing
|
||||
|
||||
private int indexIntervalBits;
|
||||
private int indexIntervalMask;
|
||||
private int indexInterval;
|
||||
|
||||
/** Don't uninvert terms that exceed this count. */
|
||||
protected final int maxTermDocFreq;
|
||||
|
||||
/** Field we are uninverting. */
|
||||
protected final String field;
|
||||
|
||||
/** Number of terms in the field. */
|
||||
protected int numTermsInField;
|
||||
|
||||
/** Total number of references to term numbers. */
|
||||
protected long termInstances;
|
||||
private long memsz;
|
||||
|
||||
/** Total time to uninvert the field. */
|
||||
protected int total_time;
|
||||
|
||||
/** Time for phase1 of the uninvert process. */
|
||||
protected int phase1_time;
|
||||
|
||||
/** Holds the per-document ords or a pointer to the ords. */
|
||||
protected int[] index;
|
||||
|
||||
/** Holds term ords for documents. */
|
||||
protected byte[][] tnums = new byte[256][];
|
||||
|
||||
/** Total bytes (sum of term lengths) for all indexed terms.*/
|
||||
protected long sizeOfIndexedStrings;
|
||||
|
||||
/** Holds the indexed (by default every 128th) terms. */
|
||||
protected BytesRef[] indexedTermsArray = new BytesRef[0];
|
||||
|
||||
/** If non-null, only terms matching this prefix were
|
||||
* indexed. */
|
||||
protected BytesRef prefix;
|
||||
|
||||
/** Ordinal of the first term in the field, or 0 if the
|
||||
* {@link PostingsFormat} does not implement {@link
|
||||
* TermsEnum#ord}. */
|
||||
protected int ordBase;
|
||||
|
||||
/** Used while uninverting. */
|
||||
protected PostingsEnum postingsEnum;
|
||||
|
||||
/** If true, check and throw an exception if the field has docValues enabled.
|
||||
* Normally, docValues should be used in preference to DocTermOrds. */
|
||||
protected boolean checkForDocValues = true;
|
||||
|
||||
/** Returns total bytes used. */
|
||||
public long ramBytesUsed() {
|
||||
// can cache the mem size since it shouldn't change
|
||||
if (memsz!=0) return memsz;
|
||||
long sz = 8*8 + 32; // local fields
|
||||
if (index != null) sz += index.length * 4;
|
||||
if (tnums!=null) {
|
||||
for (byte[] arr : tnums)
|
||||
if (arr != null) sz += arr.length;
|
||||
}
|
||||
memsz = sz;
|
||||
return sz;
|
||||
}
|
||||
|
||||
/** Inverts all terms */
|
||||
public DocTermOrds(LeafReader reader, Bits liveDocs, String field) throws IOException {
|
||||
this(reader, liveDocs, field, null, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
// TODO: instead of all these ctors and options, take termsenum!
|
||||
|
||||
/** Inverts only terms starting w/ prefix */
|
||||
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix) throws IOException {
|
||||
this(reader, liveDocs, field, termPrefix, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
/** Inverts only terms starting w/ prefix, and only terms
|
||||
* whose docFreq (not taking deletions into account) is
|
||||
* <= maxTermDocFreq */
|
||||
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq) throws IOException {
|
||||
this(reader, liveDocs, field, termPrefix, maxTermDocFreq, DEFAULT_INDEX_INTERVAL_BITS);
|
||||
}
|
||||
|
||||
/** Inverts only terms starting w/ prefix, and only terms
|
||||
* whose docFreq (not taking deletions into account) is
|
||||
* <= maxTermDocFreq, with a custom indexing interval
|
||||
* (default is every 128nd term). */
|
||||
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq, int indexIntervalBits) throws IOException {
|
||||
this(field, maxTermDocFreq, indexIntervalBits);
|
||||
uninvert(reader, liveDocs, termPrefix);
|
||||
}
|
||||
|
||||
/** Subclass inits w/ this, but be sure you then call
|
||||
* uninvert, only once */
|
||||
protected DocTermOrds(String field, int maxTermDocFreq, int indexIntervalBits) {
|
||||
//System.out.println("DTO init field=" + field + " maxTDFreq=" + maxTermDocFreq);
|
||||
this.field = field;
|
||||
this.maxTermDocFreq = maxTermDocFreq;
|
||||
this.indexIntervalBits = indexIntervalBits;
|
||||
indexIntervalMask = 0xffffffff >>> (32-indexIntervalBits);
|
||||
indexInterval = 1 << indexIntervalBits;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a TermsEnum that implements ord, or null if no terms in field.
|
||||
* <p>
|
||||
* we build a "private" terms
|
||||
* index internally (WARNING: consumes RAM) and use that
|
||||
* index to implement ord. This also enables ord on top
|
||||
* of a composite reader. The returned TermsEnum is
|
||||
* unpositioned. This returns null if there are no terms.
|
||||
* </p>
|
||||
* <p><b>NOTE</b>: you must pass the same reader that was
|
||||
* used when creating this class
|
||||
*/
|
||||
public TermsEnum getOrdTermsEnum(LeafReader reader) throws IOException {
|
||||
// NOTE: see LUCENE-6529 before attempting to optimize this method to
|
||||
// return a TermsEnum directly from the reader if it already supports ord().
|
||||
|
||||
assert null != indexedTermsArray;
|
||||
|
||||
if (0 == indexedTermsArray.length) {
|
||||
return null;
|
||||
} else {
|
||||
return new OrdWrappedTermsEnum(reader);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of terms in this field
|
||||
*/
|
||||
public int numTerms() {
|
||||
return numTermsInField;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@code true} if no terms were indexed.
|
||||
*/
|
||||
public boolean isEmpty() {
|
||||
return index == null;
|
||||
}
|
||||
|
||||
/** Subclass can override this */
|
||||
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
|
||||
}
|
||||
|
||||
/** Invoked during {@link #uninvert(org.apache.lucene.index.LeafReader,Bits,BytesRef)}
|
||||
* to record the document frequency for each uninverted
|
||||
* term. */
|
||||
protected void setActualDocFreq(int termNum, int df) throws IOException {
|
||||
}
|
||||
|
||||
/** Call this only once (if you subclass!) */
|
||||
protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix) throws IOException {
|
||||
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
|
||||
if (checkForDocValues && info != null && info.getDocValuesType() != DocValuesType.NONE) {
|
||||
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
|
||||
}
|
||||
//System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
|
||||
final long startTime = System.nanoTime();
|
||||
prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix);
|
||||
|
||||
final int maxDoc = reader.maxDoc();
|
||||
final int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number
|
||||
final int[] lastTerm = new int[maxDoc]; // last term we saw for this document
|
||||
final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)
|
||||
|
||||
final Terms terms = reader.terms(field);
|
||||
if (terms == null) {
|
||||
// No terms
|
||||
return;
|
||||
}
|
||||
|
||||
final TermsEnum te = terms.iterator();
|
||||
final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef();
|
||||
//System.out.println("seekStart=" + seekStart.utf8ToString());
|
||||
if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) {
|
||||
// No terms match
|
||||
return;
|
||||
}
|
||||
|
||||
// For our "term index wrapper"
|
||||
final List<BytesRef> indexedTerms = new ArrayList<>();
|
||||
final PagedBytes indexedTermsBytes = new PagedBytes(15);
|
||||
|
||||
// we need a minimum of 9 bytes, but round up to 12 since the space would
|
||||
// be wasted with most allocators anyway.
|
||||
byte[] tempArr = new byte[12];
|
||||
|
||||
//
|
||||
// enumerate all terms, and build an intermediate form of the un-inverted field.
|
||||
//
|
||||
// During this intermediate form, every document has a (potential) byte[]
|
||||
// and the int[maxDoc()] array either contains the termNumber list directly
|
||||
// or the *end* offset of the termNumber list in its byte array (for faster
|
||||
// appending and faster creation of the final form).
|
||||
//
|
||||
// idea... if things are too large while building, we could do a range of docs
|
||||
// at a time (but it would be a fair amount slower to build)
|
||||
// could also do ranges in parallel to take advantage of multiple CPUs
|
||||
|
||||
// OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
|
||||
// values. This requires going over the field first to find the most
|
||||
// frequent terms ahead of time.
|
||||
|
||||
int termNum = 0;
|
||||
postingsEnum = null;
|
||||
|
||||
// Loop begins with te positioned to first term (we call
|
||||
// seek above):
|
||||
for (;;) {
|
||||
final BytesRef t = te.term();
|
||||
if (t == null || (termPrefix != null && !StringHelper.startsWith(t, termPrefix))) {
|
||||
break;
|
||||
}
|
||||
//System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);
|
||||
|
||||
visitTerm(te, termNum);
|
||||
|
||||
if ((termNum & indexIntervalMask) == 0) {
|
||||
// Index this term
|
||||
sizeOfIndexedStrings += t.length;
|
||||
BytesRef indexedTerm = new BytesRef();
|
||||
indexedTermsBytes.copy(t, indexedTerm);
|
||||
// TODO: really should 1) strip off useless suffix,
|
||||
// and 2) use FST not array/PagedBytes
|
||||
indexedTerms.add(indexedTerm);
|
||||
}
|
||||
|
||||
final int df = te.docFreq();
|
||||
if (df <= maxTermDocFreq) {
|
||||
|
||||
postingsEnum = te.postings(postingsEnum, PostingsEnum.NONE);
|
||||
|
||||
// dF, but takes deletions into account
|
||||
int actualDF = 0;
|
||||
|
||||
for (;;) {
|
||||
int doc = postingsEnum.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
//System.out.println(" chunk=" + chunk + " docs");
|
||||
|
||||
actualDF ++;
|
||||
termInstances++;
|
||||
|
||||
//System.out.println(" docID=" + doc);
|
||||
// add TNUM_OFFSET to the term number to make room for special reserved values:
|
||||
// 0 (end term) and 1 (index into byte array follows)
|
||||
int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
|
||||
lastTerm[doc] = termNum;
|
||||
int val = index[doc];
|
||||
|
||||
if ((val & 0xff)==1) {
|
||||
// index into byte array (actually the end of
|
||||
// the doc-specific byte[] when building)
|
||||
int pos = val >>> 8;
|
||||
int ilen = vIntSize(delta);
|
||||
byte[] arr = bytes[doc];
|
||||
int newend = pos+ilen;
|
||||
if (newend > arr.length) {
|
||||
// We avoid a doubling strategy to lower memory usage.
|
||||
// this faceting method isn't for docs with many terms.
|
||||
// In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
|
||||
// TODO: figure out what array lengths we can round up to w/o actually using more memory
|
||||
// (how much space does a byte[] take up? Is data preceded by a 32 bit length only?
|
||||
// It should be safe to round up to the nearest 32 bits in any case.
|
||||
int newLen = (newend + 3) & 0xfffffffc; // 4 byte alignment
|
||||
byte[] newarr = new byte[newLen];
|
||||
System.arraycopy(arr, 0, newarr, 0, pos);
|
||||
arr = newarr;
|
||||
bytes[doc] = newarr;
|
||||
}
|
||||
pos = writeInt(delta, arr, pos);
|
||||
index[doc] = (pos<<8) | 1; // update pointer to end index in byte[]
|
||||
} else {
|
||||
// OK, this int has data in it... find the end (a zero starting byte - not
|
||||
// part of another number, hence not following a byte with the high bit set).
|
||||
int ipos;
|
||||
if (val==0) {
|
||||
ipos=0;
|
||||
} else if ((val & 0x0000ff80)==0) {
|
||||
ipos=1;
|
||||
} else if ((val & 0x00ff8000)==0) {
|
||||
ipos=2;
|
||||
} else if ((val & 0xff800000)==0) {
|
||||
ipos=3;
|
||||
} else {
|
||||
ipos=4;
|
||||
}
|
||||
|
||||
//System.out.println(" ipos=" + ipos);
|
||||
|
||||
int endPos = writeInt(delta, tempArr, ipos);
|
||||
//System.out.println(" endpos=" + endPos);
|
||||
if (endPos <= 4) {
|
||||
//System.out.println(" fits!");
|
||||
// value will fit in the integer... move bytes back
|
||||
for (int j=ipos; j<endPos; j++) {
|
||||
val |= (tempArr[j] & 0xff) << (j<<3);
|
||||
}
|
||||
index[doc] = val;
|
||||
} else {
|
||||
// value won't fit... move integer into byte[]
|
||||
for (int j=0; j<ipos; j++) {
|
||||
tempArr[j] = (byte)val;
|
||||
val >>>=8;
|
||||
}
|
||||
// point at the end index in the byte[]
|
||||
index[doc] = (endPos<<8) | 1;
|
||||
bytes[doc] = tempArr;
|
||||
tempArr = new byte[12];
|
||||
}
|
||||
}
|
||||
}
|
||||
setActualDocFreq(termNum, actualDF);
|
||||
}
|
||||
|
||||
termNum++;
|
||||
if (te.next() == null) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
numTermsInField = termNum;
|
||||
|
||||
long midPoint = System.nanoTime();
|
||||
|
||||
if (termInstances == 0) {
|
||||
// we didn't invert anything
|
||||
// lower memory consumption.
|
||||
tnums = null;
|
||||
} else {
|
||||
|
||||
this.index = index;
|
||||
|
||||
//
|
||||
// transform intermediate form into the final form, building a single byte[]
|
||||
// at a time, and releasing the intermediate byte[]s as we go to avoid
|
||||
// increasing the memory footprint.
|
||||
//
|
||||
|
||||
for (int pass = 0; pass<256; pass++) {
|
||||
byte[] target = tnums[pass];
|
||||
int pos=0; // end in target;
|
||||
if (target != null) {
|
||||
pos = target.length;
|
||||
} else {
|
||||
target = new byte[4096];
|
||||
}
|
||||
|
||||
// loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
|
||||
// where pp is the pass (which array we are building), and xx is all values.
|
||||
// each pass shares the same byte[] for termNumber lists.
|
||||
for (int docbase = pass<<16; docbase<maxDoc; docbase+=(1<<24)) {
|
||||
int lim = Math.min(docbase + (1<<16), maxDoc);
|
||||
for (int doc=docbase; doc<lim; doc++) {
|
||||
//System.out.println(" pass=" + pass + " process docID=" + doc);
|
||||
int val = index[doc];
|
||||
if ((val&0xff) == 1) {
|
||||
int len = val >>> 8;
|
||||
//System.out.println(" ptr pos=" + pos);
|
||||
index[doc] = (pos<<8)|1; // change index to point to start of array
|
||||
if ((pos & 0xff000000) != 0) {
|
||||
// we only have 24 bits for the array index
|
||||
throw new IllegalStateException("Too many values for UnInvertedField faceting on field "+field);
|
||||
}
|
||||
byte[] arr = bytes[doc];
|
||||
/*
|
||||
for(byte b : arr) {
|
||||
//System.out.println(" b=" + Integer.toHexString((int) b));
|
||||
}
|
||||
*/
|
||||
bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
|
||||
if (target.length <= pos + len) {
|
||||
int newlen = target.length;
|
||||
/*** we don't have to worry about the array getting too large
|
||||
* since the "pos" param will overflow first (only 24 bits available)
|
||||
if ((newlen<<1) <= 0) {
|
||||
// overflow...
|
||||
newlen = Integer.MAX_VALUE;
|
||||
if (newlen <= pos + len) {
|
||||
throw new SolrException(400,"Too many terms to uninvert field!");
|
||||
}
|
||||
} else {
|
||||
while (newlen <= pos + len) newlen<<=1; // doubling strategy
|
||||
}
|
||||
****/
|
||||
while (newlen <= pos + len) newlen<<=1; // doubling strategy
|
||||
byte[] newtarget = new byte[newlen];
|
||||
System.arraycopy(target, 0, newtarget, 0, pos);
|
||||
target = newtarget;
|
||||
}
|
||||
System.arraycopy(arr, 0, target, pos, len);
|
||||
pos += len + 1; // skip single byte at end and leave it 0 for terminator
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// shrink array
|
||||
if (pos < target.length) {
|
||||
byte[] newtarget = new byte[pos];
|
||||
System.arraycopy(target, 0, newtarget, 0, pos);
|
||||
target = newtarget;
|
||||
}
|
||||
|
||||
tnums[pass] = target;
|
||||
|
||||
if ((pass << 16) > maxDoc)
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]);
|
||||
|
||||
long endTime = System.nanoTime();
|
||||
|
||||
total_time = (int) TimeUnit.MILLISECONDS.convert(endTime-startTime, TimeUnit.NANOSECONDS);
|
||||
phase1_time = (int) TimeUnit.MILLISECONDS.convert(midPoint-startTime, TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
/** Number of bytes to represent an unsigned int as a vint. */
|
||||
private static int vIntSize(int x) {
|
||||
if ((x & (0xffffffff << (7*1))) == 0 ) {
|
||||
return 1;
|
||||
}
|
||||
if ((x & (0xffffffff << (7*2))) == 0 ) {
|
||||
return 2;
|
||||
}
|
||||
if ((x & (0xffffffff << (7*3))) == 0 ) {
|
||||
return 3;
|
||||
}
|
||||
if ((x & (0xffffffff << (7*4))) == 0 ) {
|
||||
return 4;
|
||||
}
|
||||
return 5;
|
||||
}
|
||||
|
||||
// todo: if we know the size of the vInt already, we could do
|
||||
// a single switch on the size
|
||||
private static int writeInt(int x, byte[] arr, int pos) {
|
||||
int a;
|
||||
a = (x >>> (7*4));
|
||||
if (a != 0) {
|
||||
arr[pos++] = (byte)(a | 0x80);
|
||||
}
|
||||
a = (x >>> (7*3));
|
||||
if (a != 0) {
|
||||
arr[pos++] = (byte)(a | 0x80);
|
||||
}
|
||||
a = (x >>> (7*2));
|
||||
if (a != 0) {
|
||||
arr[pos++] = (byte)(a | 0x80);
|
||||
}
|
||||
a = (x >>> (7*1));
|
||||
if (a != 0) {
|
||||
arr[pos++] = (byte)(a | 0x80);
|
||||
}
|
||||
arr[pos++] = (byte)(x & 0x7f);
|
||||
return pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* "wrap" our own terms index around the original IndexReader.
|
||||
* Only valid if there are terms for this field rom the original reader
|
||||
*/
|
||||
private final class OrdWrappedTermsEnum extends TermsEnum {
|
||||
private final TermsEnum termsEnum;
|
||||
private BytesRef term;
|
||||
private long ord = -indexInterval-1; // force "real" seek
|
||||
|
||||
public OrdWrappedTermsEnum(LeafReader reader) throws IOException {
|
||||
assert indexedTermsArray != null;
|
||||
assert 0 != indexedTermsArray.length;
|
||||
termsEnum = reader.fields().terms(field).iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
|
||||
return termsEnum.postings(reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() {
|
||||
return term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (++ord < 0) {
|
||||
ord = 0;
|
||||
}
|
||||
if (termsEnum.next() == null) {
|
||||
term = null;
|
||||
return null;
|
||||
}
|
||||
return setTerm(); // this is extra work if we know we are in bounds...
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docFreq() throws IOException {
|
||||
return termsEnum.docFreq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long totalTermFreq() throws IOException {
|
||||
return termsEnum.totalTermFreq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ord() {
|
||||
return ordBase + ord;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef target) throws IOException {
|
||||
|
||||
// already here
|
||||
if (term != null && term.equals(target)) {
|
||||
return SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
int startIdx = Arrays.binarySearch(indexedTermsArray, target);
|
||||
|
||||
if (startIdx >= 0) {
|
||||
// we hit the term exactly... lucky us!
|
||||
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
|
||||
assert seekStatus == TermsEnum.SeekStatus.FOUND;
|
||||
ord = startIdx << indexIntervalBits;
|
||||
setTerm();
|
||||
assert term != null;
|
||||
return SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
// we didn't hit the term exactly
|
||||
startIdx = -startIdx-1;
|
||||
|
||||
if (startIdx == 0) {
|
||||
// our target occurs *before* the first term
|
||||
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
|
||||
assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND;
|
||||
ord = 0;
|
||||
setTerm();
|
||||
assert term != null;
|
||||
return SeekStatus.NOT_FOUND;
|
||||
}
|
||||
|
||||
// back up to the start of the block
|
||||
startIdx--;
|
||||
|
||||
if ((ord >> indexIntervalBits) == startIdx && term != null && term.compareTo(target) <= 0) {
|
||||
// we are already in the right block and the current term is before the term we want,
|
||||
// so we don't need to seek.
|
||||
} else {
|
||||
// seek to the right block
|
||||
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(indexedTermsArray[startIdx]);
|
||||
assert seekStatus == TermsEnum.SeekStatus.FOUND;
|
||||
ord = startIdx << indexIntervalBits;
|
||||
setTerm();
|
||||
assert term != null; // should be non-null since it's in the index
|
||||
}
|
||||
|
||||
while (term != null && term.compareTo(target) < 0) {
|
||||
next();
|
||||
}
|
||||
|
||||
if (term == null) {
|
||||
return SeekStatus.END;
|
||||
} else if (term.compareTo(target) == 0) {
|
||||
return SeekStatus.FOUND;
|
||||
} else {
|
||||
return SeekStatus.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long targetOrd) throws IOException {
|
||||
int delta = (int) (targetOrd - ordBase - ord);
|
||||
//System.out.println(" seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord + " ii=" + indexInterval);
|
||||
if (delta < 0 || delta > indexInterval) {
|
||||
final int idx = (int) (targetOrd >>> indexIntervalBits);
|
||||
final BytesRef base = indexedTermsArray[idx];
|
||||
//System.out.println(" do seek term=" + base.utf8ToString());
|
||||
ord = idx << indexIntervalBits;
|
||||
delta = (int) (targetOrd - ord);
|
||||
final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(base);
|
||||
assert seekStatus == TermsEnum.SeekStatus.FOUND;
|
||||
} else {
|
||||
//System.out.println("seek w/in block");
|
||||
}
|
||||
|
||||
while (--delta >= 0) {
|
||||
BytesRef br = termsEnum.next();
|
||||
if (br == null) {
|
||||
assert false;
|
||||
return;
|
||||
}
|
||||
ord++;
|
||||
}
|
||||
|
||||
setTerm();
|
||||
assert term != null;
|
||||
}
|
||||
|
||||
private BytesRef setTerm() throws IOException {
|
||||
term = termsEnum.term();
|
||||
//System.out.println(" setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString()));
|
||||
if (prefix != null && !StringHelper.startsWith(term, prefix)) {
|
||||
term = null;
|
||||
}
|
||||
return term;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the term ({@link BytesRef}) corresponding to
|
||||
* the provided ordinal. */
|
||||
public BytesRef lookupTerm(TermsEnum termsEnum, int ord) throws IOException {
|
||||
termsEnum.seekExact(ord);
|
||||
return termsEnum.term();
|
||||
}
|
||||
|
||||
/** Returns a SortedSetDocValues view of this instance */
|
||||
public SortedSetDocValues iterator(LeafReader reader) throws IOException {
|
||||
if (isEmpty()) {
|
||||
return DocValues.emptySortedSet();
|
||||
} else {
|
||||
return new Iterator(reader);
|
||||
}
|
||||
}
|
||||
|
||||
private class Iterator extends SortedSetDocValues {
|
||||
final LeafReader reader;
|
||||
final TermsEnum te; // used internally for lookupOrd() and lookupTerm()
|
||||
// currently we read 5 at a time (using the logic of the old iterator)
|
||||
final int buffer[] = new int[5];
|
||||
int bufferUpto;
|
||||
int bufferLength;
|
||||
|
||||
private int tnum;
|
||||
private int upto;
|
||||
private byte[] arr;
|
||||
|
||||
Iterator(LeafReader reader) throws IOException {
|
||||
this.reader = reader;
|
||||
this.te = termsEnum();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long nextOrd() {
|
||||
while (bufferUpto == bufferLength) {
|
||||
if (bufferLength < buffer.length) {
|
||||
return NO_MORE_ORDS;
|
||||
} else {
|
||||
bufferLength = read(buffer);
|
||||
bufferUpto = 0;
|
||||
}
|
||||
}
|
||||
return buffer[bufferUpto++];
|
||||
}
|
||||
|
||||
/** Buffer must be at least 5 ints long. Returns number
|
||||
* of term ords placed into buffer; if this count is
|
||||
* less than buffer.length then that is the end. */
|
||||
int read(int[] buffer) {
|
||||
int bufferUpto = 0;
|
||||
if (arr == null) {
|
||||
// code is inlined into upto
|
||||
//System.out.println("inlined");
|
||||
int code = upto;
|
||||
int delta = 0;
|
||||
for (;;) {
|
||||
delta = (delta << 7) | (code & 0x7f);
|
||||
if ((code & 0x80)==0) {
|
||||
if (delta==0) break;
|
||||
tnum += delta - TNUM_OFFSET;
|
||||
buffer[bufferUpto++] = ordBase+tnum;
|
||||
//System.out.println(" tnum=" + tnum);
|
||||
delta = 0;
|
||||
}
|
||||
code >>>= 8;
|
||||
}
|
||||
} else {
|
||||
// code is a pointer
|
||||
for(;;) {
|
||||
int delta = 0;
|
||||
for(;;) {
|
||||
byte b = arr[upto++];
|
||||
delta = (delta << 7) | (b & 0x7f);
|
||||
//System.out.println(" cycle: upto=" + upto + " delta=" + delta + " b=" + b);
|
||||
if ((b & 0x80) == 0) break;
|
||||
}
|
||||
//System.out.println(" delta=" + delta);
|
||||
if (delta == 0) break;
|
||||
tnum += delta - TNUM_OFFSET;
|
||||
//System.out.println(" tnum=" + tnum);
|
||||
buffer[bufferUpto++] = ordBase+tnum;
|
||||
if (bufferUpto == buffer.length) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bufferUpto;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setDocument(int docID) {
|
||||
tnum = 0;
|
||||
final int code = index[docID];
|
||||
if ((code & 0xff)==1) {
|
||||
// a pointer
|
||||
upto = code>>>8;
|
||||
//System.out.println(" pointer! upto=" + upto);
|
||||
int whichArray = (docID >>> 16) & 0xff;
|
||||
arr = tnums[whichArray];
|
||||
} else {
|
||||
//System.out.println(" inline!");
|
||||
arr = null;
|
||||
upto = code;
|
||||
}
|
||||
bufferUpto = 0;
|
||||
bufferLength = read(buffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef lookupOrd(long ord) {
|
||||
try {
|
||||
return DocTermOrds.this.lookupTerm(te, (int) ord);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValueCount() {
|
||||
return numTerms();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long lookupTerm(BytesRef key) {
|
||||
try {
|
||||
switch (te.seekCeil(key)) {
|
||||
case FOUND:
|
||||
assert te.ord() >= 0;
|
||||
return te.ord();
|
||||
case NOT_FOUND:
|
||||
assert te.ord() >= 0;
|
||||
return -te.ord()-1;
|
||||
default: /* END */
|
||||
return -numTerms()-1;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum termsEnum() {
|
||||
try {
|
||||
return getOrdTermsEnum(reader);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,466 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.IndexReader; // javadocs
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LegacyNumericUtils;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Expert: Maintains caches of term values.
|
||||
*
|
||||
* <p>Created: May 19, 2004 11:13:14 AM
|
||||
*
|
||||
* @since lucene 1.4
|
||||
* @see FieldCacheSanityChecker
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
interface FieldCache {
|
||||
|
||||
/**
|
||||
* Placeholder indicating creation of this cache is currently in-progress.
|
||||
*/
|
||||
public static final class CreationPlaceholder implements Accountable {
|
||||
Accountable value;
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
// don't call on the in-progress value, might make things angry.
|
||||
return RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* interface to all parsers. It is used to parse different numeric types.
|
||||
*/
|
||||
public interface Parser {
|
||||
|
||||
/**
|
||||
* Pulls a {@link TermsEnum} from the given {@link Terms}. This method allows certain parsers
|
||||
* to filter the actual TermsEnum before the field cache is filled.
|
||||
*
|
||||
* @param terms the {@link Terms} instance to create the {@link TermsEnum} from.
|
||||
* @return a possibly filtered {@link TermsEnum} instance, this method must not return <code>null</code>.
|
||||
* @throws IOException if an {@link IOException} occurs
|
||||
* @deprecated index with Points instead
|
||||
*/
|
||||
@Deprecated
|
||||
public TermsEnum termsEnum(Terms terms) throws IOException;
|
||||
|
||||
/** Parse's this field's value */
|
||||
public long parseValue(BytesRef term);
|
||||
}
|
||||
|
||||
/**
|
||||
* Base class for points parsers. These parsers do not use the inverted index, but instead
|
||||
* uninvert point data.
|
||||
*
|
||||
* This abstraction can be cleaned up when Parser.termsEnum is removed.
|
||||
*/
|
||||
public abstract class PointParser implements Parser {
|
||||
public final TermsEnum termsEnum(Terms terms) throws IOException {
|
||||
throw new UnsupportedOperationException("makes no sense for parsing points");
|
||||
}
|
||||
}
|
||||
|
||||
/** Expert: The cache used internally by sorting and range query classes. */
|
||||
public static FieldCache DEFAULT = new FieldCacheImpl();
|
||||
|
||||
/**
|
||||
* A parser instance for int values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.IntPoint}.
|
||||
*/
|
||||
public static final Parser INT_POINT_PARSER = new PointParser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef point) {
|
||||
return NumericUtils.sortableBytesToInt(point.bytes, point.offset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".INT_POINT_PARSER";
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for long values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.LongPoint}.
|
||||
*/
|
||||
public static final Parser LONG_POINT_PARSER = new PointParser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef point) {
|
||||
return NumericUtils.sortableBytesToLong(point.bytes, point.offset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".LONG_POINT_PARSER";
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for float values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.FloatPoint}.
|
||||
*/
|
||||
public static final Parser FLOAT_POINT_PARSER = new PointParser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef point) {
|
||||
return NumericUtils.sortableFloatBits(NumericUtils.sortableBytesToInt(point.bytes, point.offset));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".FLOAT_POINT_PARSER";
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for double values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.DoublePoint}.
|
||||
*/
|
||||
public static final Parser DOUBLE_POINT_PARSER = new PointParser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef point) {
|
||||
return NumericUtils.sortableDoubleBits(NumericUtils.sortableBytesToLong(point.bytes, point.offset));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".DOUBLE_POINT_PARSER";
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for int values encoded by {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.LegacyIntField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
|
||||
* @deprecated Index with points and use {@link #INT_POINT_PARSER} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Parser LEGACY_INT_PARSER = new Parser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef term) {
|
||||
return LegacyNumericUtils.prefixCodedToInt(term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||
return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".LEGACY_INT_PARSER";
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for float values encoded with {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.LegacyFloatField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
|
||||
* @deprecated Index with points and use {@link #FLOAT_POINT_PARSER} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Parser LEGACY_FLOAT_PARSER = new Parser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef term) {
|
||||
int val = LegacyNumericUtils.prefixCodedToInt(term);
|
||||
if (val<0) val ^= 0x7fffffff;
|
||||
return val;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".LEGACY_FLOAT_PARSER";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||
return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for long values encoded by {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.LegacyLongField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
|
||||
* @deprecated Index with points and use {@link #LONG_POINT_PARSER} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Parser LEGACY_LONG_PARSER = new Parser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef term) {
|
||||
return LegacyNumericUtils.prefixCodedToLong(term);
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".LEGACY_LONG_PARSER";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for double values encoded with {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.LegacyDoubleField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
|
||||
* @deprecated Index with points and use {@link #DOUBLE_POINT_PARSER} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Parser LEGACY_DOUBLE_PARSER = new Parser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef term) {
|
||||
long val = LegacyNumericUtils.prefixCodedToLong(term);
|
||||
if (val<0) val ^= 0x7fffffffffffffffL;
|
||||
return val;
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".LEGACY_DOUBLE_PARSER";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
|
||||
}
|
||||
};
|
||||
|
||||
/** Checks the internal cache for an appropriate entry, and if none is found,
|
||||
* reads the terms/points in <code>field</code> and returns a bit set at the size of
|
||||
* <code>reader.maxDoc()</code>, with turned on bits for each docid that
|
||||
* does have a value for this field.
|
||||
* @param parser May be {@code null} if coming from the inverted index, otherwise
|
||||
* can be a {@link PointParser} to compute from point values.
|
||||
*/
|
||||
public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns a {@link NumericDocValues} over the values found in documents in the given
|
||||
* field. If the field was indexed as {@link NumericDocValuesField}, it simply
|
||||
* uses {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)} to read the values.
|
||||
* Otherwise, it checks the internal cache for an appropriate entry, and if
|
||||
* none is found, reads the terms/points in <code>field</code> as longs and returns
|
||||
* an array of size <code>reader.maxDoc()</code> of the value each document
|
||||
* has in the given field.
|
||||
*
|
||||
* @param reader
|
||||
* Used to get field values.
|
||||
* @param field
|
||||
* Which field contains the longs.
|
||||
* @param parser
|
||||
* Computes long for string values. May be {@code null} if the
|
||||
* requested field was indexed as {@link NumericDocValuesField} or
|
||||
* {@link org.apache.lucene.document.LegacyLongField}.
|
||||
* @param setDocsWithField
|
||||
* If true then {@link #getDocsWithField} will also be computed and
|
||||
* stored in the FieldCache.
|
||||
* @return The values in the given field for each document.
|
||||
* @throws IOException
|
||||
* If any error occurs.
|
||||
*/
|
||||
public NumericDocValues getNumerics(LeafReader reader, String field, Parser parser, boolean setDocsWithField) throws IOException;
|
||||
|
||||
/** Checks the internal cache for an appropriate entry, and if none
|
||||
* is found, reads the term values in <code>field</code>
|
||||
* and returns a {@link BinaryDocValues} instance, providing a
|
||||
* method to retrieve the term (as a BytesRef) per document.
|
||||
* @param reader Used to get field values.
|
||||
* @param field Which field contains the strings.
|
||||
* @param setDocsWithField If true then {@link #getDocsWithField} will
|
||||
* also be computed and stored in the FieldCache.
|
||||
* @return The values in the given field for each document.
|
||||
* @throws IOException If any error occurs.
|
||||
*/
|
||||
public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField) throws IOException;
|
||||
|
||||
/** Expert: just like {@link #getTerms(org.apache.lucene.index.LeafReader,String,boolean)},
|
||||
* but you can specify whether more RAM should be consumed in exchange for
|
||||
* faster lookups (default is "true"). Note that the
|
||||
* first call for a given reader and field "wins",
|
||||
* subsequent calls will share the same cache entry. */
|
||||
public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField, float acceptableOverheadRatio) throws IOException;
|
||||
|
||||
/** Checks the internal cache for an appropriate entry, and if none
|
||||
* is found, reads the term values in <code>field</code>
|
||||
* and returns a {@link SortedDocValues} instance,
|
||||
* providing methods to retrieve sort ordinals and terms
|
||||
* (as a ByteRef) per document.
|
||||
* @param reader Used to get field values.
|
||||
* @param field Which field contains the strings.
|
||||
* @return The values in the given field for each document.
|
||||
* @throws IOException If any error occurs.
|
||||
*/
|
||||
public SortedDocValues getTermsIndex(LeafReader reader, String field) throws IOException;
|
||||
|
||||
/** Expert: just like {@link
|
||||
* #getTermsIndex(org.apache.lucene.index.LeafReader,String)}, but you can specify
|
||||
* whether more RAM should be consumed in exchange for
|
||||
* faster lookups (default is "true"). Note that the
|
||||
* first call for a given reader and field "wins",
|
||||
* subsequent calls will share the same cache entry. */
|
||||
public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException;
|
||||
|
||||
/** Can be passed to {@link #getDocTermOrds} to filter for 32-bit numeric terms */
|
||||
public static final BytesRef INT32_TERM_PREFIX = new BytesRef(new byte[] { LegacyNumericUtils.SHIFT_START_INT });
|
||||
/** Can be passed to {@link #getDocTermOrds} to filter for 64-bit numeric terms */
|
||||
public static final BytesRef INT64_TERM_PREFIX = new BytesRef(new byte[] { LegacyNumericUtils.SHIFT_START_LONG });
|
||||
|
||||
/**
|
||||
* Checks the internal cache for an appropriate entry, and if none is found, reads the term values
|
||||
* in <code>field</code> and returns a {@link DocTermOrds} instance, providing a method to retrieve
|
||||
* the terms (as ords) per document.
|
||||
*
|
||||
* @param reader Used to build a {@link DocTermOrds} instance
|
||||
* @param field Which field contains the strings.
|
||||
* @param prefix prefix for a subset of the terms which should be uninverted. Can be null or
|
||||
* {@link #INT32_TERM_PREFIX} or {@link #INT64_TERM_PREFIX}
|
||||
*
|
||||
* @return a {@link DocTermOrds} instance
|
||||
* @throws IOException If any error occurs.
|
||||
*/
|
||||
public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException;
|
||||
|
||||
/**
|
||||
* EXPERT: A unique Identifier/Description for each item in the FieldCache.
|
||||
* Can be useful for logging/debugging.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class CacheEntry {
|
||||
|
||||
private final Object readerKey;
|
||||
private final String fieldName;
|
||||
private final Class<?> cacheType;
|
||||
private final Object custom;
|
||||
private final Accountable value;
|
||||
|
||||
public CacheEntry(Object readerKey, String fieldName,
|
||||
Class<?> cacheType,
|
||||
Object custom,
|
||||
Accountable value) {
|
||||
this.readerKey = readerKey;
|
||||
this.fieldName = fieldName;
|
||||
this.cacheType = cacheType;
|
||||
this.custom = custom;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public Object getReaderKey() {
|
||||
return readerKey;
|
||||
}
|
||||
|
||||
public String getFieldName() {
|
||||
return fieldName;
|
||||
}
|
||||
|
||||
public Class<?> getCacheType() {
|
||||
return cacheType;
|
||||
}
|
||||
|
||||
public Object getCustom() {
|
||||
return custom;
|
||||
}
|
||||
|
||||
public Object getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* The most recently estimated size of the value, null unless
|
||||
* estimateSize has been called.
|
||||
*/
|
||||
public String getEstimatedSize() {
|
||||
long bytesUsed = value == null ? 0L : value.ramBytesUsed();
|
||||
return RamUsageEstimator.humanReadableUnits(bytesUsed);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder b = new StringBuilder(250);
|
||||
b.append("'").append(getReaderKey()).append("'=>");
|
||||
b.append("'").append(getFieldName()).append("',");
|
||||
b.append(getCacheType()).append(",").append(getCustom());
|
||||
b.append("=>").append(getValue().getClass().getName()).append("#");
|
||||
b.append(System.identityHashCode(getValue()));
|
||||
|
||||
String s = getEstimatedSize();
|
||||
b.append(" (size =~ ").append(s).append(')');
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERT: Generates an array of CacheEntry objects representing all items
|
||||
* currently in the FieldCache.
|
||||
* <p>
|
||||
* NOTE: These CacheEntry objects maintain a strong reference to the
|
||||
* Cached Values. Maintaining references to a CacheEntry the AtomicIndexReader
|
||||
* associated with it has garbage collected will prevent the Value itself
|
||||
* from being garbage collected when the Cache drops the WeakReference.
|
||||
* </p>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public CacheEntry[] getCacheEntries();
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* EXPERT: Instructs the FieldCache to forcibly expunge all entries
|
||||
* from the underlying caches. This is intended only to be used for
|
||||
* test methods as a way to ensure a known base state of the Cache
|
||||
* (with out needing to rely on GC to free WeakReferences).
|
||||
* It should not be relied on for "Cache maintenance" in general
|
||||
* application code.
|
||||
* </p>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void purgeAllCaches();
|
||||
|
||||
/**
|
||||
* Expert: drops all cache entries associated with this
|
||||
* reader {@link IndexReader#getCoreCacheKey}. NOTE: this cache key must
|
||||
* precisely match the reader that the cache entry is
|
||||
* keyed on. If you pass a top-level reader, it usually
|
||||
* will have no effect as Lucene now caches at the segment
|
||||
* reader level.
|
||||
*/
|
||||
public void purgeByCacheKey(Object coreCacheKey);
|
||||
|
||||
/**
|
||||
* If non-null, FieldCacheImpl will warn whenever
|
||||
* entries are created that are not sane according to
|
||||
* {@link FieldCacheSanityChecker}.
|
||||
*/
|
||||
public void setInfoStream(PrintStream stream);
|
||||
|
||||
/** counterpart of {@link #setInfoStream(PrintStream)} */
|
||||
public PrintStream getInfoStream();
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,425 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.util.MapOfSets;
|
||||
import org.apache.solr.uninverting.FieldCache.CacheEntry;
|
||||
|
||||
/**
|
||||
* Provides methods for sanity checking that entries in the FieldCache
|
||||
* are not wasteful or inconsistent.
|
||||
* </p>
|
||||
* <p>
|
||||
* Lucene 2.9 Introduced numerous enhancements into how the FieldCache
|
||||
* is used by the low levels of Lucene searching (for Sorting and
|
||||
* ValueSourceQueries) to improve both the speed for Sorting, as well
|
||||
* as reopening of IndexReaders. But these changes have shifted the
|
||||
* usage of FieldCache from "top level" IndexReaders (frequently a
|
||||
* MultiReader or DirectoryReader) down to the leaf level SegmentReaders.
|
||||
* As a result, existing applications that directly access the FieldCache
|
||||
* may find RAM usage increase significantly when upgrading to 2.9 or
|
||||
* Later. This class provides an API for these applications (or their
|
||||
* Unit tests) to check at run time if the FieldCache contains "insane"
|
||||
* usages of the FieldCache.
|
||||
* </p>
|
||||
* @lucene.experimental
|
||||
* @see FieldCache
|
||||
* @see FieldCacheSanityChecker.Insanity
|
||||
* @see FieldCacheSanityChecker.InsanityType
|
||||
*/
|
||||
final class FieldCacheSanityChecker {
|
||||
|
||||
public FieldCacheSanityChecker() {
|
||||
/* NOOP */
|
||||
}
|
||||
|
||||
/**
|
||||
* Quick and dirty convenience method
|
||||
* @see #check
|
||||
*/
|
||||
public static Insanity[] checkSanity(FieldCache cache) {
|
||||
return checkSanity(cache.getCacheEntries());
|
||||
}
|
||||
|
||||
/**
|
||||
* Quick and dirty convenience method that instantiates an instance with
|
||||
* "good defaults" and uses it to test the CacheEntrys
|
||||
* @see #check
|
||||
*/
|
||||
public static Insanity[] checkSanity(CacheEntry... cacheEntries) {
|
||||
FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker();
|
||||
return sanityChecker.check(cacheEntries);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tests a CacheEntry[] for indication of "insane" cache usage.
|
||||
* <p>
|
||||
* <B>NOTE:</b>FieldCache CreationPlaceholder objects are ignored.
|
||||
* (:TODO: is this a bad idea? are we masking a real problem?)
|
||||
* </p>
|
||||
*/
|
||||
public Insanity[] check(CacheEntry... cacheEntries) {
|
||||
if (null == cacheEntries || 0 == cacheEntries.length)
|
||||
return new Insanity[0];
|
||||
|
||||
// the indirect mapping lets MapOfSet dedup identical valIds for us
|
||||
//
|
||||
// maps the (valId) identityhashCode of cache values to
|
||||
// sets of CacheEntry instances
|
||||
final MapOfSets<Integer, CacheEntry> valIdToItems = new MapOfSets<>(new HashMap<Integer, Set<CacheEntry>>(17));
|
||||
// maps ReaderField keys to Sets of ValueIds
|
||||
final MapOfSets<ReaderField, Integer> readerFieldToValIds = new MapOfSets<>(new HashMap<ReaderField, Set<Integer>>(17));
|
||||
//
|
||||
|
||||
// any keys that we know result in more then one valId
|
||||
final Set<ReaderField> valMismatchKeys = new HashSet<>();
|
||||
|
||||
// iterate over all the cacheEntries to get the mappings we'll need
|
||||
for (int i = 0; i < cacheEntries.length; i++) {
|
||||
final CacheEntry item = cacheEntries[i];
|
||||
final Object val = item.getValue();
|
||||
|
||||
// It's OK to have dup entries, where one is eg
|
||||
// float[] and the other is the Bits (from
|
||||
// getDocWithField())
|
||||
if (val instanceof FieldCacheImpl.BitsEntry) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (val instanceof FieldCache.CreationPlaceholder)
|
||||
continue;
|
||||
|
||||
final ReaderField rf = new ReaderField(item.getReaderKey(),
|
||||
item.getFieldName());
|
||||
|
||||
final Integer valId = Integer.valueOf(System.identityHashCode(val));
|
||||
|
||||
// indirect mapping, so the MapOfSet will dedup identical valIds for us
|
||||
valIdToItems.put(valId, item);
|
||||
if (1 < readerFieldToValIds.put(rf, valId)) {
|
||||
valMismatchKeys.add(rf);
|
||||
}
|
||||
}
|
||||
|
||||
final List<Insanity> insanity = new ArrayList<>(valMismatchKeys.size() * 3);
|
||||
|
||||
insanity.addAll(checkValueMismatch(valIdToItems,
|
||||
readerFieldToValIds,
|
||||
valMismatchKeys));
|
||||
insanity.addAll(checkSubreaders(valIdToItems,
|
||||
readerFieldToValIds));
|
||||
|
||||
return insanity.toArray(new Insanity[insanity.size()]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal helper method used by check that iterates over
|
||||
* valMismatchKeys and generates a Collection of Insanity
|
||||
* instances accordingly. The MapOfSets are used to populate
|
||||
* the Insanity objects.
|
||||
* @see InsanityType#VALUEMISMATCH
|
||||
*/
|
||||
private Collection<Insanity> checkValueMismatch(MapOfSets<Integer, CacheEntry> valIdToItems,
|
||||
MapOfSets<ReaderField, Integer> readerFieldToValIds,
|
||||
Set<ReaderField> valMismatchKeys) {
|
||||
|
||||
final List<Insanity> insanity = new ArrayList<>(valMismatchKeys.size() * 3);
|
||||
|
||||
if (! valMismatchKeys.isEmpty() ) {
|
||||
// we have multiple values for some ReaderFields
|
||||
|
||||
final Map<ReaderField, Set<Integer>> rfMap = readerFieldToValIds.getMap();
|
||||
final Map<Integer, Set<CacheEntry>> valMap = valIdToItems.getMap();
|
||||
for (final ReaderField rf : valMismatchKeys) {
|
||||
final List<CacheEntry> badEntries = new ArrayList<>(valMismatchKeys.size() * 2);
|
||||
for(final Integer value: rfMap.get(rf)) {
|
||||
for (final CacheEntry cacheEntry : valMap.get(value)) {
|
||||
badEntries.add(cacheEntry);
|
||||
}
|
||||
}
|
||||
|
||||
CacheEntry[] badness = new CacheEntry[badEntries.size()];
|
||||
badness = badEntries.toArray(badness);
|
||||
|
||||
insanity.add(new Insanity(InsanityType.VALUEMISMATCH,
|
||||
"Multiple distinct value objects for " +
|
||||
rf.toString(), badness));
|
||||
}
|
||||
}
|
||||
return insanity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal helper method used by check that iterates over
|
||||
* the keys of readerFieldToValIds and generates a Collection
|
||||
* of Insanity instances whenever two (or more) ReaderField instances are
|
||||
* found that have an ancestry relationships.
|
||||
*
|
||||
* @see InsanityType#SUBREADER
|
||||
*/
|
||||
private Collection<Insanity> checkSubreaders( MapOfSets<Integer, CacheEntry> valIdToItems,
|
||||
MapOfSets<ReaderField, Integer> readerFieldToValIds) {
|
||||
|
||||
final List<Insanity> insanity = new ArrayList<>(23);
|
||||
|
||||
Map<ReaderField, Set<ReaderField>> badChildren = new HashMap<>(17);
|
||||
MapOfSets<ReaderField, ReaderField> badKids = new MapOfSets<>(badChildren); // wrapper
|
||||
|
||||
Map<Integer, Set<CacheEntry>> viToItemSets = valIdToItems.getMap();
|
||||
Map<ReaderField, Set<Integer>> rfToValIdSets = readerFieldToValIds.getMap();
|
||||
|
||||
Set<ReaderField> seen = new HashSet<>(17);
|
||||
|
||||
Set<ReaderField> readerFields = rfToValIdSets.keySet();
|
||||
for (final ReaderField rf : readerFields) {
|
||||
|
||||
if (seen.contains(rf)) continue;
|
||||
|
||||
List<Object> kids = getAllDescendantReaderKeys(rf.readerKey);
|
||||
for (Object kidKey : kids) {
|
||||
ReaderField kid = new ReaderField(kidKey, rf.fieldName);
|
||||
|
||||
if (badChildren.containsKey(kid)) {
|
||||
// we've already process this kid as RF and found other problems
|
||||
// track those problems as our own
|
||||
badKids.put(rf, kid);
|
||||
badKids.putAll(rf, badChildren.get(kid));
|
||||
badChildren.remove(kid);
|
||||
|
||||
} else if (rfToValIdSets.containsKey(kid)) {
|
||||
// we have cache entries for the kid
|
||||
badKids.put(rf, kid);
|
||||
}
|
||||
seen.add(kid);
|
||||
}
|
||||
seen.add(rf);
|
||||
}
|
||||
|
||||
// every mapping in badKids represents an Insanity
|
||||
for (final ReaderField parent : badChildren.keySet()) {
|
||||
Set<ReaderField> kids = badChildren.get(parent);
|
||||
|
||||
List<CacheEntry> badEntries = new ArrayList<>(kids.size() * 2);
|
||||
|
||||
// put parent entr(ies) in first
|
||||
{
|
||||
for (final Integer value : rfToValIdSets.get(parent)) {
|
||||
badEntries.addAll(viToItemSets.get(value));
|
||||
}
|
||||
}
|
||||
|
||||
// now the entries for the descendants
|
||||
for (final ReaderField kid : kids) {
|
||||
for (final Integer value : rfToValIdSets.get(kid)) {
|
||||
badEntries.addAll(viToItemSets.get(value));
|
||||
}
|
||||
}
|
||||
|
||||
CacheEntry[] badness = new CacheEntry[badEntries.size()];
|
||||
badness = badEntries.toArray(badness);
|
||||
|
||||
insanity.add(new Insanity(InsanityType.SUBREADER,
|
||||
"Found caches for descendants of " +
|
||||
parent.toString(),
|
||||
badness));
|
||||
}
|
||||
|
||||
return insanity;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the seed is an IndexReader, and if so will walk
|
||||
* the hierarchy of subReaders building up a list of the objects
|
||||
* returned by {@code seed.getCoreCacheKey()}
|
||||
*/
|
||||
private List<Object> getAllDescendantReaderKeys(Object seed) {
|
||||
List<Object> all = new ArrayList<>(17); // will grow as we iter
|
||||
all.add(seed);
|
||||
for (int i = 0; i < all.size(); i++) {
|
||||
final Object obj = all.get(i);
|
||||
// TODO: We don't check closed readers here (as getTopReaderContext
|
||||
// throws AlreadyClosedException), what should we do? Reflection?
|
||||
if (obj instanceof IndexReader) {
|
||||
try {
|
||||
final List<IndexReaderContext> childs =
|
||||
((IndexReader) obj).getContext().children();
|
||||
if (childs != null) { // it is composite reader
|
||||
for (final IndexReaderContext ctx : childs) {
|
||||
all.add(ctx.reader().getCoreCacheKey());
|
||||
}
|
||||
}
|
||||
} catch (AlreadyClosedException ace) {
|
||||
// ignore this reader
|
||||
}
|
||||
}
|
||||
}
|
||||
// need to skip the first, because it was the seed
|
||||
return all.subList(1, all.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple pair object for using "readerKey + fieldName" a Map key
|
||||
*/
|
||||
private final static class ReaderField {
|
||||
public final Object readerKey;
|
||||
public final String fieldName;
|
||||
public ReaderField(Object readerKey, String fieldName) {
|
||||
this.readerKey = readerKey;
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return System.identityHashCode(readerKey) * fieldName.hashCode();
|
||||
}
|
||||
@Override
|
||||
public boolean equals(Object that) {
|
||||
if (! (that instanceof ReaderField)) return false;
|
||||
|
||||
ReaderField other = (ReaderField) that;
|
||||
return (this.readerKey == other.readerKey &&
|
||||
this.fieldName.equals(other.fieldName));
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
return readerKey.toString() + "+" + fieldName;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple container for a collection of related CacheEntry objects that
|
||||
* in conjunction with each other represent some "insane" usage of the
|
||||
* FieldCache.
|
||||
*/
|
||||
public final static class Insanity {
|
||||
private final InsanityType type;
|
||||
private final String msg;
|
||||
private final CacheEntry[] entries;
|
||||
public Insanity(InsanityType type, String msg, CacheEntry... entries) {
|
||||
if (null == type) {
|
||||
throw new IllegalArgumentException
|
||||
("Insanity requires non-null InsanityType");
|
||||
}
|
||||
if (null == entries || 0 == entries.length) {
|
||||
throw new IllegalArgumentException
|
||||
("Insanity requires non-null/non-empty CacheEntry[]");
|
||||
}
|
||||
this.type = type;
|
||||
this.msg = msg;
|
||||
this.entries = entries;
|
||||
|
||||
}
|
||||
/**
|
||||
* Type of insane behavior this object represents
|
||||
*/
|
||||
public InsanityType getType() { return type; }
|
||||
/**
|
||||
* Description of hte insane behavior
|
||||
*/
|
||||
public String getMsg() { return msg; }
|
||||
/**
|
||||
* CacheEntry objects which suggest a problem
|
||||
*/
|
||||
public CacheEntry[] getCacheEntries() { return entries; }
|
||||
/**
|
||||
* Multi-Line representation of this Insanity object, starting with
|
||||
* the Type and Msg, followed by each CacheEntry.toString() on its
|
||||
* own line prefaced by a tab character
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
buf.append(getType()).append(": ");
|
||||
|
||||
String m = getMsg();
|
||||
if (null != m) buf.append(m);
|
||||
|
||||
buf.append('\n');
|
||||
|
||||
CacheEntry[] ce = getCacheEntries();
|
||||
for (int i = 0; i < ce.length; i++) {
|
||||
buf.append('\t').append(ce[i].toString()).append('\n');
|
||||
}
|
||||
|
||||
return buf.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An Enumeration of the different types of "insane" behavior that
|
||||
* may be detected in a FieldCache.
|
||||
*
|
||||
* @see InsanityType#SUBREADER
|
||||
* @see InsanityType#VALUEMISMATCH
|
||||
* @see InsanityType#EXPECTED
|
||||
*/
|
||||
public final static class InsanityType {
|
||||
private final String label;
|
||||
private InsanityType(final String label) {
|
||||
this.label = label;
|
||||
}
|
||||
@Override
|
||||
public String toString() { return label; }
|
||||
|
||||
/**
|
||||
* Indicates an overlap in cache usage on a given field
|
||||
* in sub/super readers.
|
||||
*/
|
||||
public final static InsanityType SUBREADER
|
||||
= new InsanityType("SUBREADER");
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Indicates entries have the same reader+fieldname but
|
||||
* different cached values. This can happen if different datatypes,
|
||||
* or parsers are used -- and while it's not necessarily a bug
|
||||
* it's typically an indication of a possible problem.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>NOTE:</b> Only the reader, fieldname, and cached value are actually
|
||||
* tested -- if two cache entries have different parsers or datatypes but
|
||||
* the cached values are the same Object (== not just equal()) this method
|
||||
* does not consider that a red flag. This allows for subtle variations
|
||||
* in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...)
|
||||
* </p>
|
||||
*/
|
||||
public final static InsanityType VALUEMISMATCH
|
||||
= new InsanityType("VALUEMISMATCH");
|
||||
|
||||
/**
|
||||
* Indicates an expected bit of "insanity". This may be useful for
|
||||
* clients that wish to preserve/log information about insane usage
|
||||
* but indicate that it was expected.
|
||||
*/
|
||||
public final static InsanityType EXPECTED
|
||||
= new InsanityType("EXPECTED");
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,391 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.document.BinaryDocValuesField; // javadocs
|
||||
import org.apache.lucene.document.NumericDocValuesField; // javadocs
|
||||
import org.apache.lucene.document.SortedDocValuesField; // javadocs
|
||||
import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
|
||||
import org.apache.lucene.document.StringField; // javadocs
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FilterDirectoryReader;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.solr.uninverting.FieldCache.CacheEntry;
|
||||
|
||||
/**
|
||||
* A FilterReader that exposes <i>indexed</i> values as if they also had
|
||||
* docvalues.
|
||||
* <p>
|
||||
* This is accomplished by "inverting the inverted index" or "uninversion".
|
||||
* <p>
|
||||
* The uninversion process happens lazily: upon the first request for the
|
||||
* field's docvalues (e.g. via {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)}
|
||||
* or similar), it will create the docvalues on-the-fly if needed and cache it,
|
||||
* based on the core cache key of the wrapped LeafReader.
|
||||
*/
|
||||
public class UninvertingReader extends FilterLeafReader {
|
||||
|
||||
/**
|
||||
* Specifies the type of uninversion to apply for the field.
|
||||
*/
|
||||
public static enum Type {
|
||||
/**
|
||||
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.IntPoint})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
*/
|
||||
INTEGER_POINT,
|
||||
/**
|
||||
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LongPoint})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
*/
|
||||
LONG_POINT,
|
||||
/**
|
||||
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.FloatPoint})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
*/
|
||||
FLOAT_POINT,
|
||||
/**
|
||||
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.DoublePoint})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
*/
|
||||
DOUBLE_POINT,
|
||||
/**
|
||||
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LegacyIntField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
* @deprecated Index with points and use {@link #INTEGER_POINT} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
LEGACY_INTEGER,
|
||||
/**
|
||||
* Single-valued Long, (e.g. indexed with {@link org.apache.lucene.document.LegacyLongField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
* @deprecated Index with points and use {@link #LONG_POINT} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
LEGACY_LONG,
|
||||
/**
|
||||
* Single-valued Float, (e.g. indexed with {@link org.apache.lucene.document.LegacyFloatField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
* @deprecated Index with points and use {@link #FLOAT_POINT} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
LEGACY_FLOAT,
|
||||
/**
|
||||
* Single-valued Double, (e.g. indexed with {@link org.apache.lucene.document.LegacyDoubleField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
* @deprecated Index with points and use {@link #DOUBLE_POINT} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
LEGACY_DOUBLE,
|
||||
/**
|
||||
* Single-valued Binary, (e.g. indexed with {@link StringField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link BinaryDocValuesField}.
|
||||
*/
|
||||
BINARY,
|
||||
/**
|
||||
* Single-valued Binary, (e.g. indexed with {@link StringField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedDocValuesField}.
|
||||
*/
|
||||
SORTED,
|
||||
/**
|
||||
* Multi-valued Binary, (e.g. indexed with {@link StringField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedSetDocValuesField}.
|
||||
*/
|
||||
SORTED_SET_BINARY,
|
||||
/**
|
||||
* Multi-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LegacyIntField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedSetDocValuesField}.
|
||||
*/
|
||||
SORTED_SET_INTEGER,
|
||||
/**
|
||||
* Multi-valued Float, (e.g. indexed with {@link org.apache.lucene.document.LegacyFloatField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedSetDocValuesField}.
|
||||
*/
|
||||
SORTED_SET_FLOAT,
|
||||
/**
|
||||
* Multi-valued Long, (e.g. indexed with {@link org.apache.lucene.document.LegacyLongField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedSetDocValuesField}.
|
||||
*/
|
||||
SORTED_SET_LONG,
|
||||
/**
|
||||
* Multi-valued Double, (e.g. indexed with {@link org.apache.lucene.document.LegacyDoubleField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedSetDocValuesField}.
|
||||
*/
|
||||
SORTED_SET_DOUBLE
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps a provided DirectoryReader. Note that for convenience, the returned reader
|
||||
* can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
|
||||
* and so on.
|
||||
*/
|
||||
public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
|
||||
return new UninvertingDirectoryReader(in, mapping);
|
||||
}
|
||||
|
||||
static class UninvertingDirectoryReader extends FilterDirectoryReader {
|
||||
final Map<String,Type> mapping;
|
||||
|
||||
public UninvertingDirectoryReader(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
|
||||
super(in, new FilterDirectoryReader.SubReaderWrapper() {
|
||||
@Override
|
||||
public LeafReader wrap(LeafReader reader) {
|
||||
return new UninvertingReader(reader, mapping);
|
||||
}
|
||||
});
|
||||
this.mapping = mapping;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
|
||||
return new UninvertingDirectoryReader(in, mapping);
|
||||
}
|
||||
}
|
||||
|
||||
final Map<String,Type> mapping;
|
||||
final FieldInfos fieldInfos;
|
||||
|
||||
/**
|
||||
* Create a new UninvertingReader with the specified mapping
|
||||
* <p>
|
||||
* Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)}
|
||||
* instead.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public UninvertingReader(LeafReader in, Map<String,Type> mapping) {
|
||||
super(in);
|
||||
this.mapping = mapping;
|
||||
ArrayList<FieldInfo> filteredInfos = new ArrayList<>();
|
||||
for (FieldInfo fi : in.getFieldInfos()) {
|
||||
DocValuesType type = fi.getDocValuesType();
|
||||
if (type == DocValuesType.NONE) {
|
||||
Type t = mapping.get(fi.name);
|
||||
if (t != null) {
|
||||
if (t == Type.INTEGER_POINT || t == Type.LONG_POINT || t == Type.FLOAT_POINT || t == Type.DOUBLE_POINT) {
|
||||
// type uses points
|
||||
if (fi.getPointDimensionCount() == 0) {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// type uses inverted index
|
||||
if (fi.getIndexOptions() == IndexOptions.NONE) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
switch(t) {
|
||||
case INTEGER_POINT:
|
||||
case LONG_POINT:
|
||||
case FLOAT_POINT:
|
||||
case DOUBLE_POINT:
|
||||
case LEGACY_INTEGER:
|
||||
case LEGACY_LONG:
|
||||
case LEGACY_FLOAT:
|
||||
case LEGACY_DOUBLE:
|
||||
type = DocValuesType.NUMERIC;
|
||||
break;
|
||||
case BINARY:
|
||||
type = DocValuesType.BINARY;
|
||||
break;
|
||||
case SORTED:
|
||||
type = DocValuesType.SORTED;
|
||||
break;
|
||||
case SORTED_SET_BINARY:
|
||||
case SORTED_SET_INTEGER:
|
||||
case SORTED_SET_FLOAT:
|
||||
case SORTED_SET_LONG:
|
||||
case SORTED_SET_DOUBLE:
|
||||
type = DocValuesType.SORTED_SET;
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
}
|
||||
filteredInfos.add(new FieldInfo(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(),
|
||||
fi.hasPayloads(), fi.getIndexOptions(), type, fi.getDocValuesGen(), fi.attributes(),
|
||||
fi.getPointDimensionCount(), fi.getPointNumBytes()));
|
||||
}
|
||||
fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()]));
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
return fieldInfos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNumericDocValues(String field) throws IOException {
|
||||
Type v = getType(field);
|
||||
if (v != null) {
|
||||
switch (v) {
|
||||
case INTEGER_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.INT_POINT_PARSER, true);
|
||||
case FLOAT_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.FLOAT_POINT_PARSER, true);
|
||||
case LONG_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LONG_POINT_PARSER, true);
|
||||
case DOUBLE_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.DOUBLE_POINT_PARSER, true);
|
||||
case LEGACY_INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_INT_PARSER, true);
|
||||
case LEGACY_FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_FLOAT_PARSER, true);
|
||||
case LEGACY_LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_LONG_PARSER, true);
|
||||
case LEGACY_DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_DOUBLE_PARSER, true);
|
||||
}
|
||||
}
|
||||
return super.getNumericDocValues(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
|
||||
Type v = getType(field);
|
||||
if (v == Type.BINARY) {
|
||||
return FieldCache.DEFAULT.getTerms(in, field, true);
|
||||
} else {
|
||||
return in.getBinaryDocValues(field);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedDocValues getSortedDocValues(String field) throws IOException {
|
||||
Type v = getType(field);
|
||||
if (v == Type.SORTED) {
|
||||
return FieldCache.DEFAULT.getTermsIndex(in, field);
|
||||
} else {
|
||||
return in.getSortedDocValues(field);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
|
||||
Type v = getType(field);
|
||||
if (v != null) {
|
||||
switch (v) {
|
||||
case SORTED_SET_INTEGER:
|
||||
case SORTED_SET_FLOAT:
|
||||
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX);
|
||||
case SORTED_SET_LONG:
|
||||
case SORTED_SET_DOUBLE:
|
||||
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX);
|
||||
case SORTED_SET_BINARY:
|
||||
return FieldCache.DEFAULT.getDocTermOrds(in, field, null);
|
||||
}
|
||||
}
|
||||
return in.getSortedSetDocValues(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getDocsWithField(String field) throws IOException {
|
||||
Type v = getType(field);
|
||||
if (v != null) {
|
||||
switch (v) {
|
||||
case INTEGER_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.INT_POINT_PARSER);
|
||||
case FLOAT_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.FLOAT_POINT_PARSER);
|
||||
case LONG_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LONG_POINT_PARSER);
|
||||
case DOUBLE_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.DOUBLE_POINT_PARSER);
|
||||
case LEGACY_INTEGER: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_INT_PARSER);
|
||||
case LEGACY_FLOAT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_FLOAT_PARSER);
|
||||
case LEGACY_LONG: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_LONG_PARSER);
|
||||
case LEGACY_DOUBLE: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_DOUBLE_PARSER);
|
||||
default:
|
||||
return FieldCache.DEFAULT.getDocsWithField(in, field, null);
|
||||
}
|
||||
} else {
|
||||
return in.getDocsWithField(field);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the field's uninversion type, or null
|
||||
* if the field doesn't exist or doesn't have a mapping.
|
||||
*/
|
||||
private Type getType(String field) {
|
||||
FieldInfo info = fieldInfos.fieldInfo(field);
|
||||
if (info == null || info.getDocValuesType() == DocValuesType.NONE) {
|
||||
return null;
|
||||
}
|
||||
return mapping.get(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getCoreCacheKey() {
|
||||
return in.getCoreCacheKey();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getCombinedCoreAndDeletesKey() {
|
||||
return in.getCombinedCoreAndDeletesKey();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Uninverting(" + in.toString() + ")";
|
||||
}
|
||||
|
||||
/**
|
||||
* Return information about the backing cache
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static String[] getUninvertedStats() {
|
||||
CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
|
||||
String[] info = new String[entries.length];
|
||||
for (int i = 0; i < entries.length; i++) {
|
||||
info[i] = entries[i].toString();
|
||||
}
|
||||
return info;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Support for creating docvalues on-the-fly from the inverted index at runtime.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
|
@ -29,8 +29,9 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
|
||||
/**
|
||||
* Allows access to uninverted docvalues by delete-by-queries.
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
|
|||
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
|
@ -34,6 +33,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.LegacyNumericUtils;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
|
|
@ -16,103 +16,124 @@
|
|||
*/
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.not;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Tests using fromIndex that points to a collection in SolrCloud mode.
|
||||
*/
|
||||
public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase {
|
||||
public class DistribJoinFromCollectionTest extends SolrCloudTestCase{
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
final private static String[] scoreModes = {"avg","max","min","total"};
|
||||
|
||||
public DistribJoinFromCollectionTest() {
|
||||
super();
|
||||
}
|
||||
|
||||
@Before
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
System.setProperty("numShards", Integer.toString(sliceCount));
|
||||
}
|
||||
|
||||
@Override
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
try {
|
||||
super.tearDown();
|
||||
} catch (Exception exc) {}
|
||||
resetExceptionIgnores();
|
||||
}
|
||||
// resetExceptionIgnores();
|
||||
private static String toColl = "to_2x2";
|
||||
private static String fromColl = "from_1x4";
|
||||
|
||||
@Test
|
||||
public void test() throws Exception {
|
||||
private static Integer toDocId;
|
||||
|
||||
@BeforeClass
|
||||
public static void setupCluster() throws Exception {
|
||||
final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf");
|
||||
|
||||
String configName = "solrCloudCollectionConfig";
|
||||
int nodeCount = 5;
|
||||
configureCluster(nodeCount)
|
||||
.addConfig(configName, configDir)
|
||||
.configure();
|
||||
|
||||
|
||||
Map<String, String> collectionProperties = new HashMap<>();
|
||||
collectionProperties.put("config", "solrconfig-tlog.xml" );
|
||||
collectionProperties.put("schema", "schema.xml");
|
||||
|
||||
// create a collection holding data for the "to" side of the JOIN
|
||||
String toColl = "to_2x2";
|
||||
createCollection(toColl, 2, 2, 2);
|
||||
ensureAllReplicasAreActive(toColl, "shard1", 2, 2, 30);
|
||||
ensureAllReplicasAreActive(toColl, "shard2", 2, 2, 30);
|
||||
|
||||
|
||||
int shards = 2;
|
||||
int replicas = 2 ;
|
||||
assertNotNull(cluster.createCollection(toColl, shards, replicas,
|
||||
configName,
|
||||
collectionProperties));
|
||||
|
||||
// get the set of nodes where replicas for the "to" collection exist
|
||||
Set<String> nodeSet = new HashSet<>();
|
||||
ClusterState cs = cloudClient.getZkStateReader().getClusterState();
|
||||
for (Slice slice : cs.getActiveSlices(toColl))
|
||||
ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader();
|
||||
ClusterState cs = zkStateReader.getClusterState();
|
||||
for (Slice slice : cs.getCollection(toColl).getActiveSlices())
|
||||
for (Replica replica : slice.getReplicas())
|
||||
nodeSet.add(replica.getNodeName());
|
||||
assertTrue(nodeSet.size() > 0);
|
||||
|
||||
// deploy the "from" collection to all nodes where the "to" collection exists
|
||||
String fromColl = "from_1x2";
|
||||
createCollection(null, fromColl, 1, nodeSet.size(), 1, null, StringUtils.join(nodeSet,","));
|
||||
ensureAllReplicasAreActive(fromColl, "shard1", 1, nodeSet.size(), 30);
|
||||
|
||||
// both to and from collections are up and active, index some docs ...
|
||||
Integer toDocId = indexDoc(toColl, 1001, "a", null, "b");
|
||||
|
||||
assertNotNull(cluster.createCollection(fromColl, 1, 4,
|
||||
configName, StringUtils.join(nodeSet,","), null,
|
||||
collectionProperties));
|
||||
|
||||
AbstractDistribZkTestBase.waitForRecoveriesToFinish(toColl, zkStateReader, false, true, 30);
|
||||
AbstractDistribZkTestBase.waitForRecoveriesToFinish(fromColl, zkStateReader, false, true, 30);
|
||||
|
||||
toDocId = indexDoc(toColl, 1001, "a", null, "b");
|
||||
indexDoc(fromColl, 2001, "a", "c", null);
|
||||
|
||||
Thread.sleep(1000); // so the commits fire
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testScore() throws Exception {
|
||||
//without score
|
||||
testJoins(toColl, fromColl, toDocId, false);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoScore() throws Exception {
|
||||
//with score
|
||||
testJoins(toColl, fromColl, toDocId, true);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void shutdown() {
|
||||
log.info("DistribJoinFromCollectionTest logic complete ... deleting the " + toColl + " and " + fromColl + " collections");
|
||||
|
||||
// try to clean up
|
||||
for (String c : new String[]{ toColl, fromColl }) {
|
||||
try {
|
||||
CollectionAdminRequest.Delete req = new CollectionAdminRequest.Delete()
|
||||
.setCollectionName(c);
|
||||
req.process(cloudClient);
|
||||
CollectionAdminRequest.Delete req = CollectionAdminRequest.deleteCollection(c);
|
||||
req.process(cluster.getSolrClient());
|
||||
} catch (Exception e) {
|
||||
// don't fail the test
|
||||
log.warn("Could not delete collection {} after test completed due to: " + e, c);
|
||||
|
@ -126,12 +147,13 @@ public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase
|
|||
throws SolrServerException, IOException {
|
||||
// verify the join with fromIndex works
|
||||
final String fromQ = "match_s:c match_s:not_1_0_score_after_weight_normalization";
|
||||
CloudSolrClient client = cluster.getSolrClient();
|
||||
{
|
||||
final String joinQ = "{!join " + anyScoreMode(isScoresTest)
|
||||
+ "from=join_s fromIndex=" + fromColl +
|
||||
" to=join_s}" + fromQ;
|
||||
QueryRequest qr = new QueryRequest(params("collection", toColl, "q", joinQ, "fl", "id,get_s,score"));
|
||||
QueryResponse rsp = new QueryResponse(cloudClient.request(qr), cloudClient);
|
||||
QueryResponse rsp = new QueryResponse(client.request(qr), client);
|
||||
SolrDocumentList hits = rsp.getResults();
|
||||
assertTrue("Expected 1 doc, got "+hits, hits.getNumFound() == 1);
|
||||
SolrDocument doc = hits.get(0);
|
||||
|
@ -145,16 +167,14 @@ public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase
|
|||
|
||||
// create an alias for the fromIndex and then query through the alias
|
||||
String alias = fromColl+"Alias";
|
||||
CollectionAdminRequest.CreateAlias request = new CollectionAdminRequest.CreateAlias();
|
||||
request.setAliasName(alias);
|
||||
request.setAliasedCollections(fromColl);
|
||||
request.process(cloudClient);
|
||||
CollectionAdminRequest.CreateAlias request = CollectionAdminRequest.createAlias(alias,fromColl);
|
||||
request.process(client);
|
||||
|
||||
{
|
||||
final String joinQ = "{!join " + anyScoreMode(isScoresTest)
|
||||
+ "from=join_s fromIndex=" + alias + " to=join_s}"+fromQ;
|
||||
final QueryRequest qr = new QueryRequest(params("collection", toColl, "q", joinQ, "fl", "id,get_s,score"));
|
||||
final QueryResponse rsp = new QueryResponse(cloudClient.request(qr), cloudClient);
|
||||
final QueryResponse rsp = new QueryResponse(client.request(qr), client);
|
||||
final SolrDocumentList hits = rsp.getResults();
|
||||
assertTrue("Expected 1 doc", hits.getNumFound() == 1);
|
||||
SolrDocument doc = hits.get(0);
|
||||
|
@ -171,7 +191,7 @@ public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase
|
|||
final String joinQ = "{!join " + (anyScoreMode(isScoresTest))
|
||||
+ "from=join_s fromIndex=" + fromColl + " to=join_s}match_s:d";
|
||||
final QueryRequest qr = new QueryRequest(params("collection", toColl, "q", joinQ, "fl", "id,get_s,score"));
|
||||
final QueryResponse rsp = new QueryResponse(cloudClient.request(qr), cloudClient);
|
||||
final QueryResponse rsp = new QueryResponse(client.request(qr), client);
|
||||
final SolrDocumentList hits = rsp.getResults();
|
||||
assertTrue("Expected no hits", hits.getNumFound() == 0);
|
||||
}
|
||||
|
@ -195,14 +215,14 @@ public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase
|
|||
+ "from=join_s fromIndex=" + wrongName + " to=join_s}match_s:c";
|
||||
final QueryRequest qr = new QueryRequest(params("collection", toColl, "q", joinQ, "fl", "id,get_s,score"));
|
||||
try {
|
||||
cloudClient.request(qr);
|
||||
cluster.getSolrClient().request(qr);
|
||||
} catch (HttpSolrClient.RemoteSolrException ex) {
|
||||
assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, ex.code());
|
||||
assertTrue(ex.getMessage().contains(wrongName));
|
||||
}
|
||||
}
|
||||
|
||||
protected Integer indexDoc(String collection, int id, String joinField, String matchField, String getField) throws Exception {
|
||||
protected static Integer indexDoc(String collection, int id, String joinField, String matchField, String getField) throws Exception {
|
||||
UpdateRequest up = new UpdateRequest();
|
||||
up.setCommitWithin(50);
|
||||
up.setParam("collection", collection);
|
||||
|
@ -215,7 +235,7 @@ public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase
|
|||
if (getField != null)
|
||||
doc.addField("get_s", getField);
|
||||
up.add(doc);
|
||||
cloudClient.request(up);
|
||||
cluster.getSolrClient().request(up);
|
||||
return docId;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -703,7 +703,87 @@ public class HighlighterTest extends SolrTestCaseJ4 {
|
|||
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='a piece of text']"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testAlternateSummaryWithHighlighting() {
|
||||
//long document
|
||||
assertU(adoc("tv_text", "keyword is only here, tv_text alternate field",
|
||||
"t_text", "a piece of text to be substituted",
|
||||
"other_t", "keyword",
|
||||
"id", "1",
|
||||
"foo_t","hi"));
|
||||
assertU(commit());
|
||||
assertU(optimize());
|
||||
|
||||
// Prove that hl.highlightAlternate is default true and respects maxAlternateFieldLength
|
||||
HashMap<String,String> args = new HashMap<>();
|
||||
args.put("hl", "true");
|
||||
args.put("hl.fragsize","0");
|
||||
args.put("hl.fl", "t_text");
|
||||
args.put("hl.simple.pre", "<simplepre>");
|
||||
args.put("hl.simple.post", "</simplepost>");
|
||||
args.put("hl.alternateField", "tv_text");
|
||||
args.put("hl.maxAlternateFieldLength", "39");
|
||||
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
|
||||
"standard", 0, 200, args);
|
||||
assertQ("Alternate summarization with highlighting",
|
||||
sumLRF.makeRequest("tv_text:keyword"),
|
||||
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
|
||||
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='<simplepre>keyword</simplepost> is only here, tv_text']"
|
||||
);
|
||||
|
||||
// Query on other field than hl or alternate. Still we get the hightlighted snippet from alternate
|
||||
assertQ("Alternate summarization with highlighting, query other field",
|
||||
sumLRF.makeRequest("other_t:keyword"),
|
||||
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
|
||||
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='<simplepre>keyword</simplepost> is only here, tv_text']"
|
||||
);
|
||||
|
||||
// With hl.requireFieldMatch, will not highlight but fall back to plain-text alternate
|
||||
args.put("hl.requireFieldMatch", "true");
|
||||
sumLRF = h.getRequestFactory(
|
||||
"standard", 0, 200, args);
|
||||
assertQ("Alternate summarization with highlighting, requireFieldMatch",
|
||||
sumLRF.makeRequest("other_t:keyword"),
|
||||
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
|
||||
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only here, tv_text alternate']"
|
||||
);
|
||||
args.put("hl.requireFieldMatch", "false");
|
||||
|
||||
|
||||
// Works with field specific params, overriding maxAlternateFieldLength to return everything
|
||||
args.remove("hl.alternateField");
|
||||
args.put("f.t_text.hl.alternateField", "tv_text");
|
||||
args.put("f.t_text.hl.maxAlternateFieldLength", "0");
|
||||
sumLRF = h.getRequestFactory("standard", 0, 200, args);
|
||||
assertQ("Alternate summarization with highlighting",
|
||||
sumLRF.makeRequest("tv_text:keyword"),
|
||||
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
|
||||
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='<simplepre>keyword</simplepost> is only here, tv_text alternate field']"
|
||||
);
|
||||
|
||||
// Prove fallback highlighting works also with FVH
|
||||
args.put("hl.useFastVectorHighlighter", "true");
|
||||
args.put("hl.tag.pre", "<fvhpre>");
|
||||
args.put("hl.tag.post", "</fvhpost>");
|
||||
args.put("f.t_text.hl.maxAlternateFieldLength", "18");
|
||||
sumLRF = h.getRequestFactory("standard", 0, 200, args);
|
||||
assertQ("Alternate summarization with highlighting using FVH",
|
||||
sumLRF.makeRequest("tv_text:keyword"),
|
||||
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
|
||||
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='<fvhpre>keyword</fvhpost> is only here']"
|
||||
);
|
||||
|
||||
// Prove it is possible to turn off highlighting of alternate field
|
||||
args.put("hl.highlightAlternate", "false");
|
||||
sumLRF = h.getRequestFactory("standard", 0, 200, args);
|
||||
assertQ("Alternate summarization without highlighting",
|
||||
sumLRF.makeRequest("tv_text:keyword"),
|
||||
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
|
||||
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only he']"
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPhraseHighlighter() {
|
||||
HashMap<String,String> args = new HashMap<>();
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestSlowCompositeReaderWrapper extends LuceneTestCase {
|
||||
|
||||
public void testCoreListenerOnSlowCompositeReaderWrapper() throws IOException {
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), newDirectory());
|
||||
final int numDocs = TestUtil.nextInt(random(), 1, 5);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
w.addDocument(new Document());
|
||||
if (random().nextBoolean()) {
|
||||
w.commit();
|
||||
}
|
||||
}
|
||||
w.commit();
|
||||
w.close();
|
||||
|
||||
final IndexReader reader = DirectoryReader.open(w.w.getDirectory());
|
||||
final LeafReader leafReader = SlowCompositeReaderWrapper.wrap(reader);
|
||||
|
||||
final int numListeners = TestUtil.nextInt(random(), 1, 10);
|
||||
final List<LeafReader.CoreClosedListener> listeners = new ArrayList<>();
|
||||
AtomicInteger counter = new AtomicInteger(numListeners);
|
||||
|
||||
for (int i = 0; i < numListeners; ++i) {
|
||||
CountCoreListener listener = new CountCoreListener(counter, leafReader.getCoreCacheKey());
|
||||
listeners.add(listener);
|
||||
leafReader.addCoreClosedListener(listener);
|
||||
}
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
leafReader.addCoreClosedListener(listeners.get(random().nextInt(listeners.size())));
|
||||
}
|
||||
final int removed = random().nextInt(numListeners);
|
||||
Collections.shuffle(listeners, random());
|
||||
for (int i = 0; i < removed; ++i) {
|
||||
leafReader.removeCoreClosedListener(listeners.get(i));
|
||||
}
|
||||
assertEquals(numListeners, counter.get());
|
||||
// make sure listeners are registered on the wrapped reader and that closing any of them has the same effect
|
||||
if (random().nextBoolean()) {
|
||||
reader.close();
|
||||
} else {
|
||||
leafReader.close();
|
||||
}
|
||||
assertEquals(removed, counter.get());
|
||||
w.w.getDirectory().close();
|
||||
}
|
||||
|
||||
private static final class CountCoreListener implements LeafReader.CoreClosedListener {
|
||||
|
||||
private final AtomicInteger count;
|
||||
private final Object coreCacheKey;
|
||||
|
||||
public CountCoreListener(AtomicInteger count, Object coreCacheKey) {
|
||||
this.count = count;
|
||||
this.coreCacheKey = coreCacheKey;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onClose(Object coreCacheKey) {
|
||||
assertSame(this.coreCacheKey, coreCacheKey);
|
||||
count.decrementAndGet();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -25,12 +25,12 @@ import org.apache.lucene.index.DocValues;
|
|||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.uninverting.DocTermOrds;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.uninverting.DocTermOrds;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.junit.After;
|
||||
import org.junit.BeforeClass;
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
package org.apache.solr.response.transform;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
|
@ -26,27 +28,58 @@ import java.util.Random;
|
|||
|
||||
import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
|
||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.cloud.AbstractFullDistribZkTestBase;
|
||||
import org.apache.solr.cloud.AbstractDistribZkTestBase;
|
||||
import org.apache.solr.cloud.SolrCloudTestCase;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
@SuppressSSL
|
||||
public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBase {
|
||||
public class TestSubQueryTransformerDistrib extends SolrCloudTestCase {
|
||||
|
||||
@Override
|
||||
protected String getCloudSchemaFile() {
|
||||
return "schema-docValuesJoin.xml";
|
||||
final static String people = "people";
|
||||
final static String depts = "departments";
|
||||
|
||||
@BeforeClass
|
||||
public static void setupCluster() throws Exception {
|
||||
|
||||
final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf");
|
||||
|
||||
String configName = "solrCloudCollectionConfig";
|
||||
int nodeCount = 5;
|
||||
configureCluster(nodeCount)
|
||||
.addConfig(configName, configDir)
|
||||
.configure();
|
||||
|
||||
Map<String, String> collectionProperties = new HashMap<>();
|
||||
collectionProperties.put("config", "solrconfig-doctransformers.xml" );
|
||||
collectionProperties.put("schema", "schema-docValuesJoin.xml");
|
||||
|
||||
int shards = 2;
|
||||
int replicas = 2 ;
|
||||
assertNotNull(cluster.createCollection(people, shards, replicas,
|
||||
configName,
|
||||
collectionProperties));
|
||||
|
||||
assertNotNull(cluster.createCollection(depts, shards, replicas,
|
||||
configName, collectionProperties));
|
||||
|
||||
CloudSolrClient client = cluster.getSolrClient();
|
||||
client.setDefaultCollection(people);
|
||||
|
||||
ZkStateReader zkStateReader = client.getZkStateReader();
|
||||
AbstractDistribZkTestBase.waitForRecoveriesToFinish(people, zkStateReader, true, true, 30);
|
||||
|
||||
AbstractDistribZkTestBase.waitForRecoveriesToFinish(depts, zkStateReader, false, true, 30);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getCloudSolrConfig() {
|
||||
return "solrconfig-basic.xml";
|
||||
}
|
||||
|
||||
@SuppressWarnings("serial")
|
||||
@Test
|
||||
|
@ -54,14 +87,6 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas
|
|||
int peopleMultiplier = atLeast(1);
|
||||
int deptMultiplier = atLeast(1);
|
||||
|
||||
final String people = "people";
|
||||
createCollection(people, 2, 1, 10);
|
||||
|
||||
|
||||
final String depts = "departments";
|
||||
createCollection(depts, 2, 1, 10);
|
||||
|
||||
|
||||
createIndex(people, peopleMultiplier, depts, deptMultiplier);
|
||||
|
||||
Random random1 = random();
|
||||
|
@ -79,7 +104,7 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas
|
|||
"depts.rows",""+(deptMultiplier*2),
|
||||
"depts.logParamsList","q,fl,rows,row.dept_ss_dv"}));
|
||||
final QueryResponse rsp = new QueryResponse();
|
||||
rsp.setResponse(cloudClient.request(qr, people));
|
||||
rsp.setResponse(cluster.getSolrClient().request(qr, people));
|
||||
final SolrDocumentList hits = rsp.getResults();
|
||||
|
||||
assertEquals(peopleMultiplier, hits.getNumFound());
|
||||
|
@ -116,6 +141,7 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas
|
|||
|
||||
private void createIndex(String people, int peopleMultiplier, String depts, int deptMultiplier)
|
||||
throws SolrServerException, IOException {
|
||||
|
||||
int id=0;
|
||||
List<String> peopleDocs = new ArrayList<>();
|
||||
for (int p=0; p < peopleMultiplier; p++){
|
||||
|
@ -161,13 +187,16 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas
|
|||
|
||||
private void addDocs(String collection, List<String> docs) throws SolrServerException, IOException {
|
||||
StringBuilder upd = new StringBuilder("<update>");
|
||||
|
||||
upd.append("<delete><query>*:*</query></delete>");
|
||||
|
||||
for (Iterator<String> iterator = docs.iterator(); iterator.hasNext();) {
|
||||
String add = iterator.next();
|
||||
upd.append(add);
|
||||
if (rarely()) {
|
||||
upd.append(commit("softCommit", "true"));
|
||||
}
|
||||
if (!rarely() || !iterator.hasNext()) {
|
||||
if (rarely() || !iterator.hasNext()) {
|
||||
if (!iterator.hasNext()) {
|
||||
upd.append(commit("softCommit", "false"));
|
||||
}
|
||||
|
@ -176,7 +205,7 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas
|
|||
ContentStreamUpdateRequest req = new ContentStreamUpdateRequest("/update");
|
||||
req.addContentStream(new ContentStreamBase.StringStream(upd.toString(),"text/xml"));
|
||||
|
||||
cloudClient.request(req, collection);
|
||||
cluster.getSolrClient().request(req, collection);
|
||||
upd.setLength("<update>".length());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,13 +42,12 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortField.Type;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TopFieldCollector;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.lucene.util.BitDocIdSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
@ -56,6 +55,7 @@ import org.apache.lucene.util.TestUtil;
|
|||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.junit.BeforeClass;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
|
|
@ -16,7 +16,12 @@
|
|||
*/
|
||||
package org.apache.solr.search.join;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -24,34 +29,56 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.solr.BaseDistributedSearchTestCase;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.response.FacetField;
|
||||
import org.apache.solr.client.solrj.response.FacetField.Count;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.cloud.AbstractDistribZkTestBase;
|
||||
import org.apache.solr.cloud.SolrCloudTestCase;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
@Slow
|
||||
public class BlockJoinFacetDistribTest extends BaseDistributedSearchTestCase {
|
||||
public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
|
||||
|
||||
private static final String collection = "facetcollection";
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeSuperClass() throws Exception {
|
||||
schemaString = "schema-blockjoinfacetcomponent.xml";
|
||||
configString = "solrconfig-blockjoinfacetcomponent.xml";
|
||||
}
|
||||
public static void setupCluster() throws Exception {
|
||||
final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf");
|
||||
|
||||
@ShardsFixed(num = 3)
|
||||
public void test() throws Exception {
|
||||
testBJQFacetComponent();
|
||||
String configName = "solrCloudCollectionConfig";
|
||||
int nodeCount = 6;
|
||||
configureCluster(nodeCount)
|
||||
.addConfig(configName, configDir)
|
||||
.configure();
|
||||
|
||||
|
||||
Map<String, String> collectionProperties = new HashMap<>();
|
||||
collectionProperties.put("config", "solrconfig-blockjoinfacetcomponent.xml" );
|
||||
collectionProperties.put("schema", "schema-blockjoinfacetcomponent.xml");
|
||||
|
||||
// create a collection holding data for the "to" side of the JOIN
|
||||
|
||||
int shards = 3;
|
||||
int replicas = 2 ;
|
||||
assertNotNull(cluster.createCollection(collection, shards, replicas,
|
||||
configName,
|
||||
collectionProperties));
|
||||
|
||||
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collection,
|
||||
cluster.getSolrClient().getZkStateReader(), false, true, 30);
|
||||
|
||||
}
|
||||
|
||||
final static List<String> colors = Arrays.asList("red","blue","brown","white","black","yellow","cyan","magenta","blur",
|
||||
"fuchsia", "light","dark","green","grey","don't","know","any","more" );
|
||||
final static List<String> sizes = Arrays.asList("s","m","l","xl","xxl","xml","xxxl","3","4","5","6","petite","maxi");
|
||||
|
||||
private void testBJQFacetComponent() throws Exception {
|
||||
@Test
|
||||
public void testBJQFacetComponent() throws Exception {
|
||||
|
||||
assert ! colors.removeAll(sizes): "there is no colors in sizes";
|
||||
Collections.shuffle(colors,random());
|
||||
|
@ -64,8 +91,11 @@ public class BlockJoinFacetDistribTest extends BaseDistributedSearchTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
cluster.getSolrClient().deleteByQuery(collection, "*:*");
|
||||
|
||||
final int parents = atLeast(10);
|
||||
boolean aggregationOccurs = false;
|
||||
List<SolrInputDocument> parentDocs = new ArrayList<>();
|
||||
for(int parent=0; parent<parents || !aggregationOccurs;parent++){
|
||||
assert parent < 2000000 : "parent num "+parent+
|
||||
" aggregationOccurs:"+aggregationOccurs+". Sorry! too tricky loop condition.";
|
||||
|
@ -89,22 +119,18 @@ public class BlockJoinFacetDistribTest extends BaseDistributedSearchTestCase {
|
|||
}
|
||||
pdoc.addChildDocument(childDoc);
|
||||
}
|
||||
indexDoc(pdoc);
|
||||
parentDocs.add(pdoc);
|
||||
if (!parentDocs.isEmpty() && rarely()) {
|
||||
indexDocs(parentDocs);
|
||||
parentDocs.clear();
|
||||
cluster.getSolrClient().commit(collection, false, false, true);
|
||||
}
|
||||
}
|
||||
commit();
|
||||
|
||||
//handle.clear();
|
||||
handle.put("timestamp", SKIPVAL);
|
||||
handle.put("_version_", SKIPVAL); // not a cloud test, but may use updateLog
|
||||
handle.put("maxScore", SKIP);// see org.apache.solr.TestDistributedSearch.test()
|
||||
handle.put("shards", SKIP);
|
||||
handle.put("distrib", SKIP);
|
||||
handle.put("rid", SKIP);
|
||||
handle.put("track", SKIP);
|
||||
handle.put("facet_fields", UNORDERED);
|
||||
handle.put("SIZE_s", UNORDERED);
|
||||
handle.put("COLOR_s", UNORDERED);
|
||||
|
||||
if (!parentDocs.isEmpty()) {
|
||||
indexDocs(parentDocs);
|
||||
}
|
||||
cluster.getSolrClient().commit(collection);
|
||||
|
||||
// to parent query
|
||||
final String childQueryClause = "COLOR_s:("+(matchingColors.toString().replaceAll("[,\\[\\]]", " "))+")";
|
||||
QueryResponse results = query("q", "{!parent which=\"type_s:parent\"}"+childQueryClause,
|
||||
|
@ -122,15 +148,24 @@ public class BlockJoinFacetDistribTest extends BaseDistributedSearchTestCase {
|
|||
String msg = ""+parentIdsByAttrValue+" "+color_s+" "+size_s;
|
||||
for (FacetField facet: new FacetField[]{color_s, size_s}) {
|
||||
for (Count c : facet.getValues()) {
|
||||
assertEquals(c.getName()+"("+msg+")", parentIdsByAttrValue.get(c.getName()).size(), c.getCount());
|
||||
assertEquals(c.getName()+"("+msg+")",
|
||||
parentIdsByAttrValue.get(c.getName()).size(), c.getCount());
|
||||
}
|
||||
}
|
||||
|
||||
assertEquals(msg , parentIdsByAttrValue.size(),color_s.getValueCount() + size_s.getValueCount());
|
||||
// }
|
||||
//System.out.println(parentIdsByAttrValue);
|
||||
}
|
||||
|
||||
protected String getCloudSolrConfig() {
|
||||
return configString;
|
||||
private QueryResponse query(String ... arg) throws SolrServerException, IOException {
|
||||
ModifiableSolrParams solrParams = new ModifiableSolrParams();
|
||||
for(int i=0; i<arg.length; i+=2) {
|
||||
solrParams.add(arg[i], arg[i+1]);
|
||||
}
|
||||
return cluster.getSolrClient().query(collection, solrParams);
|
||||
}
|
||||
|
||||
private void indexDocs(Collection<SolrInputDocument> pdocs) throws SolrServerException, IOException {
|
||||
cluster.getSolrClient().add(collection, pdocs);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.util.TestUtil;
|
|||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.CursorMarkParams;
|
||||
import org.apache.solr.common.params.GroupParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SpellingParams;
|
||||
|
@ -594,5 +595,34 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 {
|
|||
List<?> collations = (List<?>) collationList.getAll("collation");
|
||||
assertTrue(collations.size() == 2);
|
||||
}
|
||||
@Test
|
||||
public void testWithCursorMark() throws Exception
|
||||
{
|
||||
SolrCore core = h.getCore();
|
||||
SearchComponent speller = core.getSearchComponent("spellcheck");
|
||||
assertTrue("speller is null and it shouldn't be", speller != null);
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(SpellCheckComponent.COMPONENT_NAME, "true");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "2");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1");
|
||||
params.add(CommonParams.Q, "lowerfilt:(+fauth)");
|
||||
params.add(CommonParams.SORT, "id asc");
|
||||
params.add(CursorMarkParams.CURSOR_MARK_PARAM, CursorMarkParams.CURSOR_MARK_START);
|
||||
SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
|
||||
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||
rsp.addResponseHeader(new SimpleOrderedMap());
|
||||
SolrQueryRequest req = new LocalSolrQueryRequest(core, params);
|
||||
handler.handleRequest(req, rsp);
|
||||
req.close();
|
||||
NamedList values = rsp.getValues();
|
||||
NamedList spellCheck = (NamedList) values.get("spellcheck");
|
||||
NamedList collationList = (NamedList) spellCheck.get("collations");
|
||||
List<?> collations = (List<?>) collationList.getAll("collation");
|
||||
assertTrue(collations.size() == 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,681 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
import org.apache.lucene.document.LegacyLongField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LegacyNumericUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
|
||||
// TODO:
|
||||
// - test w/ del docs
|
||||
// - test prefix
|
||||
// - test w/ cutoff
|
||||
// - crank docs way up so we get some merging sometimes
|
||||
|
||||
public class TestDocTermOrds extends LuceneTestCase {
|
||||
|
||||
public void testEmptyIndex() throws IOException {
|
||||
final Directory dir = newDirectory();
|
||||
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||
iw.close();
|
||||
|
||||
final DirectoryReader ir = DirectoryReader.open(dir);
|
||||
TestUtil.checkReader(ir);
|
||||
|
||||
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
|
||||
TestUtil.checkReader(composite);
|
||||
|
||||
// check the leaves
|
||||
// (normally there are none for an empty index, so this is really just future
|
||||
// proofing in case that changes for some reason)
|
||||
for (LeafReaderContext rc : ir.leaves()) {
|
||||
final LeafReader r = rc.reader();
|
||||
final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "any_field");
|
||||
assertNull("OrdTermsEnum should be null (leaf)", dto.getOrdTermsEnum(r));
|
||||
assertEquals("iterator should be empty (leaf)", 0, dto.iterator(r).getValueCount());
|
||||
}
|
||||
|
||||
// check the composite
|
||||
final DocTermOrds dto = new DocTermOrds(composite, composite.getLiveDocs(), "any_field");
|
||||
assertNull("OrdTermsEnum should be null (composite)", dto.getOrdTermsEnum(composite));
|
||||
assertEquals("iterator should be empty (composite)", 0, dto.iterator(composite).getValueCount());
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSimple() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
||||
Document doc = new Document();
|
||||
Field field = newTextField("field", "", Field.Store.NO);
|
||||
doc.add(field);
|
||||
field.setStringValue("a b c");
|
||||
w.addDocument(doc);
|
||||
|
||||
field.setStringValue("d e f");
|
||||
w.addDocument(doc);
|
||||
|
||||
field.setStringValue("a f");
|
||||
w.addDocument(doc);
|
||||
|
||||
final IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(ar);
|
||||
final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
|
||||
SortedSetDocValues iter = dto.iterator(ar);
|
||||
|
||||
iter.setDocument(0);
|
||||
assertEquals(0, iter.nextOrd());
|
||||
assertEquals(1, iter.nextOrd());
|
||||
assertEquals(2, iter.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
|
||||
|
||||
iter.setDocument(1);
|
||||
assertEquals(3, iter.nextOrd());
|
||||
assertEquals(4, iter.nextOrd());
|
||||
assertEquals(5, iter.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
|
||||
|
||||
iter.setDocument(2);
|
||||
assertEquals(0, iter.nextOrd());
|
||||
assertEquals(5, iter.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testRandom() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
|
||||
final int NUM_TERMS = atLeast(20);
|
||||
final Set<BytesRef> terms = new HashSet<>();
|
||||
while(terms.size() < NUM_TERMS) {
|
||||
final String s = TestUtil.randomRealisticUnicodeString(random());
|
||||
//final String s = _TestUtil.randomSimpleString(random);
|
||||
if (s.length() > 0) {
|
||||
terms.add(new BytesRef(s));
|
||||
}
|
||||
}
|
||||
final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
|
||||
Arrays.sort(termsArray);
|
||||
|
||||
final int NUM_DOCS = atLeast(100);
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
|
||||
// Sometimes swap in codec that impls ord():
|
||||
if (random().nextInt(10) == 7) {
|
||||
// Make sure terms index has ords:
|
||||
Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
|
||||
conf.setCodec(codec);
|
||||
}
|
||||
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
|
||||
|
||||
final int[][] idToOrds = new int[NUM_DOCS][];
|
||||
final Set<Integer> ordsForDocSet = new HashSet<>();
|
||||
|
||||
for(int id=0;id<NUM_DOCS;id++) {
|
||||
Document doc = new Document();
|
||||
|
||||
doc.add(new LegacyIntField("id", id, Field.Store.YES));
|
||||
|
||||
final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
|
||||
while(ordsForDocSet.size() < termCount) {
|
||||
ordsForDocSet.add(random().nextInt(termsArray.length));
|
||||
}
|
||||
final int[] ordsForDoc = new int[termCount];
|
||||
int upto = 0;
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: doc id=" + id);
|
||||
}
|
||||
for(int ord : ordsForDocSet) {
|
||||
ordsForDoc[upto++] = ord;
|
||||
Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" f=" + termsArray[ord].utf8ToString());
|
||||
}
|
||||
doc.add(field);
|
||||
}
|
||||
ordsForDocSet.clear();
|
||||
Arrays.sort(ordsForDoc);
|
||||
idToOrds[id] = ordsForDoc;
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
final DirectoryReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: reader=" + r);
|
||||
}
|
||||
|
||||
for(LeafReaderContext ctx : r.leaves()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: sub=" + ctx.reader());
|
||||
}
|
||||
verify(ctx.reader(), idToOrds, termsArray, null);
|
||||
}
|
||||
|
||||
// Also test top-level reader: its enum does not support
|
||||
// ord, so this forces the OrdWrapper to run:
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: top reader");
|
||||
}
|
||||
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(slowR);
|
||||
verify(slowR, idToOrds, termsArray, null);
|
||||
|
||||
FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testRandomWithPrefix() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
|
||||
final Set<String> prefixes = new HashSet<>();
|
||||
final int numPrefix = TestUtil.nextInt(random(), 2, 7);
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: use " + numPrefix + " prefixes");
|
||||
}
|
||||
while(prefixes.size() < numPrefix) {
|
||||
prefixes.add(TestUtil.randomRealisticUnicodeString(random()));
|
||||
//prefixes.add(_TestUtil.randomSimpleString(random));
|
||||
}
|
||||
final String[] prefixesArray = prefixes.toArray(new String[prefixes.size()]);
|
||||
|
||||
final int NUM_TERMS = atLeast(20);
|
||||
final Set<BytesRef> terms = new HashSet<>();
|
||||
while(terms.size() < NUM_TERMS) {
|
||||
final String s = prefixesArray[random().nextInt(prefixesArray.length)] + TestUtil.randomRealisticUnicodeString(random());
|
||||
//final String s = prefixesArray[random.nextInt(prefixesArray.length)] + _TestUtil.randomSimpleString(random);
|
||||
if (s.length() > 0) {
|
||||
terms.add(new BytesRef(s));
|
||||
}
|
||||
}
|
||||
final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
|
||||
Arrays.sort(termsArray);
|
||||
|
||||
final int NUM_DOCS = atLeast(100);
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
|
||||
// Sometimes swap in codec that impls ord():
|
||||
if (random().nextInt(10) == 7) {
|
||||
Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
|
||||
conf.setCodec(codec);
|
||||
}
|
||||
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
|
||||
|
||||
final int[][] idToOrds = new int[NUM_DOCS][];
|
||||
final Set<Integer> ordsForDocSet = new HashSet<>();
|
||||
|
||||
for(int id=0;id<NUM_DOCS;id++) {
|
||||
Document doc = new Document();
|
||||
|
||||
doc.add(new LegacyIntField("id", id, Field.Store.YES));
|
||||
|
||||
final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
|
||||
while(ordsForDocSet.size() < termCount) {
|
||||
ordsForDocSet.add(random().nextInt(termsArray.length));
|
||||
}
|
||||
final int[] ordsForDoc = new int[termCount];
|
||||
int upto = 0;
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: doc id=" + id);
|
||||
}
|
||||
for(int ord : ordsForDocSet) {
|
||||
ordsForDoc[upto++] = ord;
|
||||
Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" f=" + termsArray[ord].utf8ToString());
|
||||
}
|
||||
doc.add(field);
|
||||
}
|
||||
ordsForDocSet.clear();
|
||||
Arrays.sort(ordsForDoc);
|
||||
idToOrds[id] = ordsForDoc;
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
final DirectoryReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: reader=" + r);
|
||||
}
|
||||
|
||||
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(slowR);
|
||||
for(String prefix : prefixesArray) {
|
||||
|
||||
final BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix);
|
||||
|
||||
final int[][] idToOrdsPrefix = new int[NUM_DOCS][];
|
||||
for(int id=0;id<NUM_DOCS;id++) {
|
||||
final int[] docOrds = idToOrds[id];
|
||||
final List<Integer> newOrds = new ArrayList<>();
|
||||
for(int ord : idToOrds[id]) {
|
||||
if (StringHelper.startsWith(termsArray[ord], prefixRef)) {
|
||||
newOrds.add(ord);
|
||||
}
|
||||
}
|
||||
final int[] newOrdsArray = new int[newOrds.size()];
|
||||
int upto = 0;
|
||||
for(int ord : newOrds) {
|
||||
newOrdsArray[upto++] = ord;
|
||||
}
|
||||
idToOrdsPrefix[id] = newOrdsArray;
|
||||
}
|
||||
|
||||
for(LeafReaderContext ctx : r.leaves()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: sub=" + ctx.reader());
|
||||
}
|
||||
verify(ctx.reader(), idToOrdsPrefix, termsArray, prefixRef);
|
||||
}
|
||||
|
||||
// Also test top-level reader: its enum does not support
|
||||
// ord, so this forces the OrdWrapper to run:
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: top reader");
|
||||
}
|
||||
verify(slowR, idToOrdsPrefix, termsArray, prefixRef);
|
||||
}
|
||||
|
||||
FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef) throws Exception {
|
||||
|
||||
final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(),
|
||||
"field",
|
||||
prefixRef,
|
||||
Integer.MAX_VALUE,
|
||||
TestUtil.nextInt(random(), 2, 10));
|
||||
|
||||
|
||||
final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER, false);
|
||||
/*
|
||||
for(int docID=0;docID<subR.maxDoc();docID++) {
|
||||
System.out.println(" docID=" + docID + " id=" + docIDToID[docID]);
|
||||
}
|
||||
*/
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: verify prefix=" + (prefixRef==null ? "null" : prefixRef.utf8ToString()));
|
||||
System.out.println("TEST: all TERMS:");
|
||||
TermsEnum allTE = MultiFields.getTerms(r, "field").iterator();
|
||||
int ord = 0;
|
||||
while(allTE.next() != null) {
|
||||
System.out.println(" ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
|
||||
}
|
||||
}
|
||||
|
||||
//final TermsEnum te = subR.fields().terms("field").iterator();
|
||||
final TermsEnum te = dto.getOrdTermsEnum(r);
|
||||
if (dto.numTerms() == 0) {
|
||||
if (prefixRef == null) {
|
||||
assertNull(MultiFields.getTerms(r, "field"));
|
||||
} else {
|
||||
Terms terms = MultiFields.getTerms(r, "field");
|
||||
if (terms != null) {
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef);
|
||||
if (result != TermsEnum.SeekStatus.END) {
|
||||
assertFalse("term=" + termsEnum.term().utf8ToString() + " matches prefix=" + prefixRef.utf8ToString(), StringHelper.startsWith(termsEnum.term(), prefixRef));
|
||||
} else {
|
||||
// ok
|
||||
}
|
||||
} else {
|
||||
// ok
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: TERMS:");
|
||||
te.seekExact(0);
|
||||
while(true) {
|
||||
System.out.println(" ord=" + te.ord() + " term=" + te.term().utf8ToString());
|
||||
if (te.next() == null) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SortedSetDocValues iter = dto.iterator(r);
|
||||
for(int docID=0;docID<r.maxDoc();docID++) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.get(docID) + ")");
|
||||
}
|
||||
iter.setDocument(docID);
|
||||
final int[] answers = idToOrds[(int) docIDToID.get(docID)];
|
||||
int upto = 0;
|
||||
long ord;
|
||||
while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||
te.seekExact(ord);
|
||||
final BytesRef expected = termsArray[answers[upto++]];
|
||||
if (VERBOSE) {
|
||||
System.out.println(" exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
|
||||
}
|
||||
assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord=" + ord, expected, te.term());
|
||||
}
|
||||
assertEquals(answers.length, upto);
|
||||
}
|
||||
}
|
||||
|
||||
public void testBackToTheFuture() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("foo", "bar", Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(newStringField("foo", "baz", Field.Store.NO));
|
||||
// we need a second value for a doc, or we don't actually test DocTermOrds!
|
||||
doc.add(newStringField("foo", "car", Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
DirectoryReader r1 = DirectoryReader.open(iw);
|
||||
|
||||
iw.deleteDocuments(new Term("foo", "baz"));
|
||||
DirectoryReader r2 = DirectoryReader.open(iw);
|
||||
|
||||
FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r2), "foo", null);
|
||||
|
||||
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r1), "foo", null);
|
||||
assertEquals(3, v.getValueCount());
|
||||
v.setDocument(1);
|
||||
assertEquals(1, v.nextOrd());
|
||||
|
||||
iw.close();
|
||||
r1.close();
|
||||
r2.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNumericEncoded32() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
|
||||
doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
DirectoryReader ir = DirectoryReader.open(dir);
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT32_TERM_PREFIX);
|
||||
assertEquals(2, v.getValueCount());
|
||||
|
||||
v.setDocument(0);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(1);
|
||||
assertEquals(0, v.nextOrd());
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
BytesRef value = v.lookupOrd(0);
|
||||
assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));
|
||||
|
||||
value = v.lookupOrd(1);
|
||||
assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNumericEncoded64() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
|
||||
doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
DirectoryReader ir = DirectoryReader.open(dir);
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT64_TERM_PREFIX);
|
||||
assertEquals(2, v.getValueCount());
|
||||
|
||||
v.setDocument(0);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(1);
|
||||
assertEquals(0, v.nextOrd());
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
BytesRef value = v.lookupOrd(0);
|
||||
assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));
|
||||
|
||||
value = v.lookupOrd(1);
|
||||
assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSortedTermsEnum() throws IOException {
|
||||
Directory directory = newDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
|
||||
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("field", "hello", Field.Store.NO));
|
||||
iwriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("field", "world", Field.Store.NO));
|
||||
// we need a second value for a doc, or we don't actually test DocTermOrds!
|
||||
doc.add(new StringField("field", "hello", Field.Store.NO));
|
||||
iwriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("field", "beer", Field.Store.NO));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.forceMerge(1);
|
||||
|
||||
DirectoryReader ireader = iwriter.getReader();
|
||||
iwriter.close();
|
||||
|
||||
LeafReader ar = getOnlyLeafReader(ireader);
|
||||
SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field", null);
|
||||
assertEquals(3, dv.getValueCount());
|
||||
|
||||
TermsEnum termsEnum = dv.termsEnum();
|
||||
|
||||
// next()
|
||||
assertEquals("beer", termsEnum.next().utf8ToString());
|
||||
assertEquals(0, termsEnum.ord());
|
||||
assertEquals("hello", termsEnum.next().utf8ToString());
|
||||
assertEquals(1, termsEnum.ord());
|
||||
assertEquals("world", termsEnum.next().utf8ToString());
|
||||
assertEquals(2, termsEnum.ord());
|
||||
|
||||
// seekCeil()
|
||||
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
|
||||
assertEquals("hello", termsEnum.term().utf8ToString());
|
||||
assertEquals(1, termsEnum.ord());
|
||||
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
|
||||
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||
assertEquals(0, termsEnum.ord());
|
||||
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
|
||||
|
||||
// seekExact()
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("beer")));
|
||||
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||
assertEquals(0, termsEnum.ord());
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("hello")));
|
||||
assertEquals("hello", termsEnum.term().utf8ToString());
|
||||
assertEquals(1, termsEnum.ord());
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("world")));
|
||||
assertEquals("world", termsEnum.term().utf8ToString());
|
||||
assertEquals(2, termsEnum.ord());
|
||||
assertFalse(termsEnum.seekExact(new BytesRef("bogus")));
|
||||
|
||||
// seek(ord)
|
||||
termsEnum.seekExact(0);
|
||||
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||
assertEquals(0, termsEnum.ord());
|
||||
termsEnum.seekExact(1);
|
||||
assertEquals("hello", termsEnum.term().utf8ToString());
|
||||
assertEquals(1, termsEnum.ord());
|
||||
termsEnum.seekExact(2);
|
||||
assertEquals("world", termsEnum.term().utf8ToString());
|
||||
assertEquals(2, termsEnum.ord());
|
||||
|
||||
// lookupTerm(BytesRef)
|
||||
assertEquals(-1, dv.lookupTerm(new BytesRef("apple")));
|
||||
assertEquals(0, dv.lookupTerm(new BytesRef("beer")));
|
||||
assertEquals(-2, dv.lookupTerm(new BytesRef("car")));
|
||||
assertEquals(1, dv.lookupTerm(new BytesRef("hello")));
|
||||
assertEquals(-3, dv.lookupTerm(new BytesRef("matter")));
|
||||
assertEquals(2, dv.lookupTerm(new BytesRef("world")));
|
||||
assertEquals(-4, dv.lookupTerm(new BytesRef("zany")));
|
||||
|
||||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testActuallySingleValued() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwconfig = newIndexWriterConfig(null);
|
||||
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||
IndexWriter iw = new IndexWriter(dir, iwconfig);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("foo", "bar", Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("foo", "baz", Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("foo", "baz", Field.Store.NO));
|
||||
doc.add(new StringField("foo", "baz", Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
DirectoryReader ir = DirectoryReader.open(dir);
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null);
|
||||
assertNotNull(DocValues.unwrapSingleton(v)); // actually a single-valued field
|
||||
assertEquals(2, v.getValueCount());
|
||||
|
||||
v.setDocument(0);
|
||||
assertEquals(0, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(1);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(2);
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(3);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
BytesRef value = v.lookupOrd(0);
|
||||
assertEquals("bar", value.utf8ToString());
|
||||
|
||||
value = v.lookupOrd(1);
|
||||
assertEquals("baz", value.utf8ToString());
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,731 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CyclicBarrier;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.LegacyDoubleField;
|
||||
import org.apache.lucene.document.LegacyFloatField;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
import org.apache.lucene.document.LegacyLongField;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LogDocMergePolicy;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LegacyNumericUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class TestFieldCache extends LuceneTestCase {
|
||||
private static LeafReader reader;
|
||||
private static int NUM_DOCS;
|
||||
private static int NUM_ORDS;
|
||||
private static String[] unicodeStrings;
|
||||
private static BytesRef[][] multiValued;
|
||||
private static Directory directory;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
NUM_DOCS = atLeast(500);
|
||||
NUM_ORDS = atLeast(2);
|
||||
directory = newDirectory();
|
||||
IndexWriter writer= new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy()));
|
||||
long theLong = Long.MAX_VALUE;
|
||||
double theDouble = Double.MAX_VALUE;
|
||||
int theInt = Integer.MAX_VALUE;
|
||||
float theFloat = Float.MAX_VALUE;
|
||||
unicodeStrings = new String[NUM_DOCS];
|
||||
multiValued = new BytesRef[NUM_DOCS][NUM_ORDS];
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: setUp");
|
||||
}
|
||||
for (int i = 0; i < NUM_DOCS; i++){
|
||||
Document doc = new Document();
|
||||
doc.add(new LongPoint("theLong", theLong--));
|
||||
doc.add(new DoublePoint("theDouble", theDouble--));
|
||||
doc.add(new IntPoint("theInt", theInt--));
|
||||
doc.add(new FloatPoint("theFloat", theFloat--));
|
||||
if (i%2 == 0) {
|
||||
doc.add(new IntPoint("sparse", i));
|
||||
}
|
||||
|
||||
if (i%2 == 0) {
|
||||
doc.add(new IntPoint("numInt", i));
|
||||
}
|
||||
|
||||
// sometimes skip the field:
|
||||
if (random().nextInt(40) != 17) {
|
||||
unicodeStrings[i] = generateString(i);
|
||||
doc.add(newStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES));
|
||||
}
|
||||
|
||||
// sometimes skip the field:
|
||||
if (random().nextInt(10) != 8) {
|
||||
for (int j = 0; j < NUM_ORDS; j++) {
|
||||
String newValue = generateString(i);
|
||||
multiValued[i][j] = new BytesRef(newValue);
|
||||
doc.add(newStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
|
||||
}
|
||||
Arrays.sort(multiValued[i]);
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.forceMerge(1); // this test relies on one segment and docid order
|
||||
IndexReader r = DirectoryReader.open(writer);
|
||||
assertEquals(1, r.leaves().size());
|
||||
reader = r.leaves().get(0).reader();
|
||||
TestUtil.checkReader(reader);
|
||||
writer.close();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
reader.close();
|
||||
reader = null;
|
||||
directory.close();
|
||||
directory = null;
|
||||
unicodeStrings = null;
|
||||
multiValued = null;
|
||||
}
|
||||
|
||||
public void test() throws IOException {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", doubles, cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.get(i));
|
||||
}
|
||||
|
||||
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", longs, cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Long.MAX_VALUE - i, longs.get(i));
|
||||
}
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", ints, cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Integer.MAX_VALUE - i, ints.get(i));
|
||||
}
|
||||
|
||||
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", floats, cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.get(i));
|
||||
}
|
||||
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
|
||||
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
|
||||
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
|
||||
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
}
|
||||
|
||||
docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
|
||||
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
|
||||
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
|
||||
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
assertEquals(i%2 == 0, docsWithField.get(i));
|
||||
}
|
||||
|
||||
// getTermsIndex
|
||||
SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
final String s;
|
||||
final int ord = termsIndex.getOrd(i);
|
||||
if (ord == -1) {
|
||||
s = null;
|
||||
} else {
|
||||
s = termsIndex.lookupOrd(ord).utf8ToString();
|
||||
}
|
||||
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
|
||||
}
|
||||
|
||||
int nTerms = termsIndex.getValueCount();
|
||||
|
||||
TermsEnum tenum = termsIndex.termsEnum();
|
||||
for (int i=0; i<nTerms; i++) {
|
||||
BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
|
||||
final BytesRef val = termsIndex.lookupOrd(i);
|
||||
// System.out.println("i="+i);
|
||||
assertEquals(val, val1);
|
||||
}
|
||||
|
||||
// seek the enum around (note this isn't a great test here)
|
||||
int num = atLeast(100);
|
||||
for (int i = 0; i < num; i++) {
|
||||
int k = random().nextInt(nTerms);
|
||||
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
|
||||
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
|
||||
assertEquals(val, tenum.term());
|
||||
}
|
||||
|
||||
for(int i=0;i<nTerms;i++) {
|
||||
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
|
||||
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
|
||||
assertEquals(val, tenum.term());
|
||||
}
|
||||
|
||||
// test bad field
|
||||
termsIndex = cache.getTermsIndex(reader, "bogusfield");
|
||||
|
||||
// getTerms
|
||||
BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString", true);
|
||||
Bits bits = cache.getDocsWithField(reader, "theRandomUnicodeString", null);
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
final String s;
|
||||
if (!bits.get(i)) {
|
||||
s = null;
|
||||
} else {
|
||||
s = terms.get(i).utf8ToString();
|
||||
}
|
||||
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
|
||||
}
|
||||
|
||||
// test bad field
|
||||
terms = cache.getTerms(reader, "bogusfield", false);
|
||||
|
||||
// getDocTermOrds
|
||||
SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
|
||||
int numEntries = cache.getCacheEntries().length;
|
||||
// ask for it again, and check that we didnt create any additional entries:
|
||||
termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
|
||||
assertEquals(numEntries, cache.getCacheEntries().length);
|
||||
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
termOrds.setDocument(i);
|
||||
// This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
|
||||
List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
|
||||
for (BytesRef v : values) {
|
||||
if (v == null) {
|
||||
// why does this test use null values... instead of an empty list: confusing
|
||||
break;
|
||||
}
|
||||
long ord = termOrds.nextOrd();
|
||||
assert ord != SortedSetDocValues.NO_MORE_ORDS;
|
||||
BytesRef scratch = termOrds.lookupOrd(ord);
|
||||
assertEquals(v, scratch);
|
||||
}
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
|
||||
}
|
||||
|
||||
// test bad field
|
||||
termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
|
||||
assertTrue(termOrds.getValueCount() == 0);
|
||||
|
||||
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
|
||||
}
|
||||
|
||||
public void testEmptyIndex() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(500));
|
||||
writer.close();
|
||||
IndexReader r = DirectoryReader.open(dir);
|
||||
LeafReader reader = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(reader);
|
||||
FieldCache.DEFAULT.getTerms(reader, "foobar", true);
|
||||
FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
|
||||
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static String generateString(int i) {
|
||||
String s = null;
|
||||
if (i > 0 && random().nextInt(3) == 1) {
|
||||
// reuse past string -- try to find one that's not null
|
||||
for(int iter = 0; iter < 10 && s == null;iter++) {
|
||||
s = unicodeStrings[random().nextInt(i)];
|
||||
}
|
||||
if (s == null) {
|
||||
s = TestUtil.randomUnicodeString(random());
|
||||
}
|
||||
} else {
|
||||
s = TestUtil.randomUnicodeString(random());
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
public void testDocsWithField() throws Exception {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, true);
|
||||
|
||||
// The double[] takes one slots, and docsWithField should also
|
||||
// have been populated:
|
||||
assertEquals(2, cache.getCacheEntries().length);
|
||||
Bits bits = cache.getDocsWithField(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
|
||||
|
||||
// No new entries should appear:
|
||||
assertEquals(2, cache.getCacheEntries().length);
|
||||
assertTrue(bits instanceof Bits.MatchAllBits);
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.INT_POINT_PARSER, true);
|
||||
assertEquals(4, cache.getCacheEntries().length);
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
|
||||
assertEquals(4, cache.getCacheEntries().length);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
if (i%2 == 0) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
assertEquals(i, ints.get(i));
|
||||
} else {
|
||||
assertFalse(docsWithField.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
NumericDocValues numInts = cache.getNumerics(reader, "numInt", FieldCache.INT_POINT_PARSER, random().nextBoolean());
|
||||
docsWithField = cache.getDocsWithField(reader, "numInt", FieldCache.INT_POINT_PARSER);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
if (i%2 == 0) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
assertEquals(i, numInts.get(i));
|
||||
} else {
|
||||
assertFalse(docsWithField.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testGetDocsWithFieldThreadSafety() throws Exception {
|
||||
final FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
|
||||
int NUM_THREADS = 3;
|
||||
Thread[] threads = new Thread[NUM_THREADS];
|
||||
final AtomicBoolean failed = new AtomicBoolean();
|
||||
final AtomicInteger iters = new AtomicInteger();
|
||||
final int NUM_ITER = 200 * RANDOM_MULTIPLIER;
|
||||
final CyclicBarrier restart = new CyclicBarrier(NUM_THREADS,
|
||||
new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
cache.purgeAllCaches();
|
||||
iters.incrementAndGet();
|
||||
}
|
||||
});
|
||||
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
|
||||
threads[threadIDX] = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
|
||||
try {
|
||||
while(!failed.get()) {
|
||||
final int op = random().nextInt(3);
|
||||
if (op == 0) {
|
||||
// Purge all caches & resume, once all
|
||||
// threads get here:
|
||||
restart.await();
|
||||
if (iters.get() >= NUM_ITER) {
|
||||
break;
|
||||
}
|
||||
} else if (op == 1) {
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
assertEquals(i%2 == 0, docsWithField.get(i));
|
||||
}
|
||||
} else {
|
||||
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.INT_POINT_PARSER, true);
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
if (i%2 == 0) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
assertEquals(i, ints.get(i));
|
||||
} else {
|
||||
assertFalse(docsWithField.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
failed.set(true);
|
||||
restart.reset();
|
||||
throw new RuntimeException(t);
|
||||
}
|
||||
}
|
||||
};
|
||||
threads[threadIDX].start();
|
||||
}
|
||||
|
||||
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
|
||||
threads[threadIDX].join();
|
||||
}
|
||||
assertFalse(failed.get());
|
||||
}
|
||||
|
||||
public void testDocValuesIntegration() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(null);
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
|
||||
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
|
||||
doc.add(new NumericDocValuesField("numeric", 42));
|
||||
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
|
||||
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
|
||||
iw.addDocument(doc);
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.close();
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
// Binary type: can be retrieved via getTerms()
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.INT_POINT_PARSER, false);
|
||||
});
|
||||
|
||||
BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary", true);
|
||||
final BytesRef term = binary.get(0);
|
||||
assertEquals("binary value", term.utf8ToString());
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getTermsIndex(ar, "binary");
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getDocTermOrds(ar, "binary", null);
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
new DocTermOrds(ar, null, "binary");
|
||||
});
|
||||
|
||||
Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary", null);
|
||||
assertTrue(bits.get(0));
|
||||
|
||||
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.INT_POINT_PARSER, false);
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
new DocTermOrds(ar, null, "sorted");
|
||||
});
|
||||
|
||||
binary = FieldCache.DEFAULT.getTerms(ar, "sorted", true);
|
||||
BytesRef scratch = binary.get(0);
|
||||
assertEquals("sorted value", scratch.utf8ToString());
|
||||
|
||||
SortedDocValues sorted = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
|
||||
assertEquals(0, sorted.getOrd(0));
|
||||
assertEquals(1, sorted.getValueCount());
|
||||
scratch = sorted.get(0);
|
||||
assertEquals("sorted value", scratch.utf8ToString());
|
||||
|
||||
SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted", null);
|
||||
sortedSet.setDocument(0);
|
||||
assertEquals(0, sortedSet.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
|
||||
assertEquals(1, sortedSet.getValueCount());
|
||||
|
||||
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted", null);
|
||||
assertTrue(bits.get(0));
|
||||
|
||||
// Numeric type: can be retrieved via getInts() and so on
|
||||
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.INT_POINT_PARSER, false);
|
||||
assertEquals(42, numeric.get(0));
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getTerms(ar, "numeric", true);
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getTermsIndex(ar, "numeric");
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getDocTermOrds(ar, "numeric", null);
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
new DocTermOrds(ar, null, "numeric");
|
||||
});
|
||||
|
||||
bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric", null);
|
||||
assertTrue(bits.get(0));
|
||||
|
||||
// SortedSet type: can be retrieved via getDocTermOrds()
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.INT_POINT_PARSER, false);
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getTerms(ar, "sortedset", true);
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
new DocTermOrds(ar, null, "sortedset");
|
||||
});
|
||||
|
||||
sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null);
|
||||
sortedSet.setDocument(0);
|
||||
assertEquals(0, sortedSet.nextOrd());
|
||||
assertEquals(1, sortedSet.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
|
||||
assertEquals(2, sortedSet.getValueCount());
|
||||
|
||||
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset", null);
|
||||
assertTrue(bits.get(0));
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNonexistantFields() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
iw.addDocument(doc);
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
final FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER, true);
|
||||
assertEquals(0, ints.get(0));
|
||||
|
||||
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER, true);
|
||||
assertEquals(0, longs.get(0));
|
||||
|
||||
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER, true);
|
||||
assertEquals(0, floats.get(0));
|
||||
|
||||
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER, true);
|
||||
assertEquals(0, doubles.get(0));
|
||||
|
||||
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
|
||||
BytesRef scratch = binaries.get(0);
|
||||
assertEquals(0, scratch.length);
|
||||
|
||||
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
|
||||
assertEquals(-1, sorted.getOrd(0));
|
||||
scratch = sorted.get(0);
|
||||
assertEquals(0, scratch.length);
|
||||
|
||||
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
|
||||
sortedSet.setDocument(0);
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
|
||||
|
||||
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
|
||||
assertFalse(bits.get(0));
|
||||
|
||||
// check that we cached nothing
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNonIndexedFields() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(new StoredField("bogusbytes", "bogus"));
|
||||
doc.add(new StoredField("bogusshorts", "bogus"));
|
||||
doc.add(new StoredField("bogusints", "bogus"));
|
||||
doc.add(new StoredField("boguslongs", "bogus"));
|
||||
doc.add(new StoredField("bogusfloats", "bogus"));
|
||||
doc.add(new StoredField("bogusdoubles", "bogus"));
|
||||
doc.add(new StoredField("bogusterms", "bogus"));
|
||||
doc.add(new StoredField("bogustermsindex", "bogus"));
|
||||
doc.add(new StoredField("bogusmultivalued", "bogus"));
|
||||
doc.add(new StoredField("bogusbits", "bogus"));
|
||||
iw.addDocument(doc);
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
final FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER, true);
|
||||
assertEquals(0, ints.get(0));
|
||||
|
||||
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER, true);
|
||||
assertEquals(0, longs.get(0));
|
||||
|
||||
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER, true);
|
||||
assertEquals(0, floats.get(0));
|
||||
|
||||
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER, true);
|
||||
assertEquals(0, doubles.get(0));
|
||||
|
||||
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
|
||||
BytesRef scratch = binaries.get(0);
|
||||
assertEquals(0, scratch.length);
|
||||
|
||||
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
|
||||
assertEquals(-1, sorted.getOrd(0));
|
||||
scratch = sorted.get(0);
|
||||
assertEquals(0, scratch.length);
|
||||
|
||||
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
|
||||
sortedSet.setDocument(0);
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
|
||||
|
||||
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
|
||||
assertFalse(bits.get(0));
|
||||
|
||||
// check that we cached nothing
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// Make sure that the use of GrowableWriter doesn't prevent from using the full long range
|
||||
public void testLongFieldCache() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
cfg.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
|
||||
Document doc = new Document();
|
||||
LongPoint field = new LongPoint("f", 0L);
|
||||
StoredField field2 = new StoredField("f", 0L);
|
||||
doc.add(field);
|
||||
doc.add(field2);
|
||||
final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
final long v;
|
||||
switch (random().nextInt(10)) {
|
||||
case 0:
|
||||
v = Long.MIN_VALUE;
|
||||
break;
|
||||
case 1:
|
||||
v = 0;
|
||||
break;
|
||||
case 2:
|
||||
v = Long.MAX_VALUE;
|
||||
break;
|
||||
default:
|
||||
v = TestUtil.nextLong(random(), -10, 10);
|
||||
break;
|
||||
}
|
||||
values[i] = v;
|
||||
if (v == 0 && random().nextBoolean()) {
|
||||
// missing
|
||||
iw.addDocument(new Document());
|
||||
} else {
|
||||
field.setLongValue(v);
|
||||
field2.setLongValue(v);
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
}
|
||||
iw.forceMerge(1);
|
||||
final DirectoryReader reader = iw.getReader();
|
||||
final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LONG_POINT_PARSER, false);
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
assertEquals(values[i], longs.get(i));
|
||||
}
|
||||
reader.close();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// Make sure that the use of GrowableWriter doesn't prevent from using the full int range
|
||||
public void testIntFieldCache() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
cfg.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
|
||||
Document doc = new Document();
|
||||
IntPoint field = new IntPoint("f", 0);
|
||||
doc.add(field);
|
||||
final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
final int v;
|
||||
switch (random().nextInt(10)) {
|
||||
case 0:
|
||||
v = Integer.MIN_VALUE;
|
||||
break;
|
||||
case 1:
|
||||
v = 0;
|
||||
break;
|
||||
case 2:
|
||||
v = Integer.MAX_VALUE;
|
||||
break;
|
||||
default:
|
||||
v = TestUtil.nextInt(random(), -10, 10);
|
||||
break;
|
||||
}
|
||||
values[i] = v;
|
||||
if (v == 0 && random().nextBoolean()) {
|
||||
// missing
|
||||
iw.addDocument(new Document());
|
||||
} else {
|
||||
field.setIntValue(v);
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
}
|
||||
iw.forceMerge(1);
|
||||
final DirectoryReader reader = iw.getReader();
|
||||
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.INT_POINT_PARSER, false);
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
assertEquals(values[i], ints.get(i));
|
||||
}
|
||||
reader.close();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestFieldCacheReopen extends LuceneTestCase {
|
||||
|
||||
// TODO: make a version of this that tests the same thing with UninvertingReader.wrap()
|
||||
|
||||
// LUCENE-1579: Ensure that on a reopened reader, that any
|
||||
// shared segments reuse the doc values arrays in
|
||||
// FieldCache
|
||||
public void testFieldCacheReuseAfterReopen() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer = new IndexWriter(
|
||||
dir,
|
||||
newIndexWriterConfig(new MockAnalyzer(random())).
|
||||
setMergePolicy(newLogMergePolicy(10))
|
||||
);
|
||||
Document doc = new Document();
|
||||
doc.add(new IntPoint("number", 17));
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
|
||||
// Open reader1
|
||||
DirectoryReader r = DirectoryReader.open(dir);
|
||||
LeafReader r1 = getOnlyLeafReader(r);
|
||||
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(r1, "number", FieldCache.INT_POINT_PARSER, false);
|
||||
assertEquals(17, ints.get(0));
|
||||
|
||||
// Add new segment
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
|
||||
// Reopen reader1 --> reader2
|
||||
DirectoryReader r2 = DirectoryReader.openIfChanged(r);
|
||||
assertNotNull(r2);
|
||||
r.close();
|
||||
LeafReader sub0 = r2.leaves().get(0).reader();
|
||||
final NumericDocValues ints2 = FieldCache.DEFAULT.getNumerics(sub0, "number", FieldCache.INT_POINT_PARSER, false);
|
||||
r2.close();
|
||||
assertTrue(ints == ints2);
|
||||
|
||||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.LegacyDoubleField;
|
||||
import org.apache.lucene.document.LegacyFloatField;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
import org.apache.lucene.document.LegacyLongField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.uninverting.FieldCacheSanityChecker.Insanity;
|
||||
import org.apache.solr.uninverting.FieldCacheSanityChecker.InsanityType;
|
||||
|
||||
public class TestFieldCacheSanityChecker extends LuceneTestCase {
|
||||
|
||||
protected LeafReader readerA;
|
||||
protected LeafReader readerB;
|
||||
protected LeafReader readerX;
|
||||
protected LeafReader readerAclone;
|
||||
protected Directory dirA, dirB;
|
||||
private static final int NUM_DOCS = 1000;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
dirA = newDirectory();
|
||||
dirB = newDirectory();
|
||||
|
||||
IndexWriter wA = new IndexWriter(dirA, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||
IndexWriter wB = new IndexWriter(dirB, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
||||
long theLong = Long.MAX_VALUE;
|
||||
double theDouble = Double.MAX_VALUE;
|
||||
int theInt = Integer.MAX_VALUE;
|
||||
float theFloat = Float.MAX_VALUE;
|
||||
for (int i = 0; i < NUM_DOCS; i++){
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyLongField("theLong", theLong--, Field.Store.NO));
|
||||
doc.add(new LegacyDoubleField("theDouble", theDouble--, Field.Store.NO));
|
||||
doc.add(new LegacyIntField("theInt", theInt--, Field.Store.NO));
|
||||
doc.add(new LegacyFloatField("theFloat", theFloat--, Field.Store.NO));
|
||||
if (0 == i % 3) {
|
||||
wA.addDocument(doc);
|
||||
} else {
|
||||
wB.addDocument(doc);
|
||||
}
|
||||
}
|
||||
wA.close();
|
||||
wB.close();
|
||||
DirectoryReader rA = DirectoryReader.open(dirA);
|
||||
readerA = SlowCompositeReaderWrapper.wrap(rA);
|
||||
readerAclone = SlowCompositeReaderWrapper.wrap(rA);
|
||||
readerA = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirA));
|
||||
readerB = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirB));
|
||||
readerX = SlowCompositeReaderWrapper.wrap(new MultiReader(readerA, readerB));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
readerA.close();
|
||||
readerAclone.close();
|
||||
readerB.close();
|
||||
readerX.close();
|
||||
dirA.close();
|
||||
dirB.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testSanity() throws IOException {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
|
||||
cache.getNumerics(readerA, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
|
||||
cache.getNumerics(readerAclone, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
|
||||
cache.getNumerics(readerB, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
|
||||
|
||||
cache.getNumerics(readerX, "theInt", FieldCache.LEGACY_INT_PARSER, false);
|
||||
|
||||
// // //
|
||||
|
||||
Insanity[] insanity =
|
||||
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
|
||||
|
||||
if (0 < insanity.length)
|
||||
dumpArray(getTestClass().getName() + "#" + getTestName()
|
||||
+ " INSANITY", insanity, System.err);
|
||||
|
||||
assertEquals("shouldn't be any cache insanity", 0, insanity.length);
|
||||
cache.purgeAllCaches();
|
||||
}
|
||||
|
||||
public void testInsanity1() throws IOException {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
|
||||
cache.getNumerics(readerX, "theInt", FieldCache.LEGACY_INT_PARSER, false);
|
||||
cache.getTerms(readerX, "theInt", false);
|
||||
|
||||
// // //
|
||||
|
||||
Insanity[] insanity =
|
||||
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
|
||||
|
||||
assertEquals("wrong number of cache errors", 1, insanity.length);
|
||||
assertEquals("wrong type of cache error",
|
||||
InsanityType.VALUEMISMATCH,
|
||||
insanity[0].getType());
|
||||
assertEquals("wrong number of entries in cache error", 2,
|
||||
insanity[0].getCacheEntries().length);
|
||||
|
||||
// we expect bad things, don't let tearDown complain about them
|
||||
cache.purgeAllCaches();
|
||||
}
|
||||
|
||||
public void testInsanity2() throws IOException {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
|
||||
cache.getTerms(readerA, "theInt", false);
|
||||
cache.getTerms(readerB, "theInt", false);
|
||||
cache.getTerms(readerX, "theInt", false);
|
||||
|
||||
|
||||
// // //
|
||||
|
||||
Insanity[] insanity =
|
||||
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
|
||||
|
||||
assertEquals("wrong number of cache errors", 1, insanity.length);
|
||||
assertEquals("wrong type of cache error",
|
||||
InsanityType.SUBREADER,
|
||||
insanity[0].getType());
|
||||
assertEquals("wrong number of entries in cache error", 3,
|
||||
insanity[0].getCacheEntries().length);
|
||||
|
||||
// we expect bad things, don't let tearDown complain about them
|
||||
cache.purgeAllCaches();
|
||||
}
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,318 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
/** random sorting tests with uninversion */
|
||||
public class TestFieldCacheSortRandom extends LuceneTestCase {
|
||||
|
||||
public void testRandomStringSort() throws Exception {
|
||||
testRandomStringSort(SortField.Type.STRING);
|
||||
}
|
||||
|
||||
public void testRandomStringValSort() throws Exception {
|
||||
testRandomStringSort(SortField.Type.STRING_VAL);
|
||||
}
|
||||
|
||||
private void testRandomStringSort(SortField.Type type) throws Exception {
|
||||
Random random = new Random(random().nextLong());
|
||||
|
||||
final int NUM_DOCS = atLeast(100);
|
||||
final Directory dir = newDirectory();
|
||||
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||
final boolean allowDups = random.nextBoolean();
|
||||
final Set<String> seen = new HashSet<>();
|
||||
final int maxLength = TestUtil.nextInt(random, 5, 100);
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
|
||||
}
|
||||
|
||||
int numDocs = 0;
|
||||
final List<BytesRef> docValues = new ArrayList<>();
|
||||
// TODO: deletions
|
||||
while (numDocs < NUM_DOCS) {
|
||||
final Document doc = new Document();
|
||||
|
||||
// 10% of the time, the document is missing the value:
|
||||
final BytesRef br;
|
||||
if (random().nextInt(10) != 7) {
|
||||
final String s;
|
||||
if (random.nextBoolean()) {
|
||||
s = TestUtil.randomSimpleString(random, maxLength);
|
||||
} else {
|
||||
s = TestUtil.randomUnicodeString(random, maxLength);
|
||||
}
|
||||
|
||||
if (!allowDups) {
|
||||
if (seen.contains(s)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(s);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" " + numDocs + ": s=" + s);
|
||||
}
|
||||
|
||||
doc.add(new StringField("stringdv", s, Field.Store.NO));
|
||||
docValues.add(new BytesRef(s));
|
||||
|
||||
} else {
|
||||
br = null;
|
||||
if (VERBOSE) {
|
||||
System.out.println(" " + numDocs + ": <missing>");
|
||||
}
|
||||
docValues.add(null);
|
||||
}
|
||||
|
||||
doc.add(new IntPoint("id", numDocs));
|
||||
doc.add(new StoredField("id", numDocs));
|
||||
writer.addDocument(doc);
|
||||
numDocs++;
|
||||
|
||||
if (random.nextInt(40) == 17) {
|
||||
// force flush
|
||||
writer.getReader().close();
|
||||
}
|
||||
}
|
||||
|
||||
Map<String,UninvertingReader.Type> mapping = new HashMap<>();
|
||||
mapping.put("stringdv", Type.SORTED);
|
||||
mapping.put("id", Type.INTEGER_POINT);
|
||||
final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping);
|
||||
writer.close();
|
||||
if (VERBOSE) {
|
||||
System.out.println(" reader=" + r);
|
||||
}
|
||||
|
||||
final IndexSearcher s = newSearcher(r, false);
|
||||
final int ITERS = atLeast(100);
|
||||
for(int iter=0;iter<ITERS;iter++) {
|
||||
final boolean reverse = random.nextBoolean();
|
||||
|
||||
final TopFieldDocs hits;
|
||||
final SortField sf;
|
||||
final boolean sortMissingLast;
|
||||
final boolean missingIsNull;
|
||||
sf = new SortField("stringdv", type, reverse);
|
||||
sortMissingLast = random().nextBoolean();
|
||||
missingIsNull = true;
|
||||
|
||||
if (sortMissingLast) {
|
||||
sf.setMissingValue(SortField.STRING_LAST);
|
||||
}
|
||||
|
||||
final Sort sort;
|
||||
if (random.nextBoolean()) {
|
||||
sort = new Sort(sf);
|
||||
} else {
|
||||
sort = new Sort(sf, SortField.FIELD_DOC);
|
||||
}
|
||||
final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
|
||||
final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
|
||||
int queryType = random.nextInt(2);
|
||||
if (queryType == 0) {
|
||||
hits = s.search(new ConstantScoreQuery(f),
|
||||
hitCount, sort, random.nextBoolean(), random.nextBoolean());
|
||||
} else {
|
||||
hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
|
||||
}
|
||||
|
||||
// Compute expected results:
|
||||
Collections.sort(f.matchValues, new Comparator<BytesRef>() {
|
||||
@Override
|
||||
public int compare(BytesRef a, BytesRef b) {
|
||||
if (a == null) {
|
||||
if (b == null) {
|
||||
return 0;
|
||||
}
|
||||
if (sortMissingLast) {
|
||||
return 1;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if (b == null) {
|
||||
if (sortMissingLast) {
|
||||
return -1;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
return a.compareTo(b);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (reverse) {
|
||||
Collections.reverse(f.matchValues);
|
||||
}
|
||||
final List<BytesRef> expected = f.matchValues;
|
||||
if (VERBOSE) {
|
||||
System.out.println(" expected:");
|
||||
for(int idx=0;idx<expected.size();idx++) {
|
||||
BytesRef br = expected.get(idx);
|
||||
if (br == null && missingIsNull == false) {
|
||||
br = new BytesRef();
|
||||
}
|
||||
System.out.println(" " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
|
||||
if (idx == hitCount-1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" actual:");
|
||||
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
|
||||
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
|
||||
BytesRef br = (BytesRef) fd.fields[0];
|
||||
|
||||
System.out.println(" " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
|
||||
}
|
||||
}
|
||||
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
|
||||
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
|
||||
BytesRef br = expected.get(hitIDX);
|
||||
if (br == null && missingIsNull == false) {
|
||||
br = new BytesRef();
|
||||
}
|
||||
|
||||
// Normally, the old codecs (that don't support
|
||||
// docsWithField via doc values) will always return
|
||||
// an empty BytesRef for the missing case; however,
|
||||
// if all docs in a given segment were missing, in
|
||||
// that case it will return null! So we must map
|
||||
// null here, too:
|
||||
BytesRef br2 = (BytesRef) fd.fields[0];
|
||||
if (br2 == null && missingIsNull == false) {
|
||||
br2 = new BytesRef();
|
||||
}
|
||||
|
||||
assertEquals(br, br2);
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static class RandomQuery extends Query {
|
||||
private final long seed;
|
||||
private float density;
|
||||
private final List<BytesRef> docValues;
|
||||
public final List<BytesRef> matchValues = Collections.synchronizedList(new ArrayList<BytesRef>());
|
||||
|
||||
// density should be 0.0 ... 1.0
|
||||
public RandomQuery(long seed, float density, List<BytesRef> docValues) {
|
||||
this.seed = seed;
|
||||
this.density = density;
|
||||
this.docValues = docValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new ConstantScoreWeight(this) {
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
Random random = new Random(seed ^ context.docBase);
|
||||
final int maxDoc = context.reader().maxDoc();
|
||||
final NumericDocValues idSource = DocValues.getNumeric(context.reader(), "id");
|
||||
assertNotNull(idSource);
|
||||
final FixedBitSet bits = new FixedBitSet(maxDoc);
|
||||
for(int docID=0;docID<maxDoc;docID++) {
|
||||
if (random.nextFloat() <= density) {
|
||||
bits.set(docID);
|
||||
//System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID);
|
||||
matchValues.add(docValues.get((int) idSource.get(docID)));
|
||||
}
|
||||
}
|
||||
|
||||
return new ConstantScoreScorer(this, score(), new BitSetIterator(bits, bits.approximateCardinality()));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "RandomFilter(density=" + density + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
return sameClassAs(other) &&
|
||||
equalsTo(getClass().cast(other));
|
||||
}
|
||||
|
||||
private boolean equalsTo(RandomQuery other) {
|
||||
return seed == other.seed &&
|
||||
docValues == other.docValues &&
|
||||
density == other.density;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = classHash();
|
||||
h = 31 * h + Objects.hash(seed, density);
|
||||
h = 31 * h + System.identityHashCode(docValues);
|
||||
return h;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,592 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
|
||||
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
||||
|
||||
public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
assumeFalse("test unsupported on J9 temporarily, see https://issues.apache.org/jira/browse/LUCENE-6522",
|
||||
Constants.JAVA_VENDOR.startsWith("IBM"));
|
||||
}
|
||||
|
||||
public void testByteMissingVsFieldCache() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
doTestMissingVsFieldCache(Byte.MIN_VALUE, Byte.MAX_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
public void testShortMissingVsFieldCache() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
doTestMissingVsFieldCache(Short.MIN_VALUE, Short.MAX_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
public void testIntMissingVsFieldCache() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
doTestMissingVsFieldCache(Integer.MIN_VALUE, Integer.MAX_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
public void testLongMissingVsFieldCache() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
doTestMissingVsFieldCache(Long.MIN_VALUE, Long.MAX_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSortedFixedLengthVsFieldCache() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
int fixedLength = TestUtil.nextInt(random(), 1, 10);
|
||||
doTestSortedVsFieldCache(fixedLength, fixedLength);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSortedVariableLengthVsFieldCache() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
doTestSortedVsFieldCache(1, 10);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSortedSetFixedLengthVsUninvertedField() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
int fixedLength = TestUtil.nextInt(random(), 1, 10);
|
||||
doTestSortedSetVsUninvertedField(fixedLength, fixedLength);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSortedSetVariableLengthVsUninvertedField() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
doTestSortedSetVsUninvertedField(1, 10);
|
||||
}
|
||||
}
|
||||
|
||||
// LUCENE-4853
|
||||
public void testHugeBinaryValues() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
// FSDirectory because SimpleText will consume gobbs of
|
||||
// space when storing big binary values:
|
||||
Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
|
||||
boolean doFixed = random().nextBoolean();
|
||||
int numDocs;
|
||||
int fixedLength = 0;
|
||||
if (doFixed) {
|
||||
// Sometimes make all values fixed length since some
|
||||
// codecs have different code paths for this:
|
||||
numDocs = TestUtil.nextInt(random(), 10, 20);
|
||||
fixedLength = TestUtil.nextInt(random(), 65537, 256 * 1024);
|
||||
} else {
|
||||
numDocs = TestUtil.nextInt(random(), 100, 200);
|
||||
}
|
||||
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
|
||||
List<byte[]> docBytes = new ArrayList<>();
|
||||
long totalBytes = 0;
|
||||
for(int docID=0;docID<numDocs;docID++) {
|
||||
// we don't use RandomIndexWriter because it might add
|
||||
// more docvalues than we expect !!!!
|
||||
|
||||
// Must be > 64KB in size to ensure more than 2 pages in
|
||||
// PagedBytes would be needed:
|
||||
int numBytes;
|
||||
if (doFixed) {
|
||||
numBytes = fixedLength;
|
||||
} else if (docID == 0 || random().nextInt(5) == 3) {
|
||||
numBytes = TestUtil.nextInt(random(), 65537, 3 * 1024 * 1024);
|
||||
} else {
|
||||
numBytes = TestUtil.nextInt(random(), 1, 1024 * 1024);
|
||||
}
|
||||
totalBytes += numBytes;
|
||||
if (totalBytes > 5 * 1024*1024) {
|
||||
break;
|
||||
}
|
||||
byte[] bytes = new byte[numBytes];
|
||||
random().nextBytes(bytes);
|
||||
docBytes.add(bytes);
|
||||
Document doc = new Document();
|
||||
BytesRef b = new BytesRef(bytes);
|
||||
b.length = bytes.length;
|
||||
doc.add(new BinaryDocValuesField("field", b));
|
||||
doc.add(new StringField("id", ""+docID, Field.Store.YES));
|
||||
try {
|
||||
w.addDocument(doc);
|
||||
} catch (IllegalArgumentException iae) {
|
||||
if (iae.getMessage().indexOf("is too large") == -1) {
|
||||
throw iae;
|
||||
} else {
|
||||
// OK: some codecs can't handle binary DV > 32K
|
||||
assertFalse(codecAcceptsHugeBinaryValues("field"));
|
||||
w.rollback();
|
||||
d.close();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DirectoryReader r;
|
||||
try {
|
||||
r = DirectoryReader.open(w);
|
||||
} catch (IllegalArgumentException iae) {
|
||||
if (iae.getMessage().indexOf("is too large") == -1) {
|
||||
throw iae;
|
||||
} else {
|
||||
assertFalse(codecAcceptsHugeBinaryValues("field"));
|
||||
|
||||
// OK: some codecs can't handle binary DV > 32K
|
||||
w.rollback();
|
||||
d.close();
|
||||
return;
|
||||
}
|
||||
}
|
||||
w.close();
|
||||
|
||||
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(ar);
|
||||
|
||||
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
|
||||
for(int docID=0;docID<docBytes.size();docID++) {
|
||||
Document doc = ar.document(docID);
|
||||
BytesRef bytes = s.get(docID);
|
||||
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
|
||||
assertEquals(expected.length, bytes.length);
|
||||
assertEquals(new BytesRef(expected), bytes);
|
||||
}
|
||||
|
||||
assertTrue(codecAcceptsHugeBinaryValues("field"));
|
||||
|
||||
ar.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
private static final int LARGE_BINARY_FIELD_LENGTH = (1 << 15) - 2;
|
||||
|
||||
// TODO: get this out of here and into the deprecated codecs (4.0, 4.2)
|
||||
public void testHugeBinaryValueLimit() throws Exception {
|
||||
// We only test DVFormats that have a limit
|
||||
assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
// FSDirectory because SimpleText will consume gobbs of
|
||||
// space when storing big binary values:
|
||||
Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
|
||||
boolean doFixed = random().nextBoolean();
|
||||
int numDocs;
|
||||
int fixedLength = 0;
|
||||
if (doFixed) {
|
||||
// Sometimes make all values fixed length since some
|
||||
// codecs have different code paths for this:
|
||||
numDocs = TestUtil.nextInt(random(), 10, 20);
|
||||
fixedLength = LARGE_BINARY_FIELD_LENGTH;
|
||||
} else {
|
||||
numDocs = TestUtil.nextInt(random(), 100, 200);
|
||||
}
|
||||
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
|
||||
List<byte[]> docBytes = new ArrayList<>();
|
||||
long totalBytes = 0;
|
||||
for(int docID=0;docID<numDocs;docID++) {
|
||||
// we don't use RandomIndexWriter because it might add
|
||||
// more docvalues than we expect !!!!
|
||||
|
||||
// Must be > 64KB in size to ensure more than 2 pages in
|
||||
// PagedBytes would be needed:
|
||||
int numBytes;
|
||||
if (doFixed) {
|
||||
numBytes = fixedLength;
|
||||
} else if (docID == 0 || random().nextInt(5) == 3) {
|
||||
numBytes = LARGE_BINARY_FIELD_LENGTH;
|
||||
} else {
|
||||
numBytes = TestUtil.nextInt(random(), 1, LARGE_BINARY_FIELD_LENGTH);
|
||||
}
|
||||
totalBytes += numBytes;
|
||||
if (totalBytes > 5 * 1024*1024) {
|
||||
break;
|
||||
}
|
||||
byte[] bytes = new byte[numBytes];
|
||||
random().nextBytes(bytes);
|
||||
docBytes.add(bytes);
|
||||
Document doc = new Document();
|
||||
BytesRef b = new BytesRef(bytes);
|
||||
b.length = bytes.length;
|
||||
doc.add(new BinaryDocValuesField("field", b));
|
||||
doc.add(new StringField("id", ""+docID, Field.Store.YES));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
DirectoryReader r = DirectoryReader.open(w);
|
||||
w.close();
|
||||
|
||||
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(ar
|
||||
);
|
||||
|
||||
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
|
||||
for(int docID=0;docID<docBytes.size();docID++) {
|
||||
Document doc = ar.document(docID);
|
||||
BytesRef bytes = s.get(docID);
|
||||
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
|
||||
assertEquals(expected.length, bytes.length);
|
||||
assertEquals(new BytesRef(expected), bytes);
|
||||
}
|
||||
|
||||
ar.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
|
||||
Document doc = new Document();
|
||||
Field idField = new StringField("id", "", Field.Store.NO);
|
||||
Field indexedField = new StringField("indexed", "", Field.Store.NO);
|
||||
Field dvField = new SortedDocValuesField("dv", new BytesRef());
|
||||
doc.add(idField);
|
||||
doc.add(indexedField);
|
||||
doc.add(dvField);
|
||||
|
||||
// index some docs
|
||||
int numDocs = atLeast(300);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
idField.setStringValue(Integer.toString(i));
|
||||
final int length;
|
||||
if (minLength == maxLength) {
|
||||
length = minLength; // fixed length
|
||||
} else {
|
||||
length = TestUtil.nextInt(random(), minLength, maxLength);
|
||||
}
|
||||
String value = TestUtil.randomSimpleString(random(), length);
|
||||
indexedField.setStringValue(value);
|
||||
dvField.setBytesValue(new BytesRef(value));
|
||||
writer.addDocument(doc);
|
||||
if (random().nextInt(31) == 0) {
|
||||
writer.commit();
|
||||
}
|
||||
}
|
||||
|
||||
// delete some docs
|
||||
int numDeletions = random().nextInt(numDocs/10);
|
||||
for (int i = 0; i < numDeletions; i++) {
|
||||
int id = random().nextInt(numDocs);
|
||||
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||
}
|
||||
writer.close();
|
||||
|
||||
// compare
|
||||
DirectoryReader ir = DirectoryReader.open(dir);
|
||||
for (LeafReaderContext context : ir.leaves()) {
|
||||
LeafReader r = context.reader();
|
||||
SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
|
||||
SortedDocValues actual = r.getSortedDocValues("dv");
|
||||
assertEquals(r.maxDoc(), expected, actual);
|
||||
}
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void doTestSortedSetVsUninvertedField(int minLength, int maxLength) throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
|
||||
|
||||
// index some docs
|
||||
int numDocs = atLeast(300);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
|
||||
doc.add(idField);
|
||||
final int length = TestUtil.nextInt(random(), minLength, maxLength);
|
||||
int numValues = random().nextInt(17);
|
||||
// create a random list of strings
|
||||
List<String> values = new ArrayList<>();
|
||||
for (int v = 0; v < numValues; v++) {
|
||||
values.add(TestUtil.randomSimpleString(random(), minLength, length));
|
||||
}
|
||||
|
||||
// add in any order to the indexed field
|
||||
ArrayList<String> unordered = new ArrayList<>(values);
|
||||
Collections.shuffle(unordered, random());
|
||||
for (String v : values) {
|
||||
doc.add(newStringField("indexed", v, Field.Store.NO));
|
||||
}
|
||||
|
||||
// add in any order to the dv field
|
||||
ArrayList<String> unordered2 = new ArrayList<>(values);
|
||||
Collections.shuffle(unordered2, random());
|
||||
for (String v : unordered2) {
|
||||
doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
|
||||
}
|
||||
|
||||
writer.addDocument(doc);
|
||||
if (random().nextInt(31) == 0) {
|
||||
writer.commit();
|
||||
}
|
||||
}
|
||||
|
||||
// delete some docs
|
||||
int numDeletions = random().nextInt(numDocs/10);
|
||||
for (int i = 0; i < numDeletions; i++) {
|
||||
int id = random().nextInt(numDocs);
|
||||
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||
}
|
||||
|
||||
// compare per-segment
|
||||
DirectoryReader ir = writer.getReader();
|
||||
for (LeafReaderContext context : ir.leaves()) {
|
||||
LeafReader r = context.reader();
|
||||
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
|
||||
SortedSetDocValues actual = r.getSortedSetDocValues("dv");
|
||||
assertEquals(r.maxDoc(), expected, actual);
|
||||
}
|
||||
ir.close();
|
||||
|
||||
writer.forceMerge(1);
|
||||
|
||||
// now compare again after the merge
|
||||
ir = writer.getReader();
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
|
||||
SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
|
||||
assertEquals(ir.maxDoc(), expected, actual);
|
||||
ir.close();
|
||||
|
||||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void doTestMissingVsFieldCache(LongProducer longs) throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
|
||||
Field idField = new StringField("id", "", Field.Store.NO);
|
||||
Field indexedField = newStringField("indexed", "", Field.Store.NO);
|
||||
Field dvField = new NumericDocValuesField("dv", 0);
|
||||
|
||||
|
||||
// index some docs
|
||||
int numDocs = atLeast(300);
|
||||
// numDocs should be always > 256 so that in case of a codec that optimizes
|
||||
// for numbers of values <= 256, all storage layouts are tested
|
||||
assert numDocs > 256;
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
idField.setStringValue(Integer.toString(i));
|
||||
long value = longs.next();
|
||||
indexedField.setStringValue(Long.toString(value));
|
||||
dvField.setLongValue(value);
|
||||
Document doc = new Document();
|
||||
doc.add(idField);
|
||||
// 1/4 of the time we neglect to add the fields
|
||||
if (random().nextInt(4) > 0) {
|
||||
doc.add(indexedField);
|
||||
doc.add(dvField);
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
if (random().nextInt(31) == 0) {
|
||||
writer.commit();
|
||||
}
|
||||
}
|
||||
|
||||
// delete some docs
|
||||
int numDeletions = random().nextInt(numDocs/10);
|
||||
for (int i = 0; i < numDeletions; i++) {
|
||||
int id = random().nextInt(numDocs);
|
||||
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||
}
|
||||
|
||||
// merge some segments and ensure that at least one of them has more than
|
||||
// 256 values
|
||||
writer.forceMerge(numDocs / 256);
|
||||
|
||||
writer.close();
|
||||
|
||||
// compare
|
||||
DirectoryReader ir = DirectoryReader.open(dir);
|
||||
for (LeafReaderContext context : ir.leaves()) {
|
||||
LeafReader r = context.reader();
|
||||
Bits expected = FieldCache.DEFAULT.getDocsWithField(r, "indexed", null);
|
||||
Bits actual = FieldCache.DEFAULT.getDocsWithField(r, "dv", null);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void doTestMissingVsFieldCache(final long minValue, final long maxValue) throws Exception {
|
||||
doTestMissingVsFieldCache(new LongProducer() {
|
||||
@Override
|
||||
long next() {
|
||||
return TestUtil.nextLong(random(), minValue, maxValue);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static abstract class LongProducer {
|
||||
abstract long next();
|
||||
}
|
||||
|
||||
private void assertEquals(Bits expected, Bits actual) throws Exception {
|
||||
assertEquals(expected.length(), actual.length());
|
||||
for (int i = 0; i < expected.length(); i++) {
|
||||
assertEquals(expected.get(i), actual.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
|
||||
assertEquals(maxDoc, DocValues.singleton(expected), DocValues.singleton(actual));
|
||||
}
|
||||
|
||||
private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
|
||||
// can be null for the segment if no docs actually had any SortedDocValues
|
||||
// in this case FC.getDocTermsOrds returns EMPTY
|
||||
if (actual == null) {
|
||||
assertEquals(expected.getValueCount(), 0);
|
||||
return;
|
||||
}
|
||||
assertEquals(expected.getValueCount(), actual.getValueCount());
|
||||
// compare ord lists
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
expected.setDocument(i);
|
||||
actual.setDocument(i);
|
||||
long expectedOrd;
|
||||
while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
|
||||
assertEquals(expectedOrd, actual.nextOrd());
|
||||
}
|
||||
assertEquals(NO_MORE_ORDS, actual.nextOrd());
|
||||
}
|
||||
|
||||
// compare ord dictionary
|
||||
for (long i = 0; i < expected.getValueCount(); i++) {
|
||||
final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd(i));
|
||||
final BytesRef actualBytes = actual.lookupOrd(i);
|
||||
assertEquals(expectedBytes, actualBytes);
|
||||
}
|
||||
|
||||
// compare termsenum
|
||||
assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
|
||||
}
|
||||
|
||||
private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
|
||||
BytesRef ref;
|
||||
|
||||
// sequential next() through all terms
|
||||
while ((ref = expected.next()) != null) {
|
||||
assertEquals(ref, actual.next());
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
assertNull(actual.next());
|
||||
|
||||
// sequential seekExact(ord) through all terms
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
expected.seekExact(i);
|
||||
actual.seekExact(i);
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
|
||||
// sequential seekExact(BytesRef) through all terms
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
expected.seekExact(i);
|
||||
assertTrue(actual.seekExact(expected.term()));
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
|
||||
// sequential seekCeil(BytesRef) through all terms
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
expected.seekExact(i);
|
||||
assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
|
||||
// random seekExact(ord)
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
|
||||
expected.seekExact(randomOrd);
|
||||
actual.seekExact(randomOrd);
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
|
||||
// random seekExact(BytesRef)
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
|
||||
expected.seekExact(randomOrd);
|
||||
actual.seekExact(expected.term());
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
|
||||
// random seekCeil(BytesRef)
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
|
||||
SeekStatus expectedStatus = expected.seekCeil(target);
|
||||
assertEquals(expectedStatus, actual.seekCeil(target));
|
||||
if (expectedStatus != SeekStatus.END) {
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected boolean codecAcceptsHugeBinaryValues(String field) {
|
||||
String name = TestUtil.getDocValuesFormat(field);
|
||||
return !(name.equals("Memory")); // Direct has a different type of limit
|
||||
}
|
||||
}
|
|
@ -0,0 +1,228 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
// TODO: what happened to this test... its not actually uninverting?
|
||||
public class TestFieldCacheWithThreads extends LuceneTestCase {
|
||||
|
||||
public void test() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
||||
|
||||
final List<Long> numbers = new ArrayList<>();
|
||||
final List<BytesRef> binary = new ArrayList<>();
|
||||
final List<BytesRef> sorted = new ArrayList<>();
|
||||
final int numDocs = atLeast(100);
|
||||
for(int i=0;i<numDocs;i++) {
|
||||
Document d = new Document();
|
||||
long number = random().nextLong();
|
||||
d.add(new NumericDocValuesField("number", number));
|
||||
BytesRef bytes = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
|
||||
d.add(new BinaryDocValuesField("bytes", bytes));
|
||||
binary.add(bytes);
|
||||
bytes = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
|
||||
d.add(new SortedDocValuesField("sorted", bytes));
|
||||
sorted.add(bytes);
|
||||
w.addDocument(d);
|
||||
numbers.add(number);
|
||||
}
|
||||
|
||||
w.forceMerge(1);
|
||||
final IndexReader r = DirectoryReader.open(w);
|
||||
w.close();
|
||||
|
||||
assertEquals(1, r.leaves().size());
|
||||
final LeafReader ar = r.leaves().get(0).reader();
|
||||
|
||||
int numThreads = TestUtil.nextInt(random(), 2, 5);
|
||||
List<Thread> threads = new ArrayList<>();
|
||||
final CountDownLatch startingGun = new CountDownLatch(1);
|
||||
for(int t=0;t<numThreads;t++) {
|
||||
final Random threadRandom = new Random(random().nextLong());
|
||||
Thread thread = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
//NumericDocValues ndv = ar.getNumericDocValues("number");
|
||||
NumericDocValues ndv = FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.LONG_POINT_PARSER, false);
|
||||
//BinaryDocValues bdv = ar.getBinaryDocValues("bytes");
|
||||
BinaryDocValues bdv = FieldCache.DEFAULT.getTerms(ar, "bytes", false);
|
||||
SortedDocValues sdv = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
|
||||
startingGun.await();
|
||||
int iters = atLeast(1000);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
int docID = threadRandom.nextInt(numDocs);
|
||||
switch(threadRandom.nextInt(4)) {
|
||||
case 0:
|
||||
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.INT_POINT_PARSER, false).get(docID));
|
||||
break;
|
||||
case 1:
|
||||
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.LONG_POINT_PARSER, false).get(docID));
|
||||
break;
|
||||
case 2:
|
||||
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.FLOAT_POINT_PARSER, false).get(docID));
|
||||
break;
|
||||
case 3:
|
||||
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.DOUBLE_POINT_PARSER, false).get(docID));
|
||||
break;
|
||||
}
|
||||
BytesRef term = bdv.get(docID);
|
||||
assertEquals(binary.get(docID), term);
|
||||
term = sdv.get(docID);
|
||||
assertEquals(sorted.get(docID), term);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
};
|
||||
thread.start();
|
||||
threads.add(thread);
|
||||
}
|
||||
|
||||
startingGun.countDown();
|
||||
|
||||
for(Thread thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void test2() throws Exception {
|
||||
Random random = random();
|
||||
final int NUM_DOCS = atLeast(100);
|
||||
final Directory dir = newDirectory();
|
||||
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||
final boolean allowDups = random.nextBoolean();
|
||||
final Set<String> seen = new HashSet<>();
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
|
||||
}
|
||||
int numDocs = 0;
|
||||
final List<BytesRef> docValues = new ArrayList<>();
|
||||
|
||||
// TODO: deletions
|
||||
while (numDocs < NUM_DOCS) {
|
||||
final String s;
|
||||
if (random.nextBoolean()) {
|
||||
s = TestUtil.randomSimpleString(random);
|
||||
} else {
|
||||
s = TestUtil.randomUnicodeString(random);
|
||||
}
|
||||
final BytesRef br = new BytesRef(s);
|
||||
|
||||
if (!allowDups) {
|
||||
if (seen.contains(s)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(s);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" " + numDocs + ": s=" + s);
|
||||
}
|
||||
|
||||
final Document doc = new Document();
|
||||
doc.add(new SortedDocValuesField("stringdv", br));
|
||||
doc.add(new NumericDocValuesField("id", numDocs));
|
||||
docValues.add(br);
|
||||
writer.addDocument(doc);
|
||||
numDocs++;
|
||||
|
||||
if (random.nextInt(40) == 17) {
|
||||
// force flush
|
||||
writer.getReader().close();
|
||||
}
|
||||
}
|
||||
|
||||
writer.forceMerge(1);
|
||||
final DirectoryReader r = writer.getReader();
|
||||
writer.close();
|
||||
|
||||
final LeafReader sr = getOnlyLeafReader(r);
|
||||
|
||||
final long END_TIME = System.nanoTime() + TimeUnit.NANOSECONDS.convert((TEST_NIGHTLY ? 30 : 1), TimeUnit.SECONDS);
|
||||
|
||||
final int NUM_THREADS = TestUtil.nextInt(random(), 1, 10);
|
||||
Thread[] threads = new Thread[NUM_THREADS];
|
||||
for(int thread=0;thread<NUM_THREADS;thread++) {
|
||||
threads[thread] = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
Random random = random();
|
||||
final SortedDocValues stringDVDirect;
|
||||
final NumericDocValues docIDToID;
|
||||
try {
|
||||
stringDVDirect = sr.getSortedDocValues("stringdv");
|
||||
docIDToID = sr.getNumericDocValues("id");
|
||||
assertNotNull(stringDVDirect);
|
||||
} catch (IOException ioe) {
|
||||
throw new RuntimeException(ioe);
|
||||
}
|
||||
while(System.nanoTime() < END_TIME) {
|
||||
final SortedDocValues source;
|
||||
source = stringDVDirect;
|
||||
|
||||
for(int iter=0;iter<100;iter++) {
|
||||
final int docID = random.nextInt(sr.maxDoc());
|
||||
BytesRef term = source.get(docID);
|
||||
assertEquals(docValues.get((int) docIDToID.get(docID)), term);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
threads[thread].start();
|
||||
}
|
||||
|
||||
for(Thread thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,497 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.concurrent.CyclicBarrier;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.LegacyDoubleField;
|
||||
import org.apache.lucene.document.LegacyFloatField;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
import org.apache.lucene.document.LegacyLongField;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LegacyNumericUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/** random assortment of tests against legacy numerics */
|
||||
public class TestLegacyFieldCache extends LuceneTestCase {
|
||||
private static LeafReader reader;
|
||||
private static int NUM_DOCS;
|
||||
private static Directory directory;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
NUM_DOCS = atLeast(500);
|
||||
directory = newDirectory();
|
||||
RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
||||
long theLong = Long.MAX_VALUE;
|
||||
double theDouble = Double.MAX_VALUE;
|
||||
int theInt = Integer.MAX_VALUE;
|
||||
float theFloat = Float.MAX_VALUE;
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: setUp");
|
||||
}
|
||||
for (int i = 0; i < NUM_DOCS; i++){
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyLongField("theLong", theLong--, Field.Store.NO));
|
||||
doc.add(new LegacyDoubleField("theDouble", theDouble--, Field.Store.NO));
|
||||
doc.add(new LegacyIntField("theInt", theInt--, Field.Store.NO));
|
||||
doc.add(new LegacyFloatField("theFloat", theFloat--, Field.Store.NO));
|
||||
if (i%2 == 0) {
|
||||
doc.add(new LegacyIntField("sparse", i, Field.Store.NO));
|
||||
}
|
||||
|
||||
if (i%2 == 0) {
|
||||
doc.add(new LegacyIntField("numInt", i, Field.Store.NO));
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
IndexReader r = writer.getReader();
|
||||
reader = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(reader);
|
||||
writer.close();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
reader.close();
|
||||
reader = null;
|
||||
directory.close();
|
||||
directory = null;
|
||||
}
|
||||
|
||||
public void testInfoStream() throws Exception {
|
||||
try {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
cache.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
|
||||
cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
|
||||
cache.getNumerics(reader, "theDouble", new FieldCache.Parser() {
|
||||
@Override
|
||||
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
|
||||
}
|
||||
@Override
|
||||
public long parseValue(BytesRef term) {
|
||||
int val = (int) LegacyNumericUtils.prefixCodedToLong(term);
|
||||
if (val<0) val ^= 0x7fffffff;
|
||||
return val;
|
||||
}
|
||||
}, false);
|
||||
assertTrue(bos.toString(IOUtils.UTF_8).indexOf("WARNING") != -1);
|
||||
} finally {
|
||||
FieldCache.DEFAULT.setInfoStream(null);
|
||||
FieldCache.DEFAULT.purgeAllCaches();
|
||||
}
|
||||
}
|
||||
|
||||
public void test() throws IOException {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", doubles, cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.get(i));
|
||||
}
|
||||
|
||||
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LEGACY_LONG_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", longs, cache.getNumerics(reader, "theLong", FieldCache.LEGACY_LONG_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Long.MAX_VALUE - i, longs.get(i));
|
||||
}
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", ints, cache.getNumerics(reader, "theInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Integer.MAX_VALUE - i, ints.get(i));
|
||||
}
|
||||
|
||||
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.LEGACY_FLOAT_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", floats, cache.getNumerics(reader, "theFloat", FieldCache.LEGACY_FLOAT_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.get(i));
|
||||
}
|
||||
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "theLong", null);
|
||||
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", null));
|
||||
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
|
||||
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
}
|
||||
|
||||
docsWithField = cache.getDocsWithField(reader, "sparse", null);
|
||||
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", null));
|
||||
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
|
||||
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
assertEquals(i%2 == 0, docsWithField.get(i));
|
||||
}
|
||||
|
||||
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
|
||||
}
|
||||
|
||||
public void testEmptyIndex() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(500));
|
||||
writer.close();
|
||||
IndexReader r = DirectoryReader.open(dir);
|
||||
LeafReader reader = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(reader);
|
||||
FieldCache.DEFAULT.getTerms(reader, "foobar", true);
|
||||
FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
|
||||
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testDocsWithField() throws Exception {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, true);
|
||||
|
||||
// The double[] takes one slots, and docsWithField should also
|
||||
// have been populated:
|
||||
assertEquals(2, cache.getCacheEntries().length);
|
||||
Bits bits = cache.getDocsWithField(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER);
|
||||
|
||||
// No new entries should appear:
|
||||
assertEquals(2, cache.getCacheEntries().length);
|
||||
assertTrue(bits instanceof Bits.MatchAllBits);
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.LEGACY_INT_PARSER, true);
|
||||
assertEquals(4, cache.getCacheEntries().length);
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.LEGACY_INT_PARSER);
|
||||
assertEquals(4, cache.getCacheEntries().length);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
if (i%2 == 0) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
assertEquals(i, ints.get(i));
|
||||
} else {
|
||||
assertFalse(docsWithField.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
NumericDocValues numInts = cache.getNumerics(reader, "numInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean());
|
||||
docsWithField = cache.getDocsWithField(reader, "numInt", FieldCache.LEGACY_INT_PARSER);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
if (i%2 == 0) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
assertEquals(i, numInts.get(i));
|
||||
} else {
|
||||
assertFalse(docsWithField.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testGetDocsWithFieldThreadSafety() throws Exception {
|
||||
final FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
|
||||
int NUM_THREADS = 3;
|
||||
Thread[] threads = new Thread[NUM_THREADS];
|
||||
final AtomicBoolean failed = new AtomicBoolean();
|
||||
final AtomicInteger iters = new AtomicInteger();
|
||||
final int NUM_ITER = 200 * RANDOM_MULTIPLIER;
|
||||
final CyclicBarrier restart = new CyclicBarrier(NUM_THREADS,
|
||||
new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
cache.purgeAllCaches();
|
||||
iters.incrementAndGet();
|
||||
}
|
||||
});
|
||||
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
|
||||
threads[threadIDX] = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
|
||||
try {
|
||||
while(!failed.get()) {
|
||||
final int op = random().nextInt(3);
|
||||
if (op == 0) {
|
||||
// Purge all caches & resume, once all
|
||||
// threads get here:
|
||||
restart.await();
|
||||
if (iters.get() >= NUM_ITER) {
|
||||
break;
|
||||
}
|
||||
} else if (op == 1) {
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "sparse", null);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
assertEquals(i%2 == 0, docsWithField.get(i));
|
||||
}
|
||||
} else {
|
||||
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.LEGACY_INT_PARSER, true);
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "sparse", null);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
if (i%2 == 0) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
assertEquals(i, ints.get(i));
|
||||
} else {
|
||||
assertFalse(docsWithField.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
failed.set(true);
|
||||
restart.reset();
|
||||
throw new RuntimeException(t);
|
||||
}
|
||||
}
|
||||
};
|
||||
threads[threadIDX].start();
|
||||
}
|
||||
|
||||
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
|
||||
threads[threadIDX].join();
|
||||
}
|
||||
assertFalse(failed.get());
|
||||
}
|
||||
|
||||
public void testDocValuesIntegration() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(null);
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
|
||||
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
|
||||
doc.add(new NumericDocValuesField("numeric", 42));
|
||||
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
|
||||
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
|
||||
iw.addDocument(doc);
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.close();
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
// Binary type: can be retrieved via getTerms()
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.LEGACY_INT_PARSER, false);
|
||||
});
|
||||
|
||||
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.LEGACY_INT_PARSER, false);
|
||||
});
|
||||
|
||||
// Numeric type: can be retrieved via getInts() and so on
|
||||
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.LEGACY_INT_PARSER, false);
|
||||
assertEquals(42, numeric.get(0));
|
||||
|
||||
// SortedSet type: can be retrieved via getDocTermOrds()
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.LEGACY_INT_PARSER, false);
|
||||
});
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNonexistantFields() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
iw.addDocument(doc);
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
final FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.LEGACY_INT_PARSER, true);
|
||||
assertEquals(0, ints.get(0));
|
||||
|
||||
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LEGACY_LONG_PARSER, true);
|
||||
assertEquals(0, longs.get(0));
|
||||
|
||||
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.LEGACY_FLOAT_PARSER, true);
|
||||
assertEquals(0, floats.get(0));
|
||||
|
||||
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.LEGACY_DOUBLE_PARSER, true);
|
||||
assertEquals(0, doubles.get(0));
|
||||
|
||||
// check that we cached nothing
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNonIndexedFields() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(new StoredField("bogusbytes", "bogus"));
|
||||
doc.add(new StoredField("bogusshorts", "bogus"));
|
||||
doc.add(new StoredField("bogusints", "bogus"));
|
||||
doc.add(new StoredField("boguslongs", "bogus"));
|
||||
doc.add(new StoredField("bogusfloats", "bogus"));
|
||||
doc.add(new StoredField("bogusdoubles", "bogus"));
|
||||
doc.add(new StoredField("bogusbits", "bogus"));
|
||||
iw.addDocument(doc);
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
final FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.LEGACY_INT_PARSER, true);
|
||||
assertEquals(0, ints.get(0));
|
||||
|
||||
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LEGACY_LONG_PARSER, true);
|
||||
assertEquals(0, longs.get(0));
|
||||
|
||||
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.LEGACY_FLOAT_PARSER, true);
|
||||
assertEquals(0, floats.get(0));
|
||||
|
||||
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.LEGACY_DOUBLE_PARSER, true);
|
||||
assertEquals(0, doubles.get(0));
|
||||
|
||||
// check that we cached nothing
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// Make sure that the use of GrowableWriter doesn't prevent from using the full long range
|
||||
public void testLongFieldCache() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
cfg.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
|
||||
Document doc = new Document();
|
||||
LegacyLongField field = new LegacyLongField("f", 0L, Store.YES);
|
||||
doc.add(field);
|
||||
final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
final long v;
|
||||
switch (random().nextInt(10)) {
|
||||
case 0:
|
||||
v = Long.MIN_VALUE;
|
||||
break;
|
||||
case 1:
|
||||
v = 0;
|
||||
break;
|
||||
case 2:
|
||||
v = Long.MAX_VALUE;
|
||||
break;
|
||||
default:
|
||||
v = TestUtil.nextLong(random(), -10, 10);
|
||||
break;
|
||||
}
|
||||
values[i] = v;
|
||||
if (v == 0 && random().nextBoolean()) {
|
||||
// missing
|
||||
iw.addDocument(new Document());
|
||||
} else {
|
||||
field.setLongValue(v);
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
}
|
||||
iw.forceMerge(1);
|
||||
final DirectoryReader reader = iw.getReader();
|
||||
final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_LONG_PARSER, false);
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
assertEquals(values[i], longs.get(i));
|
||||
}
|
||||
reader.close();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// Make sure that the use of GrowableWriter doesn't prevent from using the full int range
|
||||
public void testIntFieldCache() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
cfg.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
|
||||
Document doc = new Document();
|
||||
LegacyIntField field = new LegacyIntField("f", 0, Store.YES);
|
||||
doc.add(field);
|
||||
final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
final int v;
|
||||
switch (random().nextInt(10)) {
|
||||
case 0:
|
||||
v = Integer.MIN_VALUE;
|
||||
break;
|
||||
case 1:
|
||||
v = 0;
|
||||
break;
|
||||
case 2:
|
||||
v = Integer.MAX_VALUE;
|
||||
break;
|
||||
default:
|
||||
v = TestUtil.nextInt(random(), -10, 10);
|
||||
break;
|
||||
}
|
||||
values[i] = v;
|
||||
if (v == 0 && random().nextBoolean()) {
|
||||
// missing
|
||||
iw.addDocument(new Document());
|
||||
} else {
|
||||
field.setIntValue(v);
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
}
|
||||
iw.forceMerge(1);
|
||||
final DirectoryReader reader = iw.getReader();
|
||||
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_INT_PARSER, false);
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
assertEquals(values[i], ints.get(i));
|
||||
}
|
||||
reader.close();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestNumericTerms32 extends LuceneTestCase {
|
||||
// distance of entries
|
||||
private static int distance;
|
||||
// shift the starting of the values to the left, to also have negative values:
|
||||
private static final int startOffset = - 1 << 15;
|
||||
// number of docs to generate for testing
|
||||
private static int noDocs;
|
||||
|
||||
private static Directory directory = null;
|
||||
private static IndexReader reader = null;
|
||||
private static IndexSearcher searcher = null;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
noDocs = atLeast(4096);
|
||||
distance = (1 << 30) / noDocs;
|
||||
directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
|
||||
newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
|
||||
.setMergePolicy(newLogMergePolicy()));
|
||||
|
||||
final FieldType storedInt = new FieldType(LegacyIntField.TYPE_NOT_STORED);
|
||||
storedInt.setStored(true);
|
||||
storedInt.freeze();
|
||||
|
||||
final FieldType storedInt8 = new FieldType(storedInt);
|
||||
storedInt8.setNumericPrecisionStep(8);
|
||||
|
||||
final FieldType storedInt4 = new FieldType(storedInt);
|
||||
storedInt4.setNumericPrecisionStep(4);
|
||||
|
||||
final FieldType storedInt2 = new FieldType(storedInt);
|
||||
storedInt2.setNumericPrecisionStep(2);
|
||||
|
||||
LegacyIntField
|
||||
field8 = new LegacyIntField("field8", 0, storedInt8),
|
||||
field4 = new LegacyIntField("field4", 0, storedInt4),
|
||||
field2 = new LegacyIntField("field2", 0, storedInt2);
|
||||
|
||||
Document doc = new Document();
|
||||
// add fields, that have a distance to test general functionality
|
||||
doc.add(field8); doc.add(field4); doc.add(field2);
|
||||
|
||||
// Add a series of noDocs docs with increasing int values
|
||||
for (int l=0; l<noDocs; l++) {
|
||||
int val=distance*l+startOffset;
|
||||
field8.setIntValue(val);
|
||||
field4.setIntValue(val);
|
||||
field2.setIntValue(val);
|
||||
|
||||
val=l-(noDocs/2);
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
Map<String,Type> map = new HashMap<>();
|
||||
map.put("field2", Type.LEGACY_INTEGER);
|
||||
map.put("field4", Type.LEGACY_INTEGER);
|
||||
map.put("field8", Type.LEGACY_INTEGER);
|
||||
reader = UninvertingReader.wrap(writer.getReader(), map);
|
||||
searcher=newSearcher(reader);
|
||||
writer.close();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
searcher = null;
|
||||
TestUtil.checkReader(reader);
|
||||
reader.close();
|
||||
reader = null;
|
||||
directory.close();
|
||||
directory = null;
|
||||
}
|
||||
|
||||
private void testSorting(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
// 10 random tests, the index order is ascending,
|
||||
// so using a reverse sort field should retun descending documents
|
||||
int num = TestUtil.nextInt(random(), 10, 20);
|
||||
for (int i = 0; i < num; i++) {
|
||||
int lower=(int)(random().nextDouble()*noDocs*distance)+startOffset;
|
||||
int upper=(int)(random().nextDouble()*noDocs*distance)+startOffset;
|
||||
if (lower>upper) {
|
||||
int a=lower; lower=upper; upper=a;
|
||||
}
|
||||
Query tq= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true);
|
||||
TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.INT, true)));
|
||||
if (topDocs.totalHits==0) continue;
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
int last = searcher.doc(sd[0].doc).getField(field).numericValue().intValue();
|
||||
for (int j=1; j<sd.length; j++) {
|
||||
int act = searcher.doc(sd[j].doc).getField(field).numericValue().intValue();
|
||||
assertTrue("Docs should be sorted backwards", last>act );
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSorting_8bit() throws Exception {
|
||||
testSorting(8);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSorting_4bit() throws Exception {
|
||||
testSorting(4);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSorting_2bit() throws Exception {
|
||||
testSorting(2);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,166 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.LegacyLongField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestNumericTerms64 extends LuceneTestCase {
|
||||
// distance of entries
|
||||
private static long distance;
|
||||
// shift the starting of the values to the left, to also have negative values:
|
||||
private static final long startOffset = - 1L << 31;
|
||||
// number of docs to generate for testing
|
||||
private static int noDocs;
|
||||
|
||||
private static Directory directory = null;
|
||||
private static IndexReader reader = null;
|
||||
private static IndexSearcher searcher = null;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
noDocs = atLeast(4096);
|
||||
distance = (1L << 60) / noDocs;
|
||||
directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
|
||||
newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
|
||||
.setMergePolicy(newLogMergePolicy()));
|
||||
|
||||
final FieldType storedLong = new FieldType(LegacyLongField.TYPE_NOT_STORED);
|
||||
storedLong.setStored(true);
|
||||
storedLong.freeze();
|
||||
|
||||
final FieldType storedLong8 = new FieldType(storedLong);
|
||||
storedLong8.setNumericPrecisionStep(8);
|
||||
|
||||
final FieldType storedLong4 = new FieldType(storedLong);
|
||||
storedLong4.setNumericPrecisionStep(4);
|
||||
|
||||
final FieldType storedLong6 = new FieldType(storedLong);
|
||||
storedLong6.setNumericPrecisionStep(6);
|
||||
|
||||
final FieldType storedLong2 = new FieldType(storedLong);
|
||||
storedLong2.setNumericPrecisionStep(2);
|
||||
|
||||
LegacyLongField
|
||||
field8 = new LegacyLongField("field8", 0L, storedLong8),
|
||||
field6 = new LegacyLongField("field6", 0L, storedLong6),
|
||||
field4 = new LegacyLongField("field4", 0L, storedLong4),
|
||||
field2 = new LegacyLongField("field2", 0L, storedLong2);
|
||||
|
||||
Document doc = new Document();
|
||||
// add fields, that have a distance to test general functionality
|
||||
doc.add(field8); doc.add(field6); doc.add(field4); doc.add(field2);
|
||||
|
||||
// Add a series of noDocs docs with increasing long values, by updating the fields
|
||||
for (int l=0; l<noDocs; l++) {
|
||||
long val=distance*l+startOffset;
|
||||
field8.setLongValue(val);
|
||||
field6.setLongValue(val);
|
||||
field4.setLongValue(val);
|
||||
field2.setLongValue(val);
|
||||
|
||||
val=l-(noDocs/2);
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
Map<String,Type> map = new HashMap<>();
|
||||
map.put("field2", Type.LEGACY_LONG);
|
||||
map.put("field4", Type.LEGACY_LONG);
|
||||
map.put("field6", Type.LEGACY_LONG);
|
||||
map.put("field8", Type.LEGACY_LONG);
|
||||
reader = UninvertingReader.wrap(writer.getReader(), map);
|
||||
searcher=newSearcher(reader);
|
||||
writer.close();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
searcher = null;
|
||||
TestUtil.checkReader(reader);
|
||||
reader.close();
|
||||
reader = null;
|
||||
directory.close();
|
||||
directory = null;
|
||||
}
|
||||
|
||||
private void testSorting(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
// 10 random tests, the index order is ascending,
|
||||
// so using a reverse sort field should retun descending documents
|
||||
int num = TestUtil.nextInt(random(), 10, 20);
|
||||
for (int i = 0; i < num; i++) {
|
||||
long lower=(long)(random().nextDouble()*noDocs*distance)+startOffset;
|
||||
long upper=(long)(random().nextDouble()*noDocs*distance)+startOffset;
|
||||
if (lower>upper) {
|
||||
long a=lower; lower=upper; upper=a;
|
||||
}
|
||||
Query tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
|
||||
TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.LONG, true)));
|
||||
if (topDocs.totalHits==0) continue;
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
long last=searcher.doc(sd[0].doc).getField(field).numericValue().longValue();
|
||||
for (int j=1; j<sd.length; j++) {
|
||||
long act=searcher.doc(sd[j].doc).getField(field).numericValue().longValue();
|
||||
assertTrue("Docs should be sorted backwards", last>act );
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSorting_8bit() throws Exception {
|
||||
testSorting(8);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSorting_6bit() throws Exception {
|
||||
testSorting(6);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSorting_4bit() throws Exception {
|
||||
testSorting(4);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSorting_2bit() throws Exception {
|
||||
testSorting(2);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,395 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
import org.apache.lucene.document.LegacyLongField;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LegacyNumericUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
public class TestUninvertingReader extends LuceneTestCase {
|
||||
|
||||
public void testSortedSetInteger() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
|
||||
doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||
Collections.singletonMap("foo", Type.SORTED_SET_INTEGER));
|
||||
LeafReader ar = ir.leaves().get(0).reader();
|
||||
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
|
||||
assertEquals(2, v.getValueCount());
|
||||
|
||||
v.setDocument(0);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(1);
|
||||
assertEquals(0, v.nextOrd());
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
BytesRef value = v.lookupOrd(0);
|
||||
assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));
|
||||
|
||||
value = v.lookupOrd(1);
|
||||
assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
|
||||
TestUtil.checkReader(ir);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSortedSetFloat() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
|
||||
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(-3f), Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||
Collections.singletonMap("foo", Type.SORTED_SET_FLOAT));
|
||||
LeafReader ar = ir.leaves().get(0).reader();
|
||||
|
||||
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
|
||||
assertEquals(2, v.getValueCount());
|
||||
|
||||
v.setDocument(0);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(1);
|
||||
assertEquals(0, v.nextOrd());
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
BytesRef value = v.lookupOrd(0);
|
||||
assertEquals(Float.floatToRawIntBits(-3f), LegacyNumericUtils.prefixCodedToInt(value));
|
||||
|
||||
value = v.lookupOrd(1);
|
||||
assertEquals(Float.floatToRawIntBits(5f), LegacyNumericUtils.prefixCodedToInt(value));
|
||||
TestUtil.checkReader(ir);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSortedSetLong() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
|
||||
doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||
Collections.singletonMap("foo", Type.SORTED_SET_LONG));
|
||||
LeafReader ar = ir.leaves().get(0).reader();
|
||||
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
|
||||
assertEquals(2, v.getValueCount());
|
||||
|
||||
v.setDocument(0);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(1);
|
||||
assertEquals(0, v.nextOrd());
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
BytesRef value = v.lookupOrd(0);
|
||||
assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));
|
||||
|
||||
value = v.lookupOrd(1);
|
||||
assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
|
||||
TestUtil.checkReader(ir);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSortedSetDouble() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
|
||||
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(-3d), Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||
Collections.singletonMap("foo", Type.SORTED_SET_DOUBLE));
|
||||
LeafReader ar = ir.leaves().get(0).reader();
|
||||
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
|
||||
assertEquals(2, v.getValueCount());
|
||||
|
||||
v.setDocument(0);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(1);
|
||||
assertEquals(0, v.nextOrd());
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
BytesRef value = v.lookupOrd(0);
|
||||
assertEquals(Double.doubleToRawLongBits(-3d), LegacyNumericUtils.prefixCodedToLong(value));
|
||||
|
||||
value = v.lookupOrd(1);
|
||||
assertEquals(Double.doubleToRawLongBits(5d), LegacyNumericUtils.prefixCodedToLong(value));
|
||||
TestUtil.checkReader(ir);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
|
||||
/** Tests {@link Type#SORTED_SET_INTEGER} using Integer based fields, with and w/o precision steps */
|
||||
public void testSortedSetIntegerManyValues() throws IOException {
|
||||
final Directory dir = newDirectory();
|
||||
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
final FieldType NO_TRIE_TYPE = new FieldType(LegacyIntField.TYPE_NOT_STORED);
|
||||
NO_TRIE_TYPE.setNumericPrecisionStep(Integer.MAX_VALUE);
|
||||
|
||||
final Map<String,Type> UNINVERT_MAP = new LinkedHashMap<String,Type>();
|
||||
UNINVERT_MAP.put("notrie_single", Type.SORTED_SET_INTEGER);
|
||||
UNINVERT_MAP.put("notrie_multi", Type.SORTED_SET_INTEGER);
|
||||
UNINVERT_MAP.put("trie_single", Type.SORTED_SET_INTEGER);
|
||||
UNINVERT_MAP.put("trie_multi", Type.SORTED_SET_INTEGER);
|
||||
final Set<String> MULTI_VALUES = new LinkedHashSet<String>();
|
||||
MULTI_VALUES.add("trie_multi");
|
||||
MULTI_VALUES.add("notrie_multi");
|
||||
|
||||
|
||||
final int NUM_DOCS = TestUtil.nextInt(random(), 200, 1500);
|
||||
final int MIN = TestUtil.nextInt(random(), 10, 100);
|
||||
final int MAX = MIN + TestUtil.nextInt(random(), 10, 100);
|
||||
final long EXPECTED_VALSET_SIZE = 1 + MAX - MIN;
|
||||
|
||||
{ // (at least) one doc should have every value, so that at least one segment has every value
|
||||
final Document doc = new Document();
|
||||
for (int i = MIN; i <= MAX; i++) {
|
||||
doc.add(new LegacyIntField("trie_multi", i, Field.Store.NO));
|
||||
doc.add(new LegacyIntField("notrie_multi", i, NO_TRIE_TYPE));
|
||||
}
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
|
||||
// now add some more random docs (note: starting at i=1 because of previously added doc)
|
||||
for (int i = 1; i < NUM_DOCS; i++) {
|
||||
final Document doc = new Document();
|
||||
if (0 != TestUtil.nextInt(random(), 0, 9)) {
|
||||
int val = TestUtil.nextInt(random(), MIN, MAX);
|
||||
doc.add(new LegacyIntField("trie_single", val, Field.Store.NO));
|
||||
doc.add(new LegacyIntField("notrie_single", val, NO_TRIE_TYPE));
|
||||
}
|
||||
if (0 != TestUtil.nextInt(random(), 0, 9)) {
|
||||
int numMulti = atLeast(1);
|
||||
while (0 < numMulti--) {
|
||||
int val = TestUtil.nextInt(random(), MIN, MAX);
|
||||
doc.add(new LegacyIntField("trie_multi", val, Field.Store.NO));
|
||||
doc.add(new LegacyIntField("notrie_multi", val, NO_TRIE_TYPE));
|
||||
}
|
||||
}
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
|
||||
iw.close();
|
||||
|
||||
final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
|
||||
TestUtil.checkReader(ir);
|
||||
|
||||
final int NUM_LEAVES = ir.leaves().size();
|
||||
|
||||
// check the leaves: no more then total set size
|
||||
for (LeafReaderContext rc : ir.leaves()) {
|
||||
final LeafReader ar = rc.reader();
|
||||
for (String f : UNINVERT_MAP.keySet()) {
|
||||
final SortedSetDocValues v = DocValues.getSortedSet(ar, f);
|
||||
final long valSetSize = v.getValueCount();
|
||||
assertTrue(f + ": Expected no more then " + EXPECTED_VALSET_SIZE + " values per segment, got " +
|
||||
valSetSize + " from: " + ar.toString(),
|
||||
valSetSize <= EXPECTED_VALSET_SIZE);
|
||||
|
||||
if (1 == NUM_LEAVES && MULTI_VALUES.contains(f)) {
|
||||
// tighter check on multi fields in single segment index since we know one doc has all of them
|
||||
assertEquals(f + ": Single segment LeafReader's value set should have had exactly expected size",
|
||||
EXPECTED_VALSET_SIZE, valSetSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check the composite of all leaves: exact expectation of set size
|
||||
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
|
||||
TestUtil.checkReader(composite);
|
||||
|
||||
for (String f : MULTI_VALUES) {
|
||||
final SortedSetDocValues v = composite.getSortedSetDocValues(f);
|
||||
final long valSetSize = v.getValueCount();
|
||||
assertEquals(f + ": Composite reader value set should have had exactly expected size",
|
||||
EXPECTED_VALSET_SIZE, valSetSize);
|
||||
}
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSortedSetEmptyIndex() throws IOException {
|
||||
final Directory dir = newDirectory();
|
||||
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
iw.close();
|
||||
|
||||
final Map<String,Type> UNINVERT_MAP = new LinkedHashMap<String,Type>();
|
||||
for (Type t : EnumSet.allOf(Type.class)) {
|
||||
UNINVERT_MAP.put(t.name(), t);
|
||||
}
|
||||
|
||||
final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
|
||||
TestUtil.checkReader(ir);
|
||||
|
||||
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
|
||||
TestUtil.checkReader(composite);
|
||||
|
||||
for (String f : UNINVERT_MAP.keySet()) {
|
||||
// check the leaves
|
||||
// (normally there are none for an empty index, so this is really just future
|
||||
// proofing in case that changes for some reason)
|
||||
for (LeafReaderContext rc : ir.leaves()) {
|
||||
final LeafReader ar = rc.reader();
|
||||
assertNull(f + ": Expected no doc values from empty index (leaf)",
|
||||
ar.getSortedSetDocValues(f));
|
||||
}
|
||||
|
||||
// check the composite
|
||||
assertNull(f + ": Expected no doc values from empty index (composite)",
|
||||
composite.getSortedSetDocValues(f));
|
||||
|
||||
}
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testFieldInfos() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
BytesRef idBytes = new BytesRef("id");
|
||||
doc.add(new StringField("id", idBytes, Store.YES));
|
||||
doc.add(new LegacyIntField("int", 5, Store.YES));
|
||||
doc.add(new NumericDocValuesField("dv", 5));
|
||||
doc.add(new IntPoint("dint", 5));
|
||||
doc.add(new StoredField("stored", 5)); // not indexed
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
Map<String, Type> uninvertingMap = new HashMap<>();
|
||||
uninvertingMap.put("int", Type.LEGACY_INTEGER);
|
||||
uninvertingMap.put("dv", Type.LEGACY_INTEGER);
|
||||
uninvertingMap.put("dint", Type.INTEGER_POINT);
|
||||
|
||||
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||
uninvertingMap);
|
||||
LeafReader leafReader = ir.leaves().get(0).reader();
|
||||
|
||||
FieldInfo intFInfo = leafReader.getFieldInfos().fieldInfo("int");
|
||||
assertEquals(DocValuesType.NUMERIC, intFInfo.getDocValuesType());
|
||||
assertEquals(0, intFInfo.getPointDimensionCount());
|
||||
assertEquals(0, intFInfo.getPointNumBytes());
|
||||
|
||||
FieldInfo dintFInfo = leafReader.getFieldInfos().fieldInfo("dint");
|
||||
assertEquals(DocValuesType.NUMERIC, dintFInfo.getDocValuesType());
|
||||
assertEquals(1, dintFInfo.getPointDimensionCount());
|
||||
assertEquals(4, dintFInfo.getPointNumBytes());
|
||||
|
||||
FieldInfo dvFInfo = leafReader.getFieldInfos().fieldInfo("dv");
|
||||
assertEquals(DocValuesType.NUMERIC, dvFInfo.getDocValuesType());
|
||||
|
||||
FieldInfo storedFInfo = leafReader.getFieldInfos().fieldInfo("stored");
|
||||
assertEquals(DocValuesType.NONE, storedFInfo.getDocValuesType());
|
||||
|
||||
TestUtil.checkReader(ir);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -22,25 +22,24 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.ResultContext;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.CopyField;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
|
|
@ -503,7 +503,7 @@ public class FacetStream extends TupleStream implements Expressible {
|
|||
t.put(identifier, d);
|
||||
++m;
|
||||
} else {
|
||||
long l = (long)bucket.get("count");
|
||||
long l = ((Number)bucket.get("count")).longValue();
|
||||
t.put("count(*)", l);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,6 +46,7 @@ public interface HighlightParams {
|
|||
public static final String DEFAULT_SUMMARY = HIGHLIGHT + ".defaultSummary";
|
||||
public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";
|
||||
public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength";
|
||||
public static final String HIGHLIGHT_ALTERNATE = HIGHLIGHT+".highlightAlternate";
|
||||
public static final String MAX_MULTIVALUED_TO_EXAMINE = HIGHLIGHT + ".maxMultiValuedToExamine";
|
||||
public static final String MAX_MULTIVALUED_TO_MATCH = HIGHLIGHT + ".maxMultiValuedToMatch";
|
||||
|
||||
|
|
|
@ -73,14 +73,19 @@ public class StreamingTest extends SolrCloudTestCase {
|
|||
.withFunctionName("parallel", ParallelStream.class);
|
||||
|
||||
private static String zkHost;
|
||||
|
||||
private static int numShards;
|
||||
private static int numWorkers;
|
||||
|
||||
@BeforeClass
|
||||
public static void configureCluster() throws Exception {
|
||||
configureCluster(2)
|
||||
numShards = random().nextInt(2) + 1; //1 - 3
|
||||
numWorkers = numShards > 2 ? random().nextInt(numShards - 1) + 1 : numShards;
|
||||
configureCluster(numShards)
|
||||
.addConfig("conf", getFile("solrj").toPath().resolve("solr").resolve("configsets").resolve("streaming").resolve("conf"))
|
||||
.configure();
|
||||
|
||||
CollectionAdminRequest.createCollection(COLLECTION, "conf", 2, 1).process(cluster.getSolrClient());
|
||||
CollectionAdminRequest.createCollection(COLLECTION, "conf", numShards, 1).process(cluster.getSolrClient());
|
||||
AbstractDistribZkTestBase.waitForRecoveriesToFinish(COLLECTION, cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
|
||||
|
||||
zkHost = cluster.getZkServer().getZkAddress();
|
||||
|
@ -147,12 +152,11 @@ public class StreamingTest extends SolrCloudTestCase {
|
|||
|
||||
SolrParams sParamsA = StreamingTest.mapParams("q", "*:*", "fl", "id,a_s,a_i,a_f", "sort", "a_s asc,a_f asc", "partitionKeys", "none");
|
||||
CloudSolrStream stream = new CloudSolrStream(zkHost, COLLECTION, sParamsA);
|
||||
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, stream, 2, new FieldComparator("a_s",ComparatorOrder.ASCENDING));
|
||||
|
||||
ParallelStream pstream = parallelStream(stream, new FieldComparator("a_s", ComparatorOrder.ASCENDING));
|
||||
attachStreamFactory(pstream);
|
||||
List<Tuple> tuples = getTuples(pstream);
|
||||
|
||||
assert(tuples.size() == 20); // Each tuple will be double counted.
|
||||
assert(tuples.size() == (10 * numWorkers)); // Each tuple will be double counted.
|
||||
|
||||
}
|
||||
|
||||
|
@ -174,7 +178,7 @@ public class StreamingTest extends SolrCloudTestCase {
|
|||
SolrParams sParams = mapParams("q", "*:*", "fl", "id,a_s,a_i,a_f", "sort", "a_f asc,a_i asc", "partitionKeys", "a_f");
|
||||
CloudSolrStream stream = new CloudSolrStream(zkHost, COLLECTION, sParams);
|
||||
UniqueStream ustream = new UniqueStream(stream, new FieldEqualitor("a_f"));
|
||||
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, ustream, 2, new FieldComparator("a_f",ComparatorOrder.ASCENDING));
|
||||
ParallelStream pstream = parallelStream(ustream, new FieldComparator("a_f", ComparatorOrder.ASCENDING));
|
||||
attachStreamFactory(pstream);
|
||||
List<Tuple> tuples = getTuples(pstream);
|
||||
assert(tuples.size() == 5);
|
||||
|
@ -183,7 +187,7 @@ public class StreamingTest extends SolrCloudTestCase {
|
|||
//Test the eofTuples
|
||||
|
||||
Map<String,Tuple> eofTuples = pstream.getEofTuples();
|
||||
assert(eofTuples.size() == 2); //There should be an EOF tuple for each worker.
|
||||
assert(eofTuples.size() == numWorkers); //There should be an EOF tuple for each worker.
|
||||
|
||||
}
|
||||
|
||||
|
@ -253,7 +257,7 @@ public class StreamingTest extends SolrCloudTestCase {
|
|||
SolrParams sParams = mapParams("q", "*:*", "fl", "id,a_s,a_i", "sort", "a_i asc", "partitionKeys", "a_i");
|
||||
CloudSolrStream stream = new CloudSolrStream(zkHost, COLLECTION, sParams);
|
||||
RankStream rstream = new RankStream(stream, 11, new FieldComparator("a_i",ComparatorOrder.DESCENDING));
|
||||
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, rstream, 2, new FieldComparator("a_i",ComparatorOrder.DESCENDING));
|
||||
ParallelStream pstream = parallelStream(rstream, new FieldComparator("a_i", ComparatorOrder.DESCENDING));
|
||||
attachStreamFactory(pstream);
|
||||
List<Tuple> tuples = getTuples(pstream);
|
||||
|
||||
|
@ -405,9 +409,7 @@ public class StreamingTest extends SolrCloudTestCase {
|
|||
ReducerStream rstream = new ReducerStream(stream,
|
||||
new FieldEqualitor("a_s"),
|
||||
new GroupOperation(new FieldComparator("a_f", ComparatorOrder.DESCENDING), 5));
|
||||
|
||||
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, rstream, 2, new FieldComparator("a_s",ComparatorOrder.ASCENDING));
|
||||
|
||||
ParallelStream pstream = parallelStream(rstream, new FieldComparator("a_s", ComparatorOrder.ASCENDING));
|
||||
attachStreamFactory(pstream);
|
||||
List<Tuple> tuples = getTuples(pstream);
|
||||
|
||||
|
@ -433,9 +435,7 @@ public class StreamingTest extends SolrCloudTestCase {
|
|||
rstream = new ReducerStream(stream,
|
||||
new FieldEqualitor("a_s"),
|
||||
new GroupOperation(new FieldComparator("a_f", ComparatorOrder.ASCENDING), 3));
|
||||
|
||||
pstream = new ParallelStream(zkHost, COLLECTION, rstream, 2, new FieldComparator("a_s",ComparatorOrder.DESCENDING));
|
||||
|
||||
pstream = parallelStream(rstream, new FieldComparator("a_s", ComparatorOrder.DESCENDING));
|
||||
attachStreamFactory(pstream);
|
||||
tuples = getTuples(pstream);
|
||||
|
||||
|
@ -1401,7 +1401,7 @@ public class StreamingTest extends SolrCloudTestCase {
|
|||
new CountMetric()};
|
||||
|
||||
RollupStream rollupStream = new RollupStream(stream, buckets, metrics);
|
||||
ParallelStream parallelStream = new ParallelStream(zkHost, COLLECTION, rollupStream, 2, new FieldComparator("a_s", ComparatorOrder.ASCENDING));
|
||||
ParallelStream parallelStream = parallelStream(rollupStream, new FieldComparator("a_s", ComparatorOrder.ASCENDING));
|
||||
attachStreamFactory(parallelStream);
|
||||
List<Tuple> tuples = getTuples(parallelStream);
|
||||
|
||||
|
@ -1501,9 +1501,7 @@ public class StreamingTest extends SolrCloudTestCase {
|
|||
ReducerStream rstream = new ReducerStream(stream,
|
||||
new FieldEqualitor("a_s"),
|
||||
new GroupOperation(new FieldComparator("a_s", ComparatorOrder.ASCENDING), 2));
|
||||
|
||||
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, rstream, 2, new FieldComparator("a_s", ComparatorOrder.ASCENDING));
|
||||
|
||||
ParallelStream pstream = parallelStream(rstream, new FieldComparator("a_s", ComparatorOrder.ASCENDING));
|
||||
attachStreamFactory(pstream);
|
||||
List<Tuple> tuples = getTuples(pstream);
|
||||
assert(tuples.size() == 0);
|
||||
|
@ -1636,7 +1634,7 @@ public class StreamingTest extends SolrCloudTestCase {
|
|||
CloudSolrStream streamB = new CloudSolrStream(zkHost, COLLECTION, sParamsB);
|
||||
|
||||
MergeStream mstream = new MergeStream(streamA, streamB, new FieldComparator("a_i",ComparatorOrder.ASCENDING));
|
||||
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, mstream, 2, new FieldComparator("a_i",ComparatorOrder.ASCENDING));
|
||||
ParallelStream pstream = parallelStream(mstream, new FieldComparator("a_i", ComparatorOrder.ASCENDING));
|
||||
attachStreamFactory(pstream);
|
||||
List<Tuple> tuples = getTuples(pstream);
|
||||
|
||||
|
@ -1651,7 +1649,7 @@ public class StreamingTest extends SolrCloudTestCase {
|
|||
streamB = new CloudSolrStream(zkHost, COLLECTION, sParamsB);
|
||||
|
||||
mstream = new MergeStream(streamA, streamB, new FieldComparator("a_i",ComparatorOrder.DESCENDING));
|
||||
pstream = new ParallelStream(zkHost, COLLECTION, mstream, 2, new FieldComparator("a_i",ComparatorOrder.DESCENDING));
|
||||
pstream = parallelStream(mstream, new FieldComparator("a_i", ComparatorOrder.DESCENDING));
|
||||
attachStreamFactory(pstream);
|
||||
tuples = getTuples(pstream);
|
||||
|
||||
|
@ -1684,14 +1682,13 @@ public class StreamingTest extends SolrCloudTestCase {
|
|||
CloudSolrStream streamB = new CloudSolrStream(zkHost, COLLECTION, sParamsB);
|
||||
|
||||
MergeStream mstream = new MergeStream(streamA, streamB, new FieldComparator("a_i",ComparatorOrder.ASCENDING));
|
||||
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, mstream, 2, new FieldComparator("a_i",ComparatorOrder.ASCENDING));
|
||||
|
||||
ParallelStream pstream = parallelStream(mstream, new FieldComparator("a_i", ComparatorOrder.ASCENDING));
|
||||
attachStreamFactory(pstream);
|
||||
List<Tuple> tuples = getTuples(pstream);
|
||||
|
||||
assert(tuples.size() == 9);
|
||||
Map<String, Tuple> eofTuples = pstream.getEofTuples();
|
||||
assert(eofTuples.size() == 2); // There should be an EOF Tuple for each worker.
|
||||
assert(eofTuples.size() == numWorkers); // There should be an EOF Tuple for each worker.
|
||||
|
||||
}
|
||||
|
||||
|
@ -1834,5 +1831,10 @@ public class StreamingTest extends SolrCloudTestCase {
|
|||
|
||||
return params;
|
||||
}
|
||||
|
||||
private ParallelStream parallelStream(TupleStream stream, FieldComparator comparator) throws IOException {
|
||||
ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, stream, numWorkers, comparator);
|
||||
return pstream;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue