From 94c7ccd589d9d298a01a5b01bffda80080086ab0 Mon Sep 17 00:00:00 2001 From: Steve Rowe Date: Wed, 25 May 2016 16:09:22 -0400 Subject: [PATCH 01/19] - Modernize example usage - Fail if script version is incompatible with release version --- dev-tools/scripts/smokeTestRelease.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py index fdb92889247..4e19a3381e6 100644 --- a/dev-tools/scripts/smokeTestRelease.py +++ b/dev-tools/scripts/smokeTestRelease.py @@ -1192,7 +1192,7 @@ revision_re = re.compile(r'rev([a-f\d]+)') def parse_config(): epilogue = textwrap.dedent(''' Example usage: - python3.2 -u dev-tools/scripts/smokeTestRelease.py http://people.apache.org/~whoever/staging_area/lucene-solr-4.3.0-RC1-rev1469340 + python3 -u dev-tools/scripts/smokeTestRelease.py https://dist.apache.org/repos/dist/dev/lucene/lucene-solr-6.0.1-RC2-revc7510a0... ''') description = 'Utility to test a release.' parser = argparse.ArgumentParser(description=description, epilog=epilogue, @@ -1350,8 +1350,25 @@ def confirmAllReleasesAreTestedForBackCompat(smokeVersion, unpackPath): else: print(' success!') +def getScriptVersion(): + topLevelDir = '../..' # Assumption: this script is in dev-tools/scripts/ of a checkout + m = re.compile(r'(.*)/').match(sys.argv[0]) # Get this script's directory + if m is not None and m.group(1) != '.': + origCwd = os.getcwd() + os.chdir(m.group(1)) + os.chdir('../..') + topLevelDir = os.getcwd() + os.chdir(origCwd) + reBaseVersion = re.compile(r'version\.base\s*=\s*(\d+\.\d+)') + return reBaseVersion.search(open('%s/lucene/version.properties' % topLevelDir).read()).group(1) + def main(): c = parse_config() + + scriptVersion = getScriptVersion() + if not c.version.startswith(scriptVersion + '.'): + raise RuntimeError('smokeTestRelease.py for %s.X is incompatible with a %s release.' % (scriptVersion, c.version)) + print('NOTE: output encoding is %s' % sys.stdout.encoding) smokeTest(c.java, c.url, c.revision, c.version, c.tmp_dir, c.is_signed, ' '.join(c.test_args)) From 2554b106922d2bbd354b020416859b5c846128ce Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 26 May 2016 09:49:27 +0200 Subject: [PATCH 02/19] Add a comment about the recursion threshold of StringMSBRadixSorter. --- .../src/java/org/apache/lucene/util/InPlaceMergeSorter.java | 2 +- lucene/core/src/java/org/apache/lucene/util/IntroSorter.java | 2 +- lucene/core/src/java/org/apache/lucene/util/Sorter.java | 2 +- .../java/org/apache/lucene/util/StringMSBRadixSorter.java | 5 ++++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/InPlaceMergeSorter.java b/lucene/core/src/java/org/apache/lucene/util/InPlaceMergeSorter.java index dbf7d72ca13..1afba99f670 100644 --- a/lucene/core/src/java/org/apache/lucene/util/InPlaceMergeSorter.java +++ b/lucene/core/src/java/org/apache/lucene/util/InPlaceMergeSorter.java @@ -33,7 +33,7 @@ public abstract class InPlaceMergeSorter extends Sorter { } void mergeSort(int from, int to) { - if (to - from < THRESHOLD) { + if (to - from < INSERTION_SORT_THRESHOLD) { insertionSort(from, to); } else { final int mid = (from + to) >>> 1; diff --git a/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java b/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java index 498c06aa6ff..26f7e37dc9f 100644 --- a/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java +++ b/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java @@ -38,7 +38,7 @@ public abstract class IntroSorter extends Sorter { } void quicksort(int from, int to, int maxDepth) { - if (to - from < THRESHOLD) { + if (to - from < INSERTION_SORT_THRESHOLD) { insertionSort(from, to); return; } else if (--maxDepth < 0) { diff --git a/lucene/core/src/java/org/apache/lucene/util/Sorter.java b/lucene/core/src/java/org/apache/lucene/util/Sorter.java index 451d271072c..0ac954b0d3b 100644 --- a/lucene/core/src/java/org/apache/lucene/util/Sorter.java +++ b/lucene/core/src/java/org/apache/lucene/util/Sorter.java @@ -23,7 +23,7 @@ import java.util.Comparator; * @lucene.internal */ public abstract class Sorter { - static final int THRESHOLD = 20; + static final int INSERTION_SORT_THRESHOLD = 20; /** Sole constructor, used for inheritance. */ protected Sorter() {} diff --git a/lucene/core/src/java/org/apache/lucene/util/StringMSBRadixSorter.java b/lucene/core/src/java/org/apache/lucene/util/StringMSBRadixSorter.java index 0112d7aae83..84bd0749b5c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/StringMSBRadixSorter.java +++ b/lucene/core/src/java/org/apache/lucene/util/StringMSBRadixSorter.java @@ -21,10 +21,13 @@ import java.util.Arrays; /** Radix sorter for variable-length strings. This class sorts based on the most * significant byte first and falls back to {@link IntroSorter} when the size * of the buckets to sort becomes small. It is NOT stable. - * Worst-case memory usage is about {@code 2.3 KB} */ + * Worst-case memory usage is about {@code 2.3 KB}. */ abstract class StringMSBRadixSorter extends Sorter { // after that many levels of recursion we fall back to introsort anyway + // this is used as a protection against the fact that radix sort performs + // worse when there are long common prefixes (probably because of cache + // locality) private static final int LEVEL_THRESHOLD = 8; // size of histograms: 256 + 1 to indicate that the string is finished private static final int HISTOGRAM_SIZE = 257; From a6839beb87a73bff6139df44a7b9168a498dd426 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 25 May 2016 11:16:18 +0200 Subject: [PATCH 03/19] LUCENE-7300: Add HardLinkCopyDirectoryWrapper to speed up file copying if hardlinks are applicable --- lucene/CHANGES.txt | 4 + .../apache/lucene/misc/IndexMergeTool.java | 4 +- .../store/HardlinkCopyDirectoryWrapper.java | 95 +++++++++++++++++++ .../TestHardLinkCopyDirectoryWrapper.java | 76 +++++++++++++++ lucene/tools/junit4/tests.policy | 4 +- 5 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 lucene/misc/src/java/org/apache/lucene/store/HardlinkCopyDirectoryWrapper.java create mode 100644 lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a6e3b7163ec..a29ca4479be 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -23,6 +23,10 @@ New Features e.g. clear the Gregorian Change Date. Also, toString(cal) is now identical to DateTimeFormatter.ISO_INSTANT. (David Smiley) +* LUCENE-7300: The misc module now has a directory wrapper that uses hard-links if + applicable and supported when copying files from another FSDirectory in + Directory#copyFrom. (Simon Willnauer) + API Changes * LUCENE-7163: refactor GeoRect, Polygon, and GeoUtils tests to geo diff --git a/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java b/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java index a63c16b1a11..cbb11cdd749 100644 --- a/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java +++ b/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java @@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.HardlinkCopyDirectoryWrapper; import org.apache.lucene.util.SuppressForbidden; import java.io.IOException; @@ -45,7 +46,8 @@ public class IndexMergeTool { Directory[] indexes = new Directory[args.length - 1]; for (int i = 1; i < args.length; i++) { - indexes[i - 1] = FSDirectory.open(Paths.get(args[i])); + // try to use hardlinks if possible + indexes[i - 1] = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(args[i]))); } System.out.println("Merging..."); diff --git a/lucene/misc/src/java/org/apache/lucene/store/HardlinkCopyDirectoryWrapper.java b/lucene/misc/src/java/org/apache/lucene/store/HardlinkCopyDirectoryWrapper.java new file mode 100644 index 00000000000..374178ec84a --- /dev/null +++ b/lucene/misc/src/java/org/apache/lucene/store/HardlinkCopyDirectoryWrapper.java @@ -0,0 +1,95 @@ +package org.apache.lucene.store; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.FileAlreadyExistsException; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.security.AccessController; +import java.security.PrivilegedAction; + +/** + * This directory wrapper overrides {@link Directory#copyFrom(Directory, String, String, IOContext)} in order + * to optionally use a hard-link instead of a full byte by byte file copy if applicable. Hard-links are only used if the + * underlying filesystem supports it and if the {@link java.nio.file.LinkPermission} "hard" is granted. + * + *

NOTE: Using hard-links changes the copy semantics of + * {@link Directory#copyFrom(Directory, String, String, IOContext)}. When hard-links are used changes to the source file + * will be reflected in the target file and vice-versa. Within Lucene, files are write once and should not be modified + * after they have been written. This directory should not be used in situations where files change after they have + * been written. + *

+ */ +public final class HardlinkCopyDirectoryWrapper extends FilterDirectory { + /** + * Creates a new HardlinkCopyDirectoryWrapper delegating to the given directory + */ + public HardlinkCopyDirectoryWrapper(Directory in) { + super(in); + } + + @Override + public void copyFrom(Directory from, String srcFile, String destFile, IOContext context) throws IOException { + final Directory fromUnwrapped = FilterDirectory.unwrap(from); + final Directory toUnwrapped = FilterDirectory.unwrap(this); + // try to unwrap to FSDirectory - we might be able to just create hard-links of these files and save copying + // the entire file. + Exception suppressedException = null; + boolean tryCopy = true; + if (fromUnwrapped instanceof FSDirectory + && toUnwrapped instanceof FSDirectory) { + final Path fromPath = ((FSDirectory) fromUnwrapped).getDirectory(); + final Path toPath = ((FSDirectory) toUnwrapped).getDirectory(); + + if (Files.isReadable(fromPath.resolve(srcFile)) && Files.isWritable(toPath)) { + // only try hardlinks if we have permission to access the files + // if not super.copyFrom() will give us the right exceptions + suppressedException = AccessController.doPrivileged((PrivilegedAction) () -> { + try { + Files.createLink(toPath.resolve(destFile), fromPath.resolve(srcFile)); + } catch (FileNotFoundException | NoSuchFileException | FileAlreadyExistsException ex) { + return ex; // in these cases we bubble up since it's a true error condition. + } catch (IOException + | UnsupportedOperationException // if the FS doesn't support hard-links + | SecurityException ex // we don't have permission to use hard-links just fall back to byte copy + ) { + // hard-links are not supported or the files are on different filesystems + // we could go deeper and check if their filesstores are the same and opt + // out earlier but for now we just fall back to normal file-copy + return ex; + } + return null; + }); + tryCopy = suppressedException != null; + } + } + if (tryCopy) { + try { + super.copyFrom(from, srcFile, destFile, context); + } catch (Exception ex) { + if (suppressedException != null) { + ex.addSuppressed(suppressedException); + } + throw ex; + } + } + } +} diff --git a/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java b/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java new file mode 100644 index 00000000000..b084c925490 --- /dev/null +++ b/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java @@ -0,0 +1,76 @@ +package org.apache.lucene.store; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.util.IOUtils; + +public class TestHardLinkCopyDirectoryWrapper extends BaseDirectoryTestCase { + + @Override + protected Directory getDirectory(Path file) throws IOException { + Directory open = random().nextBoolean() ? newFSDirectory(file) : newDirectory(); + return new HardlinkCopyDirectoryWrapper(open); + } + + /** + * Tests that we use hardlinks if possible on Directory#copyFrom + */ + public void testCopyHardLinks() throws IOException { + Path tempDir = createTempDir(); + Path dir_1 = tempDir.resolve("dir_1"); + Path dir_2 = tempDir.resolve("dir_2"); + Files.createDirectories(dir_1); + Files.createDirectories(dir_2); + + Directory luceneDir_1 = newFSDirectory(dir_1); + Directory luceneDir_2 = newFSDirectory(dir_2); + try(IndexOutput output = luceneDir_1.createOutput("foo.bar", IOContext.DEFAULT)) { + CodecUtil.writeHeader(output, "foo", 0); + output.writeString("hey man, nice shot!"); + CodecUtil.writeFooter(output); + } + try { + Files.createLink(tempDir.resolve("test"), dir_1.resolve("foo.bar")); + BasicFileAttributes destAttr = Files.readAttributes(tempDir.resolve("test"), BasicFileAttributes.class); + BasicFileAttributes sourceAttr = Files.readAttributes(dir_1.resolve("foo.bar"), BasicFileAttributes.class); + assumeTrue("hardlinks are not supported", destAttr.fileKey() != null + && destAttr.fileKey().equals(sourceAttr.fileKey())); + } catch (UnsupportedOperationException ex) { + assumeFalse("hardlinks are not supported", false); + } + + HardlinkCopyDirectoryWrapper wrapper = new HardlinkCopyDirectoryWrapper(luceneDir_2); + wrapper.copyFrom(luceneDir_1, "foo.bar", "bar.foo", IOContext.DEFAULT); + assertTrue(Files.exists(dir_2.resolve("bar.foo"))); + BasicFileAttributes destAttr = Files.readAttributes(dir_2.resolve("bar.foo"), BasicFileAttributes.class); + BasicFileAttributes sourceAttr = Files.readAttributes(dir_1.resolve("foo.bar"), BasicFileAttributes.class); + assertEquals(destAttr.fileKey(), sourceAttr.fileKey()); + try(ChecksumIndexInput indexInput = wrapper.openChecksumInput("bar.foo", IOContext.DEFAULT)) { + CodecUtil.checkHeader(indexInput, "foo", 0, 0); + assertEquals("hey man, nice shot!", indexInput.readString()); + CodecUtil.checkFooter(indexInput); + } + IOUtils.close(luceneDir_1, luceneDir_2); + } +} diff --git a/lucene/tools/junit4/tests.policy b/lucene/tools/junit4/tests.policy index a579fe251d3..f1d8f106dc2 100644 --- a/lucene/tools/junit4/tests.policy +++ b/lucene/tools/junit4/tests.policy @@ -38,7 +38,9 @@ grant { permission java.io.FilePermission "${junit4.childvm.cwd}${/}jacoco.db", "write"; permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,write,delete"; permission java.io.FilePermission "${clover.db.dir}${/}-", "read,write,delete"; - + + // misc HardlinkCopyDirectoryWrapper needs this to test if hardlinks can be created + permission java.nio.file.LinkPermission "hard"; // needed by SSD detection tests in TestIOUtils (creates symlinks) permission java.nio.file.LinkPermission "symbolic"; From b29eac852b0c8853d68d0f34e6a4d6eb45ebef3a Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 26 May 2016 12:20:49 +0200 Subject: [PATCH 04/19] Fix TestHardLinkCopyDirectoryWrapper if an assume is hit all directories must be closed --- .../TestHardLinkCopyDirectoryWrapper.java | 51 ++++++++++--------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java b/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java index b084c925490..819511b9c68 100644 --- a/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java +++ b/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java @@ -45,32 +45,37 @@ public class TestHardLinkCopyDirectoryWrapper extends BaseDirectoryTestCase { Directory luceneDir_1 = newFSDirectory(dir_1); Directory luceneDir_2 = newFSDirectory(dir_2); - try(IndexOutput output = luceneDir_1.createOutput("foo.bar", IOContext.DEFAULT)) { - CodecUtil.writeHeader(output, "foo", 0); - output.writeString("hey man, nice shot!"); - CodecUtil.writeFooter(output); - } try { - Files.createLink(tempDir.resolve("test"), dir_1.resolve("foo.bar")); - BasicFileAttributes destAttr = Files.readAttributes(tempDir.resolve("test"), BasicFileAttributes.class); + try (IndexOutput output = luceneDir_1.createOutput("foo.bar", IOContext.DEFAULT)) { + CodecUtil.writeHeader(output, "foo", 0); + output.writeString("hey man, nice shot!"); + CodecUtil.writeFooter(output); + } + try { + Files.createLink(tempDir.resolve("test"), dir_1.resolve("foo.bar")); + BasicFileAttributes destAttr = Files.readAttributes(tempDir.resolve("test"), BasicFileAttributes.class); + BasicFileAttributes sourceAttr = Files.readAttributes(dir_1.resolve("foo.bar"), BasicFileAttributes.class); + assumeTrue("hardlinks are not supported", destAttr.fileKey() != null + && destAttr.fileKey().equals(sourceAttr.fileKey())); + } catch (UnsupportedOperationException ex) { + assumeFalse("hardlinks are not supported", true); + } + + HardlinkCopyDirectoryWrapper wrapper = new HardlinkCopyDirectoryWrapper(luceneDir_2); + wrapper.copyFrom(luceneDir_1, "foo.bar", "bar.foo", IOContext.DEFAULT); + assertTrue(Files.exists(dir_2.resolve("bar.foo"))); + BasicFileAttributes destAttr = Files.readAttributes(dir_2.resolve("bar.foo"), BasicFileAttributes.class); BasicFileAttributes sourceAttr = Files.readAttributes(dir_1.resolve("foo.bar"), BasicFileAttributes.class); - assumeTrue("hardlinks are not supported", destAttr.fileKey() != null - && destAttr.fileKey().equals(sourceAttr.fileKey())); - } catch (UnsupportedOperationException ex) { - assumeFalse("hardlinks are not supported", false); + assertEquals(destAttr.fileKey(), sourceAttr.fileKey()); + try (ChecksumIndexInput indexInput = wrapper.openChecksumInput("bar.foo", IOContext.DEFAULT)) { + CodecUtil.checkHeader(indexInput, "foo", 0, 0); + assertEquals("hey man, nice shot!", indexInput.readString()); + CodecUtil.checkFooter(indexInput); + } + } finally { + // close them in a finally block we might run into an assume here + IOUtils.close(luceneDir_1, luceneDir_2); } - HardlinkCopyDirectoryWrapper wrapper = new HardlinkCopyDirectoryWrapper(luceneDir_2); - wrapper.copyFrom(luceneDir_1, "foo.bar", "bar.foo", IOContext.DEFAULT); - assertTrue(Files.exists(dir_2.resolve("bar.foo"))); - BasicFileAttributes destAttr = Files.readAttributes(dir_2.resolve("bar.foo"), BasicFileAttributes.class); - BasicFileAttributes sourceAttr = Files.readAttributes(dir_1.resolve("foo.bar"), BasicFileAttributes.class); - assertEquals(destAttr.fileKey(), sourceAttr.fileKey()); - try(ChecksumIndexInput indexInput = wrapper.openChecksumInput("bar.foo", IOContext.DEFAULT)) { - CodecUtil.checkHeader(indexInput, "foo", 0, 0); - assertEquals("hey man, nice shot!", indexInput.readString()); - CodecUtil.checkFooter(indexInput); - } - IOUtils.close(luceneDir_1, luceneDir_2); } } From 8c6493151738314420ce5ffb678dbb9170c64d9a Mon Sep 17 00:00:00 2001 From: Tommaso Teofili Date: Thu, 26 May 2016 15:59:12 +0200 Subject: [PATCH 05/19] LUCENE-7303 - avoid NPE in MultiFields.getTerms(leafReader, classFieldName), removed duplicated code in DocumentSNBC (cherry picked from commit 8808cf5) --- .../SimpleNaiveBayesClassifier.java | 28 +++++++++++-------- .../SimpleNaiveBayesDocumentClassifier.java | 22 --------------- 2 files changed, 16 insertions(+), 34 deletions(-) diff --git a/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java index 73c90de5b1f..2514ae1e644 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java @@ -145,18 +145,19 @@ public class SimpleNaiveBayesClassifier implements Classifier { List> assignedClasses = new ArrayList<>(); Terms classes = MultiFields.getTerms(leafReader, classFieldName); - TermsEnum classesEnum = classes.iterator(); - BytesRef next; - String[] tokenizedText = tokenize(inputDocument); - int docsWithClassSize = countDocsWithClass(); - while ((next = classesEnum.next()) != null) { - if (next.length > 0) { - Term term = new Term(this.classFieldName, next); - double clVal = calculateLogPrior(term, docsWithClassSize) + calculateLogLikelihood(tokenizedText, term, docsWithClassSize); - assignedClasses.add(new ClassificationResult<>(term.bytes(), clVal)); + if (classes != null) { + TermsEnum classesEnum = classes.iterator(); + BytesRef next; + String[] tokenizedText = tokenize(inputDocument); + int docsWithClassSize = countDocsWithClass(); + while ((next = classesEnum.next()) != null) { + if (next.length > 0) { + Term term = new Term(this.classFieldName, next); + double clVal = calculateLogPrior(term, docsWithClassSize) + calculateLogLikelihood(tokenizedText, term, docsWithClassSize); + assignedClasses.add(new ClassificationResult<>(term.bytes(), clVal)); + } } } - // normalization; the values transforms to a 0-1 range return normClassificationResults(assignedClasses); } @@ -168,8 +169,9 @@ public class SimpleNaiveBayesClassifier implements Classifier { * @throws IOException if accessing to term vectors or search fails */ protected int countDocsWithClass() throws IOException { - int docCount = MultiFields.getTerms(this.leafReader, this.classFieldName).getDocCount(); - if (docCount == -1) { // in case codec doesn't support getDocCount + Terms terms = MultiFields.getTerms(this.leafReader, this.classFieldName); + int docCount; + if (terms == null || terms.getDocCount() == -1) { // in case codec doesn't support getDocCount TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector(); BooleanQuery.Builder q = new BooleanQuery.Builder(); q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST)); @@ -179,6 +181,8 @@ public class SimpleNaiveBayesClassifier implements Classifier { indexSearcher.search(q.build(), classQueryCountCollector); docCount = classQueryCountCollector.getTotalHits(); + } else { + docCount = terms.getDocCount(); } return docCount; } diff --git a/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java index 3dc01bb49b2..2c5a493ed77 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java @@ -168,28 +168,6 @@ public class SimpleNaiveBayesDocumentClassifier extends SimpleNaiveBayesClassifi } } - /** - * Counts the number of documents in the index having at least a value for the 'class' field - * - * @return the no. of documents having a value for the 'class' field - * @throws java.io.IOException If accessing to term vectors or search fails - */ - protected int countDocsWithClass() throws IOException { - int docCount = MultiFields.getTerms(this.leafReader, this.classFieldName).getDocCount(); - if (docCount == -1) { // in case codec doesn't support getDocCount - TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector(); - BooleanQuery.Builder q = new BooleanQuery.Builder(); - q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST)); - if (query != null) { - q.add(query, BooleanClause.Occur.MUST); - } - indexSearcher.search(q.build(), - classQueryCountCollector); - docCount = classQueryCountCollector.getTotalHits(); - } - return docCount; - } - /** * Returns a token array from the {@link org.apache.lucene.analysis.TokenStream} in input * From 55d854566e1e3c14cd91d91f414469104c935103 Mon Sep 17 00:00:00 2001 From: Tommaso Teofili Date: Thu, 26 May 2016 16:13:46 +0200 Subject: [PATCH 06/19] LUCENE-7305 - use macro average in confusion matrix metrics, removed unused import in datasplitter (cherry picked from commit dc50b79) --- .../utils/ConfusionMatrixGenerator.java | 49 ++++++------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/lucene/classification/src/java/org/apache/lucene/classification/utils/ConfusionMatrixGenerator.java b/lucene/classification/src/java/org/apache/lucene/classification/utils/ConfusionMatrixGenerator.java index 3dd8ba83d04..65de8015f20 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/utils/ConfusionMatrixGenerator.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/utils/ConfusionMatrixGenerator.java @@ -175,7 +175,7 @@ public class ConfusionMatrixGenerator { public double getPrecision(String klass) { Map classifications = linearizedMatrix.get(klass); double tp = 0; - double fp = 0; + double den = 0; // tp + fp if (classifications != null) { for (Map.Entry entry : classifications.entrySet()) { if (klass.equals(entry.getKey())) { @@ -184,11 +184,11 @@ public class ConfusionMatrixGenerator { } for (Map values : linearizedMatrix.values()) { if (values.containsKey(klass)) { - fp += values.get(klass); + den += values.get(klass); } } } - return tp > 0 ? tp / (tp + fp) : 0; + return tp > 0 ? tp / den : 0; } /** @@ -246,7 +246,7 @@ public class ConfusionMatrixGenerator { if (this.accuracy == -1) { double tp = 0d; double tn = 0d; - double fp = 0d; + double tfp = 0d; // tp + fp double fn = 0d; for (Map.Entry> classification : linearizedMatrix.entrySet()) { String klass = classification.getKey(); @@ -259,63 +259,46 @@ public class ConfusionMatrixGenerator { } for (Map values : linearizedMatrix.values()) { if (values.containsKey(klass)) { - fp += values.get(klass); + tfp += values.get(klass); } else { tn++; } } } - this.accuracy = (tp + tn) / (fp + fn + tp + tn); + this.accuracy = (tp + tn) / (tfp + fn + tn); } return this.accuracy; } /** - * get the precision (see {@link #getPrecision(String)}) over all the classes. + * get the macro averaged precision (see {@link #getPrecision(String)}) over all the classes. * - * @return the precision as computed from the whole confusion matrix + * @return the macro averaged precision as computed from the confusion matrix */ public double getPrecision() { - double tp = 0; - double fp = 0; + double p = 0; for (Map.Entry> classification : linearizedMatrix.entrySet()) { String klass = classification.getKey(); - for (Map.Entry entry : classification.getValue().entrySet()) { - if (klass.equals(entry.getKey())) { - tp += entry.getValue(); - } - } - for (Map values : linearizedMatrix.values()) { - if (values.containsKey(klass)) { - fp += values.get(klass); - } - } + p += getPrecision(klass); } - return tp > 0 ? tp / (tp + fp) : 0; + return p / linearizedMatrix.size(); } /** - * get the recall (see {@link #getRecall(String)}) over all the classes + * get the macro averaged recall (see {@link #getRecall(String)}) over all the classes * - * @return the recall as computed from the whole confusion matrix + * @return the recall as computed from the confusion matrix */ public double getRecall() { - double tp = 0; - double fn = 0; + double r = 0; for (Map.Entry> classification : linearizedMatrix.entrySet()) { String klass = classification.getKey(); - for (Map.Entry entry : classification.getValue().entrySet()) { - if (klass.equals(entry.getKey())) { - tp += entry.getValue(); - } else { - fn += entry.getValue(); - } - } + r += getRecall(klass); } - return tp + fn > 0 ? tp / (tp + fn) : 0; + return r / linearizedMatrix.size(); } @Override From 1609428786b17135f0d8ba413c4203b88977304b Mon Sep 17 00:00:00 2001 From: jdyer1 Date: Thu, 26 May 2016 13:39:37 -0500 Subject: [PATCH 07/19] SOLR-9141: Fix ClassCastException when using the /sql handler count() function with single-shard collections --- solr/CHANGES.txt | 3 ++ .../client/solrj/io/stream/FacetStream.java | 2 +- .../client/solrj/io/stream/StreamingTest.java | 48 ++++++++++--------- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index f56bfe47ec3..2f3cc5f002c 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -205,6 +205,9 @@ Bug Fixes * SOLR-9151: Fix SolrCLI so that bin/solr -e cloud example can be run from any CWD (janhoy) +* SOLR-9141: Fix ClassCastException when using the /sql handler count() function with + single-shard collections (Minoru Osuka via James Dyer) + Optimizations ---------------------- * SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation. diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/FacetStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/FacetStream.java index 86124dedf3c..6802d0e62d2 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/FacetStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/FacetStream.java @@ -503,7 +503,7 @@ public class FacetStream extends TupleStream implements Expressible { t.put(identifier, d); ++m; } else { - long l = (long)bucket.get("count"); + long l = ((Number)bucket.get("count")).longValue(); t.put("count(*)", l); } } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamingTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamingTest.java index 17897598b96..1cea3112621 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamingTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamingTest.java @@ -73,14 +73,19 @@ public class StreamingTest extends SolrCloudTestCase { .withFunctionName("parallel", ParallelStream.class); private static String zkHost; + + private static int numShards; + private static int numWorkers; @BeforeClass public static void configureCluster() throws Exception { - configureCluster(2) + numShards = random().nextInt(2) + 1; //1 - 3 + numWorkers = numShards > 2 ? random().nextInt(numShards - 1) + 1 : numShards; + configureCluster(numShards) .addConfig("conf", getFile("solrj").toPath().resolve("solr").resolve("configsets").resolve("streaming").resolve("conf")) .configure(); - CollectionAdminRequest.createCollection(COLLECTION, "conf", 2, 1).process(cluster.getSolrClient()); + CollectionAdminRequest.createCollection(COLLECTION, "conf", numShards, 1).process(cluster.getSolrClient()); AbstractDistribZkTestBase.waitForRecoveriesToFinish(COLLECTION, cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT); zkHost = cluster.getZkServer().getZkAddress(); @@ -147,12 +152,11 @@ public class StreamingTest extends SolrCloudTestCase { SolrParams sParamsA = StreamingTest.mapParams("q", "*:*", "fl", "id,a_s,a_i,a_f", "sort", "a_s asc,a_f asc", "partitionKeys", "none"); CloudSolrStream stream = new CloudSolrStream(zkHost, COLLECTION, sParamsA); - ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, stream, 2, new FieldComparator("a_s",ComparatorOrder.ASCENDING)); - + ParallelStream pstream = parallelStream(stream, new FieldComparator("a_s", ComparatorOrder.ASCENDING)); attachStreamFactory(pstream); List tuples = getTuples(pstream); - assert(tuples.size() == 20); // Each tuple will be double counted. + assert(tuples.size() == (10 * numWorkers)); // Each tuple will be double counted. } @@ -174,7 +178,7 @@ public class StreamingTest extends SolrCloudTestCase { SolrParams sParams = mapParams("q", "*:*", "fl", "id,a_s,a_i,a_f", "sort", "a_f asc,a_i asc", "partitionKeys", "a_f"); CloudSolrStream stream = new CloudSolrStream(zkHost, COLLECTION, sParams); UniqueStream ustream = new UniqueStream(stream, new FieldEqualitor("a_f")); - ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, ustream, 2, new FieldComparator("a_f",ComparatorOrder.ASCENDING)); + ParallelStream pstream = parallelStream(ustream, new FieldComparator("a_f", ComparatorOrder.ASCENDING)); attachStreamFactory(pstream); List tuples = getTuples(pstream); assert(tuples.size() == 5); @@ -183,7 +187,7 @@ public class StreamingTest extends SolrCloudTestCase { //Test the eofTuples Map eofTuples = pstream.getEofTuples(); - assert(eofTuples.size() == 2); //There should be an EOF tuple for each worker. + assert(eofTuples.size() == numWorkers); //There should be an EOF tuple for each worker. } @@ -253,7 +257,7 @@ public class StreamingTest extends SolrCloudTestCase { SolrParams sParams = mapParams("q", "*:*", "fl", "id,a_s,a_i", "sort", "a_i asc", "partitionKeys", "a_i"); CloudSolrStream stream = new CloudSolrStream(zkHost, COLLECTION, sParams); RankStream rstream = new RankStream(stream, 11, new FieldComparator("a_i",ComparatorOrder.DESCENDING)); - ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, rstream, 2, new FieldComparator("a_i",ComparatorOrder.DESCENDING)); + ParallelStream pstream = parallelStream(rstream, new FieldComparator("a_i", ComparatorOrder.DESCENDING)); attachStreamFactory(pstream); List tuples = getTuples(pstream); @@ -405,9 +409,7 @@ public class StreamingTest extends SolrCloudTestCase { ReducerStream rstream = new ReducerStream(stream, new FieldEqualitor("a_s"), new GroupOperation(new FieldComparator("a_f", ComparatorOrder.DESCENDING), 5)); - - ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, rstream, 2, new FieldComparator("a_s",ComparatorOrder.ASCENDING)); - + ParallelStream pstream = parallelStream(rstream, new FieldComparator("a_s", ComparatorOrder.ASCENDING)); attachStreamFactory(pstream); List tuples = getTuples(pstream); @@ -433,9 +435,7 @@ public class StreamingTest extends SolrCloudTestCase { rstream = new ReducerStream(stream, new FieldEqualitor("a_s"), new GroupOperation(new FieldComparator("a_f", ComparatorOrder.ASCENDING), 3)); - - pstream = new ParallelStream(zkHost, COLLECTION, rstream, 2, new FieldComparator("a_s",ComparatorOrder.DESCENDING)); - + pstream = parallelStream(rstream, new FieldComparator("a_s", ComparatorOrder.DESCENDING)); attachStreamFactory(pstream); tuples = getTuples(pstream); @@ -1401,7 +1401,7 @@ public class StreamingTest extends SolrCloudTestCase { new CountMetric()}; RollupStream rollupStream = new RollupStream(stream, buckets, metrics); - ParallelStream parallelStream = new ParallelStream(zkHost, COLLECTION, rollupStream, 2, new FieldComparator("a_s", ComparatorOrder.ASCENDING)); + ParallelStream parallelStream = parallelStream(rollupStream, new FieldComparator("a_s", ComparatorOrder.ASCENDING)); attachStreamFactory(parallelStream); List tuples = getTuples(parallelStream); @@ -1501,9 +1501,7 @@ public class StreamingTest extends SolrCloudTestCase { ReducerStream rstream = new ReducerStream(stream, new FieldEqualitor("a_s"), new GroupOperation(new FieldComparator("a_s", ComparatorOrder.ASCENDING), 2)); - - ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, rstream, 2, new FieldComparator("a_s", ComparatorOrder.ASCENDING)); - + ParallelStream pstream = parallelStream(rstream, new FieldComparator("a_s", ComparatorOrder.ASCENDING)); attachStreamFactory(pstream); List tuples = getTuples(pstream); assert(tuples.size() == 0); @@ -1636,7 +1634,7 @@ public class StreamingTest extends SolrCloudTestCase { CloudSolrStream streamB = new CloudSolrStream(zkHost, COLLECTION, sParamsB); MergeStream mstream = new MergeStream(streamA, streamB, new FieldComparator("a_i",ComparatorOrder.ASCENDING)); - ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, mstream, 2, new FieldComparator("a_i",ComparatorOrder.ASCENDING)); + ParallelStream pstream = parallelStream(mstream, new FieldComparator("a_i", ComparatorOrder.ASCENDING)); attachStreamFactory(pstream); List tuples = getTuples(pstream); @@ -1651,7 +1649,7 @@ public class StreamingTest extends SolrCloudTestCase { streamB = new CloudSolrStream(zkHost, COLLECTION, sParamsB); mstream = new MergeStream(streamA, streamB, new FieldComparator("a_i",ComparatorOrder.DESCENDING)); - pstream = new ParallelStream(zkHost, COLLECTION, mstream, 2, new FieldComparator("a_i",ComparatorOrder.DESCENDING)); + pstream = parallelStream(mstream, new FieldComparator("a_i", ComparatorOrder.DESCENDING)); attachStreamFactory(pstream); tuples = getTuples(pstream); @@ -1684,14 +1682,13 @@ public class StreamingTest extends SolrCloudTestCase { CloudSolrStream streamB = new CloudSolrStream(zkHost, COLLECTION, sParamsB); MergeStream mstream = new MergeStream(streamA, streamB, new FieldComparator("a_i",ComparatorOrder.ASCENDING)); - ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, mstream, 2, new FieldComparator("a_i",ComparatorOrder.ASCENDING)); - + ParallelStream pstream = parallelStream(mstream, new FieldComparator("a_i", ComparatorOrder.ASCENDING)); attachStreamFactory(pstream); List tuples = getTuples(pstream); assert(tuples.size() == 9); Map eofTuples = pstream.getEofTuples(); - assert(eofTuples.size() == 2); // There should be an EOF Tuple for each worker. + assert(eofTuples.size() == numWorkers); // There should be an EOF Tuple for each worker. } @@ -1834,5 +1831,10 @@ public class StreamingTest extends SolrCloudTestCase { return params; } + + private ParallelStream parallelStream(TupleStream stream, FieldComparator comparator) throws IOException { + ParallelStream pstream = new ParallelStream(zkHost, COLLECTION, stream, numWorkers, comparator); + return pstream; + } } From f1f85e560f54371800a368aff801b7c24413ece6 Mon Sep 17 00:00:00 2001 From: jdyer1 Date: Thu, 26 May 2016 14:46:43 -0500 Subject: [PATCH 08/19] SOLR-9165: disable "cursorMark" when testing for valid SpellCheck Collations --- solr/CHANGES.txt | 3 ++ .../solr/spelling/SpellCheckCollator.java | 3 ++ .../solr/spelling/SpellCheckCollatorTest.java | 30 +++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 2f3cc5f002c..a647d1fa417 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -207,6 +207,9 @@ Bug Fixes * SOLR-9141: Fix ClassCastException when using the /sql handler count() function with single-shard collections (Minoru Osuka via James Dyer) + +* SOLR-9165: Spellcheck does not return collations if "maxCollationTries" is used with "cursorMark". + (James Dyer) Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java index 528cdc33bf8..0738081aba6 100644 --- a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java +++ b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java @@ -24,6 +24,7 @@ import java.util.List; import org.apache.lucene.analysis.Token; import org.apache.lucene.index.IndexReader; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.CursorMarkParams; import org.apache.solr.common.params.DisMaxParams; import org.apache.solr.common.params.GroupParams; import org.apache.solr.common.params.ModifiableSolrParams; @@ -118,6 +119,8 @@ public class SpellCheckCollator { params.set(CommonParams.FL, "id"); // we'll sort by doc id to ensure no scoring is done. params.set(CommonParams.SORT, "_docid_ asc"); + // CursorMark does not like _docid_ sorting, and we don't need it. + params.remove(CursorMarkParams.CURSOR_MARK_PARAM); // If a dismax query, don't add unnecessary clauses for scoring params.remove(DisMaxParams.TIE); params.remove(DisMaxParams.PF); diff --git a/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java b/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java index 97111498928..a31a19d624b 100644 --- a/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java +++ b/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java @@ -25,6 +25,7 @@ import org.apache.lucene.util.TestUtil; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.CursorMarkParams; import org.apache.solr.common.params.GroupParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SpellingParams; @@ -594,5 +595,34 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 { List collations = (List) collationList.getAll("collation"); assertTrue(collations.size() == 2); } + @Test + public void testWithCursorMark() throws Exception + { + SolrCore core = h.getCore(); + SearchComponent speller = core.getSearchComponent("spellcheck"); + assertTrue("speller is null and it shouldn't be", speller != null); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add(SpellCheckComponent.COMPONENT_NAME, "true"); + params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true"); + params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10"); + params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true"); + params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "2"); + params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1"); + params.add(CommonParams.Q, "lowerfilt:(+fauth)"); + params.add(CommonParams.SORT, "id asc"); + params.add(CursorMarkParams.CURSOR_MARK_PARAM, CursorMarkParams.CURSOR_MARK_START); + SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH"); + SolrQueryResponse rsp = new SolrQueryResponse(); + rsp.addResponseHeader(new SimpleOrderedMap()); + SolrQueryRequest req = new LocalSolrQueryRequest(core, params); + handler.handleRequest(req, rsp); + req.close(); + NamedList values = rsp.getValues(); + NamedList spellCheck = (NamedList) values.get("spellcheck"); + NamedList collationList = (NamedList) spellCheck.get("collations"); + List collations = (List) collationList.getAll("collation"); + assertTrue(collations.size() == 1); + } } From aec3654fb8e6530494d909d57dada19642db1128 Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Sat, 21 May 2016 14:36:37 +0300 Subject: [PATCH 09/19] SOLR-9110: moving JoinFromCollection- SubQueryTransformer- BlockJoinFacet- Distrib Tests to SolrCloudTestCase --- solr/CHANGES.txt | 2 + .../cloud/DistribJoinFromCollectionTest.java | 136 ++++++++++-------- .../TestSubQueryTransformerDistrib.java | 68 ++++++--- .../join/BlockJoinFacetDistribTest.java | 97 +++++++++---- 4 files changed, 197 insertions(+), 106 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index a647d1fa417..59029e5df2b 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -301,6 +301,8 @@ Other Changes * SOLR-9119: several static methods in ValueSourceParser have been made private (hossman) +* SOLR-9110: Move JoinFromCollection- SubQueryTransformer- BlockJoinFacet- Distrib Tests to SolrCloudTestCase (Mikhail Khludnev) + ================== 6.0.1 ================== (No Changes) diff --git a/solr/core/src/test/org/apache/solr/cloud/DistribJoinFromCollectionTest.java b/solr/core/src/test/org/apache/solr/cloud/DistribJoinFromCollectionTest.java index ef00df3e482..d8e1b153d59 100644 --- a/solr/core/src/test/org/apache/solr/cloud/DistribJoinFromCollectionTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/DistribJoinFromCollectionTest.java @@ -16,102 +16,128 @@ */ package org.apache.solr.cloud; +import static org.hamcrest.CoreMatchers.not; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; -import org.junit.After; -import org.junit.Before; -import org.apache.commons.lang.StringUtils; +import org.apache.solr.common.cloud.ZkStateReader; +import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.hamcrest.CoreMatchers.*; - -import java.io.IOException; -import java.lang.invoke.MethodHandles; -import java.util.HashSet; -import java.util.Set; - /** * Tests using fromIndex that points to a collection in SolrCloud mode. */ -public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase { +public class DistribJoinFromCollectionTest extends SolrCloudTestCase{ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); final private static String[] scoreModes = {"avg","max","min","total"}; - public DistribJoinFromCollectionTest() { - super(); - } - - @Before - @Override - public void setUp() throws Exception { - super.setUp(); - System.setProperty("numShards", Integer.toString(sliceCount)); - } - - @Override - @After - public void tearDown() throws Exception { - try { - super.tearDown(); - } catch (Exception exc) {} - resetExceptionIgnores(); - } +// resetExceptionIgnores(); + private static String toColl = "to_2x2"; + private static String fromColl = "from_1x4"; - @Test - public void test() throws Exception { + private static Integer toDocId; + + private static CloudSolrClient cloudClient; + + + @BeforeClass + public static void setupCluster() throws Exception { + final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf"); + + String configName = "solrCloudCollectionConfig"; + int nodeCount = 5; + configureCluster(nodeCount) + .addConfig(configName, configDir) + .configure(); + + + Map collectionProperties = new HashMap<>(); + collectionProperties.put("config", "solrconfig-tlog.xml" ); + collectionProperties.put("schema", "schema.xml"); + // create a collection holding data for the "to" side of the JOIN - String toColl = "to_2x2"; - createCollection(toColl, 2, 2, 2); - ensureAllReplicasAreActive(toColl, "shard1", 2, 2, 30); - ensureAllReplicasAreActive(toColl, "shard2", 2, 2, 30); + + int shards = 2; + int replicas = 2 ; + assertNotNull(cluster.createCollection(toColl, shards, replicas, + configName, + collectionProperties)); + // get the set of nodes where replicas for the "to" collection exist Set nodeSet = new HashSet<>(); - ClusterState cs = cloudClient.getZkStateReader().getClusterState(); - for (Slice slice : cs.getActiveSlices(toColl)) + cloudClient = cluster.getSolrClient(); + ZkStateReader zkStateReader = cloudClient.getZkStateReader(); + ClusterState cs = zkStateReader.getClusterState(); + for (Slice slice : cs.getCollection(toColl).getActiveSlices()) for (Replica replica : slice.getReplicas()) nodeSet.add(replica.getNodeName()); assertTrue(nodeSet.size() > 0); // deploy the "from" collection to all nodes where the "to" collection exists - String fromColl = "from_1x2"; - createCollection(null, fromColl, 1, nodeSet.size(), 1, null, StringUtils.join(nodeSet,",")); - ensureAllReplicasAreActive(fromColl, "shard1", 1, nodeSet.size(), 30); - - // both to and from collections are up and active, index some docs ... - Integer toDocId = indexDoc(toColl, 1001, "a", null, "b"); + + assertNotNull(cluster.createCollection(fromColl, 1, 4, + configName, StringUtils.join(nodeSet,","), null, + collectionProperties)); + + AbstractDistribZkTestBase.waitForRecoveriesToFinish(toColl, zkStateReader, false, true, 30); + AbstractDistribZkTestBase.waitForRecoveriesToFinish(fromColl, zkStateReader, false, true, 30); + + toDocId = indexDoc(toColl, 1001, "a", null, "b"); indexDoc(fromColl, 2001, "a", "c", null); Thread.sleep(1000); // so the commits fire + } + + @Test + public void testScore() throws Exception { //without score testJoins(toColl, fromColl, toDocId, false); - + } + + @Test + public void testNoScore() throws Exception { //with score testJoins(toColl, fromColl, toDocId, true); - + + } + + @AfterClass + public static void shutdown() { log.info("DistribJoinFromCollectionTest logic complete ... deleting the " + toColl + " and " + fromColl + " collections"); // try to clean up for (String c : new String[]{ toColl, fromColl }) { try { - CollectionAdminRequest.Delete req = new CollectionAdminRequest.Delete() - .setCollectionName(c); + CollectionAdminRequest.Delete req = CollectionAdminRequest.deleteCollection(c); req.process(cloudClient); } catch (Exception e) { // don't fail the test @@ -145,9 +171,7 @@ public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase // create an alias for the fromIndex and then query through the alias String alias = fromColl+"Alias"; - CollectionAdminRequest.CreateAlias request = new CollectionAdminRequest.CreateAlias(); - request.setAliasName(alias); - request.setAliasedCollections(fromColl); + CollectionAdminRequest.CreateAlias request = CollectionAdminRequest.createAlias(alias,fromColl); request.process(cloudClient); { @@ -195,14 +219,14 @@ public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase + "from=join_s fromIndex=" + wrongName + " to=join_s}match_s:c"; final QueryRequest qr = new QueryRequest(params("collection", toColl, "q", joinQ, "fl", "id,get_s,score")); try { - cloudClient.request(qr); + cluster.getSolrClient().request(qr); } catch (HttpSolrClient.RemoteSolrException ex) { assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, ex.code()); assertTrue(ex.getMessage().contains(wrongName)); } } - protected Integer indexDoc(String collection, int id, String joinField, String matchField, String getField) throws Exception { + protected static Integer indexDoc(String collection, int id, String joinField, String matchField, String getField) throws Exception { UpdateRequest up = new UpdateRequest(); up.setCommitWithin(50); up.setParam("collection", collection); @@ -215,7 +239,7 @@ public class DistribJoinFromCollectionTest extends AbstractFullDistribZkTestBase if (getField != null) doc.addField("get_s", getField); up.add(doc); - cloudClient.request(up); + cluster.getSolrClient().request(up); return docId; } } diff --git a/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java b/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java index 87cfeb36b3b..631c82e140e 100644 --- a/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java +++ b/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java @@ -17,6 +17,8 @@ package org.apache.solr.response.transform; import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -26,27 +28,59 @@ import java.util.Random; import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest; import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.cloud.AbstractFullDistribZkTestBase; +import org.apache.solr.cloud.AbstractDistribZkTestBase; +import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.util.ContentStreamBase; +import org.junit.BeforeClass; import org.junit.Test; @SuppressSSL -public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBase { +public class TestSubQueryTransformerDistrib extends SolrCloudTestCase { - @Override - protected String getCloudSchemaFile() { - return "schema-docValuesJoin.xml"; + final static String people = "people"; + final static String depts = "departments"; + private static CloudSolrClient client; + + @BeforeClass + public static void setupCluster() throws Exception { + + final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf"); + + String configName = "solrCloudCollectionConfig"; + int nodeCount = 5; + configureCluster(nodeCount) + .addConfig(configName, configDir) + .configure(); + + Map collectionProperties = new HashMap<>(); + collectionProperties.put("config", "solrconfig-doctransformers.xml" ); + collectionProperties.put("schema", "schema-docValuesJoin.xml"); + + int shards = 2; + int replicas = 2 ; + assertNotNull(cluster.createCollection(people, shards, replicas, + configName, + collectionProperties)); + + assertNotNull(cluster.createCollection(depts, shards, replicas, + configName, collectionProperties)); + + client = cluster.getSolrClient(); + client.setDefaultCollection(people); + + ZkStateReader zkStateReader = client.getZkStateReader(); + AbstractDistribZkTestBase.waitForRecoveriesToFinish(people, zkStateReader, true, true, 30); + + AbstractDistribZkTestBase.waitForRecoveriesToFinish(depts, zkStateReader, false, true, 30); } - @Override - protected String getCloudSolrConfig() { - return "solrconfig-basic.xml"; - } @SuppressWarnings("serial") @Test @@ -54,14 +88,6 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas int peopleMultiplier = atLeast(1); int deptMultiplier = atLeast(1); - final String people = "people"; - createCollection(people, 2, 1, 10); - - - final String depts = "departments"; - createCollection(depts, 2, 1, 10); - - createIndex(people, peopleMultiplier, depts, deptMultiplier); Random random1 = random(); @@ -79,7 +105,7 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas "depts.rows",""+(deptMultiplier*2), "depts.logParamsList","q,fl,rows,row.dept_ss_dv"})); final QueryResponse rsp = new QueryResponse(); - rsp.setResponse(cloudClient.request(qr, people)); + rsp.setResponse(client.request(qr, people)); final SolrDocumentList hits = rsp.getResults(); assertEquals(peopleMultiplier, hits.getNumFound()); @@ -116,6 +142,7 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas private void createIndex(String people, int peopleMultiplier, String depts, int deptMultiplier) throws SolrServerException, IOException { + int id=0; List peopleDocs = new ArrayList<>(); for (int p=0; p < peopleMultiplier; p++){ @@ -161,6 +188,9 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas private void addDocs(String collection, List docs) throws SolrServerException, IOException { StringBuilder upd = new StringBuilder(""); + + upd.append("*:*"); + for (Iterator iterator = docs.iterator(); iterator.hasNext();) { String add = iterator.next(); upd.append(add); @@ -176,7 +206,7 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas ContentStreamUpdateRequest req = new ContentStreamUpdateRequest("/update"); req.addContentStream(new ContentStreamBase.StringStream(upd.toString(),"text/xml")); - cloudClient.request(req, collection); + client.request(req, collection); upd.setLength("".length()); } } diff --git a/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java b/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java index 1650f9cf7b4..70f95a79b55 100644 --- a/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java +++ b/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java @@ -16,7 +16,12 @@ */ package org.apache.solr.search.join; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -24,34 +29,56 @@ import java.util.List; import java.util.Map; import java.util.Set; -import org.apache.lucene.util.LuceneTestCase.Slow; -import org.apache.solr.BaseDistributedSearchTestCase; +import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.FacetField; import org.apache.solr.client.solrj.response.FacetField.Count; import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.cloud.AbstractDistribZkTestBase; +import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.NamedList; import org.junit.BeforeClass; +import org.junit.Test; -@Slow -public class BlockJoinFacetDistribTest extends BaseDistributedSearchTestCase { +public class BlockJoinFacetDistribTest extends SolrCloudTestCase{ + + private static final String collection = "facetcollection"; @BeforeClass - public static void beforeSuperClass() throws Exception { - schemaString = "schema-blockjoinfacetcomponent.xml"; - configString = "solrconfig-blockjoinfacetcomponent.xml"; - } + public static void setupCluster() throws Exception { + final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf"); - @ShardsFixed(num = 3) - public void test() throws Exception { - testBJQFacetComponent(); + String configName = "solrCloudCollectionConfig"; + int nodeCount = 6; + configureCluster(nodeCount) + .addConfig(configName, configDir) + .configure(); + + + Map collectionProperties = new HashMap<>(); + collectionProperties.put("config", "solrconfig-blockjoinfacetcomponent.xml" ); + collectionProperties.put("schema", "schema-blockjoinfacetcomponent.xml"); + + // create a collection holding data for the "to" side of the JOIN + + int shards = 3; + int replicas = 2 ; + assertNotNull(cluster.createCollection(collection, shards, replicas, + configName, + collectionProperties)); + + AbstractDistribZkTestBase.waitForRecoveriesToFinish(collection, + cluster.getSolrClient().getZkStateReader(), false, true, 30); + } final static List colors = Arrays.asList("red","blue","brown","white","black","yellow","cyan","magenta","blur", "fuchsia", "light","dark","green","grey","don't","know","any","more" ); final static List sizes = Arrays.asList("s","m","l","xl","xxl","xml","xxxl","3","4","5","6","petite","maxi"); - private void testBJQFacetComponent() throws Exception { + @Test + public void testBJQFacetComponent() throws Exception { assert ! colors.removeAll(sizes): "there is no colors in sizes"; Collections.shuffle(colors,random()); @@ -64,8 +91,11 @@ public class BlockJoinFacetDistribTest extends BaseDistributedSearchTestCase { } }; + cluster.getSolrClient().deleteByQuery(collection, "*:*"); + final int parents = atLeast(10); boolean aggregationOccurs = false; + List parentDocs = new ArrayList<>(); for(int parent=0; parent pdocs) throws SolrServerException, IOException { + cluster.getSolrClient().add(collection, pdocs); } } From 5525f429288cf8480ae7b6dc1438918e809a242c Mon Sep 17 00:00:00 2001 From: yonik Date: Wed, 25 May 2016 05:35:18 -0400 Subject: [PATCH 10/19] SOLR-9160: Sync 6x and 7.0 move of UninvertingReader, SlowCompositeReaderWrapper for Solr (LUCENE-7283) --- .../apache/lucene/index/MultiDocValues.java | 2 +- solr/CHANGES.txt | 3 + .../apache/solr/schema/ICUCollationField.java | 4 +- .../handler/component/ExpandComponent.java | 24 +- .../index/SlowCompositeReaderWrapper.java | 296 +++ .../apache/solr/index/SortingMergePolicy.java | 65 + .../solr/schema/AbstractSpatialFieldType.java | 17 +- .../org/apache/solr/schema/BinaryField.java | 2 +- .../org/apache/solr/schema/BoolField.java | 6 +- .../apache/solr/schema/CollationField.java | 4 +- .../org/apache/solr/schema/CurrencyField.java | 2 +- .../org/apache/solr/schema/EnumField.java | 14 +- .../apache/solr/schema/ExternalFileField.java | 8 +- .../org/apache/solr/schema/FieldType.java | 2 +- .../org/apache/solr/schema/GeoHashField.java | 10 +- .../org/apache/solr/schema/IndexSchema.java | 2 +- .../org/apache/solr/schema/LatLonType.java | 5 +- .../org/apache/solr/schema/PointType.java | 4 +- .../apache/solr/schema/PreAnalyzedField.java | 4 +- .../apache/solr/schema/RandomSortField.java | 4 +- .../java/org/apache/solr/schema/StrField.java | 2 +- .../org/apache/solr/schema/TextField.java | 14 +- .../org/apache/solr/schema/TrieField.java | 4 +- .../solr/search/CollapsingQParserPlugin.java | 21 +- .../java/org/apache/solr/search/Insanity.java | 6 +- .../solr/search/SolrFieldCacheMBean.java | 5 +- .../apache/solr/search/SolrIndexSearcher.java | 4 +- .../solr/search/facet/UnInvertedField.java | 4 +- .../solr/search/function/OrdFieldSource.java | 6 +- .../function/ReverseOrdFieldSource.java | 6 +- .../search/join/ScoreJoinQParserPlugin.java | 2 +- .../apache/solr/uninverting/DocTermOrds.java | 887 ++++++++ .../apache/solr/uninverting/FieldCache.java | 466 +++++ .../solr/uninverting/FieldCacheImpl.java | 1085 ++++++++++ .../uninverting/FieldCacheSanityChecker.java | 425 ++++ .../solr/uninverting/UninvertingReader.java | 391 ++++ .../apache/solr/uninverting/package-info.java | 21 + .../solr/update/DeleteByQueryWrapper.java | 3 +- .../org/apache/solr/update/VersionInfo.java | 2 +- .../index/TestSlowCompositeReaderWrapper.java | 95 + .../org/apache/solr/request/TestFaceting.java | 2 +- .../test/org/apache/solr/search/TestSort.java | 4 +- .../solr/uninverting/TestDocTermOrds.java | 681 +++++++ .../solr/uninverting/TestFieldCache.java | 731 +++++++ .../uninverting/TestFieldCacheReopen.java | 70 + .../TestFieldCacheSanityChecker.java | 164 ++ .../solr/uninverting/TestFieldCacheSort.java | 1814 +++++++++++++++++ .../uninverting/TestFieldCacheSortRandom.java | 318 +++ .../TestFieldCacheVsDocValues.java | 592 ++++++ .../TestFieldCacheWithThreads.java | 228 +++ .../uninverting/TestLegacyFieldCache.java | 497 +++++ .../solr/uninverting/TestNumericTerms32.java | 156 ++ .../solr/uninverting/TestNumericTerms64.java | 166 ++ .../uninverting/TestUninvertingReader.java | 395 ++++ .../solr/update/DocumentBuilderTest.java | 13 +- 55 files changed, 9651 insertions(+), 107 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java create mode 100644 solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java create mode 100644 solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java create mode 100644 solr/core/src/java/org/apache/solr/uninverting/FieldCache.java create mode 100644 solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java create mode 100644 solr/core/src/java/org/apache/solr/uninverting/FieldCacheSanityChecker.java create mode 100644 solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java create mode 100644 solr/core/src/java/org/apache/solr/uninverting/package-info.java create mode 100644 solr/core/src/test/org/apache/solr/index/TestSlowCompositeReaderWrapper.java create mode 100644 solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java create mode 100644 solr/core/src/test/org/apache/solr/uninverting/TestFieldCache.java create mode 100644 solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheReopen.java create mode 100644 solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSanityChecker.java create mode 100644 solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSort.java create mode 100644 solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSortRandom.java create mode 100644 solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheVsDocValues.java create mode 100644 solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheWithThreads.java create mode 100644 solr/core/src/test/org/apache/solr/uninverting/TestLegacyFieldCache.java create mode 100644 solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms32.java create mode 100644 solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms64.java create mode 100644 solr/core/src/test/org/apache/solr/uninverting/TestUninvertingReader.java diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java index 33947974bce..af4dcfcd7d4 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java @@ -661,7 +661,7 @@ public class MultiDocValues { public final OrdinalMap mapping; /** Creates a new MultiSortedDocValues over values */ - MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException { + public MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException { assert docStarts.length == values.length + 1; this.values = values; this.docStarts = docStarts; diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 59029e5df2b..ef3e46215d2 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -303,6 +303,9 @@ Other Changes * SOLR-9110: Move JoinFromCollection- SubQueryTransformer- BlockJoinFacet- Distrib Tests to SolrCloudTestCase (Mikhail Khludnev) +* SOLR-9160: Sync 6x and 7.0 move of UninvertingReader, SlowCompositeReaderWrapper for Solr (LUCENE-7283) + (yonik) + ================== 6.0.1 ================== (No Changes) diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java index 4cfb2874aa4..b1867546e38 100644 --- a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java +++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java @@ -36,12 +36,12 @@ import org.apache.lucene.search.DocValuesRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; -import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.SolrException; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; +import org.apache.solr.uninverting.UninvertingReader.Type; import com.ibm.icu.text.Collator; import com.ibm.icu.text.RuleBasedCollator; diff --git a/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java b/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java index c38d1b52844..76b38fa3ebe 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java @@ -26,16 +26,6 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import com.carrotsearch.hppc.IntHashSet; -import com.carrotsearch.hppc.IntObjectHashMap; -import com.carrotsearch.hppc.LongHashSet; -import com.carrotsearch.hppc.LongObjectHashMap; -import com.carrotsearch.hppc.LongObjectMap; -import com.carrotsearch.hppc.cursors.IntObjectCursor; -import com.carrotsearch.hppc.cursors.LongCursor; -import com.carrotsearch.hppc.cursors.LongObjectCursor; -import com.carrotsearch.hppc.cursors.ObjectCursor; - import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; @@ -53,7 +43,6 @@ import org.apache.lucene.search.Collector; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Query; -import org.apache.solr.search.QueryWrapperFilter; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Sort; @@ -61,7 +50,6 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopScoreDocCollector; -import org.apache.lucene.uninverting.UninvertingReader; import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; @@ -87,12 +75,24 @@ import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; import org.apache.solr.search.DocSlice; import org.apache.solr.search.QParser; +import org.apache.solr.search.QueryWrapperFilter; import org.apache.solr.search.SolrConstantScoreQuery; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SortSpecParsing; +import org.apache.solr.uninverting.UninvertingReader; import org.apache.solr.util.plugin.PluginInfoInitialized; import org.apache.solr.util.plugin.SolrCoreAware; +import com.carrotsearch.hppc.IntHashSet; +import com.carrotsearch.hppc.IntObjectHashMap; +import com.carrotsearch.hppc.LongHashSet; +import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.LongObjectMap; +import com.carrotsearch.hppc.cursors.IntObjectCursor; +import com.carrotsearch.hppc.cursors.LongCursor; +import com.carrotsearch.hppc.cursors.LongObjectCursor; +import com.carrotsearch.hppc.cursors.ObjectCursor; + /** * The ExpandComponent is designed to work with the CollapsingPostFilter. * The CollapsingPostFilter collapses a result set on a field. diff --git a/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java b/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java new file mode 100644 index 00000000000..9ab001173b3 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.index; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.CompositeReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader.CoreClosedListener; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues; +import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; +import org.apache.lucene.index.MultiDocValues.OrdinalMap; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.search.Sort; +import org.apache.lucene.util.Bits; + +/** + * This class forces a composite reader (eg a {@link + * MultiReader} or {@link DirectoryReader}) to emulate a + * {@link LeafReader}. This requires implementing the postings + * APIs on-the-fly, using the static methods in {@link + * MultiFields}, {@link MultiDocValues}, by stepping through + * the sub-readers to merge fields/terms, appending docs, etc. + * + *

NOTE: this class almost always results in a + * performance hit. If this is important to your use case, + * you'll get better performance by gathering the sub readers using + * {@link IndexReader#getContext()} to get the + * leaves and then operate per-LeafReader, + * instead of using this class. + */ + +public final class SlowCompositeReaderWrapper extends LeafReader { + + private final CompositeReader in; + private final Fields fields; + private final boolean merging; + + /** This method is sugar for getting an {@link LeafReader} from + * an {@link IndexReader} of any kind. If the reader is already atomic, + * it is returned unchanged, otherwise wrapped by this class. + */ + public static LeafReader wrap(IndexReader reader) throws IOException { + if (reader instanceof CompositeReader) { + return new SlowCompositeReaderWrapper((CompositeReader) reader, false); + } else { + assert reader instanceof LeafReader; + return (LeafReader) reader; + } + } + + SlowCompositeReaderWrapper(CompositeReader reader, boolean merging) throws IOException { + super(); + in = reader; + if (getFieldInfos().hasPointValues()) { + throw new IllegalArgumentException("cannot wrap points"); + } + fields = MultiFields.getFields(in); + in.registerParentReader(this); + this.merging = merging; + } + + @Override + public String toString() { + return "SlowCompositeReaderWrapper(" + in + ")"; + } + + @Override + public void addCoreClosedListener(CoreClosedListener listener) { + addCoreClosedListenerAsReaderClosedListener(in, listener); + } + + @Override + public void removeCoreClosedListener(CoreClosedListener listener) { + removeCoreClosedListenerAsReaderClosedListener(in, listener); + } + + @Override + public Fields fields() { + ensureOpen(); + return fields; + } + + @Override + public NumericDocValues getNumericDocValues(String field) throws IOException { + ensureOpen(); + return MultiDocValues.getNumericValues(in, field); + } + + @Override + public Bits getDocsWithField(String field) throws IOException { + ensureOpen(); + return MultiDocValues.getDocsWithField(in, field); + } + + @Override + public BinaryDocValues getBinaryDocValues(String field) throws IOException { + ensureOpen(); + return MultiDocValues.getBinaryValues(in, field); + } + + @Override + public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException { + ensureOpen(); + return MultiDocValues.getSortedNumericValues(in, field); + } + + @Override + public SortedDocValues getSortedDocValues(String field) throws IOException { + ensureOpen(); + OrdinalMap map = null; + synchronized (cachedOrdMaps) { + map = cachedOrdMaps.get(field); + if (map == null) { + // uncached, or not a multi dv + SortedDocValues dv = MultiDocValues.getSortedValues(in, field); + if (dv instanceof MultiSortedDocValues) { + map = ((MultiSortedDocValues)dv).mapping; + if (map.owner == getCoreCacheKey() && merging == false) { + cachedOrdMaps.put(field, map); + } + } + return dv; + } + } + int size = in.leaves().size(); + final SortedDocValues[] values = new SortedDocValues[size]; + final int[] starts = new int[size+1]; + for (int i = 0; i < size; i++) { + LeafReaderContext context = in.leaves().get(i); + final LeafReader reader = context.reader(); + final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); + if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) { + return null; + } + SortedDocValues v = reader.getSortedDocValues(field); + if (v == null) { + v = DocValues.emptySorted(); + } + values[i] = v; + starts[i] = context.docBase; + } + starts[size] = maxDoc(); + return new MultiSortedDocValues(values, starts, map); + } + + @Override + public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { + ensureOpen(); + OrdinalMap map = null; + synchronized (cachedOrdMaps) { + map = cachedOrdMaps.get(field); + if (map == null) { + // uncached, or not a multi dv + SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field); + if (dv instanceof MultiSortedSetDocValues) { + map = ((MultiSortedSetDocValues)dv).mapping; + if (map.owner == getCoreCacheKey() && merging == false) { + cachedOrdMaps.put(field, map); + } + } + return dv; + } + } + + assert map != null; + int size = in.leaves().size(); + final SortedSetDocValues[] values = new SortedSetDocValues[size]; + final int[] starts = new int[size+1]; + for (int i = 0; i < size; i++) { + LeafReaderContext context = in.leaves().get(i); + final LeafReader reader = context.reader(); + final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); + if(fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET){ + return null; + } + SortedSetDocValues v = reader.getSortedSetDocValues(field); + if (v == null) { + v = DocValues.emptySortedSet(); + } + values[i] = v; + starts[i] = context.docBase; + } + starts[size] = maxDoc(); + return new MultiSortedSetDocValues(values, starts, map); + } + + // TODO: this could really be a weak map somewhere else on the coreCacheKey, + // but do we really need to optimize slow-wrapper any more? + private final Map cachedOrdMaps = new HashMap<>(); + + @Override + public NumericDocValues getNormValues(String field) throws IOException { + ensureOpen(); + return MultiDocValues.getNormValues(in, field); + } + + @Override + public Fields getTermVectors(int docID) throws IOException { + ensureOpen(); + return in.getTermVectors(docID); + } + + @Override + public int numDocs() { + // Don't call ensureOpen() here (it could affect performance) + return in.numDocs(); + } + + @Override + public int maxDoc() { + // Don't call ensureOpen() here (it could affect performance) + return in.maxDoc(); + } + + @Override + public void document(int docID, StoredFieldVisitor visitor) throws IOException { + ensureOpen(); + in.document(docID, visitor); + } + + @Override + public Bits getLiveDocs() { + ensureOpen(); + return MultiFields.getLiveDocs(in); + } + + @Override + public PointValues getPointValues() { + ensureOpen(); + return null; + } + + @Override + public FieldInfos getFieldInfos() { + ensureOpen(); + return MultiFields.getMergedFieldInfos(in); + } + + @Override + public Object getCoreCacheKey() { + return in.getCoreCacheKey(); + } + + @Override + public Object getCombinedCoreAndDeletesKey() { + return in.getCombinedCoreAndDeletesKey(); + } + + @Override + protected void doClose() throws IOException { + // TODO: as this is a wrapper, should we really close the delegate? + in.close(); + } + + @Override + public void checkIntegrity() throws IOException { + ensureOpen(); + for (LeafReaderContext ctx : in.leaves()) { + ctx.reader().checkIntegrity(); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java b/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java new file mode 100644 index 00000000000..7b334b15c88 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.index; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.MergePolicyWrapper; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.MergeTrigger; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.SegmentCommitInfo; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.search.Sort; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.packed.PackedInts; +import org.apache.lucene.util.packed.PackedLongValues; +import org.apache.solr.index.SlowCompositeReaderWrapper; + +// TODO: remove this and add indexSort specification directly to solrconfig.xml? But for BWC, also accept SortingMergePolicy specifiction? + +public final class SortingMergePolicy extends MergePolicyWrapper { + + private final Sort sort; + + /** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */ + public SortingMergePolicy(MergePolicy in, Sort sort) { + super(in); + this.sort = sort; + } + + /** Return the {@link Sort} order that is used to sort segments when merging. */ + public Sort getSort() { + return sort; + } + + @Override + public String toString() { + return "SortingMergePolicy(" + in + ", sort=" + sort + ")"; + } +} diff --git a/solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java b/solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java index 39d45c22a08..3130004593c 100644 --- a/solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java @@ -28,6 +28,7 @@ import java.util.Set; import java.util.TreeSet; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; + import org.apache.lucene.document.Field; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexableField; @@ -41,22 +42,16 @@ import org.apache.lucene.spatial.SpatialStrategy; import org.apache.lucene.spatial.query.SpatialArgs; import org.apache.lucene.spatial.query.SpatialArgsParser; import org.apache.lucene.spatial.query.SpatialOperation; -import org.apache.lucene.uninverting.UninvertingReader.Type; -import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; import org.apache.solr.search.SpatialOptions; +import org.apache.solr.uninverting.UninvertingReader.Type; import org.apache.solr.util.DistanceUnits; import org.apache.solr.util.MapListener; import org.apache.solr.util.SpatialUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.base.Throwables; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; import org.locationtech.spatial4j.context.SpatialContext; import org.locationtech.spatial4j.context.SpatialContextFactory; import org.locationtech.spatial4j.distance.DistanceUtils; @@ -66,6 +61,12 @@ import org.locationtech.spatial4j.io.SupportedFormats; import org.locationtech.spatial4j.shape.Point; import org.locationtech.spatial4j.shape.Rectangle; import org.locationtech.spatial4j.shape.Shape; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Throwables; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; /** * Abstract base class for Solr FieldTypes based on a Lucene 4 {@link SpatialStrategy}. diff --git a/solr/core/src/java/org/apache/solr/schema/BinaryField.java b/solr/core/src/java/org/apache/solr/schema/BinaryField.java index 4e5bfee25c4..889bd3c4523 100644 --- a/solr/core/src/java/org/apache/solr/schema/BinaryField.java +++ b/solr/core/src/java/org/apache/solr/schema/BinaryField.java @@ -23,10 +23,10 @@ import java.nio.ByteBuffer; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.SortField; -import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; import org.apache.solr.common.util.Base64; import org.apache.solr.response.TextResponseWriter; +import org.apache.solr.uninverting.UninvertingReader.Type; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/solr/core/src/java/org/apache/solr/schema/BoolField.java b/solr/core/src/java/org/apache/solr/schema/BoolField.java index f86a6a327ce..01161e72ada 100644 --- a/solr/core/src/java/org/apache/solr/schema/BoolField.java +++ b/solr/core/src/java/org/apache/solr/schema/BoolField.java @@ -22,15 +22,14 @@ import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.BoolDocValues; import org.apache.lucene.search.SortField; -import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRefBuilder; @@ -40,6 +39,7 @@ import org.apache.solr.analysis.SolrAnalyzer; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; import org.apache.solr.search.function.OrdFieldSource; +import org.apache.solr.uninverting.UninvertingReader.Type; /** * */ diff --git a/solr/core/src/java/org/apache/solr/schema/CollationField.java b/solr/core/src/java/org/apache/solr/schema/CollationField.java index ad6d8723da3..998db2ac336 100644 --- a/solr/core/src/java/org/apache/solr/schema/CollationField.java +++ b/solr/core/src/java/org/apache/solr/schema/CollationField.java @@ -40,12 +40,12 @@ import org.apache.lucene.search.DocValuesRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; -import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.SolrException; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; +import org.apache.solr.uninverting.UninvertingReader.Type; /** * Field for collated sort keys. diff --git a/solr/core/src/java/org/apache/solr/schema/CurrencyField.java b/solr/core/src/java/org/apache/solr/schema/CurrencyField.java index 5a0bb1a8a85..26ea1675e2d 100644 --- a/solr/core/src/java/org/apache/solr/schema/CurrencyField.java +++ b/solr/core/src/java/org/apache/solr/schema/CurrencyField.java @@ -44,7 +44,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FieldValueQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; -import org.apache.lucene.uninverting.UninvertingReader.Type; +import org.apache.solr.uninverting.UninvertingReader.Type; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.response.TextResponseWriter; diff --git a/solr/core/src/java/org/apache/solr/schema/EnumField.java b/solr/core/src/java/org/apache/solr/schema/EnumField.java index cbf1d4eb9e8..27f3a0a1ea8 100644 --- a/solr/core/src/java/org/apache/solr/schema/EnumField.java +++ b/solr/core/src/java/org/apache/solr/schema/EnumField.java @@ -16,12 +16,6 @@ */ package org.apache.solr.schema; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.xpath.XPath; -import javax.xml.xpath.XPathConstants; -import javax.xml.xpath.XPathExpressionException; -import javax.xml.xpath.XPathFactory; import java.io.IOException; import java.io.InputStream; import java.lang.invoke.MethodHandles; @@ -31,6 +25,12 @@ import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.LegacyIntField; @@ -45,7 +45,6 @@ import org.apache.lucene.search.DocValuesRangeQuery; import org.apache.lucene.search.LegacyNumericRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; -import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.CharsRef; @@ -55,6 +54,7 @@ import org.apache.solr.common.EnumFieldValue; import org.apache.solr.common.SolrException; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; +import org.apache.solr.uninverting.UninvertingReader.Type; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; diff --git a/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java b/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java index da35650e94d..feba4e0fdff 100644 --- a/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java +++ b/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java @@ -16,17 +16,17 @@ */ package org.apache.solr.schema; +import java.io.IOException; +import java.util.Map; + import org.apache.lucene.index.IndexableField; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.SortField; -import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.solr.common.SolrException; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; import org.apache.solr.search.function.FileFloatSource; - -import java.io.IOException; -import java.util.Map; +import org.apache.solr.uninverting.UninvertingReader.Type; /** Get values from an external file instead of the index. * diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index ab89f94c575..6556ddb77f6 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -49,7 +49,6 @@ import org.apache.lucene.search.SortedNumericSelector; import org.apache.lucene.search.SortedSetSelector; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.uninverting.UninvertingReader; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.CharsRef; @@ -66,6 +65,7 @@ import org.apache.solr.query.SolrRangeQuery; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; import org.apache.solr.search.Sorting; +import org.apache.solr.uninverting.UninvertingReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/solr/core/src/java/org/apache/solr/schema/GeoHashField.java b/solr/core/src/java/org/apache/solr/schema/GeoHashField.java index 2baf72e192b..3f0d765a270 100644 --- a/solr/core/src/java/org/apache/solr/schema/GeoHashField.java +++ b/solr/core/src/java/org/apache/solr/schema/GeoHashField.java @@ -18,23 +18,23 @@ package org.apache.solr.schema; import java.io.IOException; -import org.locationtech.spatial4j.context.SpatialContext; -import org.locationtech.spatial4j.distance.DistanceUtils; -import org.locationtech.spatial4j.io.GeohashUtils; -import org.locationtech.spatial4j.shape.Point; import org.apache.lucene.index.IndexableField; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.LiteralValueSource; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; -import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; import org.apache.solr.search.SolrConstantScoreQuery; import org.apache.solr.search.SpatialOptions; import org.apache.solr.search.function.ValueSourceRangeFilter; import org.apache.solr.search.function.distance.GeohashHaversineFunction; +import org.apache.solr.uninverting.UninvertingReader.Type; import org.apache.solr.util.SpatialUtils; +import org.locationtech.spatial4j.context.SpatialContext; +import org.locationtech.spatial4j.distance.DistanceUtils; +import org.locationtech.spatial4j.io.GeohashUtils; +import org.locationtech.spatial4j.shape.Point; /** * This is a class that represents a TextField is the basic type for configurable text analysis. * Analyzers for field types using this implementation should be defined in the schema. diff --git a/solr/core/src/java/org/apache/solr/schema/TrieField.java b/solr/core/src/java/org/apache/solr/schema/TrieField.java index 3483909143f..b7bac1c5313 100644 --- a/solr/core/src/java/org/apache/solr/schema/TrieField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieField.java @@ -26,8 +26,8 @@ import java.util.List; import java.util.Locale; import java.util.Map; -import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType.LegacyNumericType; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.LegacyDoubleField; import org.apache.lucene.document.LegacyFloatField; import org.apache.lucene.document.LegacyIntField; @@ -47,7 +47,6 @@ import org.apache.lucene.search.LegacyNumericRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedSetSelector; -import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.CharsRef; @@ -61,6 +60,7 @@ import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.FunctionRangeQuery; import org.apache.solr.search.QParser; import org.apache.solr.search.function.ValueSourceRangeFilter; +import org.apache.solr.uninverting.UninvertingReader.Type; import org.apache.solr.util.DateMathParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java index 5686fe1143a..0cb6682b6db 100644 --- a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java @@ -25,15 +25,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import com.carrotsearch.hppc.FloatArrayList; -import com.carrotsearch.hppc.IntArrayList; -import com.carrotsearch.hppc.IntIntHashMap; -import com.carrotsearch.hppc.IntLongHashMap; -import com.carrotsearch.hppc.cursors.IntIntCursor; -import com.carrotsearch.hppc.cursors.IntLongCursor; - import org.apache.commons.lang.StringUtils; - import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; @@ -49,13 +41,12 @@ import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldComparator; -import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; -import org.apache.lucene.uninverting.UninvertingReader; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.BytesRef; @@ -65,8 +56,8 @@ import org.apache.solr.common.SolrException; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; -import org.apache.solr.handler.component.ResponseBuilder; import org.apache.solr.handler.component.QueryElevationComponent; +import org.apache.solr.handler.component.ResponseBuilder; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestInfo; @@ -75,6 +66,14 @@ import org.apache.solr.schema.StrField; import org.apache.solr.schema.TrieFloatField; import org.apache.solr.schema.TrieIntField; import org.apache.solr.schema.TrieLongField; +import org.apache.solr.uninverting.UninvertingReader; + +import com.carrotsearch.hppc.FloatArrayList; +import com.carrotsearch.hppc.IntArrayList; +import com.carrotsearch.hppc.IntIntHashMap; +import com.carrotsearch.hppc.IntLongHashMap; +import com.carrotsearch.hppc.cursors.IntIntCursor; +import com.carrotsearch.hppc.cursors.IntLongCursor; /** diff --git a/solr/core/src/java/org/apache/solr/search/Insanity.java b/solr/core/src/java/org/apache/solr/search/Insanity.java index 2a1edc71b16..7f16797b8f4 100644 --- a/solr/core/src/java/org/apache/solr/search/Insanity.java +++ b/solr/core/src/java/org/apache/solr/search/Insanity.java @@ -20,16 +20,16 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; -import org.apache.lucene.index.FilterLeafReader; -import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FilterLeafReader; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.uninverting.UninvertingReader; +import org.apache.solr.uninverting.UninvertingReader; /** * Lucene 5.0 removes "accidental" insanity, so you must explicitly diff --git a/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java b/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java index da997081c2e..3f5d9e981a4 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java +++ b/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java @@ -18,13 +18,12 @@ package org.apache.solr.search; import java.net.URL; -import org.apache.lucene.uninverting.UninvertingReader; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; - +import org.apache.solr.core.JmxMonitoredMap.JmxAugmentedSolrInfoMBean; import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrInfoMBean; -import org.apache.solr.core.JmxMonitoredMap.JmxAugmentedSolrInfoMBean; +import org.apache.solr.uninverting.UninvertingReader; /** * A SolrInfoMBean that provides introspection of the Solr FieldCache diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index bd08ee09cf5..54f7e303728 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -53,7 +53,6 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiPostingsEnum; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.StoredFieldVisitor; @@ -94,7 +93,6 @@ import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.Weight; import org.apache.lucene.store.Directory; -import org.apache.lucene.uninverting.UninvertingReader; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; @@ -109,6 +107,7 @@ import org.apache.solr.core.DirectoryFactory.DirContext; import org.apache.solr.core.SolrConfig; import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrInfoMBean; +import org.apache.solr.index.SlowCompositeReaderWrapper; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestInfo; @@ -122,6 +121,7 @@ import org.apache.solr.schema.TrieFloatField; import org.apache.solr.schema.TrieIntField; import org.apache.solr.search.facet.UnInvertedField; import org.apache.solr.search.stats.StatsSource; +import org.apache.solr.uninverting.UninvertingReader; import org.apache.solr.update.IndexFingerprint; import org.apache.solr.update.SolrIndexConfig; import org.slf4j.Logger; diff --git a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java index c1613cde680..b52b3335cb9 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java +++ b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java @@ -27,17 +27,16 @@ import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.uninverting.DocTermOrds; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.FixedBitSet; import org.apache.solr.common.SolrException; import org.apache.solr.core.SolrCore; +import org.apache.solr.index.SlowCompositeReaderWrapper; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.TrieField; import org.apache.solr.search.BitDocSet; @@ -45,6 +44,7 @@ import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocSet; import org.apache.solr.search.SolrCache; import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.uninverting.DocTermOrds; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/solr/core/src/java/org/apache/solr/search/function/OrdFieldSource.java b/solr/core/src/java/org/apache/solr/search/function/OrdFieldSource.java index 8a9d6950138..3b63bd99cb9 100644 --- a/solr/core/src/java/org/apache/solr/search/function/OrdFieldSource.java +++ b/solr/core/src/java/org/apache/solr/search/function/OrdFieldSource.java @@ -20,13 +20,12 @@ import java.io.IOException; import java.util.List; import java.util.Map; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.ReaderUtil; -import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; @@ -34,6 +33,7 @@ import org.apache.lucene.queries.function.docvalues.IntDocValues; import org.apache.lucene.search.SortedSetSelector; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueInt; +import org.apache.solr.index.SlowCompositeReaderWrapper; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.Insanity; import org.apache.solr.search.SolrIndexSearcher; diff --git a/solr/core/src/java/org/apache/solr/search/function/ReverseOrdFieldSource.java b/solr/core/src/java/org/apache/solr/search/function/ReverseOrdFieldSource.java index 9375d969154..abc9f95426a 100644 --- a/solr/core/src/java/org/apache/solr/search/function/ReverseOrdFieldSource.java +++ b/solr/core/src/java/org/apache/solr/search/function/ReverseOrdFieldSource.java @@ -20,18 +20,18 @@ import java.io.IOException; import java.util.List; import java.util.Map; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.ReaderUtil; -import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.IntDocValues; import org.apache.lucene.search.SortedSetSelector; +import org.apache.solr.index.SlowCompositeReaderWrapper; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.Insanity; import org.apache.solr.search.SolrIndexSearcher; diff --git a/solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java index 93a4b20d295..6bd7952094a 100644 --- a/solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java @@ -25,7 +25,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Query; import org.apache.lucene.search.join.JoinUtil; import org.apache.lucene.search.join.ScoreMode; -import org.apache.lucene.uninverting.UninvertingReader; import org.apache.solr.cloud.ZkController; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.Aliases; @@ -45,6 +44,7 @@ import org.apache.solr.search.QParser; import org.apache.solr.search.QParserPlugin; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SyntaxError; +import org.apache.solr.uninverting.UninvertingReader; import org.apache.solr.util.RefCounted; /** diff --git a/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java b/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java new file mode 100644 index 00000000000..4b60dbad087 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java @@ -0,0 +1,887 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.uninverting; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import org.apache.lucene.codecs.PostingsFormat; // javadocs +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PagedBytes; +import org.apache.lucene.util.StringHelper; + +/** + * This class enables fast access to multiple term ords for + * a specified field across all docIDs. + * + * Like FieldCache, it uninverts the index and holds a + * packed data structure in RAM to enable fast access. + * Unlike FieldCache, it can handle multi-valued fields, + * and, it does not hold the term bytes in RAM. Rather, you + * must obtain a TermsEnum from the {@link #getOrdTermsEnum} + * method, and then seek-by-ord to get the term's bytes. + * + * While normally term ords are type long, in this API they are + * int as the internal representation here cannot address + * more than MAX_INT unique terms. Also, typically this + * class is used on fields with relatively few unique terms + * vs the number of documents. In addition, there is an + * internal limit (16 MB) on how many bytes each chunk of + * documents may consume. If you trip this limit you'll hit + * an IllegalStateException. + * + * Deleted documents are skipped during uninversion, and if + * you look them up you'll get 0 ords. + * + * The returned per-document ords do not retain their + * original order in the document. Instead they are returned + * in sorted (by ord, ie term's BytesRef comparator) order. They + * are also de-dup'd (ie if doc has same term more than once + * in this field, you'll only get that ord back once). + * + * This class + * will create its own term index internally, allowing to + * create a wrapped TermsEnum that can handle ord. The + * {@link #getOrdTermsEnum} method then provides this + * wrapped enum. + * + * The RAM consumption of this class can be high! + * + * @lucene.experimental + */ + +/* + * Final form of the un-inverted field: + * Each document points to a list of term numbers that are contained in that document. + * + * Term numbers are in sorted order, and are encoded as variable-length deltas from the + * previous term number. Real term numbers start at 2 since 0 and 1 are reserved. A + * term number of 0 signals the end of the termNumber list. + * + * There is a single int[maxDoc()] which either contains a pointer into a byte[] for + * the termNumber lists, or directly contains the termNumber list if it fits in the 4 + * bytes of an integer. If the first byte in the integer is 1, the next 3 bytes + * are a pointer into a byte[] where the termNumber list starts. + * + * There are actually 256 byte arrays, to compensate for the fact that the pointers + * into the byte arrays are only 3 bytes long. The correct byte array for a document + * is a function of its id. + * + * To save space and speed up faceting, any term that matches enough documents will + * not be un-inverted... it will be skipped while building the un-inverted field structure, + * and will use a set intersection method during faceting. + * + * To further save memory, the terms (the actual string values) are not all stored in + * memory, but a TermIndex is used to convert term numbers to term values only + * for the terms needed after faceting has completed. Only every 128th term value + * is stored, along with its corresponding term number, and this is used as an + * index to find the closest term and iterate until the desired number is hit (very + * much like Lucene's own internal term index). + * + */ + +public class DocTermOrds implements Accountable { + + // Term ords are shifted by this, internally, to reserve + // values 0 (end term) and 1 (index is a pointer into byte array) + private final static int TNUM_OFFSET = 2; + + /** Every 128th term is indexed, by default. */ + public final static int DEFAULT_INDEX_INTERVAL_BITS = 7; // decrease to a low number like 2 for testing + + private int indexIntervalBits; + private int indexIntervalMask; + private int indexInterval; + + /** Don't uninvert terms that exceed this count. */ + protected final int maxTermDocFreq; + + /** Field we are uninverting. */ + protected final String field; + + /** Number of terms in the field. */ + protected int numTermsInField; + + /** Total number of references to term numbers. */ + protected long termInstances; + private long memsz; + + /** Total time to uninvert the field. */ + protected int total_time; + + /** Time for phase1 of the uninvert process. */ + protected int phase1_time; + + /** Holds the per-document ords or a pointer to the ords. */ + protected int[] index; + + /** Holds term ords for documents. */ + protected byte[][] tnums = new byte[256][]; + + /** Total bytes (sum of term lengths) for all indexed terms.*/ + protected long sizeOfIndexedStrings; + + /** Holds the indexed (by default every 128th) terms. */ + protected BytesRef[] indexedTermsArray = new BytesRef[0]; + + /** If non-null, only terms matching this prefix were + * indexed. */ + protected BytesRef prefix; + + /** Ordinal of the first term in the field, or 0 if the + * {@link PostingsFormat} does not implement {@link + * TermsEnum#ord}. */ + protected int ordBase; + + /** Used while uninverting. */ + protected PostingsEnum postingsEnum; + + /** If true, check and throw an exception if the field has docValues enabled. + * Normally, docValues should be used in preference to DocTermOrds. */ + protected boolean checkForDocValues = true; + + /** Returns total bytes used. */ + public long ramBytesUsed() { + // can cache the mem size since it shouldn't change + if (memsz!=0) return memsz; + long sz = 8*8 + 32; // local fields + if (index != null) sz += index.length * 4; + if (tnums!=null) { + for (byte[] arr : tnums) + if (arr != null) sz += arr.length; + } + memsz = sz; + return sz; + } + + /** Inverts all terms */ + public DocTermOrds(LeafReader reader, Bits liveDocs, String field) throws IOException { + this(reader, liveDocs, field, null, Integer.MAX_VALUE); + } + + // TODO: instead of all these ctors and options, take termsenum! + + /** Inverts only terms starting w/ prefix */ + public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix) throws IOException { + this(reader, liveDocs, field, termPrefix, Integer.MAX_VALUE); + } + + /** Inverts only terms starting w/ prefix, and only terms + * whose docFreq (not taking deletions into account) is + * <= maxTermDocFreq */ + public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq) throws IOException { + this(reader, liveDocs, field, termPrefix, maxTermDocFreq, DEFAULT_INDEX_INTERVAL_BITS); + } + + /** Inverts only terms starting w/ prefix, and only terms + * whose docFreq (not taking deletions into account) is + * <= maxTermDocFreq, with a custom indexing interval + * (default is every 128nd term). */ + public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq, int indexIntervalBits) throws IOException { + this(field, maxTermDocFreq, indexIntervalBits); + uninvert(reader, liveDocs, termPrefix); + } + + /** Subclass inits w/ this, but be sure you then call + * uninvert, only once */ + protected DocTermOrds(String field, int maxTermDocFreq, int indexIntervalBits) { + //System.out.println("DTO init field=" + field + " maxTDFreq=" + maxTermDocFreq); + this.field = field; + this.maxTermDocFreq = maxTermDocFreq; + this.indexIntervalBits = indexIntervalBits; + indexIntervalMask = 0xffffffff >>> (32-indexIntervalBits); + indexInterval = 1 << indexIntervalBits; + } + + /** + * Returns a TermsEnum that implements ord, or null if no terms in field. + *

+ * we build a "private" terms + * index internally (WARNING: consumes RAM) and use that + * index to implement ord. This also enables ord on top + * of a composite reader. The returned TermsEnum is + * unpositioned. This returns null if there are no terms. + *

+ *

NOTE: you must pass the same reader that was + * used when creating this class + */ + public TermsEnum getOrdTermsEnum(LeafReader reader) throws IOException { + // NOTE: see LUCENE-6529 before attempting to optimize this method to + // return a TermsEnum directly from the reader if it already supports ord(). + + assert null != indexedTermsArray; + + if (0 == indexedTermsArray.length) { + return null; + } else { + return new OrdWrappedTermsEnum(reader); + } + } + + /** + * Returns the number of terms in this field + */ + public int numTerms() { + return numTermsInField; + } + + /** + * Returns {@code true} if no terms were indexed. + */ + public boolean isEmpty() { + return index == null; + } + + /** Subclass can override this */ + protected void visitTerm(TermsEnum te, int termNum) throws IOException { + } + + /** Invoked during {@link #uninvert(org.apache.lucene.index.LeafReader,Bits,BytesRef)} + * to record the document frequency for each uninverted + * term. */ + protected void setActualDocFreq(int termNum, int df) throws IOException { + } + + /** Call this only once (if you subclass!) */ + protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix) throws IOException { + final FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (checkForDocValues && info != null && info.getDocValuesType() != DocValuesType.NONE) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } + //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix); + final long startTime = System.nanoTime(); + prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix); + + final int maxDoc = reader.maxDoc(); + final int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number + final int[] lastTerm = new int[maxDoc]; // last term we saw for this document + final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts) + + final Terms terms = reader.terms(field); + if (terms == null) { + // No terms + return; + } + + final TermsEnum te = terms.iterator(); + final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef(); + //System.out.println("seekStart=" + seekStart.utf8ToString()); + if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) { + // No terms match + return; + } + + // For our "term index wrapper" + final List indexedTerms = new ArrayList<>(); + final PagedBytes indexedTermsBytes = new PagedBytes(15); + + // we need a minimum of 9 bytes, but round up to 12 since the space would + // be wasted with most allocators anyway. + byte[] tempArr = new byte[12]; + + // + // enumerate all terms, and build an intermediate form of the un-inverted field. + // + // During this intermediate form, every document has a (potential) byte[] + // and the int[maxDoc()] array either contains the termNumber list directly + // or the *end* offset of the termNumber list in its byte array (for faster + // appending and faster creation of the final form). + // + // idea... if things are too large while building, we could do a range of docs + // at a time (but it would be a fair amount slower to build) + // could also do ranges in parallel to take advantage of multiple CPUs + + // OPTIONAL: remap the largest df terms to the lowest 128 (single byte) + // values. This requires going over the field first to find the most + // frequent terms ahead of time. + + int termNum = 0; + postingsEnum = null; + + // Loop begins with te positioned to first term (we call + // seek above): + for (;;) { + final BytesRef t = te.term(); + if (t == null || (termPrefix != null && !StringHelper.startsWith(t, termPrefix))) { + break; + } + //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum); + + visitTerm(te, termNum); + + if ((termNum & indexIntervalMask) == 0) { + // Index this term + sizeOfIndexedStrings += t.length; + BytesRef indexedTerm = new BytesRef(); + indexedTermsBytes.copy(t, indexedTerm); + // TODO: really should 1) strip off useless suffix, + // and 2) use FST not array/PagedBytes + indexedTerms.add(indexedTerm); + } + + final int df = te.docFreq(); + if (df <= maxTermDocFreq) { + + postingsEnum = te.postings(postingsEnum, PostingsEnum.NONE); + + // dF, but takes deletions into account + int actualDF = 0; + + for (;;) { + int doc = postingsEnum.nextDoc(); + if (doc == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + //System.out.println(" chunk=" + chunk + " docs"); + + actualDF ++; + termInstances++; + + //System.out.println(" docID=" + doc); + // add TNUM_OFFSET to the term number to make room for special reserved values: + // 0 (end term) and 1 (index into byte array follows) + int delta = termNum - lastTerm[doc] + TNUM_OFFSET; + lastTerm[doc] = termNum; + int val = index[doc]; + + if ((val & 0xff)==1) { + // index into byte array (actually the end of + // the doc-specific byte[] when building) + int pos = val >>> 8; + int ilen = vIntSize(delta); + byte[] arr = bytes[doc]; + int newend = pos+ilen; + if (newend > arr.length) { + // We avoid a doubling strategy to lower memory usage. + // this faceting method isn't for docs with many terms. + // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary. + // TODO: figure out what array lengths we can round up to w/o actually using more memory + // (how much space does a byte[] take up? Is data preceded by a 32 bit length only? + // It should be safe to round up to the nearest 32 bits in any case. + int newLen = (newend + 3) & 0xfffffffc; // 4 byte alignment + byte[] newarr = new byte[newLen]; + System.arraycopy(arr, 0, newarr, 0, pos); + arr = newarr; + bytes[doc] = newarr; + } + pos = writeInt(delta, arr, pos); + index[doc] = (pos<<8) | 1; // update pointer to end index in byte[] + } else { + // OK, this int has data in it... find the end (a zero starting byte - not + // part of another number, hence not following a byte with the high bit set). + int ipos; + if (val==0) { + ipos=0; + } else if ((val & 0x0000ff80)==0) { + ipos=1; + } else if ((val & 0x00ff8000)==0) { + ipos=2; + } else if ((val & 0xff800000)==0) { + ipos=3; + } else { + ipos=4; + } + + //System.out.println(" ipos=" + ipos); + + int endPos = writeInt(delta, tempArr, ipos); + //System.out.println(" endpos=" + endPos); + if (endPos <= 4) { + //System.out.println(" fits!"); + // value will fit in the integer... move bytes back + for (int j=ipos; j>>=8; + } + // point at the end index in the byte[] + index[doc] = (endPos<<8) | 1; + bytes[doc] = tempArr; + tempArr = new byte[12]; + } + } + } + setActualDocFreq(termNum, actualDF); + } + + termNum++; + if (te.next() == null) { + break; + } + } + + numTermsInField = termNum; + + long midPoint = System.nanoTime(); + + if (termInstances == 0) { + // we didn't invert anything + // lower memory consumption. + tnums = null; + } else { + + this.index = index; + + // + // transform intermediate form into the final form, building a single byte[] + // at a time, and releasing the intermediate byte[]s as we go to avoid + // increasing the memory footprint. + // + + for (int pass = 0; pass<256; pass++) { + byte[] target = tnums[pass]; + int pos=0; // end in target; + if (target != null) { + pos = target.length; + } else { + target = new byte[4096]; + } + + // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx + // where pp is the pass (which array we are building), and xx is all values. + // each pass shares the same byte[] for termNumber lists. + for (int docbase = pass<<16; docbase maxDoc) + break; + } + + } + indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]); + + long endTime = System.nanoTime(); + + total_time = (int) TimeUnit.MILLISECONDS.convert(endTime-startTime, TimeUnit.NANOSECONDS); + phase1_time = (int) TimeUnit.MILLISECONDS.convert(midPoint-startTime, TimeUnit.NANOSECONDS); + } + + /** Number of bytes to represent an unsigned int as a vint. */ + private static int vIntSize(int x) { + if ((x & (0xffffffff << (7*1))) == 0 ) { + return 1; + } + if ((x & (0xffffffff << (7*2))) == 0 ) { + return 2; + } + if ((x & (0xffffffff << (7*3))) == 0 ) { + return 3; + } + if ((x & (0xffffffff << (7*4))) == 0 ) { + return 4; + } + return 5; + } + + // todo: if we know the size of the vInt already, we could do + // a single switch on the size + private static int writeInt(int x, byte[] arr, int pos) { + int a; + a = (x >>> (7*4)); + if (a != 0) { + arr[pos++] = (byte)(a | 0x80); + } + a = (x >>> (7*3)); + if (a != 0) { + arr[pos++] = (byte)(a | 0x80); + } + a = (x >>> (7*2)); + if (a != 0) { + arr[pos++] = (byte)(a | 0x80); + } + a = (x >>> (7*1)); + if (a != 0) { + arr[pos++] = (byte)(a | 0x80); + } + arr[pos++] = (byte)(x & 0x7f); + return pos; + } + + /** + * "wrap" our own terms index around the original IndexReader. + * Only valid if there are terms for this field rom the original reader + */ + private final class OrdWrappedTermsEnum extends TermsEnum { + private final TermsEnum termsEnum; + private BytesRef term; + private long ord = -indexInterval-1; // force "real" seek + + public OrdWrappedTermsEnum(LeafReader reader) throws IOException { + assert indexedTermsArray != null; + assert 0 != indexedTermsArray.length; + termsEnum = reader.fields().terms(field).iterator(); + } + + @Override + public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { + return termsEnum.postings(reuse, flags); + } + + @Override + public BytesRef term() { + return term; + } + + @Override + public BytesRef next() throws IOException { + if (++ord < 0) { + ord = 0; + } + if (termsEnum.next() == null) { + term = null; + return null; + } + return setTerm(); // this is extra work if we know we are in bounds... + } + + @Override + public int docFreq() throws IOException { + return termsEnum.docFreq(); + } + + @Override + public long totalTermFreq() throws IOException { + return termsEnum.totalTermFreq(); + } + + @Override + public long ord() { + return ordBase + ord; + } + + @Override + public SeekStatus seekCeil(BytesRef target) throws IOException { + + // already here + if (term != null && term.equals(target)) { + return SeekStatus.FOUND; + } + + int startIdx = Arrays.binarySearch(indexedTermsArray, target); + + if (startIdx >= 0) { + // we hit the term exactly... lucky us! + TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target); + assert seekStatus == TermsEnum.SeekStatus.FOUND; + ord = startIdx << indexIntervalBits; + setTerm(); + assert term != null; + return SeekStatus.FOUND; + } + + // we didn't hit the term exactly + startIdx = -startIdx-1; + + if (startIdx == 0) { + // our target occurs *before* the first term + TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target); + assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND; + ord = 0; + setTerm(); + assert term != null; + return SeekStatus.NOT_FOUND; + } + + // back up to the start of the block + startIdx--; + + if ((ord >> indexIntervalBits) == startIdx && term != null && term.compareTo(target) <= 0) { + // we are already in the right block and the current term is before the term we want, + // so we don't need to seek. + } else { + // seek to the right block + TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(indexedTermsArray[startIdx]); + assert seekStatus == TermsEnum.SeekStatus.FOUND; + ord = startIdx << indexIntervalBits; + setTerm(); + assert term != null; // should be non-null since it's in the index + } + + while (term != null && term.compareTo(target) < 0) { + next(); + } + + if (term == null) { + return SeekStatus.END; + } else if (term.compareTo(target) == 0) { + return SeekStatus.FOUND; + } else { + return SeekStatus.NOT_FOUND; + } + } + + @Override + public void seekExact(long targetOrd) throws IOException { + int delta = (int) (targetOrd - ordBase - ord); + //System.out.println(" seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord + " ii=" + indexInterval); + if (delta < 0 || delta > indexInterval) { + final int idx = (int) (targetOrd >>> indexIntervalBits); + final BytesRef base = indexedTermsArray[idx]; + //System.out.println(" do seek term=" + base.utf8ToString()); + ord = idx << indexIntervalBits; + delta = (int) (targetOrd - ord); + final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(base); + assert seekStatus == TermsEnum.SeekStatus.FOUND; + } else { + //System.out.println("seek w/in block"); + } + + while (--delta >= 0) { + BytesRef br = termsEnum.next(); + if (br == null) { + assert false; + return; + } + ord++; + } + + setTerm(); + assert term != null; + } + + private BytesRef setTerm() throws IOException { + term = termsEnum.term(); + //System.out.println(" setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString())); + if (prefix != null && !StringHelper.startsWith(term, prefix)) { + term = null; + } + return term; + } + } + + /** Returns the term ({@link BytesRef}) corresponding to + * the provided ordinal. */ + public BytesRef lookupTerm(TermsEnum termsEnum, int ord) throws IOException { + termsEnum.seekExact(ord); + return termsEnum.term(); + } + + /** Returns a SortedSetDocValues view of this instance */ + public SortedSetDocValues iterator(LeafReader reader) throws IOException { + if (isEmpty()) { + return DocValues.emptySortedSet(); + } else { + return new Iterator(reader); + } + } + + private class Iterator extends SortedSetDocValues { + final LeafReader reader; + final TermsEnum te; // used internally for lookupOrd() and lookupTerm() + // currently we read 5 at a time (using the logic of the old iterator) + final int buffer[] = new int[5]; + int bufferUpto; + int bufferLength; + + private int tnum; + private int upto; + private byte[] arr; + + Iterator(LeafReader reader) throws IOException { + this.reader = reader; + this.te = termsEnum(); + } + + @Override + public long nextOrd() { + while (bufferUpto == bufferLength) { + if (bufferLength < buffer.length) { + return NO_MORE_ORDS; + } else { + bufferLength = read(buffer); + bufferUpto = 0; + } + } + return buffer[bufferUpto++]; + } + + /** Buffer must be at least 5 ints long. Returns number + * of term ords placed into buffer; if this count is + * less than buffer.length then that is the end. */ + int read(int[] buffer) { + int bufferUpto = 0; + if (arr == null) { + // code is inlined into upto + //System.out.println("inlined"); + int code = upto; + int delta = 0; + for (;;) { + delta = (delta << 7) | (code & 0x7f); + if ((code & 0x80)==0) { + if (delta==0) break; + tnum += delta - TNUM_OFFSET; + buffer[bufferUpto++] = ordBase+tnum; + //System.out.println(" tnum=" + tnum); + delta = 0; + } + code >>>= 8; + } + } else { + // code is a pointer + for(;;) { + int delta = 0; + for(;;) { + byte b = arr[upto++]; + delta = (delta << 7) | (b & 0x7f); + //System.out.println(" cycle: upto=" + upto + " delta=" + delta + " b=" + b); + if ((b & 0x80) == 0) break; + } + //System.out.println(" delta=" + delta); + if (delta == 0) break; + tnum += delta - TNUM_OFFSET; + //System.out.println(" tnum=" + tnum); + buffer[bufferUpto++] = ordBase+tnum; + if (bufferUpto == buffer.length) { + break; + } + } + } + + return bufferUpto; + } + + @Override + public void setDocument(int docID) { + tnum = 0; + final int code = index[docID]; + if ((code & 0xff)==1) { + // a pointer + upto = code>>>8; + //System.out.println(" pointer! upto=" + upto); + int whichArray = (docID >>> 16) & 0xff; + arr = tnums[whichArray]; + } else { + //System.out.println(" inline!"); + arr = null; + upto = code; + } + bufferUpto = 0; + bufferLength = read(buffer); + } + + @Override + public BytesRef lookupOrd(long ord) { + try { + return DocTermOrds.this.lookupTerm(te, (int) ord); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public long getValueCount() { + return numTerms(); + } + + @Override + public long lookupTerm(BytesRef key) { + try { + switch (te.seekCeil(key)) { + case FOUND: + assert te.ord() >= 0; + return te.ord(); + case NOT_FOUND: + assert te.ord() >= 0; + return -te.ord()-1; + default: /* END */ + return -numTerms()-1; + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public TermsEnum termsEnum() { + try { + return getOrdTermsEnum(reader); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } +} diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java new file mode 100644 index 00000000000..7ef495618e2 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java @@ -0,0 +1,466 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.uninverting; + +import java.io.IOException; +import java.io.PrintStream; + +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.IndexReader; // javadocs +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LegacyNumericUtils; +import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.RamUsageEstimator; + +/** + * Expert: Maintains caches of term values. + * + *

Created: May 19, 2004 11:13:14 AM + * + * @since lucene 1.4 + * @see FieldCacheSanityChecker + * + * @lucene.internal + */ +interface FieldCache { + + /** + * Placeholder indicating creation of this cache is currently in-progress. + */ + public static final class CreationPlaceholder implements Accountable { + Accountable value; + + @Override + public long ramBytesUsed() { + // don't call on the in-progress value, might make things angry. + return RamUsageEstimator.NUM_BYTES_OBJECT_REF; + } + } + + /** + * interface to all parsers. It is used to parse different numeric types. + */ + public interface Parser { + + /** + * Pulls a {@link TermsEnum} from the given {@link Terms}. This method allows certain parsers + * to filter the actual TermsEnum before the field cache is filled. + * + * @param terms the {@link Terms} instance to create the {@link TermsEnum} from. + * @return a possibly filtered {@link TermsEnum} instance, this method must not return null. + * @throws IOException if an {@link IOException} occurs + * @deprecated index with Points instead + */ + @Deprecated + public TermsEnum termsEnum(Terms terms) throws IOException; + + /** Parse's this field's value */ + public long parseValue(BytesRef term); + } + + /** + * Base class for points parsers. These parsers do not use the inverted index, but instead + * uninvert point data. + * + * This abstraction can be cleaned up when Parser.termsEnum is removed. + */ + public abstract class PointParser implements Parser { + public final TermsEnum termsEnum(Terms terms) throws IOException { + throw new UnsupportedOperationException("makes no sense for parsing points"); + } + } + + /** Expert: The cache used internally by sorting and range query classes. */ + public static FieldCache DEFAULT = new FieldCacheImpl(); + + /** + * A parser instance for int values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed + * via {@link org.apache.lucene.document.IntPoint}. + */ + public static final Parser INT_POINT_PARSER = new PointParser() { + @Override + public long parseValue(BytesRef point) { + return NumericUtils.sortableBytesToInt(point.bytes, point.offset); + } + + @Override + public String toString() { + return FieldCache.class.getName()+".INT_POINT_PARSER"; + } + }; + + /** + * A parser instance for long values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed + * via {@link org.apache.lucene.document.LongPoint}. + */ + public static final Parser LONG_POINT_PARSER = new PointParser() { + @Override + public long parseValue(BytesRef point) { + return NumericUtils.sortableBytesToLong(point.bytes, point.offset); + } + + @Override + public String toString() { + return FieldCache.class.getName()+".LONG_POINT_PARSER"; + } + }; + + /** + * A parser instance for float values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed + * via {@link org.apache.lucene.document.FloatPoint}. + */ + public static final Parser FLOAT_POINT_PARSER = new PointParser() { + @Override + public long parseValue(BytesRef point) { + return NumericUtils.sortableFloatBits(NumericUtils.sortableBytesToInt(point.bytes, point.offset)); + } + + @Override + public String toString() { + return FieldCache.class.getName()+".FLOAT_POINT_PARSER"; + } + }; + + /** + * A parser instance for double values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed + * via {@link org.apache.lucene.document.DoublePoint}. + */ + public static final Parser DOUBLE_POINT_PARSER = new PointParser() { + @Override + public long parseValue(BytesRef point) { + return NumericUtils.sortableDoubleBits(NumericUtils.sortableBytesToLong(point.bytes, point.offset)); + } + + @Override + public String toString() { + return FieldCache.class.getName()+".DOUBLE_POINT_PARSER"; + } + }; + + /** + * A parser instance for int values encoded by {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed + * via {@link org.apache.lucene.document.LegacyIntField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}. + * @deprecated Index with points and use {@link #INT_POINT_PARSER} instead. + */ + @Deprecated + public static final Parser LEGACY_INT_PARSER = new Parser() { + @Override + public long parseValue(BytesRef term) { + return LegacyNumericUtils.prefixCodedToInt(term); + } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator()); + } + + @Override + public String toString() { + return FieldCache.class.getName()+".LEGACY_INT_PARSER"; + } + }; + + /** + * A parser instance for float values encoded with {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed + * via {@link org.apache.lucene.document.LegacyFloatField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}. + * @deprecated Index with points and use {@link #FLOAT_POINT_PARSER} instead. + */ + @Deprecated + public static final Parser LEGACY_FLOAT_PARSER = new Parser() { + @Override + public long parseValue(BytesRef term) { + int val = LegacyNumericUtils.prefixCodedToInt(term); + if (val<0) val ^= 0x7fffffff; + return val; + } + + @Override + public String toString() { + return FieldCache.class.getName()+".LEGACY_FLOAT_PARSER"; + } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator()); + } + }; + + /** + * A parser instance for long values encoded by {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed + * via {@link org.apache.lucene.document.LegacyLongField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}. + * @deprecated Index with points and use {@link #LONG_POINT_PARSER} instead. + */ + @Deprecated + public static final Parser LEGACY_LONG_PARSER = new Parser() { + @Override + public long parseValue(BytesRef term) { + return LegacyNumericUtils.prefixCodedToLong(term); + } + @Override + public String toString() { + return FieldCache.class.getName()+".LEGACY_LONG_PARSER"; + } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator()); + } + }; + + /** + * A parser instance for double values encoded with {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed + * via {@link org.apache.lucene.document.LegacyDoubleField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}. + * @deprecated Index with points and use {@link #DOUBLE_POINT_PARSER} instead. + */ + @Deprecated + public static final Parser LEGACY_DOUBLE_PARSER = new Parser() { + @Override + public long parseValue(BytesRef term) { + long val = LegacyNumericUtils.prefixCodedToLong(term); + if (val<0) val ^= 0x7fffffffffffffffL; + return val; + } + @Override + public String toString() { + return FieldCache.class.getName()+".LEGACY_DOUBLE_PARSER"; + } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator()); + } + }; + + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms/points in field and returns a bit set at the size of + * reader.maxDoc(), with turned on bits for each docid that + * does have a value for this field. + * @param parser May be {@code null} if coming from the inverted index, otherwise + * can be a {@link PointParser} to compute from point values. + */ + public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException; + + /** + * Returns a {@link NumericDocValues} over the values found in documents in the given + * field. If the field was indexed as {@link NumericDocValuesField}, it simply + * uses {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)} to read the values. + * Otherwise, it checks the internal cache for an appropriate entry, and if + * none is found, reads the terms/points in field as longs and returns + * an array of size reader.maxDoc() of the value each document + * has in the given field. + * + * @param reader + * Used to get field values. + * @param field + * Which field contains the longs. + * @param parser + * Computes long for string values. May be {@code null} if the + * requested field was indexed as {@link NumericDocValuesField} or + * {@link org.apache.lucene.document.LegacyLongField}. + * @param setDocsWithField + * If true then {@link #getDocsWithField} will also be computed and + * stored in the FieldCache. + * @return The values in the given field for each document. + * @throws IOException + * If any error occurs. + */ + public NumericDocValues getNumerics(LeafReader reader, String field, Parser parser, boolean setDocsWithField) throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none + * is found, reads the term values in field + * and returns a {@link BinaryDocValues} instance, providing a + * method to retrieve the term (as a BytesRef) per document. + * @param reader Used to get field values. + * @param field Which field contains the strings. + * @param setDocsWithField If true then {@link #getDocsWithField} will + * also be computed and stored in the FieldCache. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField) throws IOException; + + /** Expert: just like {@link #getTerms(org.apache.lucene.index.LeafReader,String,boolean)}, + * but you can specify whether more RAM should be consumed in exchange for + * faster lookups (default is "true"). Note that the + * first call for a given reader and field "wins", + * subsequent calls will share the same cache entry. */ + public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField, float acceptableOverheadRatio) throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none + * is found, reads the term values in field + * and returns a {@link SortedDocValues} instance, + * providing methods to retrieve sort ordinals and terms + * (as a ByteRef) per document. + * @param reader Used to get field values. + * @param field Which field contains the strings. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public SortedDocValues getTermsIndex(LeafReader reader, String field) throws IOException; + + /** Expert: just like {@link + * #getTermsIndex(org.apache.lucene.index.LeafReader,String)}, but you can specify + * whether more RAM should be consumed in exchange for + * faster lookups (default is "true"). Note that the + * first call for a given reader and field "wins", + * subsequent calls will share the same cache entry. */ + public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException; + + /** Can be passed to {@link #getDocTermOrds} to filter for 32-bit numeric terms */ + public static final BytesRef INT32_TERM_PREFIX = new BytesRef(new byte[] { LegacyNumericUtils.SHIFT_START_INT }); + /** Can be passed to {@link #getDocTermOrds} to filter for 64-bit numeric terms */ + public static final BytesRef INT64_TERM_PREFIX = new BytesRef(new byte[] { LegacyNumericUtils.SHIFT_START_LONG }); + + /** + * Checks the internal cache for an appropriate entry, and if none is found, reads the term values + * in field and returns a {@link DocTermOrds} instance, providing a method to retrieve + * the terms (as ords) per document. + * + * @param reader Used to build a {@link DocTermOrds} instance + * @param field Which field contains the strings. + * @param prefix prefix for a subset of the terms which should be uninverted. Can be null or + * {@link #INT32_TERM_PREFIX} or {@link #INT64_TERM_PREFIX} + * + * @return a {@link DocTermOrds} instance + * @throws IOException If any error occurs. + */ + public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException; + + /** + * EXPERT: A unique Identifier/Description for each item in the FieldCache. + * Can be useful for logging/debugging. + * @lucene.experimental + */ + public final class CacheEntry { + + private final Object readerKey; + private final String fieldName; + private final Class cacheType; + private final Object custom; + private final Accountable value; + + public CacheEntry(Object readerKey, String fieldName, + Class cacheType, + Object custom, + Accountable value) { + this.readerKey = readerKey; + this.fieldName = fieldName; + this.cacheType = cacheType; + this.custom = custom; + this.value = value; + } + + public Object getReaderKey() { + return readerKey; + } + + public String getFieldName() { + return fieldName; + } + + public Class getCacheType() { + return cacheType; + } + + public Object getCustom() { + return custom; + } + + public Object getValue() { + return value; + } + + /** + * The most recently estimated size of the value, null unless + * estimateSize has been called. + */ + public String getEstimatedSize() { + long bytesUsed = value == null ? 0L : value.ramBytesUsed(); + return RamUsageEstimator.humanReadableUnits(bytesUsed); + } + + @Override + public String toString() { + StringBuilder b = new StringBuilder(250); + b.append("'").append(getReaderKey()).append("'=>"); + b.append("'").append(getFieldName()).append("',"); + b.append(getCacheType()).append(",").append(getCustom()); + b.append("=>").append(getValue().getClass().getName()).append("#"); + b.append(System.identityHashCode(getValue())); + + String s = getEstimatedSize(); + b.append(" (size =~ ").append(s).append(')'); + + return b.toString(); + } + } + + /** + * EXPERT: Generates an array of CacheEntry objects representing all items + * currently in the FieldCache. + *

+ * NOTE: These CacheEntry objects maintain a strong reference to the + * Cached Values. Maintaining references to a CacheEntry the AtomicIndexReader + * associated with it has garbage collected will prevent the Value itself + * from being garbage collected when the Cache drops the WeakReference. + *

+ * @lucene.experimental + */ + public CacheEntry[] getCacheEntries(); + + /** + *

+ * EXPERT: Instructs the FieldCache to forcibly expunge all entries + * from the underlying caches. This is intended only to be used for + * test methods as a way to ensure a known base state of the Cache + * (with out needing to rely on GC to free WeakReferences). + * It should not be relied on for "Cache maintenance" in general + * application code. + *

+ * @lucene.experimental + */ + public void purgeAllCaches(); + + /** + * Expert: drops all cache entries associated with this + * reader {@link IndexReader#getCoreCacheKey}. NOTE: this cache key must + * precisely match the reader that the cache entry is + * keyed on. If you pass a top-level reader, it usually + * will have no effect as Lucene now caches at the segment + * reader level. + */ + public void purgeByCacheKey(Object coreCacheKey); + + /** + * If non-null, FieldCacheImpl will warn whenever + * entries are created that are not sane according to + * {@link FieldCacheSanityChecker}. + */ + public void setInfoStream(PrintStream stream); + + /** counterpart of {@link #setInfoStream(PrintStream)} */ + public PrintStream getInfoStream(); +} diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java new file mode 100644 index 00000000000..e6a066d8767 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java @@ -0,0 +1,1085 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.uninverting; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.WeakHashMap; + +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.PointValues.IntersectVisitor; +import org.apache.lucene.index.PointValues.Relation; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.Accountables; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.PagedBytes; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.GrowableWriter; +import org.apache.lucene.util.packed.PackedInts; +import org.apache.lucene.util.packed.PackedLongValues; + +/** + * Expert: The default cache implementation, storing all values in memory. + * A WeakHashMap is used for storage. + * + * @since lucene 1.4 + */ +class FieldCacheImpl implements FieldCache { + + private Map,Cache> caches; + FieldCacheImpl() { + init(); + } + + private synchronized void init() { + caches = new HashMap<>(6); + caches.put(Long.TYPE, new LongCache(this)); + caches.put(BinaryDocValues.class, new BinaryDocValuesCache(this)); + caches.put(SortedDocValues.class, new SortedDocValuesCache(this)); + caches.put(DocTermOrds.class, new DocTermOrdsCache(this)); + caches.put(DocsWithFieldCache.class, new DocsWithFieldCache(this)); + } + + @Override + public synchronized void purgeAllCaches() { + init(); + } + + @Override + public synchronized void purgeByCacheKey(Object coreCacheKey) { + for(Cache c : caches.values()) { + c.purgeByCacheKey(coreCacheKey); + } + } + + @Override + public synchronized CacheEntry[] getCacheEntries() { + List result = new ArrayList<>(17); + for(final Map.Entry,Cache> cacheEntry: caches.entrySet()) { + final Cache cache = cacheEntry.getValue(); + final Class cacheType = cacheEntry.getKey(); + synchronized(cache.readerCache) { + for (final Map.Entry> readerCacheEntry : cache.readerCache.entrySet()) { + final Object readerKey = readerCacheEntry.getKey(); + if (readerKey == null) continue; + final Map innerCache = readerCacheEntry.getValue(); + for (final Map.Entry mapEntry : innerCache.entrySet()) { + CacheKey entry = mapEntry.getKey(); + result.add(new CacheEntry(readerKey, entry.field, + cacheType, entry.custom, + mapEntry.getValue())); + } + } + } + } + return result.toArray(new CacheEntry[result.size()]); + } + + // per-segment fieldcaches don't purge until the shared core closes. + final SegmentReader.CoreClosedListener purgeCore = new SegmentReader.CoreClosedListener() { + @Override + public void onClose(Object ownerCoreCacheKey) { + FieldCacheImpl.this.purgeByCacheKey(ownerCoreCacheKey); + } + }; + + private void initReader(LeafReader reader) { + reader.addCoreClosedListener(purgeCore); + } + + /** Expert: Internal cache. */ + abstract static class Cache { + + Cache(FieldCacheImpl wrapper) { + this.wrapper = wrapper; + } + + final FieldCacheImpl wrapper; + + final Map> readerCache = new WeakHashMap<>(); + + protected abstract Accountable createValue(LeafReader reader, CacheKey key, boolean setDocsWithField) + throws IOException; + + /** Remove this reader from the cache, if present. */ + public void purgeByCacheKey(Object coreCacheKey) { + synchronized(readerCache) { + readerCache.remove(coreCacheKey); + } + } + + /** Sets the key to the value for the provided reader; + * if the key is already set then this doesn't change it. */ + public void put(LeafReader reader, CacheKey key, Accountable value) { + final Object readerKey = reader.getCoreCacheKey(); + synchronized (readerCache) { + Map innerCache = readerCache.get(readerKey); + if (innerCache == null) { + // First time this reader is using FieldCache + innerCache = new HashMap<>(); + readerCache.put(readerKey, innerCache); + wrapper.initReader(reader); + } + if (innerCache.get(key) == null) { + innerCache.put(key, value); + } else { + // Another thread beat us to it; leave the current + // value + } + } + } + + public Object get(LeafReader reader, CacheKey key, boolean setDocsWithField) throws IOException { + Map innerCache; + Accountable value; + final Object readerKey = reader.getCoreCacheKey(); + synchronized (readerCache) { + innerCache = readerCache.get(readerKey); + if (innerCache == null) { + // First time this reader is using FieldCache + innerCache = new HashMap<>(); + readerCache.put(readerKey, innerCache); + wrapper.initReader(reader); + value = null; + } else { + value = innerCache.get(key); + } + if (value == null) { + value = new CreationPlaceholder(); + innerCache.put(key, value); + } + } + if (value instanceof CreationPlaceholder) { + synchronized (value) { + CreationPlaceholder progress = (CreationPlaceholder) value; + if (progress.value == null) { + progress.value = createValue(reader, key, setDocsWithField); + synchronized (readerCache) { + innerCache.put(key, progress.value); + } + + // Only check if key.custom (the parser) is + // non-null; else, we check twice for a single + // call to FieldCache.getXXX + if (key.custom != null && wrapper != null) { + final PrintStream infoStream = wrapper.getInfoStream(); + if (infoStream != null) { + printNewInsanity(infoStream, progress.value); + } + } + } + return progress.value; + } + } + return value; + } + + private void printNewInsanity(PrintStream infoStream, Object value) { + final FieldCacheSanityChecker.Insanity[] insanities = FieldCacheSanityChecker.checkSanity(wrapper); + for(int i=0;i 0; + + if (setDocsWithField) { + final int docCount = values.getDocCount(field); + assert docCount <= maxDoc; + if (docCount == maxDoc) { + // Fast case: all docs have this field: + this.docsWithField = new Bits.MatchAllBits(maxDoc); + setDocsWithField = false; + } + } + + final boolean doDocsWithField = setDocsWithField; + BytesRef scratch = new BytesRef(); + values.intersect(field, new IntersectVisitor() { + @Override + public void visit(int docID) throws IOException { + throw new AssertionError(); + } + + @Override + public void visit(int docID, byte[] packedValue) throws IOException { + scratch.bytes = packedValue; + scratch.length = packedValue.length; + visitTerm(scratch); + visitDoc(docID); + if (doDocsWithField) { + if (docsWithField == null) { + // Lazy init + docsWithField = new FixedBitSet(maxDoc); + } + ((FixedBitSet)docsWithField).set(docID); + } + } + + @Override + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + return Relation.CELL_CROSSES_QUERY; // inspect all byte-docid pairs + } + }); + } + + final void uninvertPostings(LeafReader reader, String field, boolean setDocsWithField) throws IOException { + final int maxDoc = reader.maxDoc(); + Terms terms = reader.terms(field); + if (terms != null) { + if (setDocsWithField) { + final int termsDocCount = terms.getDocCount(); + assert termsDocCount <= maxDoc; + if (termsDocCount == maxDoc) { + // Fast case: all docs have this field: + this.docsWithField = new Bits.MatchAllBits(maxDoc); + setDocsWithField = false; + } + } + + final TermsEnum termsEnum = termsEnum(terms); + + PostingsEnum docs = null; + FixedBitSet docsWithField = null; + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + visitTerm(term); + docs = termsEnum.postings(docs, PostingsEnum.NONE); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + visitDoc(docID); + if (setDocsWithField) { + if (docsWithField == null) { + // Lazy init + this.docsWithField = docsWithField = new FixedBitSet(maxDoc); + } + docsWithField.set(docID); + } + } + } + } + } + + /** @deprecated remove this when legacy numerics are removed */ + @Deprecated + protected abstract TermsEnum termsEnum(Terms terms) throws IOException; + protected abstract void visitTerm(BytesRef term); + protected abstract void visitDoc(int docID); + } + + // null Bits means no docs matched + void setDocsWithField(LeafReader reader, String field, Bits docsWithField, Parser parser) { + final int maxDoc = reader.maxDoc(); + final Bits bits; + if (docsWithField == null) { + bits = new Bits.MatchNoBits(maxDoc); + } else if (docsWithField instanceof FixedBitSet) { + final int numSet = ((FixedBitSet) docsWithField).cardinality(); + if (numSet >= maxDoc) { + // The cardinality of the BitSet is maxDoc if all documents have a value. + assert numSet == maxDoc; + bits = new Bits.MatchAllBits(maxDoc); + } else { + bits = docsWithField; + } + } else { + bits = docsWithField; + } + caches.get(DocsWithFieldCache.class).put(reader, new CacheKey(field, parser), new BitsEntry(bits)); + } + + private static class HoldsOneThing { + private T it; + + public void set(T it) { + this.it = it; + } + + public T get() { + return it; + } + } + + private static class GrowableWriterAndMinValue { + GrowableWriterAndMinValue(GrowableWriter array, long minValue) { + this.writer = array; + this.minValue = minValue; + } + public GrowableWriter writer; + public long minValue; + } + + public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException { + final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); + if (fieldInfo == null) { + // field does not exist or has no value + return new Bits.MatchNoBits(reader.maxDoc()); + } else if (fieldInfo.getDocValuesType() != DocValuesType.NONE) { + return reader.getDocsWithField(field); + } + + if (parser instanceof PointParser) { + // points case + + } else { + // postings case + if (fieldInfo.getIndexOptions() == IndexOptions.NONE) { + return new Bits.MatchNoBits(reader.maxDoc()); + } + } + BitsEntry bitsEntry = (BitsEntry) caches.get(DocsWithFieldCache.class).get(reader, new CacheKey(field, parser), false); + return bitsEntry.bits; + } + + static class BitsEntry implements Accountable { + final Bits bits; + + BitsEntry(Bits bits) { + this.bits = bits; + } + + @Override + public long ramBytesUsed() { + long base = RamUsageEstimator.NUM_BYTES_OBJECT_REF; + if (bits instanceof Bits.MatchAllBits || bits instanceof Bits.MatchNoBits) { + return base; + } else { + return base + (bits.length() >>> 3); + } + } + } + + static final class DocsWithFieldCache extends Cache { + DocsWithFieldCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected BitsEntry createValue(LeafReader reader, CacheKey key, boolean setDocsWithField /* ignored */) throws IOException { + final String field = key.field; + final Parser parser = (Parser) key.custom; + if (parser instanceof PointParser) { + return createValuePoints(reader, field); + } else { + return createValuePostings(reader, field); + } + } + + private BitsEntry createValuePoints(LeafReader reader, String field) throws IOException { + final int maxDoc = reader.maxDoc(); + PointValues values = reader.getPointValues(); + assert values != null; + assert values.size(field) > 0; + + final int docCount = values.getDocCount(field); + assert docCount <= maxDoc; + if (docCount == maxDoc) { + // Fast case: all docs have this field: + return new BitsEntry(new Bits.MatchAllBits(maxDoc)); + } + + // otherwise a no-op uninvert! + Uninvert u = new Uninvert(true) { + @Override + protected TermsEnum termsEnum(Terms terms) throws IOException { + throw new AssertionError(); + } + + @Override + protected void visitTerm(BytesRef term) {} + + @Override + protected void visitDoc(int docID) {} + }; + u.uninvert(reader, field, true); + return new BitsEntry(u.docsWithField); + } + + // TODO: it is dumb that uninverting code is duplicated here in this method!! + private BitsEntry createValuePostings(LeafReader reader, String field) throws IOException { + final int maxDoc = reader.maxDoc(); + + // Visit all docs that have terms for this field + FixedBitSet res = null; + Terms terms = reader.terms(field); + if (terms != null) { + final int termsDocCount = terms.getDocCount(); + assert termsDocCount <= maxDoc; + if (termsDocCount == maxDoc) { + // Fast case: all docs have this field: + return new BitsEntry(new Bits.MatchAllBits(maxDoc)); + } + final TermsEnum termsEnum = terms.iterator(); + PostingsEnum docs = null; + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + if (res == null) { + // lazy init + res = new FixedBitSet(maxDoc); + } + + docs = termsEnum.postings(docs, PostingsEnum.NONE); + // TODO: use bulk API + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + res.set(docID); + } + } + } + if (res == null) { + return new BitsEntry(new Bits.MatchNoBits(maxDoc)); + } + final int numSet = res.cardinality(); + if (numSet >= maxDoc) { + // The cardinality of the BitSet is maxDoc if all documents have a value. + assert numSet == maxDoc; + return new BitsEntry(new Bits.MatchAllBits(maxDoc)); + } + return new BitsEntry(res); + } + } + + @Override + public NumericDocValues getNumerics(LeafReader reader, String field, Parser parser, boolean setDocsWithField) throws IOException { + if (parser == null) { + throw new NullPointerException(); + } + final NumericDocValues valuesIn = reader.getNumericDocValues(field); + if (valuesIn != null) { + // Not cached here by FieldCacheImpl (cached instead + // per-thread by SegmentReader): + return valuesIn; + } else { + final FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + return DocValues.emptyNumeric(); + } else if (info.getDocValuesType() != DocValuesType.NONE) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } + + if (parser instanceof PointParser) { + // points case + // no points in this segment + if (info.getPointDimensionCount() == 0) { + return DocValues.emptyNumeric(); + } + if (info.getPointDimensionCount() != 1) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed with dimensions=" + info.getPointDimensionCount()); + } + PointValues values = reader.getPointValues(); + // no actual points for this field (e.g. all points deleted) + if (values == null || values.size(field) == 0) { + return DocValues.emptyNumeric(); + } + // not single-valued + if (values.size(field) != values.getDocCount(field)) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed with multiple values, numValues=" + values.size(field) + ",numDocs=" + values.getDocCount(field)); + } + } else { + // postings case + // not indexed + if (info.getIndexOptions() == IndexOptions.NONE) { + return DocValues.emptyNumeric(); + } + } + return (NumericDocValues) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); + } + } + + static class LongsFromArray extends NumericDocValues implements Accountable { + private final PackedInts.Reader values; + private final long minValue; + + public LongsFromArray(PackedInts.Reader values, long minValue) { + this.values = values; + this.minValue = minValue; + } + + @Override + public long get(int docID) { + return minValue + values.get(docID); + } + + @Override + public long ramBytesUsed() { + return values.ramBytesUsed() + RamUsageEstimator.NUM_BYTES_OBJECT_REF + Long.BYTES; + } + } + + static final class LongCache extends Cache { + LongCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected Accountable createValue(final LeafReader reader, CacheKey key, boolean setDocsWithField) + throws IOException { + + final Parser parser = (Parser) key.custom; + + final HoldsOneThing valuesRef = new HoldsOneThing<>(); + + Uninvert u = new Uninvert(parser instanceof PointParser) { + private long minValue; + private long currentValue; + private GrowableWriter values; + + @Override + public void visitTerm(BytesRef term) { + currentValue = parser.parseValue(term); + if (values == null) { + // Lazy alloc so for the numeric field case + // (which will hit a NumberFormatException + // when we first try the DEFAULT_INT_PARSER), + // we don't double-alloc: + int startBitsPerValue; + // Make sure than missing values (0) can be stored without resizing + if (currentValue < 0) { + minValue = currentValue; + startBitsPerValue = minValue == Long.MIN_VALUE ? 64 : PackedInts.bitsRequired(-minValue); + } else { + minValue = 0; + startBitsPerValue = PackedInts.bitsRequired(currentValue); + } + values = new GrowableWriter(startBitsPerValue, reader.maxDoc(), PackedInts.FAST); + if (minValue != 0) { + values.fill(0, values.size(), -minValue); // default value must be 0 + } + valuesRef.set(new GrowableWriterAndMinValue(values, minValue)); + } + } + + @Override + public void visitDoc(int docID) { + values.set(docID, currentValue - minValue); + } + + @Override + protected TermsEnum termsEnum(Terms terms) throws IOException { + return parser.termsEnum(terms); + } + }; + + u.uninvert(reader, key.field, setDocsWithField); + + if (setDocsWithField) { + wrapper.setDocsWithField(reader, key.field, u.docsWithField, parser); + } + GrowableWriterAndMinValue values = valuesRef.get(); + if (values == null) { + return new LongsFromArray(new PackedInts.NullReader(reader.maxDoc()), 0L); + } + return new LongsFromArray(values.writer.getMutable(), values.minValue); + } + } + + public static class SortedDocValuesImpl implements Accountable { + private final PagedBytes.Reader bytes; + private final PackedLongValues termOrdToBytesOffset; + private final PackedInts.Reader docToTermOrd; + private final int numOrd; + + public SortedDocValuesImpl(PagedBytes.Reader bytes, PackedLongValues termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) { + this.bytes = bytes; + this.docToTermOrd = docToTermOrd; + this.termOrdToBytesOffset = termOrdToBytesOffset; + this.numOrd = numOrd; + } + + public SortedDocValues iterator() { + final BytesRef term = new BytesRef(); + return new SortedDocValues() { + + @Override + public int getValueCount() { + return numOrd; + } + + @Override + public int getOrd(int docID) { + // Subtract 1, matching the 1+ord we did when + // storing, so that missing values, which are 0 in the + // packed ints, are returned as -1 ord: + return (int) docToTermOrd.get(docID)-1; + } + + @Override + public BytesRef lookupOrd(int ord) { + if (ord < 0) { + throw new IllegalArgumentException("ord must be >=0 (got ord=" + ord + ")"); + } + bytes.fill(term, termOrdToBytesOffset.get(ord)); + return term; + } + }; + } + + @Override + public long ramBytesUsed() { + return bytes.ramBytesUsed() + + termOrdToBytesOffset.ramBytesUsed() + + docToTermOrd.ramBytesUsed() + + 3*RamUsageEstimator.NUM_BYTES_OBJECT_REF + + Integer.BYTES; + } + + @Override + public Collection getChildResources() { + List resources = new ArrayList<>(3); + resources.add(Accountables.namedAccountable("term bytes", bytes)); + resources.add(Accountables.namedAccountable("ord -> term", termOrdToBytesOffset)); + resources.add(Accountables.namedAccountable("doc -> ord", docToTermOrd)); + return Collections.unmodifiableList(resources); + } + } + + public SortedDocValues getTermsIndex(LeafReader reader, String field) throws IOException { + return getTermsIndex(reader, field, PackedInts.FAST); + } + + public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException { + SortedDocValues valuesIn = reader.getSortedDocValues(field); + if (valuesIn != null) { + // Not cached here by FieldCacheImpl (cached instead + // per-thread by SegmentReader): + return valuesIn; + } else { + final FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + return DocValues.emptySorted(); + } else if (info.getDocValuesType() != DocValuesType.NONE) { + // we don't try to build a sorted instance from numeric/binary doc + // values because dedup can be very costly + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (info.getIndexOptions() == IndexOptions.NONE) { + return DocValues.emptySorted(); + } + SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false); + return impl.iterator(); + } + } + + static class SortedDocValuesCache extends Cache { + SortedDocValuesCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected Accountable createValue(LeafReader reader, CacheKey key, boolean setDocsWithField /* ignored */) + throws IOException { + + final int maxDoc = reader.maxDoc(); + + Terms terms = reader.terms(key.field); + + final float acceptableOverheadRatio = ((Float) key.custom).floatValue(); + + final PagedBytes bytes = new PagedBytes(15); + + int startTermsBPV; + + // TODO: use Uninvert? + if (terms != null) { + // Try for coarse estimate for number of bits; this + // should be an underestimate most of the time, which + // is fine -- GrowableWriter will reallocate as needed + long numUniqueTerms = terms.size(); + if (numUniqueTerms != -1L) { + if (numUniqueTerms > maxDoc) { + throw new IllegalStateException("Type mismatch: " + key.field + " was indexed with multiple values per document, use SORTED_SET instead"); + } + + startTermsBPV = PackedInts.bitsRequired(numUniqueTerms); + } else { + startTermsBPV = 1; + } + } else { + startTermsBPV = 1; + } + + PackedLongValues.Builder termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); + final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio); + + int termOrd = 0; + + // TODO: use Uninvert? + + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(); + PostingsEnum docs = null; + + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + if (termOrd >= maxDoc) { + throw new IllegalStateException("Type mismatch: " + key.field + " was indexed with multiple values per document, use SORTED_SET instead"); + } + + termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term)); + docs = termsEnum.postings(docs, PostingsEnum.NONE); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + // Store 1+ ord into packed bits + docToTermOrd.set(docID, 1+termOrd); + } + termOrd++; + } + } + + // maybe an int-only impl? + return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.build(), docToTermOrd.getMutable(), termOrd); + } + } + + private static class BinaryDocValuesImpl implements Accountable { + private final PagedBytes.Reader bytes; + private final PackedInts.Reader docToOffset; + + public BinaryDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset) { + this.bytes = bytes; + this.docToOffset = docToOffset; + } + + public BinaryDocValues iterator() { + final BytesRef term = new BytesRef(); + return new BinaryDocValues() { + @Override + public BytesRef get(int docID) { + final long pointer = docToOffset.get(docID); + if (pointer == 0) { + term.length = 0; + } else { + bytes.fill(term, pointer); + } + return term; + } + }; + } + + @Override + public long ramBytesUsed() { + return bytes.ramBytesUsed() + docToOffset.ramBytesUsed() + 2*RamUsageEstimator.NUM_BYTES_OBJECT_REF; + } + + @Override + public Collection getChildResources() { + List resources = new ArrayList<>(2); + resources.add(Accountables.namedAccountable("term bytes", bytes)); + resources.add(Accountables.namedAccountable("addresses", docToOffset)); + return Collections.unmodifiableList(resources); + } + } + + // TODO: this if DocTermsIndex was already created, we + // should share it... + public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField) throws IOException { + return getTerms(reader, field, setDocsWithField, PackedInts.FAST); + } + + public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField, float acceptableOverheadRatio) throws IOException { + BinaryDocValues valuesIn = reader.getBinaryDocValues(field); + if (valuesIn == null) { + valuesIn = reader.getSortedDocValues(field); + } + + if (valuesIn != null) { + // Not cached here by FieldCacheImpl (cached instead + // per-thread by SegmentReader): + return valuesIn; + } + + final FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + return DocValues.emptyBinary(); + } else if (info.getDocValuesType() != DocValuesType.NONE) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (info.getIndexOptions() == IndexOptions.NONE) { + return DocValues.emptyBinary(); + } + + BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), setDocsWithField); + return impl.iterator(); + } + + static final class BinaryDocValuesCache extends Cache { + BinaryDocValuesCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected Accountable createValue(LeafReader reader, CacheKey key, boolean setDocsWithField) + throws IOException { + + // TODO: would be nice to first check if DocTermsIndex + // was already cached for this field and then return + // that instead, to avoid insanity + + final int maxDoc = reader.maxDoc(); + Terms terms = reader.terms(key.field); + + final float acceptableOverheadRatio = ((Float) key.custom).floatValue(); + + final int termCountHardLimit = maxDoc; + + // Holds the actual term data, expanded. + final PagedBytes bytes = new PagedBytes(15); + + int startBPV; + + if (terms != null) { + // Try for coarse estimate for number of bits; this + // should be an underestimate most of the time, which + // is fine -- GrowableWriter will reallocate as needed + long numUniqueTerms = terms.size(); + if (numUniqueTerms != -1L) { + if (numUniqueTerms > termCountHardLimit) { + numUniqueTerms = termCountHardLimit; + } + startBPV = PackedInts.bitsRequired(numUniqueTerms*4); + } else { + startBPV = 1; + } + } else { + startBPV = 1; + } + + final GrowableWriter docToOffset = new GrowableWriter(startBPV, maxDoc, acceptableOverheadRatio); + + // pointer==0 means not set + bytes.copyUsingLengthPrefix(new BytesRef()); + + if (terms != null) { + int termCount = 0; + final TermsEnum termsEnum = terms.iterator(); + PostingsEnum docs = null; + while(true) { + if (termCount++ == termCountHardLimit) { + // app is misusing the API (there is more than + // one term per doc); in this case we make best + // effort to load what we can (see LUCENE-2142) + break; + } + + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final long pointer = bytes.copyUsingLengthPrefix(term); + docs = termsEnum.postings(docs, PostingsEnum.NONE); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + docToOffset.set(docID, pointer); + } + } + } + + final PackedInts.Reader offsetReader = docToOffset.getMutable(); + if (setDocsWithField) { + wrapper.setDocsWithField(reader, key.field, new Bits() { + @Override + public boolean get(int index) { + return offsetReader.get(index) != 0; + } + + @Override + public int length() { + return maxDoc; + } + }, null); + } + // maybe an int-only impl? + return new BinaryDocValuesImpl(bytes.freeze(true), offsetReader); + } + } + + // TODO: this if DocTermsIndex was already created, we + // should share it... + public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException { + // not a general purpose filtering mechanism... + assert prefix == null || prefix == INT32_TERM_PREFIX || prefix == INT64_TERM_PREFIX; + + SortedSetDocValues dv = reader.getSortedSetDocValues(field); + if (dv != null) { + return dv; + } + + SortedDocValues sdv = reader.getSortedDocValues(field); + if (sdv != null) { + return DocValues.singleton(sdv); + } + + final FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + return DocValues.emptySortedSet(); + } else if (info.getDocValuesType() != DocValuesType.NONE) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (info.getIndexOptions() == IndexOptions.NONE) { + return DocValues.emptySortedSet(); + } + + // ok we need to uninvert. check if we can optimize a bit. + + Terms terms = reader.terms(field); + if (terms == null) { + return DocValues.emptySortedSet(); + } else { + // if #postings = #docswithfield we know that the field is "single valued enough". + // it's possible the same term might appear twice in the same document, but SORTED_SET discards frequency. + // it's still ok with filtering (which we limit to numerics), it just means precisionStep = Inf + long numPostings = terms.getSumDocFreq(); + if (numPostings != -1 && numPostings == terms.getDocCount()) { + return DocValues.singleton(getTermsIndex(reader, field)); + } + } + + DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, prefix), false); + return dto.iterator(reader); + } + + static final class DocTermOrdsCache extends Cache { + DocTermOrdsCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected Accountable createValue(LeafReader reader, CacheKey key, boolean setDocsWithField /* ignored */) + throws IOException { + BytesRef prefix = (BytesRef) key.custom; + return new DocTermOrds(reader, null, key.field, prefix); + } + } + + private volatile PrintStream infoStream; + + public void setInfoStream(PrintStream stream) { + infoStream = stream; + } + + public PrintStream getInfoStream() { + return infoStream; + } +} + diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheSanityChecker.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheSanityChecker.java new file mode 100644 index 00000000000..ec398f2174a --- /dev/null +++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheSanityChecker.java @@ -0,0 +1,425 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.uninverting; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.util.MapOfSets; +import org.apache.solr.uninverting.FieldCache.CacheEntry; + +/** + * Provides methods for sanity checking that entries in the FieldCache + * are not wasteful or inconsistent. + *

+ *

+ * Lucene 2.9 Introduced numerous enhancements into how the FieldCache + * is used by the low levels of Lucene searching (for Sorting and + * ValueSourceQueries) to improve both the speed for Sorting, as well + * as reopening of IndexReaders. But these changes have shifted the + * usage of FieldCache from "top level" IndexReaders (frequently a + * MultiReader or DirectoryReader) down to the leaf level SegmentReaders. + * As a result, existing applications that directly access the FieldCache + * may find RAM usage increase significantly when upgrading to 2.9 or + * Later. This class provides an API for these applications (or their + * Unit tests) to check at run time if the FieldCache contains "insane" + * usages of the FieldCache. + *

+ * @lucene.experimental + * @see FieldCache + * @see FieldCacheSanityChecker.Insanity + * @see FieldCacheSanityChecker.InsanityType + */ +final class FieldCacheSanityChecker { + + public FieldCacheSanityChecker() { + /* NOOP */ + } + + /** + * Quick and dirty convenience method + * @see #check + */ + public static Insanity[] checkSanity(FieldCache cache) { + return checkSanity(cache.getCacheEntries()); + } + + /** + * Quick and dirty convenience method that instantiates an instance with + * "good defaults" and uses it to test the CacheEntrys + * @see #check + */ + public static Insanity[] checkSanity(CacheEntry... cacheEntries) { + FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker(); + return sanityChecker.check(cacheEntries); + } + + + /** + * Tests a CacheEntry[] for indication of "insane" cache usage. + *

+ * NOTE:FieldCache CreationPlaceholder objects are ignored. + * (:TODO: is this a bad idea? are we masking a real problem?) + *

+ */ + public Insanity[] check(CacheEntry... cacheEntries) { + if (null == cacheEntries || 0 == cacheEntries.length) + return new Insanity[0]; + + // the indirect mapping lets MapOfSet dedup identical valIds for us + // + // maps the (valId) identityhashCode of cache values to + // sets of CacheEntry instances + final MapOfSets valIdToItems = new MapOfSets<>(new HashMap>(17)); + // maps ReaderField keys to Sets of ValueIds + final MapOfSets readerFieldToValIds = new MapOfSets<>(new HashMap>(17)); + // + + // any keys that we know result in more then one valId + final Set valMismatchKeys = new HashSet<>(); + + // iterate over all the cacheEntries to get the mappings we'll need + for (int i = 0; i < cacheEntries.length; i++) { + final CacheEntry item = cacheEntries[i]; + final Object val = item.getValue(); + + // It's OK to have dup entries, where one is eg + // float[] and the other is the Bits (from + // getDocWithField()) + if (val instanceof FieldCacheImpl.BitsEntry) { + continue; + } + + if (val instanceof FieldCache.CreationPlaceholder) + continue; + + final ReaderField rf = new ReaderField(item.getReaderKey(), + item.getFieldName()); + + final Integer valId = Integer.valueOf(System.identityHashCode(val)); + + // indirect mapping, so the MapOfSet will dedup identical valIds for us + valIdToItems.put(valId, item); + if (1 < readerFieldToValIds.put(rf, valId)) { + valMismatchKeys.add(rf); + } + } + + final List insanity = new ArrayList<>(valMismatchKeys.size() * 3); + + insanity.addAll(checkValueMismatch(valIdToItems, + readerFieldToValIds, + valMismatchKeys)); + insanity.addAll(checkSubreaders(valIdToItems, + readerFieldToValIds)); + + return insanity.toArray(new Insanity[insanity.size()]); + } + + /** + * Internal helper method used by check that iterates over + * valMismatchKeys and generates a Collection of Insanity + * instances accordingly. The MapOfSets are used to populate + * the Insanity objects. + * @see InsanityType#VALUEMISMATCH + */ + private Collection checkValueMismatch(MapOfSets valIdToItems, + MapOfSets readerFieldToValIds, + Set valMismatchKeys) { + + final List insanity = new ArrayList<>(valMismatchKeys.size() * 3); + + if (! valMismatchKeys.isEmpty() ) { + // we have multiple values for some ReaderFields + + final Map> rfMap = readerFieldToValIds.getMap(); + final Map> valMap = valIdToItems.getMap(); + for (final ReaderField rf : valMismatchKeys) { + final List badEntries = new ArrayList<>(valMismatchKeys.size() * 2); + for(final Integer value: rfMap.get(rf)) { + for (final CacheEntry cacheEntry : valMap.get(value)) { + badEntries.add(cacheEntry); + } + } + + CacheEntry[] badness = new CacheEntry[badEntries.size()]; + badness = badEntries.toArray(badness); + + insanity.add(new Insanity(InsanityType.VALUEMISMATCH, + "Multiple distinct value objects for " + + rf.toString(), badness)); + } + } + return insanity; + } + + /** + * Internal helper method used by check that iterates over + * the keys of readerFieldToValIds and generates a Collection + * of Insanity instances whenever two (or more) ReaderField instances are + * found that have an ancestry relationships. + * + * @see InsanityType#SUBREADER + */ + private Collection checkSubreaders( MapOfSets valIdToItems, + MapOfSets readerFieldToValIds) { + + final List insanity = new ArrayList<>(23); + + Map> badChildren = new HashMap<>(17); + MapOfSets badKids = new MapOfSets<>(badChildren); // wrapper + + Map> viToItemSets = valIdToItems.getMap(); + Map> rfToValIdSets = readerFieldToValIds.getMap(); + + Set seen = new HashSet<>(17); + + Set readerFields = rfToValIdSets.keySet(); + for (final ReaderField rf : readerFields) { + + if (seen.contains(rf)) continue; + + List kids = getAllDescendantReaderKeys(rf.readerKey); + for (Object kidKey : kids) { + ReaderField kid = new ReaderField(kidKey, rf.fieldName); + + if (badChildren.containsKey(kid)) { + // we've already process this kid as RF and found other problems + // track those problems as our own + badKids.put(rf, kid); + badKids.putAll(rf, badChildren.get(kid)); + badChildren.remove(kid); + + } else if (rfToValIdSets.containsKey(kid)) { + // we have cache entries for the kid + badKids.put(rf, kid); + } + seen.add(kid); + } + seen.add(rf); + } + + // every mapping in badKids represents an Insanity + for (final ReaderField parent : badChildren.keySet()) { + Set kids = badChildren.get(parent); + + List badEntries = new ArrayList<>(kids.size() * 2); + + // put parent entr(ies) in first + { + for (final Integer value : rfToValIdSets.get(parent)) { + badEntries.addAll(viToItemSets.get(value)); + } + } + + // now the entries for the descendants + for (final ReaderField kid : kids) { + for (final Integer value : rfToValIdSets.get(kid)) { + badEntries.addAll(viToItemSets.get(value)); + } + } + + CacheEntry[] badness = new CacheEntry[badEntries.size()]; + badness = badEntries.toArray(badness); + + insanity.add(new Insanity(InsanityType.SUBREADER, + "Found caches for descendants of " + + parent.toString(), + badness)); + } + + return insanity; + + } + + /** + * Checks if the seed is an IndexReader, and if so will walk + * the hierarchy of subReaders building up a list of the objects + * returned by {@code seed.getCoreCacheKey()} + */ + private List getAllDescendantReaderKeys(Object seed) { + List all = new ArrayList<>(17); // will grow as we iter + all.add(seed); + for (int i = 0; i < all.size(); i++) { + final Object obj = all.get(i); + // TODO: We don't check closed readers here (as getTopReaderContext + // throws AlreadyClosedException), what should we do? Reflection? + if (obj instanceof IndexReader) { + try { + final List childs = + ((IndexReader) obj).getContext().children(); + if (childs != null) { // it is composite reader + for (final IndexReaderContext ctx : childs) { + all.add(ctx.reader().getCoreCacheKey()); + } + } + } catch (AlreadyClosedException ace) { + // ignore this reader + } + } + } + // need to skip the first, because it was the seed + return all.subList(1, all.size()); + } + + /** + * Simple pair object for using "readerKey + fieldName" a Map key + */ + private final static class ReaderField { + public final Object readerKey; + public final String fieldName; + public ReaderField(Object readerKey, String fieldName) { + this.readerKey = readerKey; + this.fieldName = fieldName; + } + @Override + public int hashCode() { + return System.identityHashCode(readerKey) * fieldName.hashCode(); + } + @Override + public boolean equals(Object that) { + if (! (that instanceof ReaderField)) return false; + + ReaderField other = (ReaderField) that; + return (this.readerKey == other.readerKey && + this.fieldName.equals(other.fieldName)); + } + @Override + public String toString() { + return readerKey.toString() + "+" + fieldName; + } + } + + /** + * Simple container for a collection of related CacheEntry objects that + * in conjunction with each other represent some "insane" usage of the + * FieldCache. + */ + public final static class Insanity { + private final InsanityType type; + private final String msg; + private final CacheEntry[] entries; + public Insanity(InsanityType type, String msg, CacheEntry... entries) { + if (null == type) { + throw new IllegalArgumentException + ("Insanity requires non-null InsanityType"); + } + if (null == entries || 0 == entries.length) { + throw new IllegalArgumentException + ("Insanity requires non-null/non-empty CacheEntry[]"); + } + this.type = type; + this.msg = msg; + this.entries = entries; + + } + /** + * Type of insane behavior this object represents + */ + public InsanityType getType() { return type; } + /** + * Description of hte insane behavior + */ + public String getMsg() { return msg; } + /** + * CacheEntry objects which suggest a problem + */ + public CacheEntry[] getCacheEntries() { return entries; } + /** + * Multi-Line representation of this Insanity object, starting with + * the Type and Msg, followed by each CacheEntry.toString() on its + * own line prefaced by a tab character + */ + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(getType()).append(": "); + + String m = getMsg(); + if (null != m) buf.append(m); + + buf.append('\n'); + + CacheEntry[] ce = getCacheEntries(); + for (int i = 0; i < ce.length; i++) { + buf.append('\t').append(ce[i].toString()).append('\n'); + } + + return buf.toString(); + } + } + + /** + * An Enumeration of the different types of "insane" behavior that + * may be detected in a FieldCache. + * + * @see InsanityType#SUBREADER + * @see InsanityType#VALUEMISMATCH + * @see InsanityType#EXPECTED + */ + public final static class InsanityType { + private final String label; + private InsanityType(final String label) { + this.label = label; + } + @Override + public String toString() { return label; } + + /** + * Indicates an overlap in cache usage on a given field + * in sub/super readers. + */ + public final static InsanityType SUBREADER + = new InsanityType("SUBREADER"); + + /** + *

+ * Indicates entries have the same reader+fieldname but + * different cached values. This can happen if different datatypes, + * or parsers are used -- and while it's not necessarily a bug + * it's typically an indication of a possible problem. + *

+ *

+ * NOTE: Only the reader, fieldname, and cached value are actually + * tested -- if two cache entries have different parsers or datatypes but + * the cached values are the same Object (== not just equal()) this method + * does not consider that a red flag. This allows for subtle variations + * in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...) + *

+ */ + public final static InsanityType VALUEMISMATCH + = new InsanityType("VALUEMISMATCH"); + + /** + * Indicates an expected bit of "insanity". This may be useful for + * clients that wish to preserve/log information about insane usage + * but indicate that it was expected. + */ + public final static InsanityType EXPECTED + = new InsanityType("EXPECTED"); + } + + +} diff --git a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java new file mode 100644 index 00000000000..4450cbb7d86 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java @@ -0,0 +1,391 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.uninverting; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Map; + +import org.apache.lucene.document.BinaryDocValuesField; // javadocs +import org.apache.lucene.document.NumericDocValuesField; // javadocs +import org.apache.lucene.document.SortedDocValuesField; // javadocs +import org.apache.lucene.document.SortedSetDocValuesField; // javadocs +import org.apache.lucene.document.StringField; // javadocs +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FilterDirectoryReader; +import org.apache.lucene.index.FilterLeafReader; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.Bits; +import org.apache.solr.uninverting.FieldCache.CacheEntry; + +/** + * A FilterReader that exposes indexed values as if they also had + * docvalues. + *

+ * This is accomplished by "inverting the inverted index" or "uninversion". + *

+ * The uninversion process happens lazily: upon the first request for the + * field's docvalues (e.g. via {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)} + * or similar), it will create the docvalues on-the-fly if needed and cache it, + * based on the core cache key of the wrapped LeafReader. + */ +public class UninvertingReader extends FilterLeafReader { + + /** + * Specifies the type of uninversion to apply for the field. + */ + public static enum Type { + /** + * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.IntPoint}) + *

+ * Fields with this type act as if they were indexed with + * {@link NumericDocValuesField}. + */ + INTEGER_POINT, + /** + * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LongPoint}) + *

+ * Fields with this type act as if they were indexed with + * {@link NumericDocValuesField}. + */ + LONG_POINT, + /** + * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.FloatPoint}) + *

+ * Fields with this type act as if they were indexed with + * {@link NumericDocValuesField}. + */ + FLOAT_POINT, + /** + * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.DoublePoint}) + *

+ * Fields with this type act as if they were indexed with + * {@link NumericDocValuesField}. + */ + DOUBLE_POINT, + /** + * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LegacyIntField}) + *

+ * Fields with this type act as if they were indexed with + * {@link NumericDocValuesField}. + * @deprecated Index with points and use {@link #INTEGER_POINT} instead. + */ + @Deprecated + LEGACY_INTEGER, + /** + * Single-valued Long, (e.g. indexed with {@link org.apache.lucene.document.LegacyLongField}) + *

+ * Fields with this type act as if they were indexed with + * {@link NumericDocValuesField}. + * @deprecated Index with points and use {@link #LONG_POINT} instead. + */ + @Deprecated + LEGACY_LONG, + /** + * Single-valued Float, (e.g. indexed with {@link org.apache.lucene.document.LegacyFloatField}) + *

+ * Fields with this type act as if they were indexed with + * {@link NumericDocValuesField}. + * @deprecated Index with points and use {@link #FLOAT_POINT} instead. + */ + @Deprecated + LEGACY_FLOAT, + /** + * Single-valued Double, (e.g. indexed with {@link org.apache.lucene.document.LegacyDoubleField}) + *

+ * Fields with this type act as if they were indexed with + * {@link NumericDocValuesField}. + * @deprecated Index with points and use {@link #DOUBLE_POINT} instead. + */ + @Deprecated + LEGACY_DOUBLE, + /** + * Single-valued Binary, (e.g. indexed with {@link StringField}) + *

+ * Fields with this type act as if they were indexed with + * {@link BinaryDocValuesField}. + */ + BINARY, + /** + * Single-valued Binary, (e.g. indexed with {@link StringField}) + *

+ * Fields with this type act as if they were indexed with + * {@link SortedDocValuesField}. + */ + SORTED, + /** + * Multi-valued Binary, (e.g. indexed with {@link StringField}) + *

+ * Fields with this type act as if they were indexed with + * {@link SortedSetDocValuesField}. + */ + SORTED_SET_BINARY, + /** + * Multi-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LegacyIntField}) + *

+ * Fields with this type act as if they were indexed with + * {@link SortedSetDocValuesField}. + */ + SORTED_SET_INTEGER, + /** + * Multi-valued Float, (e.g. indexed with {@link org.apache.lucene.document.LegacyFloatField}) + *

+ * Fields with this type act as if they were indexed with + * {@link SortedSetDocValuesField}. + */ + SORTED_SET_FLOAT, + /** + * Multi-valued Long, (e.g. indexed with {@link org.apache.lucene.document.LegacyLongField}) + *

+ * Fields with this type act as if they were indexed with + * {@link SortedSetDocValuesField}. + */ + SORTED_SET_LONG, + /** + * Multi-valued Double, (e.g. indexed with {@link org.apache.lucene.document.LegacyDoubleField}) + *

+ * Fields with this type act as if they were indexed with + * {@link SortedSetDocValuesField}. + */ + SORTED_SET_DOUBLE + } + + /** + * Wraps a provided DirectoryReader. Note that for convenience, the returned reader + * can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)}) + * and so on. + */ + public static DirectoryReader wrap(DirectoryReader in, final Map mapping) throws IOException { + return new UninvertingDirectoryReader(in, mapping); + } + + static class UninvertingDirectoryReader extends FilterDirectoryReader { + final Map mapping; + + public UninvertingDirectoryReader(DirectoryReader in, final Map mapping) throws IOException { + super(in, new FilterDirectoryReader.SubReaderWrapper() { + @Override + public LeafReader wrap(LeafReader reader) { + return new UninvertingReader(reader, mapping); + } + }); + this.mapping = mapping; + } + + @Override + protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException { + return new UninvertingDirectoryReader(in, mapping); + } + } + + final Map mapping; + final FieldInfos fieldInfos; + + /** + * Create a new UninvertingReader with the specified mapping + *

+ * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)} + * instead. + * + * @lucene.internal + */ + public UninvertingReader(LeafReader in, Map mapping) { + super(in); + this.mapping = mapping; + ArrayList filteredInfos = new ArrayList<>(); + for (FieldInfo fi : in.getFieldInfos()) { + DocValuesType type = fi.getDocValuesType(); + if (type == DocValuesType.NONE) { + Type t = mapping.get(fi.name); + if (t != null) { + if (t == Type.INTEGER_POINT || t == Type.LONG_POINT || t == Type.FLOAT_POINT || t == Type.DOUBLE_POINT) { + // type uses points + if (fi.getPointDimensionCount() == 0) { + continue; + } + } else { + // type uses inverted index + if (fi.getIndexOptions() == IndexOptions.NONE) { + continue; + } + } + switch(t) { + case INTEGER_POINT: + case LONG_POINT: + case FLOAT_POINT: + case DOUBLE_POINT: + case LEGACY_INTEGER: + case LEGACY_LONG: + case LEGACY_FLOAT: + case LEGACY_DOUBLE: + type = DocValuesType.NUMERIC; + break; + case BINARY: + type = DocValuesType.BINARY; + break; + case SORTED: + type = DocValuesType.SORTED; + break; + case SORTED_SET_BINARY: + case SORTED_SET_INTEGER: + case SORTED_SET_FLOAT: + case SORTED_SET_LONG: + case SORTED_SET_DOUBLE: + type = DocValuesType.SORTED_SET; + break; + default: + throw new AssertionError(); + } + } + } + filteredInfos.add(new FieldInfo(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(), + fi.hasPayloads(), fi.getIndexOptions(), type, fi.getDocValuesGen(), fi.attributes(), + fi.getPointDimensionCount(), fi.getPointNumBytes())); + } + fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()])); + } + + @Override + public FieldInfos getFieldInfos() { + return fieldInfos; + } + + @Override + public NumericDocValues getNumericDocValues(String field) throws IOException { + Type v = getType(field); + if (v != null) { + switch (v) { + case INTEGER_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.INT_POINT_PARSER, true); + case FLOAT_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.FLOAT_POINT_PARSER, true); + case LONG_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LONG_POINT_PARSER, true); + case DOUBLE_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.DOUBLE_POINT_PARSER, true); + case LEGACY_INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_INT_PARSER, true); + case LEGACY_FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_FLOAT_PARSER, true); + case LEGACY_LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_LONG_PARSER, true); + case LEGACY_DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_DOUBLE_PARSER, true); + } + } + return super.getNumericDocValues(field); + } + + @Override + public BinaryDocValues getBinaryDocValues(String field) throws IOException { + Type v = getType(field); + if (v == Type.BINARY) { + return FieldCache.DEFAULT.getTerms(in, field, true); + } else { + return in.getBinaryDocValues(field); + } + } + + @Override + public SortedDocValues getSortedDocValues(String field) throws IOException { + Type v = getType(field); + if (v == Type.SORTED) { + return FieldCache.DEFAULT.getTermsIndex(in, field); + } else { + return in.getSortedDocValues(field); + } + } + + @Override + public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { + Type v = getType(field); + if (v != null) { + switch (v) { + case SORTED_SET_INTEGER: + case SORTED_SET_FLOAT: + return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX); + case SORTED_SET_LONG: + case SORTED_SET_DOUBLE: + return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX); + case SORTED_SET_BINARY: + return FieldCache.DEFAULT.getDocTermOrds(in, field, null); + } + } + return in.getSortedSetDocValues(field); + } + + @Override + public Bits getDocsWithField(String field) throws IOException { + Type v = getType(field); + if (v != null) { + switch (v) { + case INTEGER_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.INT_POINT_PARSER); + case FLOAT_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.FLOAT_POINT_PARSER); + case LONG_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LONG_POINT_PARSER); + case DOUBLE_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.DOUBLE_POINT_PARSER); + case LEGACY_INTEGER: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_INT_PARSER); + case LEGACY_FLOAT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_FLOAT_PARSER); + case LEGACY_LONG: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_LONG_PARSER); + case LEGACY_DOUBLE: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_DOUBLE_PARSER); + default: + return FieldCache.DEFAULT.getDocsWithField(in, field, null); + } + } else { + return in.getDocsWithField(field); + } + } + + /** + * Returns the field's uninversion type, or null + * if the field doesn't exist or doesn't have a mapping. + */ + private Type getType(String field) { + FieldInfo info = fieldInfos.fieldInfo(field); + if (info == null || info.getDocValuesType() == DocValuesType.NONE) { + return null; + } + return mapping.get(field); + } + + @Override + public Object getCoreCacheKey() { + return in.getCoreCacheKey(); + } + + @Override + public Object getCombinedCoreAndDeletesKey() { + return in.getCombinedCoreAndDeletesKey(); + } + + @Override + public String toString() { + return "Uninverting(" + in.toString() + ")"; + } + + /** + * Return information about the backing cache + * @lucene.internal + */ + public static String[] getUninvertedStats() { + CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries(); + String[] info = new String[entries.length]; + for (int i = 0; i < entries.length; i++) { + info[i] = entries[i].toString(); + } + return info; + } +} diff --git a/solr/core/src/java/org/apache/solr/uninverting/package-info.java b/solr/core/src/java/org/apache/solr/uninverting/package-info.java new file mode 100644 index 00000000000..d95e08fd6a5 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/uninverting/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Support for creating docvalues on-the-fly from the inverted index at runtime. + */ +package org.apache.solr.uninverting; diff --git a/solr/core/src/java/org/apache/solr/update/DeleteByQueryWrapper.java b/solr/core/src/java/org/apache/solr/update/DeleteByQueryWrapper.java index 3d871616484..778e4c6ad80 100644 --- a/solr/core/src/java/org/apache/solr/update/DeleteByQueryWrapper.java +++ b/solr/core/src/java/org/apache/solr/update/DeleteByQueryWrapper.java @@ -29,8 +29,9 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; -import org.apache.lucene.uninverting.UninvertingReader; import org.apache.solr.schema.IndexSchema; +import org.apache.solr.uninverting.UninvertingReader; +import org.apache.solr.uninverting.UninvertingReader; /** * Allows access to uninverted docvalues by delete-by-queries. diff --git a/solr/core/src/java/org/apache/solr/update/VersionInfo.java b/solr/core/src/java/org/apache/solr/update/VersionInfo.java index 5fe415c6110..bee30f500d8 100644 --- a/solr/core/src/java/org/apache/solr/update/VersionInfo.java +++ b/solr/core/src/java/org/apache/solr/update/VersionInfo.java @@ -24,7 +24,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.Terms; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; @@ -34,6 +33,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LegacyNumericUtils; import org.apache.solr.common.SolrException; import org.apache.solr.common.util.SuppressForbidden; +import org.apache.solr.index.SlowCompositeReaderWrapper; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.SolrIndexSearcher; diff --git a/solr/core/src/test/org/apache/solr/index/TestSlowCompositeReaderWrapper.java b/solr/core/src/test/org/apache/solr/index/TestSlowCompositeReaderWrapper.java new file mode 100644 index 00000000000..0685e5525d3 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/index/TestSlowCompositeReaderWrapper.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.index; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; + +public class TestSlowCompositeReaderWrapper extends LuceneTestCase { + + public void testCoreListenerOnSlowCompositeReaderWrapper() throws IOException { + RandomIndexWriter w = new RandomIndexWriter(random(), newDirectory()); + final int numDocs = TestUtil.nextInt(random(), 1, 5); + for (int i = 0; i < numDocs; ++i) { + w.addDocument(new Document()); + if (random().nextBoolean()) { + w.commit(); + } + } + w.commit(); + w.close(); + + final IndexReader reader = DirectoryReader.open(w.w.getDirectory()); + final LeafReader leafReader = SlowCompositeReaderWrapper.wrap(reader); + + final int numListeners = TestUtil.nextInt(random(), 1, 10); + final List listeners = new ArrayList<>(); + AtomicInteger counter = new AtomicInteger(numListeners); + + for (int i = 0; i < numListeners; ++i) { + CountCoreListener listener = new CountCoreListener(counter, leafReader.getCoreCacheKey()); + listeners.add(listener); + leafReader.addCoreClosedListener(listener); + } + for (int i = 0; i < 100; ++i) { + leafReader.addCoreClosedListener(listeners.get(random().nextInt(listeners.size()))); + } + final int removed = random().nextInt(numListeners); + Collections.shuffle(listeners, random()); + for (int i = 0; i < removed; ++i) { + leafReader.removeCoreClosedListener(listeners.get(i)); + } + assertEquals(numListeners, counter.get()); + // make sure listeners are registered on the wrapped reader and that closing any of them has the same effect + if (random().nextBoolean()) { + reader.close(); + } else { + leafReader.close(); + } + assertEquals(removed, counter.get()); + w.w.getDirectory().close(); + } + + private static final class CountCoreListener implements LeafReader.CoreClosedListener { + + private final AtomicInteger count; + private final Object coreCacheKey; + + public CountCoreListener(AtomicInteger count, Object coreCacheKey) { + this.count = count; + this.coreCacheKey = coreCacheKey; + } + + @Override + public void onClose(Object coreCacheKey) { + assertSame(this.coreCacheKey, coreCacheKey); + count.decrementAndGet(); + } + + } +} diff --git a/solr/core/src/test/org/apache/solr/request/TestFaceting.java b/solr/core/src/test/org/apache/solr/request/TestFaceting.java index 97dcedfe6a0..4dd49e18202 100644 --- a/solr/core/src/test/org/apache/solr/request/TestFaceting.java +++ b/solr/core/src/test/org/apache/solr/request/TestFaceting.java @@ -25,12 +25,12 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.uninverting.DocTermOrds; import org.apache.lucene.util.BytesRef; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.params.FacetParams; import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.uninverting.DocTermOrds; import org.apache.solr.util.RefCounted; import org.junit.After; import org.junit.BeforeClass; diff --git a/solr/core/src/test/org/apache/solr/search/TestSort.java b/solr/core/src/test/org/apache/solr/search/TestSort.java index e874c373e2d..8590b18cf97 100644 --- a/solr/core/src/test/org/apache/solr/search/TestSort.java +++ b/solr/core/src/test/org/apache/solr/search/TestSort.java @@ -42,13 +42,12 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField.Type; +import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.uninverting.UninvertingReader; import org.apache.lucene.util.BitDocIdSet; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; @@ -56,6 +55,7 @@ import org.apache.lucene.util.TestUtil; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.SchemaField; +import org.apache.solr.uninverting.UninvertingReader; import org.junit.BeforeClass; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java b/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java new file mode 100644 index 00000000000..f1627a6ee35 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java @@ -0,0 +1,681 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.uninverting; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.LegacyIntField; +import org.apache.lucene.document.LegacyLongField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LegacyNumericUtils; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.TestUtil; +import org.apache.solr.index.SlowCompositeReaderWrapper; + +// TODO: +// - test w/ del docs +// - test prefix +// - test w/ cutoff +// - crank docs way up so we get some merging sometimes + +public class TestDocTermOrds extends LuceneTestCase { + + public void testEmptyIndex() throws IOException { + final Directory dir = newDirectory(); + final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); + iw.close(); + + final DirectoryReader ir = DirectoryReader.open(dir); + TestUtil.checkReader(ir); + + final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir); + TestUtil.checkReader(composite); + + // check the leaves + // (normally there are none for an empty index, so this is really just future + // proofing in case that changes for some reason) + for (LeafReaderContext rc : ir.leaves()) { + final LeafReader r = rc.reader(); + final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "any_field"); + assertNull("OrdTermsEnum should be null (leaf)", dto.getOrdTermsEnum(r)); + assertEquals("iterator should be empty (leaf)", 0, dto.iterator(r).getValueCount()); + } + + // check the composite + final DocTermOrds dto = new DocTermOrds(composite, composite.getLiveDocs(), "any_field"); + assertNull("OrdTermsEnum should be null (composite)", dto.getOrdTermsEnum(composite)); + assertEquals("iterator should be empty (composite)", 0, dto.iterator(composite).getValueCount()); + + ir.close(); + dir.close(); + } + + public void testSimple() throws Exception { + Directory dir = newDirectory(); + final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); + Document doc = new Document(); + Field field = newTextField("field", "", Field.Store.NO); + doc.add(field); + field.setStringValue("a b c"); + w.addDocument(doc); + + field.setStringValue("d e f"); + w.addDocument(doc); + + field.setStringValue("a f"); + w.addDocument(doc); + + final IndexReader r = w.getReader(); + w.close(); + + final LeafReader ar = SlowCompositeReaderWrapper.wrap(r); + TestUtil.checkReader(ar); + final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field"); + SortedSetDocValues iter = dto.iterator(ar); + + iter.setDocument(0); + assertEquals(0, iter.nextOrd()); + assertEquals(1, iter.nextOrd()); + assertEquals(2, iter.nextOrd()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); + + iter.setDocument(1); + assertEquals(3, iter.nextOrd()); + assertEquals(4, iter.nextOrd()); + assertEquals(5, iter.nextOrd()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); + + iter.setDocument(2); + assertEquals(0, iter.nextOrd()); + assertEquals(5, iter.nextOrd()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); + + r.close(); + dir.close(); + } + + public void testRandom() throws Exception { + Directory dir = newDirectory(); + + final int NUM_TERMS = atLeast(20); + final Set terms = new HashSet<>(); + while(terms.size() < NUM_TERMS) { + final String s = TestUtil.randomRealisticUnicodeString(random()); + //final String s = _TestUtil.randomSimpleString(random); + if (s.length() > 0) { + terms.add(new BytesRef(s)); + } + } + final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]); + Arrays.sort(termsArray); + + final int NUM_DOCS = atLeast(100); + + IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); + + // Sometimes swap in codec that impls ord(): + if (random().nextInt(10) == 7) { + // Make sure terms index has ords: + Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random())); + conf.setCodec(codec); + } + + final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf); + + final int[][] idToOrds = new int[NUM_DOCS][]; + final Set ordsForDocSet = new HashSet<>(); + + for(int id=0;id prefixes = new HashSet<>(); + final int numPrefix = TestUtil.nextInt(random(), 2, 7); + if (VERBOSE) { + System.out.println("TEST: use " + numPrefix + " prefixes"); + } + while(prefixes.size() < numPrefix) { + prefixes.add(TestUtil.randomRealisticUnicodeString(random())); + //prefixes.add(_TestUtil.randomSimpleString(random)); + } + final String[] prefixesArray = prefixes.toArray(new String[prefixes.size()]); + + final int NUM_TERMS = atLeast(20); + final Set terms = new HashSet<>(); + while(terms.size() < NUM_TERMS) { + final String s = prefixesArray[random().nextInt(prefixesArray.length)] + TestUtil.randomRealisticUnicodeString(random()); + //final String s = prefixesArray[random.nextInt(prefixesArray.length)] + _TestUtil.randomSimpleString(random); + if (s.length() > 0) { + terms.add(new BytesRef(s)); + } + } + final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]); + Arrays.sort(termsArray); + + final int NUM_DOCS = atLeast(100); + + IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); + + // Sometimes swap in codec that impls ord(): + if (random().nextInt(10) == 7) { + Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random())); + conf.setCodec(codec); + } + + final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf); + + final int[][] idToOrds = new int[NUM_DOCS][]; + final Set ordsForDocSet = new HashSet<>(); + + for(int id=0;id values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i]))); + for (BytesRef v : values) { + if (v == null) { + // why does this test use null values... instead of an empty list: confusing + break; + } + long ord = termOrds.nextOrd(); + assert ord != SortedSetDocValues.NO_MORE_ORDS; + BytesRef scratch = termOrds.lookupOrd(ord); + assertEquals(v, scratch); + } + assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd()); + } + + // test bad field + termOrds = cache.getDocTermOrds(reader, "bogusfield", null); + assertTrue(termOrds.getValueCount() == 0); + + FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey()); + } + + public void testEmptyIndex() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(500)); + writer.close(); + IndexReader r = DirectoryReader.open(dir); + LeafReader reader = SlowCompositeReaderWrapper.wrap(r); + TestUtil.checkReader(reader); + FieldCache.DEFAULT.getTerms(reader, "foobar", true); + FieldCache.DEFAULT.getTermsIndex(reader, "foobar"); + FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey()); + r.close(); + dir.close(); + } + + private static String generateString(int i) { + String s = null; + if (i > 0 && random().nextInt(3) == 1) { + // reuse past string -- try to find one that's not null + for(int iter = 0; iter < 10 && s == null;iter++) { + s = unicodeStrings[random().nextInt(i)]; + } + if (s == null) { + s = TestUtil.randomUnicodeString(random()); + } + } else { + s = TestUtil.randomUnicodeString(random()); + } + return s; + } + + public void testDocsWithField() throws Exception { + FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + assertEquals(0, cache.getCacheEntries().length); + cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, true); + + // The double[] takes one slots, and docsWithField should also + // have been populated: + assertEquals(2, cache.getCacheEntries().length); + Bits bits = cache.getDocsWithField(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER); + + // No new entries should appear: + assertEquals(2, cache.getCacheEntries().length); + assertTrue(bits instanceof Bits.MatchAllBits); + + NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.INT_POINT_PARSER, true); + assertEquals(4, cache.getCacheEntries().length); + Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER); + assertEquals(4, cache.getCacheEntries().length); + for (int i = 0; i < docsWithField.length(); i++) { + if (i%2 == 0) { + assertTrue(docsWithField.get(i)); + assertEquals(i, ints.get(i)); + } else { + assertFalse(docsWithField.get(i)); + } + } + + NumericDocValues numInts = cache.getNumerics(reader, "numInt", FieldCache.INT_POINT_PARSER, random().nextBoolean()); + docsWithField = cache.getDocsWithField(reader, "numInt", FieldCache.INT_POINT_PARSER); + for (int i = 0; i < docsWithField.length(); i++) { + if (i%2 == 0) { + assertTrue(docsWithField.get(i)); + assertEquals(i, numInts.get(i)); + } else { + assertFalse(docsWithField.get(i)); + } + } + } + + public void testGetDocsWithFieldThreadSafety() throws Exception { + final FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + + int NUM_THREADS = 3; + Thread[] threads = new Thread[NUM_THREADS]; + final AtomicBoolean failed = new AtomicBoolean(); + final AtomicInteger iters = new AtomicInteger(); + final int NUM_ITER = 200 * RANDOM_MULTIPLIER; + final CyclicBarrier restart = new CyclicBarrier(NUM_THREADS, + new Runnable() { + @Override + public void run() { + cache.purgeAllCaches(); + iters.incrementAndGet(); + } + }); + for(int threadIDX=0;threadIDX= NUM_ITER) { + break; + } + } else if (op == 1) { + Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER); + for (int i = 0; i < docsWithField.length(); i++) { + assertEquals(i%2 == 0, docsWithField.get(i)); + } + } else { + NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.INT_POINT_PARSER, true); + Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER); + for (int i = 0; i < docsWithField.length(); i++) { + if (i%2 == 0) { + assertTrue(docsWithField.get(i)); + assertEquals(i, ints.get(i)); + } else { + assertFalse(docsWithField.get(i)); + } + } + } + } + } catch (Throwable t) { + failed.set(true); + restart.reset(); + throw new RuntimeException(t); + } + } + }; + threads[threadIDX].start(); + } + + for(int threadIDX=0;threadIDX { + FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.INT_POINT_PARSER, false); + }); + + BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary", true); + final BytesRef term = binary.get(0); + assertEquals("binary value", term.utf8ToString()); + + expectThrows(IllegalStateException.class, () -> { + FieldCache.DEFAULT.getTermsIndex(ar, "binary"); + }); + + expectThrows(IllegalStateException.class, () -> { + FieldCache.DEFAULT.getDocTermOrds(ar, "binary", null); + }); + + expectThrows(IllegalStateException.class, () -> { + new DocTermOrds(ar, null, "binary"); + }); + + Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary", null); + assertTrue(bits.get(0)); + + // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds() + expectThrows(IllegalStateException.class, () -> { + FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.INT_POINT_PARSER, false); + }); + + expectThrows(IllegalStateException.class, () -> { + new DocTermOrds(ar, null, "sorted"); + }); + + binary = FieldCache.DEFAULT.getTerms(ar, "sorted", true); + BytesRef scratch = binary.get(0); + assertEquals("sorted value", scratch.utf8ToString()); + + SortedDocValues sorted = FieldCache.DEFAULT.getTermsIndex(ar, "sorted"); + assertEquals(0, sorted.getOrd(0)); + assertEquals(1, sorted.getValueCount()); + scratch = sorted.get(0); + assertEquals("sorted value", scratch.utf8ToString()); + + SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted", null); + sortedSet.setDocument(0); + assertEquals(0, sortedSet.nextOrd()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd()); + assertEquals(1, sortedSet.getValueCount()); + + bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted", null); + assertTrue(bits.get(0)); + + // Numeric type: can be retrieved via getInts() and so on + NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.INT_POINT_PARSER, false); + assertEquals(42, numeric.get(0)); + + expectThrows(IllegalStateException.class, () -> { + FieldCache.DEFAULT.getTerms(ar, "numeric", true); + }); + + expectThrows(IllegalStateException.class, () -> { + FieldCache.DEFAULT.getTermsIndex(ar, "numeric"); + }); + + expectThrows(IllegalStateException.class, () -> { + FieldCache.DEFAULT.getDocTermOrds(ar, "numeric", null); + }); + + expectThrows(IllegalStateException.class, () -> { + new DocTermOrds(ar, null, "numeric"); + }); + + bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric", null); + assertTrue(bits.get(0)); + + // SortedSet type: can be retrieved via getDocTermOrds() + expectThrows(IllegalStateException.class, () -> { + FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.INT_POINT_PARSER, false); + }); + + expectThrows(IllegalStateException.class, () -> { + FieldCache.DEFAULT.getTerms(ar, "sortedset", true); + }); + + expectThrows(IllegalStateException.class, () -> { + FieldCache.DEFAULT.getTermsIndex(ar, "sortedset"); + }); + + expectThrows(IllegalStateException.class, () -> { + new DocTermOrds(ar, null, "sortedset"); + }); + + sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null); + sortedSet.setDocument(0); + assertEquals(0, sortedSet.nextOrd()); + assertEquals(1, sortedSet.nextOrd()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd()); + assertEquals(2, sortedSet.getValueCount()); + + bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset", null); + assertTrue(bits.get(0)); + + ir.close(); + dir.close(); + } + + public void testNonexistantFields() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + iw.addDocument(doc); + DirectoryReader ir = iw.getReader(); + iw.close(); + + LeafReader ar = getOnlyLeafReader(ir); + + final FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + assertEquals(0, cache.getCacheEntries().length); + + NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER, true); + assertEquals(0, ints.get(0)); + + NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER, true); + assertEquals(0, longs.get(0)); + + NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER, true); + assertEquals(0, floats.get(0)); + + NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER, true); + assertEquals(0, doubles.get(0)); + + BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true); + BytesRef scratch = binaries.get(0); + assertEquals(0, scratch.length); + + SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex"); + assertEquals(-1, sorted.getOrd(0)); + scratch = sorted.get(0); + assertEquals(0, scratch.length); + + SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null); + sortedSet.setDocument(0); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd()); + + Bits bits = cache.getDocsWithField(ar, "bogusbits", null); + assertFalse(bits.get(0)); + + // check that we cached nothing + assertEquals(0, cache.getCacheEntries().length); + ir.close(); + dir.close(); + } + + public void testNonIndexedFields() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new StoredField("bogusbytes", "bogus")); + doc.add(new StoredField("bogusshorts", "bogus")); + doc.add(new StoredField("bogusints", "bogus")); + doc.add(new StoredField("boguslongs", "bogus")); + doc.add(new StoredField("bogusfloats", "bogus")); + doc.add(new StoredField("bogusdoubles", "bogus")); + doc.add(new StoredField("bogusterms", "bogus")); + doc.add(new StoredField("bogustermsindex", "bogus")); + doc.add(new StoredField("bogusmultivalued", "bogus")); + doc.add(new StoredField("bogusbits", "bogus")); + iw.addDocument(doc); + DirectoryReader ir = iw.getReader(); + iw.close(); + + LeafReader ar = getOnlyLeafReader(ir); + + final FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + assertEquals(0, cache.getCacheEntries().length); + + NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER, true); + assertEquals(0, ints.get(0)); + + NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER, true); + assertEquals(0, longs.get(0)); + + NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER, true); + assertEquals(0, floats.get(0)); + + NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER, true); + assertEquals(0, doubles.get(0)); + + BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true); + BytesRef scratch = binaries.get(0); + assertEquals(0, scratch.length); + + SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex"); + assertEquals(-1, sorted.getOrd(0)); + scratch = sorted.get(0); + assertEquals(0, scratch.length); + + SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null); + sortedSet.setDocument(0); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd()); + + Bits bits = cache.getDocsWithField(ar, "bogusbits", null); + assertFalse(bits.get(0)); + + // check that we cached nothing + assertEquals(0, cache.getCacheEntries().length); + ir.close(); + dir.close(); + } + + // Make sure that the use of GrowableWriter doesn't prevent from using the full long range + public void testLongFieldCache() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random())); + cfg.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg); + Document doc = new Document(); + LongPoint field = new LongPoint("f", 0L); + StoredField field2 = new StoredField("f", 0L); + doc.add(field); + doc.add(field2); + final long[] values = new long[TestUtil.nextInt(random(), 1, 10)]; + for (int i = 0; i < values.length; ++i) { + final long v; + switch (random().nextInt(10)) { + case 0: + v = Long.MIN_VALUE; + break; + case 1: + v = 0; + break; + case 2: + v = Long.MAX_VALUE; + break; + default: + v = TestUtil.nextLong(random(), -10, 10); + break; + } + values[i] = v; + if (v == 0 && random().nextBoolean()) { + // missing + iw.addDocument(new Document()); + } else { + field.setLongValue(v); + field2.setLongValue(v); + iw.addDocument(doc); + } + } + iw.forceMerge(1); + final DirectoryReader reader = iw.getReader(); + final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LONG_POINT_PARSER, false); + for (int i = 0; i < values.length; ++i) { + assertEquals(values[i], longs.get(i)); + } + reader.close(); + iw.close(); + dir.close(); + } + + // Make sure that the use of GrowableWriter doesn't prevent from using the full int range + public void testIntFieldCache() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random())); + cfg.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg); + Document doc = new Document(); + IntPoint field = new IntPoint("f", 0); + doc.add(field); + final int[] values = new int[TestUtil.nextInt(random(), 1, 10)]; + for (int i = 0; i < values.length; ++i) { + final int v; + switch (random().nextInt(10)) { + case 0: + v = Integer.MIN_VALUE; + break; + case 1: + v = 0; + break; + case 2: + v = Integer.MAX_VALUE; + break; + default: + v = TestUtil.nextInt(random(), -10, 10); + break; + } + values[i] = v; + if (v == 0 && random().nextBoolean()) { + // missing + iw.addDocument(new Document()); + } else { + field.setIntValue(v); + iw.addDocument(doc); + } + } + iw.forceMerge(1); + final DirectoryReader reader = iw.getReader(); + final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.INT_POINT_PARSER, false); + for (int i = 0; i < values.length; ++i) { + assertEquals(values[i], ints.get(i)); + } + reader.close(); + iw.close(); + dir.close(); + } + +} diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheReopen.java b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheReopen.java new file mode 100644 index 00000000000..18c64202163 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheReopen.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.uninverting; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestFieldCacheReopen extends LuceneTestCase { + + // TODO: make a version of this that tests the same thing with UninvertingReader.wrap() + + // LUCENE-1579: Ensure that on a reopened reader, that any + // shared segments reuse the doc values arrays in + // FieldCache + public void testFieldCacheReuseAfterReopen() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(new MockAnalyzer(random())). + setMergePolicy(newLogMergePolicy(10)) + ); + Document doc = new Document(); + doc.add(new IntPoint("number", 17)); + writer.addDocument(doc); + writer.commit(); + + // Open reader1 + DirectoryReader r = DirectoryReader.open(dir); + LeafReader r1 = getOnlyLeafReader(r); + final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(r1, "number", FieldCache.INT_POINT_PARSER, false); + assertEquals(17, ints.get(0)); + + // Add new segment + writer.addDocument(doc); + writer.commit(); + + // Reopen reader1 --> reader2 + DirectoryReader r2 = DirectoryReader.openIfChanged(r); + assertNotNull(r2); + r.close(); + LeafReader sub0 = r2.leaves().get(0).reader(); + final NumericDocValues ints2 = FieldCache.DEFAULT.getNumerics(sub0, "number", FieldCache.INT_POINT_PARSER, false); + r2.close(); + assertTrue(ints == ints2); + + writer.close(); + dir.close(); + } +} diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSanityChecker.java b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSanityChecker.java new file mode 100644 index 00000000000..d54d5792447 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSanityChecker.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.uninverting; + +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.LegacyDoubleField; +import org.apache.lucene.document.LegacyFloatField; +import org.apache.lucene.document.LegacyIntField; +import org.apache.lucene.document.LegacyLongField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.solr.index.SlowCompositeReaderWrapper; +import org.apache.solr.uninverting.FieldCacheSanityChecker.Insanity; +import org.apache.solr.uninverting.FieldCacheSanityChecker.InsanityType; + +public class TestFieldCacheSanityChecker extends LuceneTestCase { + + protected LeafReader readerA; + protected LeafReader readerB; + protected LeafReader readerX; + protected LeafReader readerAclone; + protected Directory dirA, dirB; + private static final int NUM_DOCS = 1000; + + @Override + public void setUp() throws Exception { + super.setUp(); + dirA = newDirectory(); + dirB = newDirectory(); + + IndexWriter wA = new IndexWriter(dirA, newIndexWriterConfig(new MockAnalyzer(random()))); + IndexWriter wB = new IndexWriter(dirB, newIndexWriterConfig(new MockAnalyzer(random()))); + + long theLong = Long.MAX_VALUE; + double theDouble = Double.MAX_VALUE; + int theInt = Integer.MAX_VALUE; + float theFloat = Float.MAX_VALUE; + for (int i = 0; i < NUM_DOCS; i++){ + Document doc = new Document(); + doc.add(new LegacyLongField("theLong", theLong--, Field.Store.NO)); + doc.add(new LegacyDoubleField("theDouble", theDouble--, Field.Store.NO)); + doc.add(new LegacyIntField("theInt", theInt--, Field.Store.NO)); + doc.add(new LegacyFloatField("theFloat", theFloat--, Field.Store.NO)); + if (0 == i % 3) { + wA.addDocument(doc); + } else { + wB.addDocument(doc); + } + } + wA.close(); + wB.close(); + DirectoryReader rA = DirectoryReader.open(dirA); + readerA = SlowCompositeReaderWrapper.wrap(rA); + readerAclone = SlowCompositeReaderWrapper.wrap(rA); + readerA = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirA)); + readerB = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirB)); + readerX = SlowCompositeReaderWrapper.wrap(new MultiReader(readerA, readerB)); + } + + @Override + public void tearDown() throws Exception { + readerA.close(); + readerAclone.close(); + readerB.close(); + readerX.close(); + dirA.close(); + dirB.close(); + super.tearDown(); + } + + public void testSanity() throws IOException { + FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + + cache.getNumerics(readerA, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false); + cache.getNumerics(readerAclone, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false); + cache.getNumerics(readerB, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false); + + cache.getNumerics(readerX, "theInt", FieldCache.LEGACY_INT_PARSER, false); + + // // // + + Insanity[] insanity = + FieldCacheSanityChecker.checkSanity(cache.getCacheEntries()); + + if (0 < insanity.length) + dumpArray(getTestClass().getName() + "#" + getTestName() + + " INSANITY", insanity, System.err); + + assertEquals("shouldn't be any cache insanity", 0, insanity.length); + cache.purgeAllCaches(); + } + + public void testInsanity1() throws IOException { + FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + + cache.getNumerics(readerX, "theInt", FieldCache.LEGACY_INT_PARSER, false); + cache.getTerms(readerX, "theInt", false); + + // // // + + Insanity[] insanity = + FieldCacheSanityChecker.checkSanity(cache.getCacheEntries()); + + assertEquals("wrong number of cache errors", 1, insanity.length); + assertEquals("wrong type of cache error", + InsanityType.VALUEMISMATCH, + insanity[0].getType()); + assertEquals("wrong number of entries in cache error", 2, + insanity[0].getCacheEntries().length); + + // we expect bad things, don't let tearDown complain about them + cache.purgeAllCaches(); + } + + public void testInsanity2() throws IOException { + FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + + cache.getTerms(readerA, "theInt", false); + cache.getTerms(readerB, "theInt", false); + cache.getTerms(readerX, "theInt", false); + + + // // // + + Insanity[] insanity = + FieldCacheSanityChecker.checkSanity(cache.getCacheEntries()); + + assertEquals("wrong number of cache errors", 1, insanity.length); + assertEquals("wrong type of cache error", + InsanityType.SUBREADER, + insanity[0].getType()); + assertEquals("wrong number of entries in cache error", 3, + insanity[0].getCacheEntries().length); + + // we expect bad things, don't let tearDown complain about them + cache.purgeAllCaches(); + } + +} diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSort.java b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSort.java new file mode 100644 index 00000000000..34d92379b39 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSort.java @@ -0,0 +1,1814 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.uninverting; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.LegacyDoubleField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LegacyFloatField; +import org.apache.lucene.document.LegacyIntField; +import org.apache.lucene.document.LegacyLongField; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.apache.solr.uninverting.UninvertingReader.Type; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; + +/* + * Tests sorting (but with fieldcache instead of docvalues) + */ +public class TestFieldCacheSort extends LuceneTestCase { + + public void testString() throws IOException { + testString(SortField.Type.STRING); + } + + public void testStringVal() throws Exception { + testString(SortField.Type.STRING_VAL); + } + + /** Tests sorting on type string */ + private void testString(SortField.Type sortType) throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(newStringField("value", "foo", Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "bar", Field.Store.YES)); + writer.addDocument(doc); + Type type = sortType == SortField.Type.STRING ? Type.SORTED : Type.BINARY; + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", type)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", sortType)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(2, td.totalHits); + // 'bar' comes before 'foo' + assertEquals("bar", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("foo", searcher.doc(td.scoreDocs[1].doc).get("value")); + + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + public void testStringMissing() throws IOException { + testStringMissing(SortField.Type.STRING); + } + + public void testStringValMissing() throws IOException { + testStringMissing(SortField.Type.STRING_VAL); + } + + /** Tests sorting on type string with a missing value */ + private void testStringMissing(SortField.Type sortType) throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "foo", Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "bar", Field.Store.YES)); + writer.addDocument(doc); + Type type = sortType == SortField.Type.STRING ? Type.SORTED : Type.BINARY; + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", type)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", sortType)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null comes first + assertNull(searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("bar", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("foo", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + public void testStringReverse() throws IOException { + testStringReverse(SortField.Type.STRING); + } + + public void testStringValReverse() throws IOException { + testStringReverse(SortField.Type.STRING_VAL); + } + + /** Tests reverse sorting on type string */ + private void testStringReverse(SortField.Type sortType) throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(newStringField("value", "bar", Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "foo", Field.Store.YES)); + writer.addDocument(doc); + Type type = sortType == SortField.Type.STRING ? Type.SORTED : Type.BINARY; + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", type)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", sortType, true)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(2, td.totalHits); + // 'foo' comes after 'bar' in reverse order + assertEquals("foo", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("bar", searcher.doc(td.scoreDocs[1].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + public void testStringMissingSortedFirst() throws IOException { + testStringMissingSortedFirst(SortField.Type.STRING); + } + + public void testStringValMissingSortedFirst() throws IOException { + testStringMissingSortedFirst(SortField.Type.STRING_VAL); + } + + /** Tests sorting on type string with a missing + * value sorted first */ + private void testStringMissingSortedFirst(SortField.Type sortType) throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "foo", Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "bar", Field.Store.YES)); + writer.addDocument(doc); + Type type = sortType == SortField.Type.STRING ? Type.SORTED : Type.BINARY; + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", type)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + SortField sf = new SortField("value", sortType); + Sort sort = new Sort(sf); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null comes first + assertNull(searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("bar", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("foo", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + public void testStringMissingSortedFirstReverse() throws IOException { + testStringMissingSortedFirstReverse(SortField.Type.STRING); + } + + public void testStringValMissingSortedFirstReverse() throws IOException { + testStringMissingSortedFirstReverse(SortField.Type.STRING_VAL); + } + + /** Tests reverse sorting on type string with a missing + * value sorted first */ + private void testStringMissingSortedFirstReverse(SortField.Type sortType) throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "foo", Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "bar", Field.Store.YES)); + writer.addDocument(doc); + Type type = sortType == SortField.Type.STRING ? Type.SORTED : Type.BINARY; + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", type)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + SortField sf = new SortField("value", sortType, true); + Sort sort = new Sort(sf); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + assertEquals("foo", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("bar", searcher.doc(td.scoreDocs[1].doc).get("value")); + // null comes last + assertNull(searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + public void testStringMissingSortedLast() throws IOException { + testStringMissingSortedLast(SortField.Type.STRING); + } + + public void testStringValMissingSortedLast() throws IOException { + testStringMissingSortedLast(SortField.Type.STRING_VAL); + } + + /** Tests sorting on type string with a missing + * value sorted last */ + private void testStringMissingSortedLast(SortField.Type sortType) throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "foo", Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "bar", Field.Store.YES)); + writer.addDocument(doc); + Type type = sortType == SortField.Type.STRING ? Type.SORTED : Type.BINARY; + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", type)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + SortField sf = new SortField("value", sortType); + sf.setMissingValue(SortField.STRING_LAST); + Sort sort = new Sort(sf); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + assertEquals("bar", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("foo", searcher.doc(td.scoreDocs[1].doc).get("value")); + // null comes last + assertNull(searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + public void testStringMissingSortedLastReverse() throws IOException { + testStringMissingSortedLastReverse(SortField.Type.STRING); + } + + public void testStringValMissingSortedLastReverse() throws IOException { + testStringMissingSortedLastReverse(SortField.Type.STRING_VAL); + } + + /** Tests reverse sorting on type string with a missing + * value sorted last */ + private void testStringMissingSortedLastReverse(SortField.Type sortType) throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "foo", Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "bar", Field.Store.YES)); + writer.addDocument(doc); + Type type = sortType == SortField.Type.STRING ? Type.SORTED : Type.BINARY; + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", type)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + SortField sf = new SortField("value", sortType, true); + sf.setMissingValue(SortField.STRING_LAST); + Sort sort = new Sort(sf); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null comes first + assertNull(searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("foo", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("bar", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on internal docid order */ + public void testFieldDoc() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(newStringField("value", "foo", Field.Store.NO)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "bar", Field.Store.NO)); + writer.addDocument(doc); + IndexReader ir = writer.getReader(); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(SortField.FIELD_DOC); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(2, td.totalHits); + // docid 0, then docid 1 + assertEquals(0, td.scoreDocs[0].doc); + assertEquals(1, td.scoreDocs[1].doc); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on reverse internal docid order */ + public void testFieldDocReverse() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(newStringField("value", "foo", Field.Store.NO)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "bar", Field.Store.NO)); + writer.addDocument(doc); + IndexReader ir = writer.getReader(); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField(null, SortField.Type.DOC, true)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(2, td.totalHits); + // docid 1, then docid 0 + assertEquals(1, td.scoreDocs[0].doc); + assertEquals(0, td.scoreDocs[1].doc); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests default sort (by score) */ + public void testFieldScore() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(newTextField("value", "foo bar bar bar bar", Field.Store.NO)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newTextField("value", "foo foo foo foo foo", Field.Store.NO)); + writer.addDocument(doc); + IndexReader ir = writer.getReader(); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(); + + TopDocs actual = searcher.search(new TermQuery(new Term("value", "foo")), 10, sort); + assertEquals(2, actual.totalHits); + + TopDocs expected = searcher.search(new TermQuery(new Term("value", "foo")), 10); + // the two topdocs should be the same + assertEquals(expected.totalHits, actual.totalHits); + for (int i = 0; i < actual.scoreDocs.length; i++) { + assertEquals(actual.scoreDocs[i].doc, expected.scoreDocs[i].doc); + } + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests default sort (by score) in reverse */ + public void testFieldScoreReverse() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(newTextField("value", "foo bar bar bar bar", Field.Store.NO)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newTextField("value", "foo foo foo foo foo", Field.Store.NO)); + writer.addDocument(doc); + IndexReader ir = writer.getReader(); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField(null, SortField.Type.SCORE, true)); + + TopDocs actual = searcher.search(new TermQuery(new Term("value", "foo")), 10, sort); + assertEquals(2, actual.totalHits); + + TopDocs expected = searcher.search(new TermQuery(new Term("value", "foo")), 10); + // the two topdocs should be the reverse of each other + assertEquals(expected.totalHits, actual.totalHits); + assertEquals(actual.scoreDocs[0].doc, expected.scoreDocs[1].doc); + assertEquals(actual.scoreDocs[1].doc, expected.scoreDocs[0].doc); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type int */ + public void testInt() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new IntPoint("value", 300000)); + doc.add(new StoredField("value", 300000)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new IntPoint("value", -1)); + doc.add(new StoredField("value", -1)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new IntPoint("value", 4)); + doc.add(new StoredField("value", 4)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.INTEGER_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + Sort sort = new Sort(new SortField("value", SortField.Type.INT)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // numeric order + assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("300000", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type int with a missing value */ + public void testIntMissing() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new IntPoint("value", -1)); + doc.add(new StoredField("value", -1)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new IntPoint("value", 4)); + doc.add(new StoredField("value", 4)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.INTEGER_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + Sort sort = new Sort(new SortField("value", SortField.Type.INT)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null is treated as a 0 + assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type int, specifying the missing value should be treated as Integer.MAX_VALUE */ + public void testIntMissingLast() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new IntPoint("value", -1)); + doc.add(new StoredField("value", -1)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new IntPoint("value", 4)); + doc.add(new StoredField("value", 4)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.INTEGER_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + SortField sortField = new SortField("value", SortField.Type.INT); + sortField.setMissingValue(Integer.MAX_VALUE); + Sort sort = new Sort(sortField); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null is treated as a Integer.MAX_VALUE + assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type int in reverse */ + public void testIntReverse() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new IntPoint("value", 300000)); + doc.add(new StoredField("value", 300000)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new IntPoint("value", -1)); + doc.add(new StoredField("value", -1)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new IntPoint("value", 4)); + doc.add(new StoredField("value", 4)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.INTEGER_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + Sort sort = new Sort(new SortField("value", SortField.Type.INT, true)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // reverse numeric order + assertEquals("300000", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("-1", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy int */ + public void testLegacyInt() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new LegacyIntField("value", 300000, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyIntField("value", -1, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyIntField("value", 4, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_INTEGER)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", SortField.Type.INT)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // numeric order + assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("300000", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy int with a missing value */ + public void testLegacyIntMissing() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyIntField("value", -1, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyIntField("value", 4, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_INTEGER)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", SortField.Type.INT)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null is treated as a 0 + assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy int, specifying the missing value should be treated as Integer.MAX_VALUE */ + public void testLegacyIntMissingLast() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyIntField("value", -1, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyIntField("value", 4, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_INTEGER)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + SortField sortField = new SortField("value", SortField.Type.INT); + sortField.setMissingValue(Integer.MAX_VALUE); + Sort sort = new Sort(sortField); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null is treated as a Integer.MAX_VALUE + assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy int in reverse */ + public void testLegacyIntReverse() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new LegacyIntField("value", 300000, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyIntField("value", -1, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyIntField("value", 4, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_INTEGER)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", SortField.Type.INT, true)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // reverse numeric order + assertEquals("300000", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("-1", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type long */ + public void testLong() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new LongPoint("value", 3000000000L)); + doc.add(new StoredField("value", 3000000000L)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LongPoint("value", -1)); + doc.add(new StoredField("value", -1)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LongPoint("value", 4)); + doc.add(new StoredField("value", 4)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LONG_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + Sort sort = new Sort(new SortField("value", SortField.Type.LONG)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // numeric order + assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("3000000000", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type long with a missing value */ + public void testLongMissing() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LongPoint("value", -1)); + doc.add(new StoredField("value", -1)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LongPoint("value", 4)); + doc.add(new StoredField("value", 4)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LONG_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + Sort sort = new Sort(new SortField("value", SortField.Type.LONG)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null is treated as 0 + assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type long, specifying the missing value should be treated as Long.MAX_VALUE */ + public void testLongMissingLast() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LongPoint("value", -1)); + doc.add(new StoredField("value", -1)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LongPoint("value", 4)); + doc.add(new StoredField("value", 4)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LONG_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + SortField sortField = new SortField("value", SortField.Type.LONG); + sortField.setMissingValue(Long.MAX_VALUE); + Sort sort = new Sort(sortField); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null is treated as Long.MAX_VALUE + assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type long in reverse */ + public void testLongReverse() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new LongPoint("value", 3000000000L)); + doc.add(new StoredField("value", 3000000000L)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LongPoint("value", -1)); + doc.add(new StoredField("value", -1)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LongPoint("value", 4)); + doc.add(new StoredField("value", 4)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LONG_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + Sort sort = new Sort(new SortField("value", SortField.Type.LONG, true)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // reverse numeric order + assertEquals("3000000000", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("-1", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy long */ + public void testLegacyLong() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new LegacyLongField("value", 3000000000L, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyLongField("value", -1, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyLongField("value", 4, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_LONG)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", SortField.Type.LONG)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // numeric order + assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("3000000000", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy long with a missing value */ + public void testLegacyLongMissing() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyLongField("value", -1, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyLongField("value", 4, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_LONG)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", SortField.Type.LONG)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null is treated as 0 + assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy long, specifying the missing value should be treated as Long.MAX_VALUE */ + public void testLegacyLongMissingLast() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyLongField("value", -1, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyLongField("value", 4, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_LONG)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + SortField sortField = new SortField("value", SortField.Type.LONG); + sortField.setMissingValue(Long.MAX_VALUE); + Sort sort = new Sort(sortField); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null is treated as Long.MAX_VALUE + assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy long in reverse */ + public void testLegacyLongReverse() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new LegacyLongField("value", 3000000000L, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyLongField("value", -1, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyLongField("value", 4, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_LONG)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", SortField.Type.LONG, true)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // reverse numeric order + assertEquals("3000000000", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("-1", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type float */ + public void testFloat() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new FloatPoint("value", 30.1f)); + doc.add(new StoredField("value", 30.1f)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new FloatPoint("value", -1.3f)); + doc.add(new StoredField("value", -1.3f)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new FloatPoint("value", 4.2f)); + doc.add(new StoredField("value", 4.2f)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.FLOAT_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // numeric order + assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4.2", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("30.1", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type float with a missing value */ + public void testFloatMissing() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new FloatPoint("value", -1.3f)); + doc.add(new StoredField("value", -1.3f)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new FloatPoint("value", 4.2f)); + doc.add(new StoredField("value", 4.2f)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.FLOAT_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null is treated as 0 + assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("4.2", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type float, specifying the missing value should be treated as Float.MAX_VALUE */ + public void testFloatMissingLast() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new FloatPoint("value", -1.3f)); + doc.add(new StoredField("value", -1.3f)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new FloatPoint("value", 4.2f)); + doc.add(new StoredField("value", 4.2f)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.FLOAT_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + SortField sortField = new SortField("value", SortField.Type.FLOAT); + sortField.setMissingValue(Float.MAX_VALUE); + Sort sort = new Sort(sortField); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null is treated as Float.MAX_VALUE + assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4.2", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type float in reverse */ + public void testFloatReverse() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new FloatPoint("value", 30.1f)); + doc.add(new StoredField("value", 30.1f)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new FloatPoint("value", -1.3f)); + doc.add(new StoredField("value", -1.3f)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new FloatPoint("value", 4.2f)); + doc.add(new StoredField("value", 4.2f)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.FLOAT_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT, true)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // reverse numeric order + assertEquals("30.1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4.2", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("-1.3", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy float */ + public void testLegacyFloat() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new LegacyFloatField("value", 30.1f, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyFloatField("value", -1.3f, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyFloatField("value", 4.2f, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_FLOAT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // numeric order + assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4.2", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("30.1", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy float with a missing value */ + public void testLegacyFloatMissing() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyFloatField("value", -1.3f, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyFloatField("value", 4.2f, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_FLOAT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null is treated as 0 + assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("4.2", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy float, specifying the missing value should be treated as Float.MAX_VALUE */ + public void testLegacyFloatMissingLast() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyFloatField("value", -1.3f, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyFloatField("value", 4.2f, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_FLOAT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + SortField sortField = new SortField("value", SortField.Type.FLOAT); + sortField.setMissingValue(Float.MAX_VALUE); + Sort sort = new Sort(sortField); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // null is treated as Float.MAX_VALUE + assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4.2", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy float in reverse */ + public void testLegacyFloatReverse() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new LegacyFloatField("value", 30.1f, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyFloatField("value", -1.3f, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyFloatField("value", 4.2f, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_FLOAT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT, true)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(3, td.totalHits); + // reverse numeric order + assertEquals("30.1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4.2", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("-1.3", searcher.doc(td.scoreDocs[2].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type double */ + public void testDouble() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new DoublePoint("value", 30.1)); + doc.add(new StoredField("value", 30.1)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new DoublePoint("value", -1.3)); + doc.add(new StoredField("value", -1.3)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new DoublePoint("value", 4.2333333333333)); + doc.add(new StoredField("value", 4.2333333333333)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new DoublePoint("value", 4.2333333333332)); + doc.add(new StoredField("value", 4.2333333333332)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.DOUBLE_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(4, td.totalHits); + // numeric order + assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[2].doc).get("value")); + assertEquals("30.1", searcher.doc(td.scoreDocs[3].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type double with +/- zero */ + public void testDoubleSignedZero() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new DoublePoint("value", +0d)); + doc.add(new StoredField("value", +0d)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new DoublePoint("value", -0d)); + doc.add(new StoredField("value", -0d)); + writer.addDocument(doc); + doc = new Document(); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.DOUBLE_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(2, td.totalHits); + // numeric order + double v0 = searcher.doc(td.scoreDocs[0].doc).getField("value").numericValue().doubleValue(); + double v1 = searcher.doc(td.scoreDocs[1].doc).getField("value").numericValue().doubleValue(); + assertEquals(0, v0, 0d); + assertEquals(0, v1, 0d); + // check sign bits + assertEquals(1, Double.doubleToLongBits(v0) >>> 63); + assertEquals(0, Double.doubleToLongBits(v1) >>> 63); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type double with a missing value */ + public void testDoubleMissing() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new DoublePoint("value", -1.3)); + doc.add(new StoredField("value", -1.3)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new DoublePoint("value", 4.2333333333333)); + doc.add(new StoredField("value", 4.2333333333333)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new DoublePoint("value", 4.2333333333332)); + doc.add(new StoredField("value", 4.2333333333332)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.DOUBLE_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(4, td.totalHits); + // null treated as a 0 + assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[2].doc).get("value")); + assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[3].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type double, specifying the missing value should be treated as Double.MAX_VALUE */ + public void testDoubleMissingLast() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new DoublePoint("value", -1.3)); + doc.add(new StoredField("value", -1.3)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new DoublePoint("value", 4.2333333333333)); + doc.add(new StoredField("value", 4.2333333333333)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new DoublePoint("value", 4.2333333333332)); + doc.add(new StoredField("value", 4.2333333333332)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.DOUBLE_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + SortField sortField = new SortField("value", SortField.Type.DOUBLE); + sortField.setMissingValue(Double.MAX_VALUE); + Sort sort = new Sort(sortField); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(4, td.totalHits); + // null treated as Double.MAX_VALUE + assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[2].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[3].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type double in reverse */ + public void testDoubleReverse() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new DoublePoint("value", 30.1)); + doc.add(new StoredField("value", 30.1)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new DoublePoint("value", -1.3)); + doc.add(new StoredField("value", -1.3)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new DoublePoint("value", 4.2333333333333)); + doc.add(new StoredField("value", 4.2333333333333)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new DoublePoint("value", 4.2333333333332)); + doc.add(new StoredField("value", 4.2333333333332)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.DOUBLE_POINT)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir, false); + Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE, true)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(4, td.totalHits); + // numeric order + assertEquals("30.1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[2].doc).get("value")); + assertEquals("-1.3", searcher.doc(td.scoreDocs[3].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy double */ + public void testLegacyDouble() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new LegacyDoubleField("value", 30.1, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyDoubleField("value", -1.3, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyDoubleField("value", 4.2333333333333, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyDoubleField("value", 4.2333333333332, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_DOUBLE)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(4, td.totalHits); + // numeric order + assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[2].doc).get("value")); + assertEquals("30.1", searcher.doc(td.scoreDocs[3].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy double with +/- zero */ + public void testLegacyDoubleSignedZero() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new LegacyDoubleField("value", +0d, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyDoubleField("value", -0d, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_DOUBLE)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(2, td.totalHits); + // numeric order + double v0 = searcher.doc(td.scoreDocs[0].doc).getField("value").numericValue().doubleValue(); + double v1 = searcher.doc(td.scoreDocs[1].doc).getField("value").numericValue().doubleValue(); + assertEquals(0, v0, 0d); + assertEquals(0, v1, 0d); + // check sign bits + assertEquals(1, Double.doubleToLongBits(v0) >>> 63); + assertEquals(0, Double.doubleToLongBits(v1) >>> 63); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy double with a missing value */ + public void testLegacyDoubleMissing() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyDoubleField("value", -1.3, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyDoubleField("value", 4.2333333333333, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyDoubleField("value", 4.2333333333332, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_DOUBLE)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(4, td.totalHits); + // null treated as a 0 + assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[2].doc).get("value")); + assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[3].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy double, specifying the missing value should be treated as Double.MAX_VALUE */ + public void testLegacyDoubleMissingLast() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyDoubleField("value", -1.3, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyDoubleField("value", 4.2333333333333, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyDoubleField("value", 4.2333333333332, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_DOUBLE)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + SortField sortField = new SortField("value", SortField.Type.DOUBLE); + sortField.setMissingValue(Double.MAX_VALUE); + Sort sort = new Sort(sortField); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(4, td.totalHits); + // null treated as Double.MAX_VALUE + assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[2].doc).get("value")); + assertNull(searcher.doc(td.scoreDocs[3].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + /** Tests sorting on type legacy double in reverse */ + public void testLegacyDoubleReverse() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new LegacyDoubleField("value", 30.1, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyDoubleField("value", -1.3, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyDoubleField("value", 4.2333333333333, Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new LegacyDoubleField("value", 4.2333333333332, Field.Store.YES)); + writer.addDocument(doc); + IndexReader ir = UninvertingReader.wrap(writer.getReader(), + Collections.singletonMap("value", Type.LEGACY_DOUBLE)); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE, true)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(4, td.totalHits); + // numeric order + assertEquals("30.1", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[1].doc).get("value")); + assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[2].doc).get("value")); + assertEquals("-1.3", searcher.doc(td.scoreDocs[3].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + public void testEmptyStringVsNullStringSort() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); + Document doc = new Document(); + doc.add(newStringField("f", "", Field.Store.NO)); + doc.add(newStringField("t", "1", Field.Store.NO)); + w.addDocument(doc); + w.commit(); + doc = new Document(); + doc.add(newStringField("t", "1", Field.Store.NO)); + w.addDocument(doc); + + IndexReader r = UninvertingReader.wrap(DirectoryReader.open(w), + Collections.singletonMap("f", Type.SORTED)); + w.close(); + IndexSearcher s = newSearcher(r); + TopDocs hits = s.search(new TermQuery(new Term("t", "1")), 10, new Sort(new SortField("f", SortField.Type.STRING))); + assertEquals(2, hits.totalHits); + // null sorts first + assertEquals(1, hits.scoreDocs[0].doc); + assertEquals(0, hits.scoreDocs[1].doc); + TestUtil.checkReader(r); + r.close(); + dir.close(); + } + + /** test that we throw exception on multi-valued field, creates corrupt reader, use SORTED_SET instead */ + public void testMultiValuedField() throws IOException { + Directory indexStore = newDirectory(); + IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig(new MockAnalyzer(random()))); + for(int i=0; i<5; i++) { + Document doc = new Document(); + doc.add(new StringField("string", "a"+i, Field.Store.NO)); + doc.add(new StringField("string", "b"+i, Field.Store.NO)); + writer.addDocument(doc); + } + writer.forceMerge(1); // enforce one segment to have a higher unique term count in all cases + writer.close(); + Sort sort = new Sort( + new SortField("string", SortField.Type.STRING), + SortField.FIELD_DOC); + IndexReader reader = UninvertingReader.wrap(DirectoryReader.open(indexStore), + Collections.singletonMap("string", Type.SORTED)); + IndexSearcher searcher = new IndexSearcher(reader); + expectThrows(IllegalStateException.class, () -> { + searcher.search(new MatchAllDocsQuery(), 500, sort); + }); + reader.close(); + indexStore.close(); + } + + public void testMaxScore() throws Exception { + Directory d = newDirectory(); + // Not RIW because we need exactly 2 segs: + IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random()))); + int id = 0; + for(int seg=0;seg<2;seg++) { + for(int docIDX=0;docIDX<10;docIDX++) { + Document doc = new Document(); + doc.add(new LegacyIntField("id", docIDX, Field.Store.YES)); + StringBuilder sb = new StringBuilder(); + for(int i=0;i mappings = new HashMap<>(); + mappings.put("tievalue", Type.SORTED); + mappings.put("value", Type.SORTED); + + IndexReader ir = UninvertingReader.wrap(writer.getReader(), mappings); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + // tievalue, then value + Sort sort = new Sort(new SortField("tievalue", SortField.Type.STRING), + new SortField("value", SortField.Type.STRING)); + + TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); + assertEquals(2, td.totalHits); + // 'bar' comes before 'foo' + assertEquals("bar", searcher.doc(td.scoreDocs[0].doc).get("value")); + assertEquals("foo", searcher.doc(td.scoreDocs[1].doc).get("value")); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + public void testScore() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(newStringField("value", "bar", Field.Store.NO)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newStringField("value", "foo", Field.Store.NO)); + writer.addDocument(doc); + IndexReader ir = writer.getReader(); + writer.close(); + + IndexSearcher searcher = newSearcher(ir); + Sort sort = new Sort(SortField.FIELD_SCORE); + + final BooleanQuery.Builder bq = new BooleanQuery.Builder(); + bq.add(new TermQuery(new Term("value", "foo")), Occur.SHOULD); + bq.add(new MatchAllDocsQuery(), Occur.SHOULD); + TopDocs td = searcher.search(bq.build(), 10, sort); + assertEquals(2, td.totalHits); + if (Float.isNaN(td.scoreDocs[0].score) == false && Float.isNaN(td.scoreDocs[1].score) == false) { + assertEquals(1, td.scoreDocs[0].doc); + assertEquals(0, td.scoreDocs[1].doc); + } + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } +} diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSortRandom.java b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSortRandom.java new file mode 100644 index 00000000000..6f2e17cc1df --- /dev/null +++ b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSortRandom.java @@ -0,0 +1,318 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.uninverting; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Random; +import java.util.Set; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopFieldDocs; +import org.apache.lucene.search.Weight; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; +import org.apache.solr.uninverting.UninvertingReader.Type; + +/** random sorting tests with uninversion */ +public class TestFieldCacheSortRandom extends LuceneTestCase { + + public void testRandomStringSort() throws Exception { + testRandomStringSort(SortField.Type.STRING); + } + + public void testRandomStringValSort() throws Exception { + testRandomStringSort(SortField.Type.STRING_VAL); + } + + private void testRandomStringSort(SortField.Type type) throws Exception { + Random random = new Random(random().nextLong()); + + final int NUM_DOCS = atLeast(100); + final Directory dir = newDirectory(); + final RandomIndexWriter writer = new RandomIndexWriter(random, dir); + final boolean allowDups = random.nextBoolean(); + final Set seen = new HashSet<>(); + final int maxLength = TestUtil.nextInt(random, 5, 100); + if (VERBOSE) { + System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups); + } + + int numDocs = 0; + final List docValues = new ArrayList<>(); + // TODO: deletions + while (numDocs < NUM_DOCS) { + final Document doc = new Document(); + + // 10% of the time, the document is missing the value: + final BytesRef br; + if (random().nextInt(10) != 7) { + final String s; + if (random.nextBoolean()) { + s = TestUtil.randomSimpleString(random, maxLength); + } else { + s = TestUtil.randomUnicodeString(random, maxLength); + } + + if (!allowDups) { + if (seen.contains(s)) { + continue; + } + seen.add(s); + } + + if (VERBOSE) { + System.out.println(" " + numDocs + ": s=" + s); + } + + doc.add(new StringField("stringdv", s, Field.Store.NO)); + docValues.add(new BytesRef(s)); + + } else { + br = null; + if (VERBOSE) { + System.out.println(" " + numDocs + ": "); + } + docValues.add(null); + } + + doc.add(new IntPoint("id", numDocs)); + doc.add(new StoredField("id", numDocs)); + writer.addDocument(doc); + numDocs++; + + if (random.nextInt(40) == 17) { + // force flush + writer.getReader().close(); + } + } + + Map mapping = new HashMap<>(); + mapping.put("stringdv", Type.SORTED); + mapping.put("id", Type.INTEGER_POINT); + final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping); + writer.close(); + if (VERBOSE) { + System.out.println(" reader=" + r); + } + + final IndexSearcher s = newSearcher(r, false); + final int ITERS = atLeast(100); + for(int iter=0;iter" : br.utf8ToString())); + if (idx == hitCount-1) { + break; + } + } + } + + if (VERBOSE) { + System.out.println(" actual:"); + for(int hitIDX=0;hitIDX" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id")); + } + } + for(int hitIDX=0;hitIDX docValues; + public final List matchValues = Collections.synchronizedList(new ArrayList()); + + // density should be 0.0 ... 1.0 + public RandomQuery(long seed, float density, List docValues) { + this.seed = seed; + this.density = density; + this.docValues = docValues; + } + + @Override + public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + return new ConstantScoreWeight(this) { + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + Random random = new Random(seed ^ context.docBase); + final int maxDoc = context.reader().maxDoc(); + final NumericDocValues idSource = DocValues.getNumeric(context.reader(), "id"); + assertNotNull(idSource); + final FixedBitSet bits = new FixedBitSet(maxDoc); + for(int docID=0;docID docBytes = new ArrayList<>(); + long totalBytes = 0; + for(int docID=0;docID 64KB in size to ensure more than 2 pages in + // PagedBytes would be needed: + int numBytes; + if (doFixed) { + numBytes = fixedLength; + } else if (docID == 0 || random().nextInt(5) == 3) { + numBytes = TestUtil.nextInt(random(), 65537, 3 * 1024 * 1024); + } else { + numBytes = TestUtil.nextInt(random(), 1, 1024 * 1024); + } + totalBytes += numBytes; + if (totalBytes > 5 * 1024*1024) { + break; + } + byte[] bytes = new byte[numBytes]; + random().nextBytes(bytes); + docBytes.add(bytes); + Document doc = new Document(); + BytesRef b = new BytesRef(bytes); + b.length = bytes.length; + doc.add(new BinaryDocValuesField("field", b)); + doc.add(new StringField("id", ""+docID, Field.Store.YES)); + try { + w.addDocument(doc); + } catch (IllegalArgumentException iae) { + if (iae.getMessage().indexOf("is too large") == -1) { + throw iae; + } else { + // OK: some codecs can't handle binary DV > 32K + assertFalse(codecAcceptsHugeBinaryValues("field")); + w.rollback(); + d.close(); + return; + } + } + } + + DirectoryReader r; + try { + r = DirectoryReader.open(w); + } catch (IllegalArgumentException iae) { + if (iae.getMessage().indexOf("is too large") == -1) { + throw iae; + } else { + assertFalse(codecAcceptsHugeBinaryValues("field")); + + // OK: some codecs can't handle binary DV > 32K + w.rollback(); + d.close(); + return; + } + } + w.close(); + + LeafReader ar = SlowCompositeReaderWrapper.wrap(r); + TestUtil.checkReader(ar); + + BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false); + for(int docID=0;docID docBytes = new ArrayList<>(); + long totalBytes = 0; + for(int docID=0;docID 64KB in size to ensure more than 2 pages in + // PagedBytes would be needed: + int numBytes; + if (doFixed) { + numBytes = fixedLength; + } else if (docID == 0 || random().nextInt(5) == 3) { + numBytes = LARGE_BINARY_FIELD_LENGTH; + } else { + numBytes = TestUtil.nextInt(random(), 1, LARGE_BINARY_FIELD_LENGTH); + } + totalBytes += numBytes; + if (totalBytes > 5 * 1024*1024) { + break; + } + byte[] bytes = new byte[numBytes]; + random().nextBytes(bytes); + docBytes.add(bytes); + Document doc = new Document(); + BytesRef b = new BytesRef(bytes); + b.length = bytes.length; + doc.add(new BinaryDocValuesField("field", b)); + doc.add(new StringField("id", ""+docID, Field.Store.YES)); + w.addDocument(doc); + } + + DirectoryReader r = DirectoryReader.open(w); + w.close(); + + LeafReader ar = SlowCompositeReaderWrapper.wrap(r); + TestUtil.checkReader(ar + ); + + BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false); + for(int docID=0;docID values = new ArrayList<>(); + for (int v = 0; v < numValues; v++) { + values.add(TestUtil.randomSimpleString(random(), minLength, length)); + } + + // add in any order to the indexed field + ArrayList unordered = new ArrayList<>(values); + Collections.shuffle(unordered, random()); + for (String v : values) { + doc.add(newStringField("indexed", v, Field.Store.NO)); + } + + // add in any order to the dv field + ArrayList unordered2 = new ArrayList<>(values); + Collections.shuffle(unordered2, random()); + for (String v : unordered2) { + doc.add(new SortedSetDocValuesField("dv", new BytesRef(v))); + } + + writer.addDocument(doc); + if (random().nextInt(31) == 0) { + writer.commit(); + } + } + + // delete some docs + int numDeletions = random().nextInt(numDocs/10); + for (int i = 0; i < numDeletions; i++) { + int id = random().nextInt(numDocs); + writer.deleteDocuments(new Term("id", Integer.toString(id))); + } + + // compare per-segment + DirectoryReader ir = writer.getReader(); + for (LeafReaderContext context : ir.leaves()) { + LeafReader r = context.reader(); + SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null); + SortedSetDocValues actual = r.getSortedSetDocValues("dv"); + assertEquals(r.maxDoc(), expected, actual); + } + ir.close(); + + writer.forceMerge(1); + + // now compare again after the merge + ir = writer.getReader(); + LeafReader ar = getOnlyLeafReader(ir); + SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null); + SortedSetDocValues actual = ar.getSortedSetDocValues("dv"); + assertEquals(ir.maxDoc(), expected, actual); + ir.close(); + + writer.close(); + dir.close(); + } + + private void doTestMissingVsFieldCache(LongProducer longs) throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); + Field idField = new StringField("id", "", Field.Store.NO); + Field indexedField = newStringField("indexed", "", Field.Store.NO); + Field dvField = new NumericDocValuesField("dv", 0); + + + // index some docs + int numDocs = atLeast(300); + // numDocs should be always > 256 so that in case of a codec that optimizes + // for numbers of values <= 256, all storage layouts are tested + assert numDocs > 256; + for (int i = 0; i < numDocs; i++) { + idField.setStringValue(Integer.toString(i)); + long value = longs.next(); + indexedField.setStringValue(Long.toString(value)); + dvField.setLongValue(value); + Document doc = new Document(); + doc.add(idField); + // 1/4 of the time we neglect to add the fields + if (random().nextInt(4) > 0) { + doc.add(indexedField); + doc.add(dvField); + } + writer.addDocument(doc); + if (random().nextInt(31) == 0) { + writer.commit(); + } + } + + // delete some docs + int numDeletions = random().nextInt(numDocs/10); + for (int i = 0; i < numDeletions; i++) { + int id = random().nextInt(numDocs); + writer.deleteDocuments(new Term("id", Integer.toString(id))); + } + + // merge some segments and ensure that at least one of them has more than + // 256 values + writer.forceMerge(numDocs / 256); + + writer.close(); + + // compare + DirectoryReader ir = DirectoryReader.open(dir); + for (LeafReaderContext context : ir.leaves()) { + LeafReader r = context.reader(); + Bits expected = FieldCache.DEFAULT.getDocsWithField(r, "indexed", null); + Bits actual = FieldCache.DEFAULT.getDocsWithField(r, "dv", null); + assertEquals(expected, actual); + } + ir.close(); + dir.close(); + } + + private void doTestMissingVsFieldCache(final long minValue, final long maxValue) throws Exception { + doTestMissingVsFieldCache(new LongProducer() { + @Override + long next() { + return TestUtil.nextLong(random(), minValue, maxValue); + } + }); + } + + static abstract class LongProducer { + abstract long next(); + } + + private void assertEquals(Bits expected, Bits actual) throws Exception { + assertEquals(expected.length(), actual.length()); + for (int i = 0; i < expected.length(); i++) { + assertEquals(expected.get(i), actual.get(i)); + } + } + + private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception { + assertEquals(maxDoc, DocValues.singleton(expected), DocValues.singleton(actual)); + } + + private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception { + // can be null for the segment if no docs actually had any SortedDocValues + // in this case FC.getDocTermsOrds returns EMPTY + if (actual == null) { + assertEquals(expected.getValueCount(), 0); + return; + } + assertEquals(expected.getValueCount(), actual.getValueCount()); + // compare ord lists + for (int i = 0; i < maxDoc; i++) { + expected.setDocument(i); + actual.setDocument(i); + long expectedOrd; + while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) { + assertEquals(expectedOrd, actual.nextOrd()); + } + assertEquals(NO_MORE_ORDS, actual.nextOrd()); + } + + // compare ord dictionary + for (long i = 0; i < expected.getValueCount(); i++) { + final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd(i)); + final BytesRef actualBytes = actual.lookupOrd(i); + assertEquals(expectedBytes, actualBytes); + } + + // compare termsenum + assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum()); + } + + private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception { + BytesRef ref; + + // sequential next() through all terms + while ((ref = expected.next()) != null) { + assertEquals(ref, actual.next()); + assertEquals(expected.ord(), actual.ord()); + assertEquals(expected.term(), actual.term()); + } + assertNull(actual.next()); + + // sequential seekExact(ord) through all terms + for (long i = 0; i < numOrds; i++) { + expected.seekExact(i); + actual.seekExact(i); + assertEquals(expected.ord(), actual.ord()); + assertEquals(expected.term(), actual.term()); + } + + // sequential seekExact(BytesRef) through all terms + for (long i = 0; i < numOrds; i++) { + expected.seekExact(i); + assertTrue(actual.seekExact(expected.term())); + assertEquals(expected.ord(), actual.ord()); + assertEquals(expected.term(), actual.term()); + } + + // sequential seekCeil(BytesRef) through all terms + for (long i = 0; i < numOrds; i++) { + expected.seekExact(i); + assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term())); + assertEquals(expected.ord(), actual.ord()); + assertEquals(expected.term(), actual.term()); + } + + // random seekExact(ord) + for (long i = 0; i < numOrds; i++) { + long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1); + expected.seekExact(randomOrd); + actual.seekExact(randomOrd); + assertEquals(expected.ord(), actual.ord()); + assertEquals(expected.term(), actual.term()); + } + + // random seekExact(BytesRef) + for (long i = 0; i < numOrds; i++) { + long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1); + expected.seekExact(randomOrd); + actual.seekExact(expected.term()); + assertEquals(expected.ord(), actual.ord()); + assertEquals(expected.term(), actual.term()); + } + + // random seekCeil(BytesRef) + for (long i = 0; i < numOrds; i++) { + BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random())); + SeekStatus expectedStatus = expected.seekCeil(target); + assertEquals(expectedStatus, actual.seekCeil(target)); + if (expectedStatus != SeekStatus.END) { + assertEquals(expected.ord(), actual.ord()); + assertEquals(expected.term(), actual.term()); + } + } + } + + protected boolean codecAcceptsHugeBinaryValues(String field) { + String name = TestUtil.getDocValuesFormat(field); + return !(name.equals("Memory")); // Direct has a different type of limit + } +} diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheWithThreads.java b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheWithThreads.java new file mode 100644 index 00000000000..b0801fed9bb --- /dev/null +++ b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheWithThreads.java @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.uninverting; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; + +// TODO: what happened to this test... its not actually uninverting? +public class TestFieldCacheWithThreads extends LuceneTestCase { + + public void test() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); + + final List numbers = new ArrayList<>(); + final List binary = new ArrayList<>(); + final List sorted = new ArrayList<>(); + final int numDocs = atLeast(100); + for(int i=0;i threads = new ArrayList<>(); + final CountDownLatch startingGun = new CountDownLatch(1); + for(int t=0;t seen = new HashSet<>(); + if (VERBOSE) { + System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups); + } + int numDocs = 0; + final List docValues = new ArrayList<>(); + + // TODO: deletions + while (numDocs < NUM_DOCS) { + final String s; + if (random.nextBoolean()) { + s = TestUtil.randomSimpleString(random); + } else { + s = TestUtil.randomUnicodeString(random); + } + final BytesRef br = new BytesRef(s); + + if (!allowDups) { + if (seen.contains(s)) { + continue; + } + seen.add(s); + } + + if (VERBOSE) { + System.out.println(" " + numDocs + ": s=" + s); + } + + final Document doc = new Document(); + doc.add(new SortedDocValuesField("stringdv", br)); + doc.add(new NumericDocValuesField("id", numDocs)); + docValues.add(br); + writer.addDocument(doc); + numDocs++; + + if (random.nextInt(40) == 17) { + // force flush + writer.getReader().close(); + } + } + + writer.forceMerge(1); + final DirectoryReader r = writer.getReader(); + writer.close(); + + final LeafReader sr = getOnlyLeafReader(r); + + final long END_TIME = System.nanoTime() + TimeUnit.NANOSECONDS.convert((TEST_NIGHTLY ? 30 : 1), TimeUnit.SECONDS); + + final int NUM_THREADS = TestUtil.nextInt(random(), 1, 10); + Thread[] threads = new Thread[NUM_THREADS]; + for(int thread=0;thread= NUM_ITER) { + break; + } + } else if (op == 1) { + Bits docsWithField = cache.getDocsWithField(reader, "sparse", null); + for (int i = 0; i < docsWithField.length(); i++) { + assertEquals(i%2 == 0, docsWithField.get(i)); + } + } else { + NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.LEGACY_INT_PARSER, true); + Bits docsWithField = cache.getDocsWithField(reader, "sparse", null); + for (int i = 0; i < docsWithField.length(); i++) { + if (i%2 == 0) { + assertTrue(docsWithField.get(i)); + assertEquals(i, ints.get(i)); + } else { + assertFalse(docsWithField.get(i)); + } + } + } + } + } catch (Throwable t) { + failed.set(true); + restart.reset(); + throw new RuntimeException(t); + } + } + }; + threads[threadIDX].start(); + } + + for(int threadIDX=0;threadIDX { + FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.LEGACY_INT_PARSER, false); + }); + + // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds() + expectThrows(IllegalStateException.class, () -> { + FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.LEGACY_INT_PARSER, false); + }); + + // Numeric type: can be retrieved via getInts() and so on + NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.LEGACY_INT_PARSER, false); + assertEquals(42, numeric.get(0)); + + // SortedSet type: can be retrieved via getDocTermOrds() + expectThrows(IllegalStateException.class, () -> { + FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.LEGACY_INT_PARSER, false); + }); + + ir.close(); + dir.close(); + } + + public void testNonexistantFields() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + iw.addDocument(doc); + DirectoryReader ir = iw.getReader(); + iw.close(); + + LeafReader ar = getOnlyLeafReader(ir); + + final FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + assertEquals(0, cache.getCacheEntries().length); + + NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.LEGACY_INT_PARSER, true); + assertEquals(0, ints.get(0)); + + NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LEGACY_LONG_PARSER, true); + assertEquals(0, longs.get(0)); + + NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.LEGACY_FLOAT_PARSER, true); + assertEquals(0, floats.get(0)); + + NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.LEGACY_DOUBLE_PARSER, true); + assertEquals(0, doubles.get(0)); + + // check that we cached nothing + assertEquals(0, cache.getCacheEntries().length); + ir.close(); + dir.close(); + } + + public void testNonIndexedFields() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new StoredField("bogusbytes", "bogus")); + doc.add(new StoredField("bogusshorts", "bogus")); + doc.add(new StoredField("bogusints", "bogus")); + doc.add(new StoredField("boguslongs", "bogus")); + doc.add(new StoredField("bogusfloats", "bogus")); + doc.add(new StoredField("bogusdoubles", "bogus")); + doc.add(new StoredField("bogusbits", "bogus")); + iw.addDocument(doc); + DirectoryReader ir = iw.getReader(); + iw.close(); + + LeafReader ar = getOnlyLeafReader(ir); + + final FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + assertEquals(0, cache.getCacheEntries().length); + + NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.LEGACY_INT_PARSER, true); + assertEquals(0, ints.get(0)); + + NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LEGACY_LONG_PARSER, true); + assertEquals(0, longs.get(0)); + + NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.LEGACY_FLOAT_PARSER, true); + assertEquals(0, floats.get(0)); + + NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.LEGACY_DOUBLE_PARSER, true); + assertEquals(0, doubles.get(0)); + + // check that we cached nothing + assertEquals(0, cache.getCacheEntries().length); + ir.close(); + dir.close(); + } + + // Make sure that the use of GrowableWriter doesn't prevent from using the full long range + public void testLongFieldCache() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random())); + cfg.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg); + Document doc = new Document(); + LegacyLongField field = new LegacyLongField("f", 0L, Store.YES); + doc.add(field); + final long[] values = new long[TestUtil.nextInt(random(), 1, 10)]; + for (int i = 0; i < values.length; ++i) { + final long v; + switch (random().nextInt(10)) { + case 0: + v = Long.MIN_VALUE; + break; + case 1: + v = 0; + break; + case 2: + v = Long.MAX_VALUE; + break; + default: + v = TestUtil.nextLong(random(), -10, 10); + break; + } + values[i] = v; + if (v == 0 && random().nextBoolean()) { + // missing + iw.addDocument(new Document()); + } else { + field.setLongValue(v); + iw.addDocument(doc); + } + } + iw.forceMerge(1); + final DirectoryReader reader = iw.getReader(); + final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_LONG_PARSER, false); + for (int i = 0; i < values.length; ++i) { + assertEquals(values[i], longs.get(i)); + } + reader.close(); + iw.close(); + dir.close(); + } + + // Make sure that the use of GrowableWriter doesn't prevent from using the full int range + public void testIntFieldCache() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random())); + cfg.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg); + Document doc = new Document(); + LegacyIntField field = new LegacyIntField("f", 0, Store.YES); + doc.add(field); + final int[] values = new int[TestUtil.nextInt(random(), 1, 10)]; + for (int i = 0; i < values.length; ++i) { + final int v; + switch (random().nextInt(10)) { + case 0: + v = Integer.MIN_VALUE; + break; + case 1: + v = 0; + break; + case 2: + v = Integer.MAX_VALUE; + break; + default: + v = TestUtil.nextInt(random(), -10, 10); + break; + } + values[i] = v; + if (v == 0 && random().nextBoolean()) { + // missing + iw.addDocument(new Document()); + } else { + field.setIntValue(v); + iw.addDocument(doc); + } + } + iw.forceMerge(1); + final DirectoryReader reader = iw.getReader(); + final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_INT_PARSER, false); + for (int i = 0; i < values.length; ++i) { + assertEquals(values[i], ints.get(i)); + } + reader.close(); + iw.close(); + dir.close(); + } + +} diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms32.java b/solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms32.java new file mode 100644 index 00000000000..2b861adb43d --- /dev/null +++ b/solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms32.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.uninverting; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.LegacyIntField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LegacyNumericRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; +import org.apache.solr.uninverting.UninvertingReader.Type; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestNumericTerms32 extends LuceneTestCase { + // distance of entries + private static int distance; + // shift the starting of the values to the left, to also have negative values: + private static final int startOffset = - 1 << 15; + // number of docs to generate for testing + private static int noDocs; + + private static Directory directory = null; + private static IndexReader reader = null; + private static IndexSearcher searcher = null; + + @BeforeClass + public static void beforeClass() throws Exception { + noDocs = atLeast(4096); + distance = (1 << 30) / noDocs; + directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), directory, + newIndexWriterConfig(new MockAnalyzer(random())) + .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)) + .setMergePolicy(newLogMergePolicy())); + + final FieldType storedInt = new FieldType(LegacyIntField.TYPE_NOT_STORED); + storedInt.setStored(true); + storedInt.freeze(); + + final FieldType storedInt8 = new FieldType(storedInt); + storedInt8.setNumericPrecisionStep(8); + + final FieldType storedInt4 = new FieldType(storedInt); + storedInt4.setNumericPrecisionStep(4); + + final FieldType storedInt2 = new FieldType(storedInt); + storedInt2.setNumericPrecisionStep(2); + + LegacyIntField + field8 = new LegacyIntField("field8", 0, storedInt8), + field4 = new LegacyIntField("field4", 0, storedInt4), + field2 = new LegacyIntField("field2", 0, storedInt2); + + Document doc = new Document(); + // add fields, that have a distance to test general functionality + doc.add(field8); doc.add(field4); doc.add(field2); + + // Add a series of noDocs docs with increasing int values + for (int l=0; l map = new HashMap<>(); + map.put("field2", Type.LEGACY_INTEGER); + map.put("field4", Type.LEGACY_INTEGER); + map.put("field8", Type.LEGACY_INTEGER); + reader = UninvertingReader.wrap(writer.getReader(), map); + searcher=newSearcher(reader); + writer.close(); + } + + @AfterClass + public static void afterClass() throws Exception { + searcher = null; + TestUtil.checkReader(reader); + reader.close(); + reader = null; + directory.close(); + directory = null; + } + + private void testSorting(int precisionStep) throws Exception { + String field="field"+precisionStep; + // 10 random tests, the index order is ascending, + // so using a reverse sort field should retun descending documents + int num = TestUtil.nextInt(random(), 10, 20); + for (int i = 0; i < num; i++) { + int lower=(int)(random().nextDouble()*noDocs*distance)+startOffset; + int upper=(int)(random().nextDouble()*noDocs*distance)+startOffset; + if (lower>upper) { + int a=lower; lower=upper; upper=a; + } + Query tq= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true); + TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.INT, true))); + if (topDocs.totalHits==0) continue; + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + int last = searcher.doc(sd[0].doc).getField(field).numericValue().intValue(); + for (int j=1; jact ); + last=act; + } + } + } + + @Test + public void testSorting_8bit() throws Exception { + testSorting(8); + } + + @Test + public void testSorting_4bit() throws Exception { + testSorting(4); + } + + @Test + public void testSorting_2bit() throws Exception { + testSorting(2); + } +} diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms64.java b/solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms64.java new file mode 100644 index 00000000000..4da8be98c0f --- /dev/null +++ b/solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms64.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.uninverting; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.LegacyLongField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LegacyNumericRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; +import org.apache.solr.uninverting.UninvertingReader.Type; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestNumericTerms64 extends LuceneTestCase { + // distance of entries + private static long distance; + // shift the starting of the values to the left, to also have negative values: + private static final long startOffset = - 1L << 31; + // number of docs to generate for testing + private static int noDocs; + + private static Directory directory = null; + private static IndexReader reader = null; + private static IndexSearcher searcher = null; + + @BeforeClass + public static void beforeClass() throws Exception { + noDocs = atLeast(4096); + distance = (1L << 60) / noDocs; + directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), directory, + newIndexWriterConfig(new MockAnalyzer(random())) + .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)) + .setMergePolicy(newLogMergePolicy())); + + final FieldType storedLong = new FieldType(LegacyLongField.TYPE_NOT_STORED); + storedLong.setStored(true); + storedLong.freeze(); + + final FieldType storedLong8 = new FieldType(storedLong); + storedLong8.setNumericPrecisionStep(8); + + final FieldType storedLong4 = new FieldType(storedLong); + storedLong4.setNumericPrecisionStep(4); + + final FieldType storedLong6 = new FieldType(storedLong); + storedLong6.setNumericPrecisionStep(6); + + final FieldType storedLong2 = new FieldType(storedLong); + storedLong2.setNumericPrecisionStep(2); + + LegacyLongField + field8 = new LegacyLongField("field8", 0L, storedLong8), + field6 = new LegacyLongField("field6", 0L, storedLong6), + field4 = new LegacyLongField("field4", 0L, storedLong4), + field2 = new LegacyLongField("field2", 0L, storedLong2); + + Document doc = new Document(); + // add fields, that have a distance to test general functionality + doc.add(field8); doc.add(field6); doc.add(field4); doc.add(field2); + + // Add a series of noDocs docs with increasing long values, by updating the fields + for (int l=0; l map = new HashMap<>(); + map.put("field2", Type.LEGACY_LONG); + map.put("field4", Type.LEGACY_LONG); + map.put("field6", Type.LEGACY_LONG); + map.put("field8", Type.LEGACY_LONG); + reader = UninvertingReader.wrap(writer.getReader(), map); + searcher=newSearcher(reader); + writer.close(); + } + + @AfterClass + public static void afterClass() throws Exception { + searcher = null; + TestUtil.checkReader(reader); + reader.close(); + reader = null; + directory.close(); + directory = null; + } + + private void testSorting(int precisionStep) throws Exception { + String field="field"+precisionStep; + // 10 random tests, the index order is ascending, + // so using a reverse sort field should retun descending documents + int num = TestUtil.nextInt(random(), 10, 20); + for (int i = 0; i < num; i++) { + long lower=(long)(random().nextDouble()*noDocs*distance)+startOffset; + long upper=(long)(random().nextDouble()*noDocs*distance)+startOffset; + if (lower>upper) { + long a=lower; lower=upper; upper=a; + } + Query tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true); + TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.LONG, true))); + if (topDocs.totalHits==0) continue; + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + long last=searcher.doc(sd[0].doc).getField(field).numericValue().longValue(); + for (int j=1; jact ); + last=act; + } + } + } + + @Test + public void testSorting_8bit() throws Exception { + testSorting(8); + } + + @Test + public void testSorting_6bit() throws Exception { + testSorting(6); + } + + @Test + public void testSorting_4bit() throws Exception { + testSorting(4); + } + + @Test + public void testSorting_2bit() throws Exception { + testSorting(2); + } +} diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestUninvertingReader.java b/solr/core/src/test/org/apache/solr/uninverting/TestUninvertingReader.java new file mode 100644 index 00000000000..2ecc63e6d11 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/uninverting/TestUninvertingReader.java @@ -0,0 +1,395 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.uninverting; + +import java.io.IOException; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LegacyIntField; +import org.apache.lucene.document.LegacyLongField; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LegacyNumericUtils; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; +import org.apache.solr.index.SlowCompositeReaderWrapper; +import org.apache.solr.uninverting.UninvertingReader.Type; + +public class TestUninvertingReader extends LuceneTestCase { + + public void testSortedSetInteger() throws IOException { + Directory dir = newDirectory(); + IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); + + Document doc = new Document(); + doc.add(new LegacyIntField("foo", 5, Field.Store.NO)); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new LegacyIntField("foo", 5, Field.Store.NO)); + doc.add(new LegacyIntField("foo", -3, Field.Store.NO)); + iw.addDocument(doc); + + iw.forceMerge(1); + iw.close(); + + DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), + Collections.singletonMap("foo", Type.SORTED_SET_INTEGER)); + LeafReader ar = ir.leaves().get(0).reader(); + SortedSetDocValues v = ar.getSortedSetDocValues("foo"); + assertEquals(2, v.getValueCount()); + + v.setDocument(0); + assertEquals(1, v.nextOrd()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); + + v.setDocument(1); + assertEquals(0, v.nextOrd()); + assertEquals(1, v.nextOrd()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); + + BytesRef value = v.lookupOrd(0); + assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value)); + + value = v.lookupOrd(1); + assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value)); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + public void testSortedSetFloat() throws IOException { + Directory dir = newDirectory(); + IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); + + Document doc = new Document(); + doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO)); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO)); + doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(-3f), Field.Store.NO)); + iw.addDocument(doc); + + iw.forceMerge(1); + iw.close(); + + DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), + Collections.singletonMap("foo", Type.SORTED_SET_FLOAT)); + LeafReader ar = ir.leaves().get(0).reader(); + + SortedSetDocValues v = ar.getSortedSetDocValues("foo"); + assertEquals(2, v.getValueCount()); + + v.setDocument(0); + assertEquals(1, v.nextOrd()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); + + v.setDocument(1); + assertEquals(0, v.nextOrd()); + assertEquals(1, v.nextOrd()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); + + BytesRef value = v.lookupOrd(0); + assertEquals(Float.floatToRawIntBits(-3f), LegacyNumericUtils.prefixCodedToInt(value)); + + value = v.lookupOrd(1); + assertEquals(Float.floatToRawIntBits(5f), LegacyNumericUtils.prefixCodedToInt(value)); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + public void testSortedSetLong() throws IOException { + Directory dir = newDirectory(); + IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); + + Document doc = new Document(); + doc.add(new LegacyLongField("foo", 5, Field.Store.NO)); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new LegacyLongField("foo", 5, Field.Store.NO)); + doc.add(new LegacyLongField("foo", -3, Field.Store.NO)); + iw.addDocument(doc); + + iw.forceMerge(1); + iw.close(); + + DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), + Collections.singletonMap("foo", Type.SORTED_SET_LONG)); + LeafReader ar = ir.leaves().get(0).reader(); + SortedSetDocValues v = ar.getSortedSetDocValues("foo"); + assertEquals(2, v.getValueCount()); + + v.setDocument(0); + assertEquals(1, v.nextOrd()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); + + v.setDocument(1); + assertEquals(0, v.nextOrd()); + assertEquals(1, v.nextOrd()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); + + BytesRef value = v.lookupOrd(0); + assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value)); + + value = v.lookupOrd(1); + assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value)); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + public void testSortedSetDouble() throws IOException { + Directory dir = newDirectory(); + IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); + + Document doc = new Document(); + doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO)); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO)); + doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(-3d), Field.Store.NO)); + iw.addDocument(doc); + + iw.forceMerge(1); + iw.close(); + + DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), + Collections.singletonMap("foo", Type.SORTED_SET_DOUBLE)); + LeafReader ar = ir.leaves().get(0).reader(); + SortedSetDocValues v = ar.getSortedSetDocValues("foo"); + assertEquals(2, v.getValueCount()); + + v.setDocument(0); + assertEquals(1, v.nextOrd()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); + + v.setDocument(1); + assertEquals(0, v.nextOrd()); + assertEquals(1, v.nextOrd()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); + + BytesRef value = v.lookupOrd(0); + assertEquals(Double.doubleToRawLongBits(-3d), LegacyNumericUtils.prefixCodedToLong(value)); + + value = v.lookupOrd(1); + assertEquals(Double.doubleToRawLongBits(5d), LegacyNumericUtils.prefixCodedToLong(value)); + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + + + /** Tests {@link Type#SORTED_SET_INTEGER} using Integer based fields, with and w/o precision steps */ + public void testSortedSetIntegerManyValues() throws IOException { + final Directory dir = newDirectory(); + final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); + + final FieldType NO_TRIE_TYPE = new FieldType(LegacyIntField.TYPE_NOT_STORED); + NO_TRIE_TYPE.setNumericPrecisionStep(Integer.MAX_VALUE); + + final Map UNINVERT_MAP = new LinkedHashMap(); + UNINVERT_MAP.put("notrie_single", Type.SORTED_SET_INTEGER); + UNINVERT_MAP.put("notrie_multi", Type.SORTED_SET_INTEGER); + UNINVERT_MAP.put("trie_single", Type.SORTED_SET_INTEGER); + UNINVERT_MAP.put("trie_multi", Type.SORTED_SET_INTEGER); + final Set MULTI_VALUES = new LinkedHashSet(); + MULTI_VALUES.add("trie_multi"); + MULTI_VALUES.add("notrie_multi"); + + + final int NUM_DOCS = TestUtil.nextInt(random(), 200, 1500); + final int MIN = TestUtil.nextInt(random(), 10, 100); + final int MAX = MIN + TestUtil.nextInt(random(), 10, 100); + final long EXPECTED_VALSET_SIZE = 1 + MAX - MIN; + + { // (at least) one doc should have every value, so that at least one segment has every value + final Document doc = new Document(); + for (int i = MIN; i <= MAX; i++) { + doc.add(new LegacyIntField("trie_multi", i, Field.Store.NO)); + doc.add(new LegacyIntField("notrie_multi", i, NO_TRIE_TYPE)); + } + iw.addDocument(doc); + } + + // now add some more random docs (note: starting at i=1 because of previously added doc) + for (int i = 1; i < NUM_DOCS; i++) { + final Document doc = new Document(); + if (0 != TestUtil.nextInt(random(), 0, 9)) { + int val = TestUtil.nextInt(random(), MIN, MAX); + doc.add(new LegacyIntField("trie_single", val, Field.Store.NO)); + doc.add(new LegacyIntField("notrie_single", val, NO_TRIE_TYPE)); + } + if (0 != TestUtil.nextInt(random(), 0, 9)) { + int numMulti = atLeast(1); + while (0 < numMulti--) { + int val = TestUtil.nextInt(random(), MIN, MAX); + doc.add(new LegacyIntField("trie_multi", val, Field.Store.NO)); + doc.add(new LegacyIntField("notrie_multi", val, NO_TRIE_TYPE)); + } + } + iw.addDocument(doc); + } + + iw.close(); + + final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP); + TestUtil.checkReader(ir); + + final int NUM_LEAVES = ir.leaves().size(); + + // check the leaves: no more then total set size + for (LeafReaderContext rc : ir.leaves()) { + final LeafReader ar = rc.reader(); + for (String f : UNINVERT_MAP.keySet()) { + final SortedSetDocValues v = DocValues.getSortedSet(ar, f); + final long valSetSize = v.getValueCount(); + assertTrue(f + ": Expected no more then " + EXPECTED_VALSET_SIZE + " values per segment, got " + + valSetSize + " from: " + ar.toString(), + valSetSize <= EXPECTED_VALSET_SIZE); + + if (1 == NUM_LEAVES && MULTI_VALUES.contains(f)) { + // tighter check on multi fields in single segment index since we know one doc has all of them + assertEquals(f + ": Single segment LeafReader's value set should have had exactly expected size", + EXPECTED_VALSET_SIZE, valSetSize); + } + } + } + + // check the composite of all leaves: exact expectation of set size + final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir); + TestUtil.checkReader(composite); + + for (String f : MULTI_VALUES) { + final SortedSetDocValues v = composite.getSortedSetDocValues(f); + final long valSetSize = v.getValueCount(); + assertEquals(f + ": Composite reader value set should have had exactly expected size", + EXPECTED_VALSET_SIZE, valSetSize); + } + + ir.close(); + dir.close(); + } + + public void testSortedSetEmptyIndex() throws IOException { + final Directory dir = newDirectory(); + final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); + iw.close(); + + final Map UNINVERT_MAP = new LinkedHashMap(); + for (Type t : EnumSet.allOf(Type.class)) { + UNINVERT_MAP.put(t.name(), t); + } + + final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP); + TestUtil.checkReader(ir); + + final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir); + TestUtil.checkReader(composite); + + for (String f : UNINVERT_MAP.keySet()) { + // check the leaves + // (normally there are none for an empty index, so this is really just future + // proofing in case that changes for some reason) + for (LeafReaderContext rc : ir.leaves()) { + final LeafReader ar = rc.reader(); + assertNull(f + ": Expected no doc values from empty index (leaf)", + ar.getSortedSetDocValues(f)); + } + + // check the composite + assertNull(f + ": Expected no doc values from empty index (composite)", + composite.getSortedSetDocValues(f)); + + } + + ir.close(); + dir.close(); + } + + public void testFieldInfos() throws IOException { + Directory dir = newDirectory(); + IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); + + Document doc = new Document(); + BytesRef idBytes = new BytesRef("id"); + doc.add(new StringField("id", idBytes, Store.YES)); + doc.add(new LegacyIntField("int", 5, Store.YES)); + doc.add(new NumericDocValuesField("dv", 5)); + doc.add(new IntPoint("dint", 5)); + doc.add(new StoredField("stored", 5)); // not indexed + iw.addDocument(doc); + + iw.forceMerge(1); + iw.close(); + + Map uninvertingMap = new HashMap<>(); + uninvertingMap.put("int", Type.LEGACY_INTEGER); + uninvertingMap.put("dv", Type.LEGACY_INTEGER); + uninvertingMap.put("dint", Type.INTEGER_POINT); + + DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), + uninvertingMap); + LeafReader leafReader = ir.leaves().get(0).reader(); + + FieldInfo intFInfo = leafReader.getFieldInfos().fieldInfo("int"); + assertEquals(DocValuesType.NUMERIC, intFInfo.getDocValuesType()); + assertEquals(0, intFInfo.getPointDimensionCount()); + assertEquals(0, intFInfo.getPointNumBytes()); + + FieldInfo dintFInfo = leafReader.getFieldInfos().fieldInfo("dint"); + assertEquals(DocValuesType.NUMERIC, dintFInfo.getDocValuesType()); + assertEquals(1, dintFInfo.getPointDimensionCount()); + assertEquals(4, dintFInfo.getPointNumBytes()); + + FieldInfo dvFInfo = leafReader.getFieldInfos().fieldInfo("dv"); + assertEquals(DocValuesType.NUMERIC, dvFInfo.getDocValuesType()); + + FieldInfo storedFInfo = leafReader.getFieldInfos().fieldInfo("stored"); + assertEquals(DocValuesType.NONE, storedFInfo.getDocValuesType()); + + TestUtil.checkReader(ir); + ir.close(); + dir.close(); + } + +} diff --git a/solr/core/src/test/org/apache/solr/update/DocumentBuilderTest.java b/solr/core/src/test/org/apache/solr/update/DocumentBuilderTest.java index 88727457017..6e6b30ebf62 100644 --- a/solr/core/src/test/org/apache/solr/update/DocumentBuilderTest.java +++ b/solr/core/src/test/org/apache/solr/update/DocumentBuilderTest.java @@ -22,25 +22,24 @@ import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.search.similarities.ClassicSimilarity; import org.apache.lucene.util.TestUtil; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrDocument; -import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; +import org.apache.solr.common.params.CommonParams; import org.apache.solr.core.SolrCore; +import org.apache.solr.index.SlowCompositeReaderWrapper; +import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.ResultContext; -import org.apache.solr.search.SolrIndexSearcher; -import org.apache.solr.search.DocList; +import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.CopyField; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.response.SolrQueryResponse; - +import org.apache.solr.search.DocList; +import org.apache.solr.search.SolrIndexSearcher; import org.junit.BeforeClass; import org.junit.Test; From 6d530e085701e32e593395ef54ec402ad40a2fb9 Mon Sep 17 00:00:00 2001 From: Steve Rowe Date: Fri, 27 May 2016 13:49:20 -0400 Subject: [PATCH 11/19] LUCENE-7278: Get Clover working again --- .../spatial/prefix/tree/DateRangePrefixTreeTest.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java index d76454e6ae3..d29a192c6d6 100644 --- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java +++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java @@ -32,16 +32,17 @@ import org.locationtech.spatial4j.shape.SpatialRelation; public class DateRangePrefixTreeTest extends LuceneTestCase { - @ParametersFactory + @ParametersFactory(argumentFormatting = "calendar=%s") public static Iterable parameters() { return Arrays.asList(new Object[][]{ - {DateRangePrefixTree.DEFAULT_CAL}, {DateRangePrefixTree.JAVA_UTIL_TIME_COMPAT_CAL} + {"default", DateRangePrefixTree.DEFAULT_CAL}, + {"compat", DateRangePrefixTree.JAVA_UTIL_TIME_COMPAT_CAL} }); } private final DateRangePrefixTree tree; - public DateRangePrefixTreeTest(Calendar templateCal) { + public DateRangePrefixTreeTest(String suiteName, Calendar templateCal) { tree = new DateRangePrefixTree(templateCal); } From 9863eea256c649dd035103128f95410bd85b8771 Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Fri, 27 May 2016 23:59:47 +0300 Subject: [PATCH 12/19] SOLR-9110: removing static members from tests. --- .../cloud/DistribJoinFromCollectionTest.java | 18 +++++++----------- .../TestSubQueryTransformerDistrib.java | 9 ++++----- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/DistribJoinFromCollectionTest.java b/solr/core/src/test/org/apache/solr/cloud/DistribJoinFromCollectionTest.java index d8e1b153d59..ae17ca98c51 100644 --- a/solr/core/src/test/org/apache/solr/cloud/DistribJoinFromCollectionTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/DistribJoinFromCollectionTest.java @@ -64,9 +64,6 @@ public class DistribJoinFromCollectionTest extends SolrCloudTestCase{ private static Integer toDocId; - private static CloudSolrClient cloudClient; - - @BeforeClass public static void setupCluster() throws Exception { final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf"); @@ -90,11 +87,9 @@ public class DistribJoinFromCollectionTest extends SolrCloudTestCase{ configName, collectionProperties)); - // get the set of nodes where replicas for the "to" collection exist Set nodeSet = new HashSet<>(); - cloudClient = cluster.getSolrClient(); - ZkStateReader zkStateReader = cloudClient.getZkStateReader(); + ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader(); ClusterState cs = zkStateReader.getClusterState(); for (Slice slice : cs.getCollection(toColl).getActiveSlices()) for (Replica replica : slice.getReplicas()) @@ -138,7 +133,7 @@ public class DistribJoinFromCollectionTest extends SolrCloudTestCase{ for (String c : new String[]{ toColl, fromColl }) { try { CollectionAdminRequest.Delete req = CollectionAdminRequest.deleteCollection(c); - req.process(cloudClient); + req.process(cluster.getSolrClient()); } catch (Exception e) { // don't fail the test log.warn("Could not delete collection {} after test completed due to: " + e, c); @@ -152,12 +147,13 @@ public class DistribJoinFromCollectionTest extends SolrCloudTestCase{ throws SolrServerException, IOException { // verify the join with fromIndex works final String fromQ = "match_s:c match_s:not_1_0_score_after_weight_normalization"; + CloudSolrClient client = cluster.getSolrClient(); { final String joinQ = "{!join " + anyScoreMode(isScoresTest) + "from=join_s fromIndex=" + fromColl + " to=join_s}" + fromQ; QueryRequest qr = new QueryRequest(params("collection", toColl, "q", joinQ, "fl", "id,get_s,score")); - QueryResponse rsp = new QueryResponse(cloudClient.request(qr), cloudClient); + QueryResponse rsp = new QueryResponse(client.request(qr), client); SolrDocumentList hits = rsp.getResults(); assertTrue("Expected 1 doc, got "+hits, hits.getNumFound() == 1); SolrDocument doc = hits.get(0); @@ -172,13 +168,13 @@ public class DistribJoinFromCollectionTest extends SolrCloudTestCase{ // create an alias for the fromIndex and then query through the alias String alias = fromColl+"Alias"; CollectionAdminRequest.CreateAlias request = CollectionAdminRequest.createAlias(alias,fromColl); - request.process(cloudClient); + request.process(client); { final String joinQ = "{!join " + anyScoreMode(isScoresTest) + "from=join_s fromIndex=" + alias + " to=join_s}"+fromQ; final QueryRequest qr = new QueryRequest(params("collection", toColl, "q", joinQ, "fl", "id,get_s,score")); - final QueryResponse rsp = new QueryResponse(cloudClient.request(qr), cloudClient); + final QueryResponse rsp = new QueryResponse(client.request(qr), client); final SolrDocumentList hits = rsp.getResults(); assertTrue("Expected 1 doc", hits.getNumFound() == 1); SolrDocument doc = hits.get(0); @@ -195,7 +191,7 @@ public class DistribJoinFromCollectionTest extends SolrCloudTestCase{ final String joinQ = "{!join " + (anyScoreMode(isScoresTest)) + "from=join_s fromIndex=" + fromColl + " to=join_s}match_s:d"; final QueryRequest qr = new QueryRequest(params("collection", toColl, "q", joinQ, "fl", "id,get_s,score")); - final QueryResponse rsp = new QueryResponse(cloudClient.request(qr), cloudClient); + final QueryResponse rsp = new QueryResponse(client.request(qr), client); final SolrDocumentList hits = rsp.getResults(); assertTrue("Expected no hits", hits.getNumFound() == 0); } diff --git a/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java b/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java index 631c82e140e..0273a852bca 100644 --- a/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java +++ b/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java @@ -46,7 +46,6 @@ public class TestSubQueryTransformerDistrib extends SolrCloudTestCase { final static String people = "people"; final static String depts = "departments"; - private static CloudSolrClient client; @BeforeClass public static void setupCluster() throws Exception { @@ -72,7 +71,7 @@ public class TestSubQueryTransformerDistrib extends SolrCloudTestCase { assertNotNull(cluster.createCollection(depts, shards, replicas, configName, collectionProperties)); - client = cluster.getSolrClient(); + CloudSolrClient client = cluster.getSolrClient(); client.setDefaultCollection(people); ZkStateReader zkStateReader = client.getZkStateReader(); @@ -105,7 +104,7 @@ public class TestSubQueryTransformerDistrib extends SolrCloudTestCase { "depts.rows",""+(deptMultiplier*2), "depts.logParamsList","q,fl,rows,row.dept_ss_dv"})); final QueryResponse rsp = new QueryResponse(); - rsp.setResponse(client.request(qr, people)); + rsp.setResponse(cluster.getSolrClient().request(qr, people)); final SolrDocumentList hits = rsp.getResults(); assertEquals(peopleMultiplier, hits.getNumFound()); @@ -197,7 +196,7 @@ public class TestSubQueryTransformerDistrib extends SolrCloudTestCase { if (rarely()) { upd.append(commit("softCommit", "true")); } - if (!rarely() || !iterator.hasNext()) { + if (rarely() || !iterator.hasNext()) { if (!iterator.hasNext()) { upd.append(commit("softCommit", "false")); } @@ -206,7 +205,7 @@ public class TestSubQueryTransformerDistrib extends SolrCloudTestCase { ContentStreamUpdateRequest req = new ContentStreamUpdateRequest("/update"); req.addContentStream(new ContentStreamBase.StringStream(upd.toString(),"text/xml")); - client.request(req, collection); + cluster.getSolrClient().request(req, collection); upd.setLength("".length()); } } From 88dbd07f1ef6af77e282653119d4c45f0e0c2624 Mon Sep 17 00:00:00 2001 From: Steve Rowe Date: Sat, 28 May 2016 09:26:59 -0400 Subject: [PATCH 13/19] Add 6.0.1 back compat test indexes --- .../index/TestBackwardsCompatibility.java | 4 +++- .../org/apache/lucene/index/index.6.0.1-cfs.zip | Bin 0 -> 13731 bytes .../apache/lucene/index/index.6.0.1-nocfs.zip | Bin 0 -> 13733 bytes 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/index/index.6.0.1-cfs.zip create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/index/index.6.0.1-nocfs.zip diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index 0677addc04d..d553752b6c0 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -239,7 +239,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase { "5.5.1-cfs", "5.5.1-nocfs", "6.0.0-cfs", - "6.0.0-nocfs" + "6.0.0-nocfs", + "6.0.1-cfs", + "6.0.1-nocfs" }; final String[] unsupportedNames = { diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/index.6.0.1-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.6.0.1-cfs.zip new file mode 100644 index 0000000000000000000000000000000000000000..a74316328e27e306fb17a71bd56d5c067e5a106f GIT binary patch literal 13731 zcmdU$Wl&t}wyqm@4Z+>r-Ccvbd*g1wT^f?$?gR<$!4n`5+#P~TaCf~Vd#!Vmb?;ec zovQt37d2a|x}Kh6bo22rzIQ6gK|o>v003A3np~^2R*YWE;`6h5FaQ7=fDbTaW;6ww ztE<5QAkZ>oG=Cg!uMq%XkQdJ{fP8)-p!RbX8RMa9T1Hy3we?Cywr-4JTJl<&rb z`&7G~m>?p*HV^&s90pGGU;`M+n*d?o3fKGITq*kaOwt~ctTMn==)ONsxj`x1~V|xGf=NWQo2h^Dl_+Z zjzY+PqOs|5=U&mmd3pptub4lxF#dkC#DJclR8Wg%SBFA(_^!D3XlG?Q#RTD$W)7GZBcQ=)>iG;H$ z3o>ol*lD!iiEt_{=NmOrD}Yg+nRRQJ_FS0qMNfGw%=T5byTnKU^r`SCzuQd14Fo&Y z`n}4G*Bq7l{+kvSvbeS9ojA&pTV#)p!kp-2EF~o-I4p`mm^e8vNemfxbYv{3>LbIV z)ZLr)#%6mWZ_Tt6<^imc89~m@@wNr&D)Ntnf&qaWfH>8+%km_!OzY$vbQjJqBBH z5n}x4F%kL0T!Bxq(nZVU8c69OEO0jH!{n_pkns_ev5hiGvV=Y2HkeBJ#wo6h2R3+p z&fn#@^8?!2V@eS#YIMV#616o`T@pPtwH*^BG_}#$A}Itc-((X|=sWCQ;+49350xrE z#(0KRW1<63&?godOoi2;!9KMsbL!)tn|t2laNX+18(%jC_h>?O8sjUZ^<+zjKp=;k zo;0CK)F&Zq2EO?WKhSlBv;CdyT2{Bg);sl9GGgWGMaX8Br|CIjl;NuuFH41MK@Y^& z_Qb8TYmmhCc(&%*uwC43!m~0+gfOFP-^0hN#@bCYyDnIXgL*H%$qu{14~A{QfJO1c ziDGYT!Srb3+xX4$X7Rk(Udo=k@7E8#! z^FE5Ix7bF%+aZ7RGHhqQ6uDGgK$F(8*7ceYyQZWUI4@;zPSMChWOxk>Gx=D_uW6gF z0fV4!8rJi5hgHN3I*GY{tZS3;+F9sq9bWUHah%HU+bEBa{WYDC{lZHj`!!!7`&)_N z1rI~NLv3z^qmG_q&FkoT&D=!+H;>Ij|43^Gy&-}m)wZP%Ghpw@L6%Wf85krl$OzrS zS)gdhA*tPCd8`s! zFSm}qjk}X*^hWco^28c)pC}6lwy`gW)e5E^6lgz@dU>c7j8>U+=a&!&6@5`y_)>_l z6_G?0T)AWkb~y@l&D4Q(HD9^xm(=NY-CoWc*jNz*AmYWl+rG;lkP1`C=GC2bay18& zH!NXedinXx>Tvx2e2)%3*foAS*6AZLZt6=NX6$?;R_P8S=^duqu$nE0d+}QYMRn`N zZ(QW{_QRJl<>}!yasF)KyQBx?J5Gz-hK+?@nKmU+da9Wp*mpZK8xPzzTEC|-3W%cTS{B{M^X^SwqQS@$(|cR$_@<)SIbjJpwA>rfS1WX&kj z_LTG48@q8`jY%ob-p2Bvpn9`UbkcQc*ngiy1?PdZ#_B*_CZe~6cU|7i(8m!mf#&J$ z5OH?jN6;|zy==MfkSc?9WQ548vmv$)b$7<{<245_Lzd(?M6&vLV=K~}lf~v@q&PV` zLRpoVm~yo&M645=sQyefnzvqerAtLS*2alvV}kFz2>Zp+3_iv|%`pvd-ahV864dQ()yI#fNW^2x z>$<*|co}8SQS3XzExeIlM&S5nPPB>BF+^+Hez@G!I8*9QIr>tsOVPVx@%=4b^zKcH z1>-Q+R%c4I*MyH&s7Sj1*E|Hwhi|y~0<&6Q=DS0RMJQ(WOtLVwNp?ukhIy^D`!8_RY z_1^We&f?<~5AD?2r!8pR{oB3=4EoJEyV6B*eLkd*i!S>#^DJhfs_tj^{Tzfu+gS>`{9K)+_+^u!0 zr)53w7Z@`+A7fd)Yp6{1d|V3Po<@}FmL6Y(y_+?7YP3-+#g4qRCZt?fzQ}w5jdn2A zlBQM9$)cPQirCo=ZghR=#8Y3WxZjN`oU9!=2pLGP!`2ps1euv}E{EmMroIv+jCC%n zJdo%r2DsuCINTYZ)B0Dx^m)7Q8G`%E|#}RG|BSObQ0n2iK~Bmrc1?{hcrTl&SqcWC}GSEh{}u zb1gMFX{5cq{X@?D%NI*Eh1(^GS+h`|vn7OCpgEyojUY`JB^e9vO^}zn%%Gl(LSYZf zx@yGRY9?C~?nz0W^13!M)S$lK=LOy8b;S+d^Amn>Rc49#V*J4(*8_J*r>FMIf{!74Q!8*c8IQTFtl_dFcpb4 zw4D2M9W_rP2SZUEcQC>Sypo7TRLiv7EE+0l3nv*9d$7t}P$~!n$_;I*js>By>9z(M zIP1{T#n29iI7Z@Zu9B$7878EyC8egttL!HkCZucSC8uY{^>1$A;-eGcuk7wR3!9!I zSZc9E`GEb|UPBf}TPyefvBN8W>T0}p(r#R~-v7^2Eqejb zgBLLDl%I_Pf`}$Ck^p~schXU=&kN7*{9mff@^4n7n|I z!<6Xp>AgIeMUsFY@;RiLv1l7nXlJ|gl9OOwnq35pW~3|czv@#Ai#U4-8-4Snv?0fu zdkf_!ii8y~kREXUMUS)}ZtC-kDaD&^RHte&rJMJ{QqOkHjeek%9F~IvU$3^t83NRR zw4x1k%zqN*cz_H=o+MALR1)!L|BfXbVy>SvtxGhTat~gTw0c4Gk^e8P!Btk6z=s6@ zqW=`-AFcU&QO41KgD2$)yqvMQyLY+FWSWulkyq81!=1Z|A5hQOZE2(|w5w`VEuoHZ z-CG<;{WjNOP~<_4QW!#uksBuVQlwy1Se1hY<*l$49U&~0axTBHZUOPFQf#TTvLP~=(A{Fj<|xYr)GR$%@%`>H{l*XY>E`Y$%DrbU*QI0Ihoq_M$)*0aYT33jGjn-p zZVC_nheRpG$GqFahZe7;`6nwR{s5b+)Ov)(G!0s!VgVfnE9-h!r+fCtII^UrLIP|a}(baypcbW8l9*F zdset!dEdy7(TFz8njGxDDY6;p%-hG*wLE_ zqlw-x$g;rALxSA1b-o`j?b;Jf?srV{YEj$`G=sybtiP6i%BNYx!KIv8*K5^gkojG4 z(Pc(9>r+u!R-I#(;oMce4GCk%X!$6$6J4Dwe&{TCx`tlkCxYJSr}a5ae=Rl4U9K#l zoA^Ah^&5xJN*Xi6t;7S))hg4IHJp_iQz~}VD(czPR%dX`ju1nMlTZcpOeKk@zRu-Z zv;wfAiU5*2sHdb3VQQ$x(W?$#1tF&GapJm4h|lRy=PzoI#kYBxYd)*!m)SKw&T5a) zQZ12{onTH&smW__nly~VnVHDF39Y8o_)f1=DIuqYZS|_xzAxGTvL3sd+*+%KFOe)W zj%h^Qx&n9(S0~ixRjcpTx=9r$QOQS%|M;is0ye1`kwQ`N@bUKC(3}Ff=((Ac_ z&5dTx1{e{PS3r>2SA;&&gB|1P7dx-dEA@TsL{?j47%ivb(wJ;ov6e7E$KiCFWI@AS47HqdN49VzQKs_QRVq9?A; z$v&Jwacm4VVkWFNg+`WzwxNyC^qg3upGb0NotSZeyvPWyd_))t)f4SUy!?QKBcK$n zwFj+M!F8Y-12SN5NX7iatf8S=X@Lm}GGePEDvqA{ypu}b}=jyK$dpmp0g(g^NIPK zRC|JjGUS#k6axO{JfhhmEZXk(wd(2=Gbr`8#SzgP05``LKZx-oK}%O0E*wsz?eq{~ zcGL2TX``r)lUQo7b+pgCG&`UZ7H&u}bfc^wYsWaB6fj6RQdYH+YH~_b870gZlR^L- zq~aG@+fwlfhU%}6kp2)I8!ZhHyGv-U9iQnwA zEO!0n=ePExgERtO+)xcduQ&J_BjG?MYqo1SC*Z9}i*UA>k{abDyHQhbS zdk!0g$PK(3CNk^ppx)RCxX($JA&|7b&?=T$C*D79ma*Ek>^Cj%dds2|LdxaE$1}mi ztUFHx&|Z-qjn0{T%z=&SJV3eQNos3&u!6V#ST%>c9mjiBv7GTEGLiE%g&Tthuqvr|Na?@?!!9!E`@ns}-|6os3_5p^a&zwhMe zzu3`+@49Yq=w6lOR+WXqce~b_Ka(b_Vrs=##EOAQ?enJ76oZqRUt-k?euVvQ&1BMe zTq0b?LE66DlPmz-{@frEe7K!EvgLT46y_-;*k9IF&G{O2%h&$&3=O9A zOQ7Ri^u7JyfFI16cDU1b8LNTJ3oM2EUEWk;4_o#f&U>tivGxva#B98Z&Wpx~t99nA z#>)v&bn+9yboLQ+kX`#zrK2+p4+EuKJ^AzP4jQY>$4m0LD-pL3ipvKtj0kkXyU2Oi zJ|SNP9oi*_3-8Nm>EF^ew%Vnbk&)w3%whMr$J=I@c}sITy=*mz0qsxw<3mdgKZLUL zubz&q4`HS$*NoolRz)qGEtL7WZrdYKypqW@f$RLb0R%d6Ag^3FBcT!4sT~H)-blxA9Q&;I{WIu3LG!)>QF&G)8iE;P;iv)rDiRYa%c5;Kj>|7lMz6 zLI=yf#nB%3Sid_pzbh}%Z%)m^`0sP-?*A~SZu;TWxzA3`5+Z93rP7`H=JYs*wvS0x z-f7^~kPMCc08_s_{n)^eqRghVGZHcg1=-QfjVFiY28z85d$yF+cG z>!v^Vwp{!C)W~mZ8M1y7cS9g$MfZaYK5?Sbdgy0Sb$77E449V~#o<46Ss+ad4lS82 z$of>^M{^Q}h`lXTUj9XEJBZ{=Fn`tBAHDf|t;M!`p8vW5FV8N0J|6j4WD7C!ybwhy z*mgdV%2-?D4u6@9Nec1YUCIcT!)31G7GIt> zR;`W*p7a4DBR)MNrZbMZCR+Z+G*&im)qT|Xk%!>uy={_}cPxjGuUJ|PGYu#E z_p0eIzk8^>9{5r#1dfRy8Y%it#6J8R7!PvO3umki`(A;V7YzgcEtij)2d4|e-Jg;_ zi_;67q%R|lg7&HlJ!P4a>b|`;NFPb;nW`DyLTM>%(dmZVm)(Xpc-u#wX)V96-+J=( zG+q1PE1nR)!Dt+H)9L(4{_Wth0ne2+C)VYK{Nl`dhp(eKsus4+hkCe_~f)Z{Q)VjbI0e!MXuiWG$2`lv60#Z_cgor(ES zhn>?+%n)W-9gO)!b2L;Yarf)&+{w~NV)ppEp7M8!HU*&%I+)FdS=1YKkrqX8(6qbZ zZyqUlrVdK)Z?e_&i)SBRP32#`tXyhc7Ef#4_O~k0 z1VLzInq!5V;=hYc3FlX9+6dP_Z85`&w?RG;Z3CBC~Ri)kJLBZ#TLehFHpgT|O@SfN*;OMxRTz$th}PHsq2 zp2&g9IN%FwQt0CXO_M!nuf!j9E0%7RxI^|qtdrlW-2b}r^xpSEQC_(`yne|jdh#`Y zW_)8gsehD$iAmeU8ZgU+>LSCi%&Dug;&5@=$T)zgt*?`~&!8ss$6f+;N1>97oDKIl z=G%IYF20s#Z}fD|8Qw-^M5nHcdw-j`Dh_Y;Z@eFs@rLJwUSIdp@ZQB%-=Cx6^2vQ0|Y>$BkyO)a?wjt_}0 zF7genEL3tkyh{{2>njJj;g*%&mx{ud+k%xvhN%o7{)M_-?J>p5y9d;Q9svW;LLgF2fI6~4L z0&;<}$wI+PQmHF05&R4=aC8#J1m&DN0(wD)3Ds^$mKSyDIbR^1Ubv>^3_vbYZnKDp zro4X_Cj`5cS!#q)f)fV(Dqu$@vE!Es89%FItqeoxvT}gj`X#W3;B^tik@LzTa%)c@ z)rQ)5SC{fUSeK&c#9o@OS0>QmwLb0VO@p=BI8uJ?)!SQ)K&aiIOrloI1T4%2; zq~K$1(1>|@mbQg@;^myA@P<@r*Uw=J$ktq3O&iKDV5XbKdr6d7?0u|pXCWb%fT5$S zftS2~_tk2w-th?g5{Q7c`1=93ySp-4;p+cR8D^FEU z-FCyd-Y)l*R3BVZkoqotC3axE6X0phIDr;U!y_FxnZ)Gwpk zoX^b;mwIMMEms$5ue=_!gydVjZCAT+%@UyT)XnJq#gi4{NTI<{;{g#Ouw6oUxC1Fi z=g#+RGZr~c7o%D^PHsTz(MB)&7snNtO6rAgLohYpzGa;B)eLQR(Ee}-9XsFP`s5o8 zyX+&5{jmMm!kEd;h2%i(3>Ohc_45B7@&uuGv9X4wYLZOZzTu+t)Dqvwjs$Ommrm#z zBHLD52!$|54r1A+HjQ}*GFc)&hm-007e=&61vG=XORz(7wpT=OJiML5`3^m675%N8 zYgXbU)cZ(~vttSC7I~qOnsC8owiewQ;H93%VUXzlwiI1wr3yZ=R5xbxX{Oo#IBg|u zu2>P}!6%jQChSZSgx7UEe(u^Zd>O7ha3*cN6Vw`l&Ok$6ci+|csZGGjQuj`EBiREh z%lYvuo4)jWA?#j-<$4E$mv@B9)s>`dHbI{Z|H@YofIj!8GgdSd>#v zK2hZ<&|%~$d}+>#>&n+)g)BdI#J_#Ni!z8U{IPY>%XYInHPr{I2(|A!bBBjPWe(#m z8z1atZXf^D;7zVJkSue{cMMtX_2#|z8BYhwL7YLq%?l{~b5l<95+8?ZX1beP{Xqg; z-Ej_X@~U$0cnY&8yK}Xr&y3mS0=Cvn7VSlYZ@$y%Fa}uruO8;)Vix5-Sj(LByb4K8 zrA>0Z4F>1MfSg$gE|}&>%8`&sg%Fx+f#Hqa65B(4+2p6Y2l$=ZEG?kSM^lp3@Uev4J!yd53&<2@a6`rcW87J zO+EL!@l7>2sQ42Y&Q(KqNqVCyXmWm078Z`m8uN6yVMC zIXA$RlX$HfM-XAi31>kQU4wCqrR0D};=bl6v!F6lUW)|7or<8d`;DTgSO7gL7nVLk zbq*g6uM3?$Jy<-loh{!e?kPg3kJaDgr)lWoYM4Ven4j*bwdBzSKzcrwj@Zwh8DI8n zM$s`aJ1%4VG6F>i`_&v+iEsP(E-sMvA5Q?ix{G0gkQD18B!p0J!~IjfW8tFM zbepltRAE!PO;tn$wA=LTK}3YMWdCdzW5#p<2^;5YG(t)2R0HzAo>&q zE%Tq1PA<4RJ{3;}y;X4lN6eye?5TsPbkD-U@i|I?-iC!YgJmZhR~UQMfp=%8t=n>P zxet2KP&rpHY%3eGp)*xNEk-5HA+Ev6RAV%HPfxFNP42fQlhIiB!T!80+Pha5laNBZ zBUUy7yr3Tx1*keY9#vXfm>?QWSd-1-SSxvGvEhqj@1G3Y<$TwrV$*fmnbp(v)siUc z5kB-gQ7nIbCF(8#douqU&ZF6zcRR z{_B(UzDeQ7X^?;e#gs19W!9_G8RTp=42sm}4@~Dsv$iA&kG#b{yzr_=_<~_ls$5tH zM~!VqDnUF2u;h+ea5MIg_aUK(5<8*zmOHO zyXu8@6G=r1o&86C#_^KKIxQA1NzwMD`0}IA1cd@{cmmqqlGDuJOf&8iuPdK-`_iM8+i|GZ>V2wI#0x?KSkXw+fA{U_mNhu?#t^}39#l0O zMlu_lbR|22=hgl^BY0y=9J=_tu~}B=Ug#E$S4*RET%UT5*@5J$XzjgFRe@d#7d6X) zsp?HRSBkiyZgIRtv+#0E4BD$A6FcD(vYoWNXJ2Ipln7W$jsXQ-6B(s#Kg^q>P!kpT~wc1CN)+*vIod+gIj{HX{iVlIn-=$XV*!1mc(ZbD+Oz&xl%k z_Xtal1s}GrPr94Piyj8V6DXF;%o4cf-7r!#smn{~P55{S&Am|eSn-9-I%u?4uLJ~m z^r+#GPJf4)DoZLO@6qI799#T~=y{%dXK_NrcM#sQwH8ryO3Khs zl-d|@hCZ2Y%mT-;5yh4b8K84IHKfM&;;GGa386@z;W0eqn4OSBdEB*xgpef8W{jWE z8*+^9=H7fW(1;ozq@kwK4IIj1YiOn%zE>`Sb?o)gvtR3#6xgI8qe5<(wr3Jr8XkVK zgGk+&&MGTWS>~Flpy-1m%CVa4wwK)Erb{;-%sB)EmqHqs6EDqoMo(;!zl z+_%vIR_PNys5u}28OaoVB^~BZadE_)^2aNW;n}@*qTUxmPu4+laO4$IPENj=^SONl za><=@2BH*aVSVs0G$nT;i+!6vExqd~*JZa67SV|V-@()X2KGkmNHiO<++5jo zpvTU0E~&D9bFT|}uGbTN|5M-*>FHPV14(yn5B`&J*kH}0dWNQ`DH^5xmTJTp71AKjQBS7ac2cOb?GMp;Y4KuKsP9Yt-jGcA*Gh@c z4CG;_l@#(~h`UltOS^|wgSqhY5>;QsLb>*TTAiM1b%{O9Xi{CIQSe+I*``@Dzq!j$ziS4oI(YB&h>2KqAd8k> zBCCFW_7`c-u@5)!*R687s4prxn?Kn!3HM;RG|Hv#G}omX zBtLd8ta3gmd8BtrP|i~!Sn)zg%r(9{BpgSCFuAU%=HLcJD)OmR@+yZaQvPwD zedA3^xnYV;MaWeH(qNO7VB5zL7nnea}3Q#(|kLiTYBF-<-6#UO-Im&WI6GJ#0;;d zndp0kbFxSD-snh8Z_JINm-uDEFBR^yH4={f^*jAV=&f9H^Ouv`6t0uSD9S!3S1p}C z|MPezzib9PO-`BQp&p%w^GJl5)>{TLF_k<%Ha&s;u7TNqWmD2B9?pF^E0Gs&h|zD0q&@7=PV zzgq-PrGsRd83gR>jkMO?o0NsII+1_>o|wOth%Lv;%z7UVPB|AaNm?}=aK9Ee*m`cl zm2-wJcRt>Z0YfkDNJGacFE>zT#BLOjYELSGFcB-@H~I7#u6Mmtf#;@gjzprkn54KJ z^QjqCBmabo^ z$9Rb(%yy-g#MD=8f35}Ot7B@?2HybKQdwAn^vs~T5SMx-G38%h4@zrYhhmv0 z3jX!;5)$y-Vfg0=l8d>8ow>cMiy_R{!-&3IwR5jQ3m`}w^7dBc9+MEn&6hWU*8V@vU$mH7Ln+pjDFtUp-) zxWwN_ZNIWyh<>vEd5OP|m3~Ey6F;N=7&85{5`Uk>{L0!R`GfV3OZ;6Ke`UR+`N{g{ zCH}5QzoHIkpHY9v(myNlcPsgo)k*gU>mQf+`(LzQS>p^pS^vDm-UC)~|6IC%6Uo;O?%CySoH;mmmRx1a}C*0t9!r0FApl!Gk*lf?bj`b8a&C zp84`s%|EB8z3Hy%df00<{qnHh-&T}?gu(y=0|S6z$h1h6jm!G1JUy!g2LpovBLFjC zVlXi`Q&WWlgT#I(t?~VE^*{gvhq`=v0o2nA!K&NelQA5rq`gZ^wz67%m#GtDke0lj zrlFjWsyze-^bXx?J z>O#zhfDrB@{EAuZ-7r|uBXxwnz7~8?*8`?i1ABzNHV`^!7+kF%HPBoIT(u3_3Qbc5 zE~u-KsA!irLSGtpioDELQP?krcuL+*{i7)NYEQa9pEk@TK#(!WTwUx^%9uh-7l4O~ z*k>d z#RWPx!S5>aIjaE+=knPsUzm7fCI4dQ17^KrbESpX9d=}ZW2D(6m?cikN4~pg zlX5`~YPX^3vNoU1FVL5JgMxhe%Bfplq9a&Xh)#LCs$y}%lrW$NLp>p;`j|=q!A{Z| zGCaJ+>Z&Db^EXc7W<--0^A+!Ez)dl@;kP!;=Yg|%X?IWp{%e~|aVo=~T}c3pYvk-N zkQ^q{cr;9923lwn2lDJ`rvvkG;YSR&%MReI_!4dfX_j`uD}l2%ZIqeerKlNNqqJNb;cdyx(*GKy!SG?ME~4dM(lL>nhZ;~Z4(mt-E-hQhGK zqs)-GU680+IwwUo`=oE!rv{@jPv6xrP*Ob9IM7l|*D#P%Ocyr)mV)2vZDt0sp8f7s z`OMB$lSb|x?{@Fo+8CyYyHFhULXF>mwkX2onh>X|sL#A9qGiX~!GytQ{G71+ zK-afyPU;XDtElI#g8aByU>_o#iB)d!V8+>&{@Dinm-~hZXhFLjc0s%AaH9H+cSM}0 zZA6^+fgR6V>J=a1-ub()nOsA(RbA1#G!-0;9U8EIyvna#GUA@zkCcF90>*{9JyVY( zH8la{9x){e4ufcNC`AY1@8J z>Km(qn$>4MF3MW_5hUPYdaqNQAL|=D(nIoH$1W~|h9s}o)`bzeG8rG)_B(Cs3D=JV zK#{L}7VDhL+XTd4#e%pyF?Xs(^$R8F?g@>d-s1x5NiG7WSI`2F_S@@iCA{1lh` zE?{+f5!hbXN7{QE(&+I~Z{taZfb{w;$!=frRc^dwX}^T))O*t~gb%-untVg~%G*)K z>G`^g*vbn2I^W~wNP5!Q%yEBPZwAXK<8z;-`q&!Oy;+O8Yjz&`_YxD3$!Ze~RnO-g zUtTVUi;<%tlvId{#;WH9^flmSuN2RP^0O_>Wq$z+7YGU3kvN-CvS*8E*ijgVmoAsZ zu_*2uu=mSYUrtVcbtdesnOa;p?X9LqQ|2hJ(5v()I#1pCLcNOkjp@7uQKQv_Qn8&r zFI=P+o8@?oIHwky$+#_L?J?eMs82+#Zxpi*^=I%X)WL<}8zefL6AQo5T)pUP%UNFj zqV|)I@85cEubVnEyuGog8G56JQ+v}(?=_Q?Tq?ZqI+DFKL)xRsrDrnF-L!qY1--{e z@FBCSG={iVM1-$f2)@AQLONd3uP?V<;R4|41Muue5hVKh(Tf0ZiHdwfGb>7H7LBk* zZX4Oe*_T+b&wpt52+RQ1HkLPb*gXu_KMHjyzhADKk(;WWG`tN+B^sC4o&sLPi3vW( zvBn|r^gM&_<1@>{Pi$B6ek^_kZQFa;V7rz)VAq?PiE=1nI=T8a1EpE~3#-}qjaQK) za{1s|UPjzY{s*+y%Y@VDg!y=Wt&R0`M!8$RkQ}>P1son@@B57vxLRZCg<{WlZzad= zGdV&n%2#SZMYX$@w$G{n*5aQ|E;pQja+xa;B>oK57^!d&S~ccg*!Tb*?%m|k7Iuxz zKq2gl@ceBV9-<&larjpXjcjC40VCMhJ$p^SD+aE)bXy9lPu~K+meYL6>Xo~&iqdV( z@2wTDm4YBZ{-ii%yHlYY`b ztBGu^|I5)Taf#_?Zz(DpVqVM=zE{cZD6!6NU=q+!ztChl(RB~qJ&4k9Z#cgaGS zTb1AExBO%v*mOJKw$_#F=RBL)w$UZ?2>q)uoM_jm5kIMt<?Oti!e z={5ifQ|5_P=EBcqBH~oECY^v0tPTE6+2M0Y0r|LlsZN0|SjptuSSNUCDXA$bINB*X zcsklF)!q0z6a{%z5QXcqfPxeyLPBMV8}ULKyVOE%nr^!r@y0L;uf`jiLS6Z?4wBOj z@Uk)nST@Rb{}4>krNKi+@L$_|9r=xb?@2;MpANu}_8KrV*jT##j~!n9Q&{7*lJ?>< zb^m{+`Zfg7@RP2ekjWVW47CHj?+xx>bZ{z`N@elsC;wZPng3x~?*F$eLx&}Pm*qTP ztqb6Qa@IR5KzCItpO85x09J;m$Oa>$5#z z_!>pLjU2tmV3lgh0lc9FPD*fyBIljrJ+IPzf&t<)Y+k5DL3kI{C%+~eM|QOYq-870 zP}qrDn2X2cqX8tpp{PzODDDW!#ofSTUPwB>0vFSqfwpSB1dr@z%(BA8aHUW9%Nuh@ zAHjnS00WEu)0Drr=I>1zSIY*!*VX)LGY$hK7Ry!~Yl}=tnUlh-zbYJ%Z7RW(9^tQw z9T3rj??gc_8;jiNLQV{=Ig9iT%=USnFu|t(YmkKPqZm;mDhB(V5)@4MOh7Dz__G?| z-qeFrL=F)&ysNppTjOS##e5&u&Vkd)BIkKjD(5V|!1Wttg@afxVVW0Yg}#ZwWQksl zDT(a3s|mN=-n?JRSb2{c9q$z~BYpQNPKe7|=Swomr`T%wG;a0PrW366^_@&&hZYp_ z8Iap9IE5zL6gIzvRS;HtNE9*7z`M|Pz>Y|N6Z6G1_MsU(~*2_?80K*!rF^&z$M_D z5g|$SbgIs68|jYM%JgQ2#Uo`#(^(>xfGj&~?x^*R&|Ty-w|s@reB_;SJJO}QJy2$w zyyEE&)84T|A&a3~m_ps4%&g>)XPjfG#M%xk^q8ToJESY%7a(Mu06ND0nsImfNj1PU zF`%!GDha*&-nh=uN0uObcyC^ibUdRRc=yRUr72(!t(?^+s|m!p&==oeo}T+Kgi#LE0WvyUDnZCR?*os?l*@LOYD==DZ~5u-|Phwzn2E9jZ}6Wa>M;^OKpc z;IPPF?ByuxI=UDAET}nwauA;fTsx6fQ|h}Er7y-hi6zKFwwtvV@>+(CWZ1y!$_Mw| zKn*o7nV!b8VdfG*qcG%KN9m#9XBL2YV<-hNay{y1{QJT|E z+BI~|1uRhHiJTCnZpF`gqvQpZ_ciN+8h9+e%Urno?HYm0>-D!tWCX5Hkl>-8X1&4B z-iX~R3&XyGghE7R6G{dbfdSwX8~c9*Z))^JL#vYPgB~Y^>c}^?G4E94jQOxK8zTre zoJ~3J(2j&Mcanv4T(w4mPguA144<%eZP}RC($Ghueb4B8?t-J*uQ6&KpD=mCJ|&aI zMT!Kgl;!nnv?vm(=V0TWQ8E2YB!Wqjna($`h=E6^goJunOu?1ZaD={lT^JThTv!>G z=!Gt;sCWruwf^M7zBX@9g+MRMIGiq+3-`euK%}bOZkhGv{7D{grADlEvTkcs5n3RGdnG}OaIen zED(WdtxAnWXH7k10U|mNQErivTFdV(;jQW_=J9soc)k`ahqe3OLobK<=Ii9Mw`5~n zR_odV%Pbq6lt}79437YzsuW-R$4?scU&Cj&0*)78C7dPPn}}SaVe@`>x|^Swa|`*W3V-wg9BFAmyluydWQ9TuD4WW z_cbw4(k-Ku!_{IrMAwhF@<-$a-5<1N;wFvb2ub2itrg;YiBw@Z_nUnoN`&`gQpjKS zpsWduE&Dm5f(U!}CNw&_A4uX3Dd(B`C8X-!nC6l%%2ZVruXeS_n$|yTpw2(|%i_Iv zxcPFbjxfxmW${2U*PT~Z+9CyVtq(!V{~%upWn_|{|I&dSdG*o>^VxG-)uWadm(H9; z4)c+Fw^NaOg8LFky3V<7%MQ5`di#_NyHn$m2r`0iUat0@z~8|?ydms2bRXRIU4QSl z*j~?nzZLul@vH9vMtSpye%5JbhJR3}5&x@n8baE<&O@|yv&I;ZH~{}4oE3L#jYKWZ zAR%o%DK#x#`5?(4Azd>kIXyFOU~3bP0F8)Xb#Kq!JL`XL0)kHkW_) zS(bm;XKB9stl}2s_vx=0&A+C*5RxE#6`U3I8|tYsR1CSN5whZq?8kgR>NAQ|MVZD461E7iG#_xW zcjW{{pVPz{QMO+`$SKN{3a`9|YYR>Tfh)tY>tDZ6HNzeoeiG$aukWHfog0>j>c~y@ zk@7PnUc7qmx3@s%vCkJ?K?VU% zBrab^-MdhkQm&Ed0d`C4e)TMo9Gl~rtHLWi?|OOLLP2?o8fZ*_<%@6;lHh!xG6{ts zAdFUclc**7HwtN$>sqBg#>Jw;F25~fbvnEflcatQ1kt{hQQ6nEt!D_+#%Qz&IVy$()n$| zjd*PnVY7{sKw3}wappZ%MGC4ih7+O_>?YSH)TVr&UQ9E|S0VWoy2s&#TZ7s`Yu+#4 z3|ii5esjrF$w!|xkv~sRFZ&oAmcwK4QJz{Ya+t_X*1a?YZtgN0;aXdkG}T4N>3Drw z1LcmZT;KOHT@q*opQmoX9j8oqJ2*(?7sE*IrL<~qTS)J0B`$|qTnJqeKaebwrahso zfLYs^kVDN^bm2atxB=&alsu5voM_diDVvx8eTlddyx={0nY+_vcocy>*0_R5UpH>- zkM+6+C#RcaVu+Fv^f1&N6y zB;ogUep-&N$rV;@c6Sa8r6^6g;eirB;uJ}d?)H$!(Hu&ZGE5y=K6~{qJRVDHF|Q^_S_xd@_3hk4+qO~pSGI8? zHmXKy&jHp4%rFWp9w8mhh;3b~$2fQX9VoahxYlK<98Ao~S6_{ADy}we&oD+A;Tvp2z~%Qh3*yEQZ7>GirM$syfJGRF;_d~kgwchV`mL)zcx@u56&z-#@E z018b^VyRjaDQW_R2*Jn$Z#Kp_vA_{YVIvib+ln?u>4ca;lCz&~ImY6`_s*IR@42{B z$-_zv%;#vSgtSW(crTmoNrwS@ag8SbX)VvhyHuRvw~->6u-RDsrc&nk{fIF`#Qlh| zLrncPRH@Gz1uiS?on8FUK^?fn#Lw16;BJ~fgVtv$yozv$&Ha%fK}a!7LIvPWh-0?? zgplOH+n%fVAIr>aPbdz#hW4$Q z^ZVU{>%dKAD5Kr&$a7uGZPbrAoXqS^zC`8@%YwoAL z*Q^|64a)*9k2{N(&a}_XMXL_8SuQ{9>&+hKxIcbbGf`zba>CXA3L#}hqxm`@*Y?=w zQq=SF^&o6CR-Hf;cwgoH?-l{!Io(w@#=xaJxXx&6oeu{7G@qWZVm;5VGr6*5wRx@P++PiCTE7N`aYe0;S5T&J=PN#vBwy zKK6}l`{yFfnsb6}C4~-BhB99%RrIukr&yDF12P&kdQZp1n3l=>FHwTA^ zxQX5-gdaqv0$5qq$}BEmpJZ>hEi!)LZ5y|OEaTBPUjw<THwdO zDde6*SF)di6>4qU$XBI@`D$&in`pG2-3?upAAJ>uiwmX z+Pw5Q-)-)9ooH6yH|!Qr|KfHR!QOqoU7=NP>H*xb%u5jV|4w!VLr6#R4V~z5^7=~F~PHxOd|g! zM5O2A$~YjyK% z>q;GF=g6y}gY>Vjo)Qd0i=LAYdQA*1*)ggtK;vcL7(PtNB3D74zuY?zSD{(6gyfs!3{bmvpssW@#8B8#v!a&n5M zai!r9=7c3O1^}nC|Riz13Q`Y(|?+x_b51zz>CET>`M6l#?v7cfxcv zr^gFF-#3$D(+cocYn1PuXtps^!>ht6DPqip%HJ{=-!J<1R>7X!qpX5v-PxjuysI;p z)YvzIcIxuba?+LaKpJ2bUsn61)~nH}Eae@=HR>zujz>bu$bHT510&OWNDJ+wUz1ddh4w)>6}+J$Ubxb&X%H(BJmUf7kXnoE%^1vbMLTCmA;|-PIgzo^TEuJy7*-$efKL7m0>jd>AEux?OZSgf#1c z7Vm>TF&WB>`JoIafx@Ew06$oVTkM#Z7*n_D;8*PcO2K0@#fl*~SprMz0`#)FAvhaV zHg5KKGU~j7SS`FYVBUoBi&eDJGO>sRU_ALvDgAu&tp%p7bmU50;_y)8r76r|23*Q9 z2c3vIT?!jo(XpD7?3iw_-Uq=dw^`yrC}*uJW$8p)&L(Ts+wG-Oni%+=Db1}CW@_IvcVM|X3Tdc)vUXl z>o=<`Hcx12A#g!Pt1O0b!tp@LtMgq_vX5k;4}Kl!w*}H}rfl7Y^rZoKA&)6eY`QOL zwCTZGSj&JKr-nwF{e}IWrnh?_A)Lp_86LEBuB&=|+<=|!T%CHGf~pR?{cUFby;E7G z54cw#X+Q&)a4zl@B@OUBPJ}d=1+l`0F1vI8XRhr?Ded8^}g38I>K6SaAe%%F1k8nHs$xHW>iMC3uK?| zZOm^Uqg*%|O9cz4*{+7&i=?Z%U@fqmE8}B>uX@aJoKu3Q(zFB3p zHAXZ%p7PZLJ3`5ps`b)lM!@yNWA$P&pX!4G@?(y6uqxW;afWE8DGfSJRYBC&WF>K# zZ|t`A$S3VsLJ+CyDva$26uo5TkH07p9>qIFMMA!zlAh~}M19SQ5ollw4uJz}@LC@( zZIaZArkBpFU)ioA0YBJWnz0e>pe5j#;##)r=64RGv~=&@RM6QtGcOk)8R{09p%uXgqd$4v^?_Bu!3O!!G$;8v!bnQV(>b z4zF1qs?>=i5vJ^^Su>!>b5vQ7@Wa55YyE2y~J?|1N1SzbbVyx3fn+U<< zE7)F8@JK6kxDsRVE<<&qJaZi!^`ET7yqFd-IAvW`Ete%3h~`-FTbE3|Vl;M6;+S=` zd`Fo407||okonwAqx*45FjcCWo0_)A_3{#(12J@icXWG|jaI-&hTmG!HF{BrAKgv$ zn1r`z6svv@o7Za9|2g{yYn@tV?r#u$$b$X`T?T9IJ#0ZF#k<$mRDx9xs0CMW)z8_q zuHYXEr9l3=k&ecZ`m#0no}EJxUTrgDcA+)#zPoG*Zs%`ltyoIS-@|Wuu9WBtM!z|y z{lKXvWnWemzd}%iI;;^I1w;FdD;#%rq4?XEh4#yqu8&7dV!T0x0s=ZJ*?aTwYR%%! z>PJ^3na}Sr4WKvo^K$vo&knVbXL7W8Z|bdIYzh{}Jc@iY{Falm{MqB+q#aH@^g)}% zt2?5_ty80AGPvKsSbHNYeI_YGP5;4j7H|Kn!MMeS->+uqom^q}iy5;1lV%A2rx}`? z{THMM((rJYp3HFn4>SBe^!(qLp^>_RKGa_X2q`0TrpF*?}{} zyf3l>Rk8v*s44R{sYJIjMYqn!Yyz8txG`w`?tX++IYiebLh%1ml(3FP`PfrIbWiEr ze;4()R`(-vvi@fv;?D$coMv3#{~O=kMZ6N4W?`D{_&K!^)=i4#u-PNH$2&IY`%zO9>pLk7H_OS9X~3BVLu{ zsOO0N3}sq|3eshTtZ?Q(pE!%C#2Wy`zRT2zOVco>>;KeuLIj^;3aK3Ni3jp5LWbph zB4XKpM22o$22zEf0<|Nrq=Ai|zB+K+AKbht41-=)^6I)cB&L8k8i99flj`LgjJ*7i z4-leLtc(=35Y)p#N6ri>^rtYywnKIuBtm8-h;N66qEq~12mC1qCSbAHzXmTPzqPrU zO$|09z6_+U3}aOzvF3L>8?=g1*U~LS+=d)7?EwLtVlh!!J;?AapLNusn0_E1lG3~m z#x_g(fYrE3W#Wvf?9W6)%|cUMQB~>e;N*CIa(0RUfQO3+4G)u%0!k0|4fK;O=Hs5q z5xiShgj}IuZ444>c>P)vubHgF9M!=bl8lv#{3UZYGcj{2Ic_Q;Gb>AA^>cs>Gs|Tq z@o6^0vFMe=rPi@o-(7!?q|9G_AN&Y}1Af^uVf!c75ad7B5LEO3iX{Y}xw=%+PnV$)e_<~Gs*v#?mB!?Y4v(qLj0dTaqv$8Bxkdiwq|xN&IYXiQv_*5>0jY4 z<)No!*Y;nNUHukAJ)io(!0&{H;5&s#@;x9W*lOdse1P#P=gPx~S$o3Ya2|8PD-cy$ zNScoN9hUoO9d|fHX8D6LmV0)Z3gU7Wq4d7Gth=%jjHTQ`;PD{7;V3Qp*PaIa)AfS= zNc;bI@W<)<)w?@cx|lK8*qd1Y-PJ3~fI|>M{-=8eaQ+Sb5b*uspLY;I|LYEdzuZ0W zSJtly`=42aPaXcjdRohVkM(Qz?q`-F+z;0Gt;63#{hBlT8CCw&O#`MsQ)jY%Km> ziNEf={mjC{{)6?~CH^{V`;LCH^{B`WZDz{Dk^r%Jh3B{yK{JnRP(&2kW;> z{8bu%W<}BbVEulHzY5aNs6*N()E~O^dnNwrB|oz|U;M%P?Gk_eoAxtng8m2V_e=bB v0sk4r#0d7wa{hN^{--7NXWr2h@8<>fKk=YpzAu|-PhWb_U|=jv-=F;-4WVV3 literal 0 HcmV?d00001 From e4210b29a83f676583d1db327d2a3d1e10bfb9ad Mon Sep 17 00:00:00 2001 From: Erick Date: Sat, 28 May 2016 11:20:05 -0700 Subject: [PATCH 14/19] SOLR-9136: Separate out the error statistics into server-side error vs client-side error --- solr/CHANGES.txt | 3 ++ .../solr/handler/RequestHandlerBase.java | 29 ++++++++++++++----- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index ef3e46215d2..50ca449aec8 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -306,6 +306,9 @@ Other Changes * SOLR-9160: Sync 6x and 7.0 move of UninvertingReader, SlowCompositeReaderWrapper for Solr (LUCENE-7283) (yonik) +* SOLR-9136: Separate out the error statistics into server-side error vs client-side error + (Jessica Cheng Mallet via Erick Erickson) + ================== 6.0.1 ================== (No Changes) diff --git a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java index cf3aa706dfa..98f68967384 100644 --- a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java +++ b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java @@ -56,7 +56,8 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo // Statistics private final AtomicLong numRequests = new AtomicLong(); - private final AtomicLong numErrors = new AtomicLong(); + private final AtomicLong numServerErrors = new AtomicLong(); + private final AtomicLong numClientErrors = new AtomicLong(); private final AtomicLong numTimeouts = new AtomicLong(); private final Timer requestTimes = new Timer(); @@ -164,23 +165,33 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo } } } catch (Exception e) { + boolean incrementErrors = true; + boolean isServerError = true; if (e instanceof SolrException) { SolrException se = (SolrException)e; if (se.code() == SolrException.ErrorCode.CONFLICT.code) { - // TODO: should we allow this to be counted as an error (numErrors++)? - - } else { - SolrException.log(log, e); + incrementErrors = false; + } else if (se.code() >= 400 && se.code() < 500) { + isServerError = false; } } else { - SolrException.log(log, e); if (e instanceof SyntaxError) { + isServerError = false; e = new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } } rsp.setException(e); - numErrors.incrementAndGet(); + + if (incrementErrors) { + SolrException.log(log, e); + + if (isServerError) { + numServerErrors.incrementAndGet(); + } else { + numClientErrors.incrementAndGet(); + } + } } finally { timer.stop(); @@ -263,7 +274,9 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo Snapshot snapshot = requestTimes.getSnapshot(); lst.add("handlerStart",handlerStart); lst.add("requests", numRequests.longValue()); - lst.add("errors", numErrors.longValue()); + lst.add("errors", numServerErrors.longValue() + numClientErrors.longValue()); + lst.add("serverErrors", numServerErrors.longValue()); + lst.add("clientErrors", numClientErrors.longValue()); lst.add("timeouts", numTimeouts.longValue()); lst.add("totalTime", requestTimes.getSum()); lst.add("avgRequestsPerSecond", requestTimes.getMeanRate()); From fa840526826d24856c21a1a2b32b5646be273a5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B8ydahl?= Date: Sun, 29 May 2016 00:34:32 +0200 Subject: [PATCH 15/19] SOLR-8583: Apply highlighting to hl.alternateField by default. (cherry picked from commit e37e49e) --- solr/CHANGES.txt | 3 + .../highlight/DefaultSolrHighlighter.java | 103 ++++++++++++------ .../solr/highlight/HighlighterTest.java | 82 +++++++++++++- .../solr/common/params/HighlightParams.java | 1 + 4 files changed, 155 insertions(+), 34 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 50ca449aec8..70d4b123feb 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -116,6 +116,9 @@ New Features * SOLR-8988: Adds query option facet.distrib.mco which when set to true allows the use of facet.mincount=1 in cloud mode. (Keith Laban, Dennis Gove) +* SOLR-8583: Apply highlighting to hl.alternateField by default for Default and FastVectorHighlighter. + Turn off with hl.highlightAlternate=false (janhoy, David Smiley) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java index 821af5c9a05..08ae03769fe 100644 --- a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java +++ b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; @@ -61,6 +60,7 @@ import org.apache.lucene.search.vectorhighlight.FragmentsBuilder; import org.apache.lucene.util.AttributeSource.State; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.HighlightParams; +import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; @@ -389,8 +389,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf preFetchFieldNames.add(keyField.getName()); } - FastVectorHighlighter fvh = null; // lazy - FieldQuery fvhFieldQuery = null; // lazy + FvhContainer fvhContainer = new FvhContainer(); // Lazy container for fvh and fieldQuery IndexReader reader = new TermVectorReusingLeafReader(req.getSearcher().getLeafReader()); // SOLR-5855 @@ -408,30 +407,10 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf SchemaField schemaField = schema.getFieldOrNull(fieldName); Object fieldHighlights; // object type allows flexibility for subclassers - if (schemaField == null) { - fieldHighlights = null; - } else if (schemaField.getType() instanceof org.apache.solr.schema.TrieField) { - // TODO: highlighting numeric fields is broken (Lucene) - so we disable them until fixed (see LUCENE-3080)! - fieldHighlights = null; - } else if (useFastVectorHighlighter(params, schemaField)) { - if (fvhFieldQuery == null) { - fvh = new FastVectorHighlighter( - // FVH cannot process hl.usePhraseHighlighter parameter per-field basis - params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), - // FVH cannot process hl.requireFieldMatch parameter per-field basis - params.getBool(HighlightParams.FIELD_MATCH, false)); - fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT)); - fvhFieldQuery = fvh.getFieldQuery(query, reader); - } - fieldHighlights = - doHighlightingByFastVectorHighlighter(doc, docId, schemaField, fvh, fvhFieldQuery, reader, req); - } else { // standard/default highlighter - fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req); - } + fieldHighlights = doHighlightingOfField(doc, docId, schemaField, fvhContainer, query, reader, req, params); if (fieldHighlights == null) { - // no summaries made; copy text from alternate field - fieldHighlights = alternateField(doc, fieldName, req); + fieldHighlights = alternateField(doc, docId, fieldName, fvhContainer, query, reader, req); } if (fieldHighlights != null) { @@ -443,6 +422,34 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf return fragments; } + private Object doHighlightingOfField(Document doc, int docId, SchemaField schemaField, + FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req, + SolrParams params) throws IOException { + Object fieldHighlights; + if (schemaField == null) { + fieldHighlights = null; + } else if (schemaField.getType() instanceof org.apache.solr.schema.TrieField) { + // TODO: highlighting numeric fields is broken (Lucene) - so we disable them until fixed (see LUCENE-3080)! + fieldHighlights = null; + } else if (useFastVectorHighlighter(params, schemaField)) { + if (fvhContainer.fieldQuery == null) { + FastVectorHighlighter fvh = new FastVectorHighlighter( + // FVH cannot process hl.usePhraseHighlighter parameter per-field basis + params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), + // FVH cannot process hl.requireFieldMatch parameter per-field basis + params.getBool(HighlightParams.FIELD_MATCH, false)); + fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT)); + fvhContainer.fvh = fvh; + fvhContainer.fieldQuery = fvh.getFieldQuery(query, reader); + } + fieldHighlights = + doHighlightingByFastVectorHighlighter(doc, docId, schemaField, fvhContainer, reader, req); + } else { // standard/default highlighter + fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req); + } + return fieldHighlights; + } + /** Returns the field names to be passed to {@link SolrIndexSearcher#doc(int, Set)}. * Subclasses might over-ride to include fields in search-results and other stored field values needed so as to avoid * the possibility of extra trips to disk. The uniqueKey will be added after if the result isn't null. */ @@ -469,14 +476,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf /** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */ @SuppressWarnings("unchecked") protected Object doHighlightingByFastVectorHighlighter(Document doc, int docId, - SchemaField schemaField, FastVectorHighlighter highlighter, - FieldQuery fieldQuery, + SchemaField schemaField, FvhContainer fvhContainer, IndexReader reader, SolrQueryRequest req) throws IOException { SolrParams params = req.getParams(); String fieldName = schemaField.getName(); SolrFragmentsBuilder solrFb = getSolrFragmentsBuilder(fieldName, params); - String[] snippets = highlighter.getBestFragments( fieldQuery, reader, docId, fieldName, + String[] snippets = fvhContainer.fvh.getBestFragments( fvhContainer.fieldQuery, reader, docId, fieldName, params.getFieldInt( fieldName, HighlightParams.FRAGSIZE, 100 ), params.getFieldInt( fieldName, HighlightParams.SNIPPETS, 1 ), getFragListBuilder( fieldName, params ), @@ -497,12 +503,12 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf final String fieldName = schemaField.getName(); final int mvToExamine = - req.getParams().getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, + params.getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, (schemaField.multiValued()) ? Integer.MAX_VALUE : 1); // Technically this is the max *fragments* (snippets), not max values: int mvToMatch = - req.getParams().getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.MAX_VALUE); + params.getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.MAX_VALUE); if (mvToExamine <= 0 || mvToMatch <= 0) { return null; } @@ -557,7 +563,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf } Highlighter highlighter; - if (req.getParams().getFieldBool(fieldName, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) { + if (params.getFieldBool(fieldName, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) { // We're going to call getPhraseHighlighter and it might consume the tokenStream. If it does, the tokenStream // needs to implement reset() efficiently. @@ -662,12 +668,38 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf /** Returns the alternate highlight object for this field -- a String[] by default. Null if none. */ @SuppressWarnings("unchecked") - protected Object alternateField(Document doc, String fieldName, SolrQueryRequest req) { + protected Object alternateField(Document doc, int docId, String fieldName, FvhContainer fvhContainer, Query query, + IndexReader reader, SolrQueryRequest req) throws IOException { + IndexSchema schema = req.getSearcher().getSchema(); SolrParams params = req.getParams(); String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD); + int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0); if (alternateField == null || alternateField.length() == 0) { return null; } + + if (params.getFieldBool(fieldName, HighlightParams.HIGHLIGHT_ALTERNATE, true)) { + // Try to highlight alternate field + Object fieldHighlights = null; + SchemaField schemaField = schema.getFieldOrNull(alternateField); + if (schemaField != null) { + HashMap invariants = new HashMap<>(); + invariants.put("f." + alternateField + "." + HighlightParams.SNIPPETS, "1"); + // Enforce maxAlternateFieldLength by FRAGSIZE. Minimum 18 due to FVH limitations + invariants.put("f." + alternateField + "." + HighlightParams.FRAGSIZE, + alternateFieldLen > 0 ? String.valueOf(Math.max(18, alternateFieldLen)) : String.valueOf(Integer.MAX_VALUE)); + SolrParams origParams = req.getParams(); + req.setParams(SolrParams.wrapDefaults(new MapSolrParams(invariants), origParams)); + fieldHighlights = doHighlightingOfField(doc, docId, schemaField, fvhContainer, query, reader, req, params); + req.setParams(origParams); + if (fieldHighlights != null) { + return fieldHighlights; + } + } + } + + + // Fallback to static non-highlighted IndexableField[] docFields = doc.getFields(alternateField); if (docFields.length == 0) { // The alternate field did not exist, treat the original field as fallback instead @@ -685,7 +717,6 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf String[] altTexts = listFields.toArray(new String[listFields.size()]); Encoder encoder = getEncoder(fieldName, params); - int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0); List altList = new ArrayList<>(); int len = 0; for( String altText: altTexts ){ @@ -707,6 +738,12 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf final TokenStream tStream = schemaField.getType().getIndexAnalyzer().tokenStream(schemaField.getName(), docText); return new TokenOrderingFilter(tStream, 10); } + + // Wraps FVH to allow pass-by-reference + private class FvhContainer { + private FastVectorHighlighter fvh; + private FieldQuery fieldQuery; + } } /** Orders Tokens in a window first by their startOffset ascending. diff --git a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java index 1a432db20f5..2cc74abe6fc 100644 --- a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java +++ b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java @@ -703,7 +703,87 @@ public class HighlighterTest extends SolrTestCaseJ4 { "//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='a piece of text']" ); } - + + @Test + public void testAlternateSummaryWithHighlighting() { + //long document + assertU(adoc("tv_text", "keyword is only here, tv_text alternate field", + "t_text", "a piece of text to be substituted", + "other_t", "keyword", + "id", "1", + "foo_t","hi")); + assertU(commit()); + assertU(optimize()); + + // Prove that hl.highlightAlternate is default true and respects maxAlternateFieldLength + HashMap args = new HashMap<>(); + args.put("hl", "true"); + args.put("hl.fragsize","0"); + args.put("hl.fl", "t_text"); + args.put("hl.simple.pre", ""); + args.put("hl.simple.post", ""); + args.put("hl.alternateField", "tv_text"); + args.put("hl.maxAlternateFieldLength", "39"); + TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory( + "standard", 0, 200, args); + assertQ("Alternate summarization with highlighting", + sumLRF.makeRequest("tv_text:keyword"), + "//lst[@name='highlighting']/lst[@name='1' and count(*)=1]", + "//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only here, tv_text']" + ); + + // Query on other field than hl or alternate. Still we get the hightlighted snippet from alternate + assertQ("Alternate summarization with highlighting, query other field", + sumLRF.makeRequest("other_t:keyword"), + "//lst[@name='highlighting']/lst[@name='1' and count(*)=1]", + "//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only here, tv_text']" + ); + + // With hl.requireFieldMatch, will not highlight but fall back to plain-text alternate + args.put("hl.requireFieldMatch", "true"); + sumLRF = h.getRequestFactory( + "standard", 0, 200, args); + assertQ("Alternate summarization with highlighting, requireFieldMatch", + sumLRF.makeRequest("other_t:keyword"), + "//lst[@name='highlighting']/lst[@name='1' and count(*)=1]", + "//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only here, tv_text alternate']" + ); + args.put("hl.requireFieldMatch", "false"); + + + // Works with field specific params, overriding maxAlternateFieldLength to return everything + args.remove("hl.alternateField"); + args.put("f.t_text.hl.alternateField", "tv_text"); + args.put("f.t_text.hl.maxAlternateFieldLength", "0"); + sumLRF = h.getRequestFactory("standard", 0, 200, args); + assertQ("Alternate summarization with highlighting", + sumLRF.makeRequest("tv_text:keyword"), + "//lst[@name='highlighting']/lst[@name='1' and count(*)=1]", + "//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only here, tv_text alternate field']" + ); + + // Prove fallback highlighting works also with FVH + args.put("hl.useFastVectorHighlighter", "true"); + args.put("hl.tag.pre", ""); + args.put("hl.tag.post", ""); + args.put("f.t_text.hl.maxAlternateFieldLength", "18"); + sumLRF = h.getRequestFactory("standard", 0, 200, args); + assertQ("Alternate summarization with highlighting using FVH", + sumLRF.makeRequest("tv_text:keyword"), + "//lst[@name='highlighting']/lst[@name='1' and count(*)=1]", + "//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only here']" + ); + + // Prove it is possible to turn off highlighting of alternate field + args.put("hl.highlightAlternate", "false"); + sumLRF = h.getRequestFactory("standard", 0, 200, args); + assertQ("Alternate summarization without highlighting", + sumLRF.makeRequest("tv_text:keyword"), + "//lst[@name='highlighting']/lst[@name='1' and count(*)=1]", + "//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only he']" + ); + } + @Test public void testPhraseHighlighter() { HashMap args = new HashMap<>(); diff --git a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java index 21528a94caa..c0d40aaf83e 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java @@ -46,6 +46,7 @@ public interface HighlightParams { public static final String DEFAULT_SUMMARY = HIGHLIGHT + ".defaultSummary"; public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField"; public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength"; + public static final String HIGHLIGHT_ALTERNATE = HIGHLIGHT+".highlightAlternate"; public static final String MAX_MULTIVALUED_TO_EXAMINE = HIGHLIGHT + ".maxMultiValuedToExamine"; public static final String MAX_MULTIVALUED_TO_MATCH = HIGHLIGHT + ".maxMultiValuedToMatch"; From 3a74daab20736c6345d3118b2750ff365417268e Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 30 May 2016 07:51:41 +0200 Subject: [PATCH 16/19] LUCENE-7289: Fix test bug, 65520 gets rounded to +Infinity too. --- .../src/test/org/apache/lucene/document/TestHalfFloatPoint.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestHalfFloatPoint.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestHalfFloatPoint.java index d280944fba6..a24d99279b3 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/document/TestHalfFloatPoint.java +++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestHalfFloatPoint.java @@ -105,7 +105,7 @@ public class TestHalfFloatPoint extends LuceneTestCase { assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(rounded), 0f); } else if (Float.isFinite(rounded) == false) { assertFalse(Float.isNaN(rounded)); - assertTrue(Math.abs(f) > 65520); + assertTrue(Math.abs(f) >= 65520); } else { int index = Arrays.binarySearch(values, f); float closest; From a460addd2f9511432da7684bd2ee6598025389ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B8ydahl?= Date: Mon, 30 May 2016 10:24:23 +0200 Subject: [PATCH 17/19] SOLR-8583: Do not attempt highlight of alternate if it is same as original hl field (cherry picked from commit 05ce40a) --- .../java/org/apache/solr/highlight/DefaultSolrHighlighter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java index 08ae03769fe..9941b16ea60 100644 --- a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java +++ b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java @@ -678,7 +678,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf return null; } - if (params.getFieldBool(fieldName, HighlightParams.HIGHLIGHT_ALTERNATE, true)) { + if (params.getFieldBool(fieldName, HighlightParams.HIGHLIGHT_ALTERNATE, true) && !alternateField.equals(fieldName)) { // Try to highlight alternate field Object fieldHighlights = null; SchemaField schemaField = schema.getFieldOrNull(alternateField); From 358d6f7e6be0b546c4c4a08e0277969d623fbbc0 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 27 May 2016 12:55:27 +0200 Subject: [PATCH 18/19] LUCENE-7306: Speed up points indexing/merging with radix sort. --- lucene/CHANGES.txt | 5 +- .../lucene/util/BytesRefComparator.java | 55 +++++ .../lucene/util/FixedLengthBytesRefArray.java | 29 +++ .../apache/lucene/util/MSBRadixSorter.java | 219 ++++++++++++++++++ .../lucene/util/StringMSBRadixSorter.java | 160 ++----------- .../org/apache/lucene/util/bkd/BKDWriter.java | 113 ++++----- .../lucene/util/TestMSBRadixSorter.java | 117 ++++++++++ 7 files changed, 488 insertions(+), 210 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/util/BytesRefComparator.java create mode 100644 lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java create mode 100644 lucene/core/src/test/org/apache/lucene/util/TestMSBRadixSorter.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a29ca4479be..fb60dbabd5b 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -86,7 +86,10 @@ Optimizations (which is used by TermsQuery, multi-term queries and several point queries). (Adrien Grand, Jeff Wartes, David Smiley) -* LUCENE-7299: Speed up BytesRefHash.sort(). (Adrien Grand) +* LUCENE-7299: Speed up BytesRefHash.sort() using radix sort. (Adrien Grand) + +* LUCENE-7306: Speed up points indexing and merging using radix sort. + (Adrien Grand) Bug Fixes diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefComparator.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefComparator.java new file mode 100644 index 00000000000..821b8f2ae94 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefComparator.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +import java.util.Comparator; + +/** Specialized {@link BytesRef} comparator that + * {@link FixedLengthBytesRefArray#iterator(Comparator)} has optimizations + * for. + * @lucene.internal */ +public abstract class BytesRefComparator implements Comparator { + + final int comparedBytesCount; + + /** Sole constructor. + * @param comparedBytesCount the maximum number of bytes to compare. */ + protected BytesRefComparator(int comparedBytesCount) { + this.comparedBytesCount = comparedBytesCount; + } + + /** Return the unsigned byte to use for comparison at index {@code i}, or + * {@code -1} if all bytes that are useful for comparisons are exhausted. + * This may only be called with a value of {@code i} between {@code 0} + * included and {@code comparedBytesCount} excluded. */ + protected abstract int byteAt(BytesRef ref, int i); + + @Override + public int compare(BytesRef o1, BytesRef o2) { + for (int i = 0; i < comparedBytesCount; ++i) { + final int b1 = byteAt(o1, i); + final int b2 = byteAt(o2, i); + if (b1 != b2) { + return b1 - b2; + } else if (b1 == -1) { + break; + } + } + return 0; + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/util/FixedLengthBytesRefArray.java b/lucene/core/src/java/org/apache/lucene/util/FixedLengthBytesRefArray.java index 346b908bc6f..4fc41088074 100644 --- a/lucene/core/src/java/org/apache/lucene/util/FixedLengthBytesRefArray.java +++ b/lucene/core/src/java/org/apache/lucene/util/FixedLengthBytesRefArray.java @@ -105,6 +105,35 @@ final class FixedLengthBytesRefArray implements SortableBytesRefArray { orderedEntries[i] = i; } + if (comp instanceof BytesRefComparator) { + BytesRefComparator bComp = (BytesRefComparator) comp; + new MSBRadixSorter(bComp.comparedBytesCount) { + + BytesRef scratch; + + { + scratch = new BytesRef(); + scratch.length = valueLength; + } + + @Override + protected void swap(int i, int j) { + int o = orderedEntries[i]; + orderedEntries[i] = orderedEntries[j]; + orderedEntries[j] = o; + } + + @Override + protected int byteAt(int i, int k) { + int index1 = orderedEntries[i]; + scratch.bytes = blocks[index1 / valuesPerBlock]; + scratch.offset = (index1 % valuesPerBlock) * valueLength; + return bComp.byteAt(scratch, k); + } + }.sort(0, size()); + return orderedEntries; + } + final BytesRef pivot = new BytesRef(); final BytesRef scratch1 = new BytesRef(); final BytesRef scratch2 = new BytesRef(); diff --git a/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java b/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java new file mode 100644 index 00000000000..33f20b63922 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +import java.util.Arrays; + +/** Radix sorter for variable-length strings. This class sorts based on the most + * significant byte first and falls back to {@link IntroSorter} when the size + * of the buckets to sort becomes small. It is NOT stable. + * Worst-case memory usage is about {@code 2.3 KB}. + * @lucene.internal */ +public abstract class MSBRadixSorter extends Sorter { + + // after that many levels of recursion we fall back to introsort anyway + // this is used as a protection against the fact that radix sort performs + // worse when there are long common prefixes (probably because of cache + // locality) + private static final int LEVEL_THRESHOLD = 8; + // size of histograms: 256 + 1 to indicate that the string is finished + private static final int HISTOGRAM_SIZE = 257; + // buckets below this size will be sorted with introsort + private static final int LENGTH_THRESHOLD = 100; + + // we store one histogram per recursion level + private final int[][] histograms = new int[LEVEL_THRESHOLD][]; + private final int[] endOffsets = new int[HISTOGRAM_SIZE]; + + private final int maxLength; + + /** + * Sole constructor. + * @param maxLength the maximum length of keys, pass {@link Integer#MAX_VALUE} if unknown. + */ + protected MSBRadixSorter(int maxLength) { + this.maxLength = maxLength; + } + + /** Return the k-th byte of the entry at index {@code i}, or {@code -1} if + * its length is less than or equal to {@code k}. This may only be called + * with a value of {@code i} between {@code 0} included and + * {@code maxLength} excluded. */ + protected abstract int byteAt(int i, int k); + + /** Get a fall-back sorter which may assume that the first k bytes of all compared strings are equal. */ + protected Sorter getFallbackSorter(int k) { + return new IntroSorter() { + @Override + protected void swap(int i, int j) { + MSBRadixSorter.this.swap(i, j); + } + + @Override + protected int compare(int i, int j) { + for (int o = k; o < maxLength; ++o) { + final int b1 = byteAt(i, o); + final int b2 = byteAt(j, o); + if (b1 != b2) { + return b1 - b2; + } else if (b1 == -1) { + break; + } + } + return 0; + } + + @Override + protected void setPivot(int i) { + pivot.setLength(0); + for (int o = k; o < maxLength; ++o) { + final int b = byteAt(i, o); + if (b == -1) { + break; + } + pivot.append((byte) b); + } + } + + @Override + protected int comparePivot(int j) { + for (int o = 0; o < pivot.length(); ++o) { + final int b1 = pivot.byteAt(o) & 0xff; + final int b2 = byteAt(j, k + o); + if (b1 != b2) { + return b1 - b2; + } + } + if (k + pivot.length() == maxLength) { + return 0; + } + return -1 - byteAt(j, k + pivot.length()); + } + + private final BytesRefBuilder pivot = new BytesRefBuilder(); + }; + } + + @Override + protected final int compare(int i, int j) { + throw new UnsupportedOperationException("unused: not a comparison-based sort"); + } + + @Override + public void sort(int from, int to) { + checkRange(from, to); + sort(from, to, 0); + } + + private void sort(int from, int to, int k) { + if (to - from <= LENGTH_THRESHOLD || k >= LEVEL_THRESHOLD) { + introSort(from, to, k); + } else { + radixSort(from, to, k); + } + } + + private void introSort(int from, int to, int k) { + getFallbackSorter(k).sort(from, to); + } + + private void radixSort(int from, int to, int k) { + int[] histogram = histograms[k]; + if (histogram == null) { + histogram = histograms[k] = new int[HISTOGRAM_SIZE]; + } else { + Arrays.fill(histogram, 0); + } + + buildHistogram(from, to, k, histogram); + + // short-circuit: if all keys have the same byte at offset k, then recurse directly + for (int i = 0; i < HISTOGRAM_SIZE; ++i) { + if (histogram[i] == to - from) { + // everything is in the same bucket, recurse + if (i > 0) { + sort(from, to, k + 1); + } + return; + } else if (histogram[i] != 0) { + break; + } + } + + int[] startOffsets = histogram; + int[] endOffsets = this.endOffsets; + sumHistogram(histogram, endOffsets); + reorder(from, to, startOffsets, endOffsets, k); + endOffsets = startOffsets; + + if (k + 1 < maxLength) { + // recurse on all but the first bucket since all keys are equals in this + // bucket (we already compared all bytes) + for (int prev = endOffsets[0], i = 1; i < HISTOGRAM_SIZE; ++i) { + int h = endOffsets[i]; + final int bucketLen = h - prev; + if (bucketLen > 1) { + sort(from + prev, from + h, k + 1); + } + prev = h; + } + } + } + + /** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */ + private int getBucket(int i, int k) { + return byteAt(i, k) + 1; + } + + /** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */ + private int[] buildHistogram(int from, int to, int k, int[] histogram) { + for (int i = from; i < to; ++i) { + histogram[getBucket(i, k)]++; + } + return histogram; + } + + /** Accumulate values of the histogram so that it does not store counts but + * start offsets. {@code endOffsets} will store the end offsets. */ + private static void sumHistogram(int[] histogram, int[] endOffsets) { + int accum = 0; + for (int i = 0; i < HISTOGRAM_SIZE; ++i) { + final int count = histogram[i]; + histogram[i] = accum; + accum += count; + endOffsets[i] = accum; + } + } + + /** + * Reorder based on start/end offsets for each bucket. When this method + * returns, startOffsets and endOffsets are equal. + * @param startOffsets start offsets per bucket + * @param endOffsets end offsets per bucket + */ + private void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) { + // reorder in place, like the dutch flag problem + for (int i = 0; i < HISTOGRAM_SIZE; ++i) { + final int limit = endOffsets[i]; + for (int h1 = startOffsets[i]; h1 < limit; h1 = startOffsets[i]) { + final int b = getBucket(from + h1, k); + final int h2 = startOffsets[b]++; + swap(from + h1, from + h2); + } + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/StringMSBRadixSorter.java b/lucene/core/src/java/org/apache/lucene/util/StringMSBRadixSorter.java index 84bd0749b5c..67cba2b85e5 100644 --- a/lucene/core/src/java/org/apache/lucene/util/StringMSBRadixSorter.java +++ b/lucene/core/src/java/org/apache/lucene/util/StringMSBRadixSorter.java @@ -16,61 +16,36 @@ */ package org.apache.lucene.util; -import java.util.Arrays; +abstract class StringMSBRadixSorter extends MSBRadixSorter { -/** Radix sorter for variable-length strings. This class sorts based on the most - * significant byte first and falls back to {@link IntroSorter} when the size - * of the buckets to sort becomes small. It is NOT stable. - * Worst-case memory usage is about {@code 2.3 KB}. */ -abstract class StringMSBRadixSorter extends Sorter { - - // after that many levels of recursion we fall back to introsort anyway - // this is used as a protection against the fact that radix sort performs - // worse when there are long common prefixes (probably because of cache - // locality) - private static final int LEVEL_THRESHOLD = 8; - // size of histograms: 256 + 1 to indicate that the string is finished - private static final int HISTOGRAM_SIZE = 257; - // buckets below this size will be sorted with introsort - private static final int LENGTH_THRESHOLD = 100; - - // we store one histogram per recursion level - private final int[][] histograms = new int[LEVEL_THRESHOLD][]; - private final int[] endOffsets = new int[HISTOGRAM_SIZE]; + StringMSBRadixSorter() { + super(Integer.MAX_VALUE); + } /** Get a {@link BytesRef} for the given index. */ protected abstract BytesRef get(int i); - /** Store bytes for the given index into {@code dest}, without the first k bytes. */ - private void get(int i, int k, BytesRef dest) { + @Override + protected int byteAt(int i, int k) { BytesRef ref = get(i); - assert ref.length >= k; - dest.bytes = ref.bytes; - dest.offset = ref.offset + k; - dest.length = ref.length - k; - } - - @Override - protected final int compare(int i, int j) { - throw new UnsupportedOperationException("unused: not a comparison-based sort"); - } - - @Override - public void sort(int from, int to) { - checkRange(from, to); - sort(from, to, 0); - } - - private void sort(int from, int to, int k) { - if (to - from <= LENGTH_THRESHOLD || k >= LEVEL_THRESHOLD) { - introSort(from, to, k); - } else { - radixSort(from, to, k); + if (ref.length <= k) { + return -1; } + return ref.bytes[ref.offset + k] & 0xff; } - private void introSort(int from, int to, int k) { - new IntroSorter() { + @Override + protected Sorter getFallbackSorter(int k) { + return new IntroSorter() { + + private void get(int i, int k, BytesRef scratch) { + BytesRef ref = StringMSBRadixSorter.this.get(i); + assert ref.length >= k; + scratch.bytes = ref.bytes; + scratch.offset = ref.offset + k; + scratch.length = ref.length - k; + } + @Override protected void swap(int i, int j) { StringMSBRadixSorter.this.swap(i, j); @@ -95,96 +70,7 @@ abstract class StringMSBRadixSorter extends Sorter { } private final BytesRef pivot = new BytesRef(), - scratch1 = new BytesRef(), scratch2 = new BytesRef(); - }.sort(from, to); - } - - private void radixSort(int from, int to, int k) { - int[] histogram = histograms[k]; - if (histogram == null) { - histogram = histograms[k] = new int[HISTOGRAM_SIZE]; - } else { - Arrays.fill(histogram, 0); - } - - buildHistogram(from, to, k, histogram); - - // short-circuit: if all keys have the same byte at offset k, then recurse directly - for (int i = 0; i < HISTOGRAM_SIZE; ++i) { - if (histogram[i] == to - from) { - // everything is in the same bucket, recurse - if (i > 0) { - sort(from, to, k + 1); - } - return; - } else if (histogram[i] != 0) { - break; - } - } - - int[] startOffsets = histogram; - int[] endOffsets = this.endOffsets; - sumHistogram(histogram, endOffsets); - reorder(from, to, startOffsets, endOffsets, k); - endOffsets = startOffsets; - - // recurse on all but the first bucket since all keys are equals in this - // bucket (we already compared all bytes) - for (int prev = endOffsets[0], i = 1; i < HISTOGRAM_SIZE; ++i) { - int h = endOffsets[i]; - final int bucketLen = h - prev; - if (bucketLen > 1) { - sort(from + prev, from + h, k + 1); - } - prev = h; - } - } - - /** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */ - private int getBucket(int id, int k) { - BytesRef ref = get(id); - if (ref.length <= k) { - return 0; - } - final int b = ref.bytes[ref.offset + k] & 0xff; - return b + 1; - } - - /** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */ - private int[] buildHistogram(int from, int to, int k, int[] histogram) { - for (int i = from; i < to; ++i) { - histogram[getBucket(i, k)]++; - } - return histogram; - } - - /** Accumulate values of the histogram so that it does not store counts but - * start offsets. {@code endOffsets} will store the end offsets. */ - private static void sumHistogram(int[] histogram, int[] endOffsets) { - int accum = 0; - for (int i = 0; i < HISTOGRAM_SIZE; ++i) { - final int count = histogram[i]; - histogram[i] = accum; - accum += count; - endOffsets[i] = accum; - } - } - - /** - * Reorder based on start/end offsets for each bucket. When this method - * returns, startOffsets and endOffsets are equal. - * @param startOffsets start offsets per bucket - * @param endOffsets end offsets per bucket - */ - private void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) { - // reorder in place, like the dutch flag problem - for (int i = 0; i < HISTOGRAM_SIZE; ++i) { - final int limit = endOffsets[i]; - for (int h1 = startOffsets[i]; h1 < limit; h1 = startOffsets[i]) { - final int b = getBucket(from + h1, k); - final int h2 = startOffsets[b]++; - swap(from + h1, from + h2); - } - } + scratch1 = new BytesRef(), scratch2 = new BytesRef(); + }; } } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java index 288ece4c51d..e13e5cffd40 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java @@ -25,7 +25,6 @@ import java.util.List; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.index.MergeState; -import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -33,10 +32,11 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.TrackingDirectoryWrapper; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefComparator; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.IntroSorter; import org.apache.lucene.util.LongBitSet; +import org.apache.lucene.util.MSBRadixSorter; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.OfflineSorter; import org.apache.lucene.util.PriorityQueue; @@ -604,39 +604,26 @@ public class BKDWriter implements Closeable { /** Sort the heap writer by the specified dim */ private void sortHeapPointWriter(final HeapPointWriter writer, int dim) { + final int pointCount = Math.toIntExact(this.pointCount); + // Tie-break by docID: - assert pointCount < Integer.MAX_VALUE; - //int[] swapCount = new int[1]; - //int[] cmpCount = new int[1]; - - // System.out.println("SORT length=" + length); - - // All buffered points are still in heap; just do in-place sort: - new IntroSorter() { - private final byte[] pivotPackedValue = new byte[bytesPerDim]; - private int pivotDocID; + // No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it + // can't matter at search time since we don't write ords into the index: + new MSBRadixSorter(bytesPerDim + Integer.BYTES) { @Override - protected void setPivot(int i) { - pivotDocID = writer.docIDs[i]; - int block = i / writer.valuesPerBlock; - int index = i % writer.valuesPerBlock; - System.arraycopy(writer.blocks.get(block), index*packedBytesLength+dim*bytesPerDim, pivotPackedValue, 0, bytesPerDim); - } - - @Override - protected int comparePivot(int j) { - //cmpCount[0]++; - int block = j / writer.valuesPerBlock; - int index = j % writer.valuesPerBlock; - assert index >= 0: "index=" + index + " j=" + j; - int cmp = StringHelper.compare(bytesPerDim, pivotPackedValue, 0, writer.blocks.get(block), bytesPerDim*(index*numDims+dim)); - if (cmp != 0) { - return cmp; + protected int byteAt(int i, int k) { + assert k >= 0; + if (k < bytesPerDim) { + // dim bytes + int block = i / writer.valuesPerBlock; + int index = i % writer.valuesPerBlock; + return writer.blocks.get(block)[index * packedBytesLength + dim * bytesPerDim + k] & 0xff; + } else { + // doc id + int s = 3 - (k - bytesPerDim); + return (writer.docIDs[i] >>> (s * 8)) & 0xff; } - - // Tie-break - return Integer.compare(pivotDocID, writer.docIDs[j]); } @Override @@ -670,26 +657,7 @@ public class BKDWriter implements Closeable { System.arraycopy(scratch1, 0, blockJ, indexJ, packedBytesLength); } - @Override - protected int compare(int i, int j) { - //cmpCount[0]++; - int blockI = i / writer.valuesPerBlock; - int dimI = i % writer.valuesPerBlock; - int blockJ = j / writer.valuesPerBlock; - int dimJ = j % writer.valuesPerBlock; - int cmp = StringHelper.compare(bytesPerDim, writer.blocks.get(blockI), bytesPerDim*(dimI*numDims+dim), writer.blocks.get(blockJ), bytesPerDim*(dimJ*numDims+dim)); - if (cmp != 0) { - return cmp; - } - - // Tie-break by docID: - - // No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it - // can't matter at search time since we don't write ords into the index: - return Integer.compare(writer.docIDs[i], writer.docIDs[j]); - } - }.sort(0, Math.toIntExact(pointCount)); - //System.out.println("LEN=" + length + " SWAP=" + swapCount[0] + " CMP=" + cmpCount[0]); + }.sort(0, pointCount); } private PointWriter sort(int dim) throws IOException { @@ -724,28 +692,28 @@ public class BKDWriter implements Closeable { final int offset = bytesPerDim * dim; - Comparator cmp = new Comparator() { - - final ByteArrayDataInput reader = new ByteArrayDataInput(); - - @Override - public int compare(BytesRef a, BytesRef b) { - // First compare by the requested dimension we are sorting by: - int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + offset, b.bytes, b.offset + offset); - - if (cmp != 0) { - return cmp; + Comparator cmp; + if (dim == numDims - 1) { + // in that case the bytes for the dimension and for the doc id are contiguous, + // so we don't need a branch + cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) { + @Override + protected int byteAt(BytesRef ref, int i) { + return ref.bytes[ref.offset + offset + i] & 0xff; } - - // Tie-break by docID ... no need to tie break on ord, for the case where the same doc has - // the same value in a given dimension indexed more than once: it can't matter at search - // time since we don't write ords into the index: - - return StringHelper.compare(Integer.BYTES, - a.bytes, a.offset + packedBytesLength, - b.bytes, b.offset + packedBytesLength); - } - }; + }; + } else { + cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) { + @Override + protected int byteAt(BytesRef ref, int i) { + if (i < bytesPerDim) { + return ref.bytes[ref.offset + offset + i] & 0xff; + } else { + return ref.bytes[ref.offset + packedBytesLength + i - bytesPerDim] & 0xff; + } + } + }; + } OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, offlineSorterBufferMB, offlineSorterMaxTempFiles, bytesPerDoc) { @@ -1272,4 +1240,5 @@ public class BKDWriter implements Closeable { return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, desc, count, singleValuePerDoc); } } + } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestMSBRadixSorter.java b/lucene/core/src/test/org/apache/lucene/util/TestMSBRadixSorter.java new file mode 100644 index 00000000000..bc5af7f23fe --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/TestMSBRadixSorter.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +import java.util.Arrays; + +public class TestMSBRadixSorter extends LuceneTestCase { + + private void test(BytesRef[] refs, int len) { + BytesRef[] expected = Arrays.copyOf(refs, len); + Arrays.sort(expected); + + int maxLength = 0; + for (int i = 0; i < len; ++i) { + BytesRef ref = refs[i]; + maxLength = Math.max(maxLength, ref.length); + } + switch (random().nextInt(3)) { + case 0: + maxLength += TestUtil.nextInt(random(), 1, 5); + break; + case 1: + maxLength = Integer.MAX_VALUE; + break; + default: + // leave unchanged + break; + } + + new MSBRadixSorter(maxLength) { + + protected int byteAt(int i, int k) { + BytesRef ref = refs[i]; + if (ref.length <= k) { + return -1; + } + return ref.bytes[ref.offset + k] & 0xff; + } + + @Override + protected void swap(int i, int j) { + BytesRef tmp = refs[i]; + refs[i] = refs[j]; + refs[j] = tmp; + } + }.sort(0, len); + BytesRef[] actual = Arrays.copyOf(refs, len); + assertArrayEquals(expected, actual); + } + + public void testEmpty() { + test(new BytesRef[random().nextInt(5)], 0); + } + + public void testOneValue() { + BytesRef bytes = new BytesRef(TestUtil.randomSimpleString(random())); + test(new BytesRef[] { bytes }, 1); + } + + public void testTwoValues() { + BytesRef bytes1 = new BytesRef(TestUtil.randomSimpleString(random())); + BytesRef bytes2 = new BytesRef(TestUtil.randomSimpleString(random())); + test(new BytesRef[] { bytes1, bytes2 }, 2); + } + + private void testRandom(int commonPrefixLen, int maxLen) { + byte[] commonPrefix = new byte[commonPrefixLen]; + random().nextBytes(commonPrefix); + final int len = random().nextInt(100000); + BytesRef[] bytes = new BytesRef[len + random().nextInt(50)]; + for (int i = 0; i < len; ++i) { + byte[] b = new byte[commonPrefixLen + random().nextInt(maxLen)]; + random().nextBytes(b); + System.arraycopy(commonPrefix, 0, b, 0, commonPrefixLen); + bytes[i] = new BytesRef(b); + } + test(bytes, len); + } + + public void testRandom() { + for (int iter = 0; iter < 10; ++iter) { + testRandom(0, 10); + } + } + + public void testRandomWithLotsOfDuplicates() { + for (int iter = 0; iter < 10; ++iter) { + testRandom(0, 2); + } + } + + public void testRandomWithSharedPrefix() { + for (int iter = 0; iter < 10; ++iter) { + testRandom(TestUtil.nextInt(random(), 1, 30), 10); + } + } + + public void testRandomWithSharedPrefixAndLotsOfDuplicates() { + for (int iter = 0; iter < 10; ++iter) { + testRandom(TestUtil.nextInt(random(), 1, 30), 2); + } + } +} From 979af27209a10b41857cbf6c7439472c3eca5983 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Mon, 30 May 2016 06:37:19 -0400 Subject: [PATCH 19/19] LUCENE-7300: fix test bug to ensure the newly created file is in fact written through to the underlying filesystem even if NRTCachingDirectory is used --- .../apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java b/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java index 819511b9c68..388b2f08f13 100644 --- a/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java +++ b/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.attribute.BasicFileAttributes; +import java.util.Collections; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.util.IOUtils; @@ -51,6 +52,8 @@ public class TestHardLinkCopyDirectoryWrapper extends BaseDirectoryTestCase { output.writeString("hey man, nice shot!"); CodecUtil.writeFooter(output); } + // In case luceneDir_1 has an NRTCachingDirectory + luceneDir_1.sync(Collections.singleton("foo.bar")); try { Files.createLink(tempDir.resolve("test"), dir_1.resolve("foo.bar")); BasicFileAttributes destAttr = Files.readAttributes(tempDir.resolve("test"), BasicFileAttributes.class);