diff --git a/dev-tools/scripts/addVersion.py b/dev-tools/scripts/addVersion.py index 112da45adb9..630e86f6cb3 100644 --- a/dev-tools/scripts/addVersion.py +++ b/dev-tools/scripts/addVersion.py @@ -48,28 +48,36 @@ def add_constant(new_version, deprecate): last = buffer[-1] if last.strip() != '@Deprecated': spaces = ' ' * (len(last) - len(last.lstrip()) - 1) - buffer[-1] = spaces + (' * @deprecated (%s) Use latest\n' % new_version) - buffer.append(spaces + ' */\n') - buffer.append(spaces + '@Deprecated\n') + del buffer[-1] # Remove comment closer line + if (len(buffer) >= 4 and re.search('for Lucene.\s*$', buffer[-1]) != None): + del buffer[-3:] # drop the trailing lines '

/ Use this to get the latest ... / ... for Lucene.' + buffer.append(( '{0} * @deprecated ({1}) Use latest\n' + + '{0} */\n' + + '{0}@Deprecated\n').format(spaces, new_version)) def buffer_constant(buffer, line): spaces = ' ' * (len(line) - len(line.lstrip())) - buffer.append('\n' + spaces + '/**\n') - buffer.append(spaces + ' * Match settings and bugs in Lucene\'s %s release.\n' % new_version) + buffer.append(( '\n{0}/**\n' + + '{0} * Match settings and bugs in Lucene\'s {1} release.\n') + .format(spaces, new_version)) if deprecate: - buffer.append(spaces + ' * @deprecated Use latest\n') - buffer.append(spaces + ' */\n') + buffer.append('%s * @deprecated Use latest\n' % spaces) + else: + buffer.append(( '{0} *

\n' + + '{0} * Use this to get the latest & greatest settings, bug\n' + + '{0} * fixes, etc, for Lucene.\n').format(spaces)) + buffer.append('%s */\n' % spaces) if deprecate: - buffer.append(spaces + '@Deprecated\n') - buffer.append(spaces + 'public static final Version %s = new Version(%d, %d, %d);\n' % - (new_version.constant, new_version.major, new_version.minor, new_version.bugfix)) + buffer.append('%s@Deprecated\n' % spaces) + buffer.append('{0}public static final Version {1} = new Version({2}, {3}, {4});\n'.format + (spaces, new_version.constant, new_version.major, new_version.minor, new_version.bugfix)) class Edit(object): found = -1 def __call__(self, buffer, match, line): if new_version.constant in line: return None # constant already exists - # outter match is just to find lines declaring version constants + # outer match is just to find lines declaring version constants match = prev_matcher.search(line) if match is not None: ensure_deprecated(buffer) # old version should be deprecated @@ -166,38 +174,26 @@ def check_solr_version_tests(): def read_config(): parser = argparse.ArgumentParser(description='Add a new version') parser.add_argument('version', type=Version.parse) - parser.add_argument('-c', '--changeid', type=str, help='Git ChangeId (commit hash) for downstream version change to merge') c = parser.parse_args() c.branch_type = find_branch_type() c.matching_branch = c.version.is_bugfix_release() and c.branch_type == BranchType.release or \ c.version.is_minor_release() and c.branch_type == BranchType.stable or \ - c.version.is_major_release() and c.branch_type == BranchType.major + c.version.is_major_release() and c.branch_type == BranchType.unstable print ("branch_type is %s " % c.branch_type) - if c.changeid and c.version.is_major_release(): - parser.error('Cannot use --changeid for major release') - if c.changeid and c.matching_branch: - parser.error('Cannot use --changeid on branch that new version will originate on') - if c.version.is_bugfix_release() and c.branch_type in [BranchType.major, BranchType.stable] and not c.changeid: - parser.error('Adding bugfix release on master or stable branch requires --changeid') - if c.version.is_minor_release() and c.branch_type in [BranchType.major] and not c.changeid: - parser.error('Adding minor release on master branch requires --changeid') return c def main(): - c = read_config() - - if c.changeid: - cherry_pick_change(c.changeid) + c = read_config() print('\nAdding new version %s' % c.version) update_changes('lucene/CHANGES.txt', c.version) update_changes('solr/CHANGES.txt', c.version) add_constant(c.version, not c.matching_branch) - if not c.changeid: + if c.matching_branch: print('\nUpdating latest version') update_build_version(c.version) update_latest_constant(c.version) diff --git a/dev-tools/scripts/scriptutil.py b/dev-tools/scripts/scriptutil.py index f10bf971b6f..84c39cd9516 100644 --- a/dev-tools/scripts/scriptutil.py +++ b/dev-tools/scripts/scriptutil.py @@ -94,11 +94,11 @@ def update_file(filename, line_re, edit): f.write(''.join(buffer)) return True -# branch types are "release", "stable" and "major" +# branch types are "release", "stable" and "unstable" class BranchType(Enum): - major = 1 - stable = 2 - release = 3 + unstable = 1 + stable = 2 + release = 3 def find_branch_type(): output = subprocess.check_output('git status', shell=True) @@ -110,22 +110,17 @@ def find_branch_type(): raise Exception('git status missing branch name') if branchName == b'master': - return BranchType.major + return BranchType.unstable if re.match(r'branch_(\d+)x', branchName.decode('UTF-8')): return BranchType.stable if re.match(r'branch_(\d+)_(\d+)', branchName.decode('UTF-8')): return BranchType.release - raise Exception('Cannot run bumpVersion.py on feature branch') + raise Exception('Cannot run %s on feature branch' % sys.argv[0].rsplit('/', 1)[-1]) version_prop_re = re.compile('version\.base=(.*)') def find_current_version(): return version_prop_re.search(open('lucene/version.properties').read()).group(1) -def cherry_pick_change(changeid): - print('\nCherry-picking downstream change %s...' % changeid, end='') - run('git cherry-pick %s' % changeid) - print('done') - if __name__ == '__main__': print('This is only a support module, it cannot be run') sys.exit(1) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 791adadbf68..97d5b966731 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -22,6 +22,10 @@ New Features * LUCENE-7234: Added InetAddressPoint.nextDown/nextUp to easily generate range queries with excluded bounds. (Adrien Grand) +* LUCENE-7278: Spatial-extras DateRangePrefixTree's Calendar is now configurable, to + e.g. clear the Gregorian Change Date. Also, toString(cal) is now identical to + DateTimeFormatter.ISO_INSTANT. (David Smiley) + API Changes * LUCENE-7184: Refactor LatLonPoint encoding methods to new GeoEncodingUtils @@ -133,6 +137,9 @@ Other * LUCENE-7263: Make queryparser/xml/CoreParser's SpanQueryBuilderFactory accessible to deriving classes. (Daniel Collins via Christine Poerschke) +======================= Lucene 6.0.1 ======================= +(No Changes) + ======================= Lucene 6.0.0 ======================= System Requirements diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/Placeholder.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/Placeholder.java index b0c292b0348..f359369a0c9 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/Placeholder.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/Placeholder.java @@ -16,7 +16,6 @@ */ package org.apache.lucene.codecs; - /** Remove this file when adding back compat codecs */ public class Placeholder { diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java index 001439ce14d..19d6e3bbe90 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java @@ -108,7 +108,7 @@ public class Lucene50Codec extends Codec { } @Override - public final SegmentInfoFormat segmentInfoFormat() { + public SegmentInfoFormat segmentInfoFormat() { return segmentInfosFormat; } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java new file mode 100644 index 00000000000..9c5453f65b5 --- /dev/null +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene50; + + +import java.io.IOException; +import java.util.Collections; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexWriter; // javadocs +import org.apache.lucene.index.SegmentInfo; // javadocs +import org.apache.lucene.index.SegmentInfos; // javadocs +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.DataOutput; // javadocs +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.Version; + +/** + * Lucene 5.0 Segment info format. + * @deprecated Only for reading old 5.0-6.0 segments + */ +@Deprecated +public class Lucene50SegmentInfoFormat extends SegmentInfoFormat { + + /** Sole constructor. */ + public Lucene50SegmentInfoFormat() { + } + + @Override + public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException { + final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION); + try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) { + Throwable priorE = null; + SegmentInfo si = null; + try { + int format = CodecUtil.checkIndexHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME, + Lucene50SegmentInfoFormat.VERSION_START, + Lucene50SegmentInfoFormat.VERSION_CURRENT, + segmentID, ""); + final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt()); + + final int docCount = input.readInt(); + if (docCount < 0) { + throw new CorruptIndexException("invalid docCount: " + docCount, input); + } + final boolean isCompoundFile = input.readByte() == SegmentInfo.YES; + + final Map diagnostics; + final Set files; + final Map attributes; + + if (format >= VERSION_SAFE_MAPS) { + diagnostics = input.readMapOfStrings(); + files = input.readSetOfStrings(); + attributes = input.readMapOfStrings(); + } else { + diagnostics = Collections.unmodifiableMap(input.readStringStringMap()); + files = Collections.unmodifiableSet(input.readStringSet()); + attributes = Collections.unmodifiableMap(input.readStringStringMap()); + } + + si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, null); + si.setFiles(files); + } catch (Throwable exception) { + priorE = exception; + } finally { + CodecUtil.checkFooter(input, priorE); + } + return si; + } + } + + @Override + public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException { + throw new UnsupportedOperationException("this codec can only be used for reading"); + } + + /** File extension used to store {@link SegmentInfo}. */ + public final static String SI_EXTENSION = "si"; + static final String CODEC_NAME = "Lucene50SegmentInfo"; + static final int VERSION_START = 0; + static final int VERSION_SAFE_MAPS = 1; + static final int VERSION_CURRENT = VERSION_SAFE_MAPS; +} diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54Codec.java index 2dde0cf6d90..d982d3b9ce0 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54Codec.java @@ -51,7 +51,9 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; * * @see org.apache.lucene.codecs.lucene54 package documentation for file format details. * @lucene.experimental + * @deprecated Only for 5.x back compat */ +@Deprecated public class Lucene54Codec extends Codec { private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat(); private final FieldInfosFormat fieldInfosFormat = new Lucene50FieldInfosFormat(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java similarity index 98% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java index 9f0d546740d..32c17527deb 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java @@ -16,7 +16,6 @@ */ package org.apache.lucene.codecs.lucene60; - import java.util.Objects; import org.apache.lucene.codecs.Codec; @@ -51,7 +50,9 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; * @see org.apache.lucene.codecs.lucene60 package documentation for file format details. * * @lucene.experimental + * @deprecated Only for 6.0 back compat */ +@Deprecated public class Lucene60Codec extends Codec { private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat(); private final FieldInfosFormat fieldInfosFormat = new Lucene60FieldInfosFormat(); @@ -149,7 +150,7 @@ public class Lucene60Codec extends Codec { /** Returns the docvalues format that should be used for writing * new segments of field. * - * The default implementation always returns "Lucene50". + * The default implementation always returns "Lucene54". *

* WARNING: if you subclass, you are responsible for index * backwards compatibility: future version of Lucene are only diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/package.html new file mode 100644 index 00000000000..6b4e234826d --- /dev/null +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/package.html @@ -0,0 +1,25 @@ + + + + + + + +Lucene 6.0 file format. + + diff --git a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec index 7f66de899e7..71aa938e21e 100644 --- a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -16,3 +16,4 @@ org.apache.lucene.codecs.lucene50.Lucene50Codec org.apache.lucene.codecs.lucene53.Lucene53Codec org.apache.lucene.codecs.lucene54.Lucene54Codec +org.apache.lucene.codecs.lucene60.Lucene60Codec diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java index 359e2ec3d22..8fdeb2041d2 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java @@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene50; import org.apache.lucene.codecs.NormsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; /** * Codec for testing 5.0 index format @@ -26,9 +27,15 @@ import org.apache.lucene.codecs.NormsFormat; @Deprecated final class Lucene50RWCodec extends Lucene50Codec { private final NormsFormat normsFormat = new Lucene50RWNormsFormat(); + private final SegmentInfoFormat segmentInfoFormat = new Lucene50RWSegmentInfoFormat(); @Override public NormsFormat normsFormat() { return normsFormat; } + + @Override + public SegmentInfoFormat segmentInfoFormat() { + return segmentInfoFormat; + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWSegmentInfoFormat.java similarity index 76% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWSegmentInfoFormat.java index 68aacc62db8..0a373b1dc76 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWSegmentInfoFormat.java @@ -37,43 +37,14 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.Version; /** - * Lucene 5.0 Segment info format. - *

- * Files: - *

- * Data types: - * - * Field Descriptions: - * - * - * @see SegmentInfos - * @lucene.experimental + * Read-write version of 5.0 SegmentInfoFormat for testing + * @deprecated for test purposes only */ -public class Lucene50SegmentInfoFormat extends SegmentInfoFormat { +@Deprecated +public class Lucene50RWSegmentInfoFormat extends Lucene50SegmentInfoFormat { /** Sole constructor. */ - public Lucene50SegmentInfoFormat() { + public Lucene50RWSegmentInfoFormat() { } @Override @@ -109,7 +80,7 @@ public class Lucene50SegmentInfoFormat extends SegmentInfoFormat { attributes = Collections.unmodifiableMap(input.readStringStringMap()); } - si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes); + si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, null); si.setFiles(files); } catch (Throwable exception) { priorE = exception; @@ -124,6 +95,8 @@ public class Lucene50SegmentInfoFormat extends SegmentInfoFormat { public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException { final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene50SegmentInfoFormat.SI_EXTENSION); + assert si.getIndexSort() == null; + try (IndexOutput output = dir.createOutput(fileName, ioContext)) { // Only add the file once we've successfully created it, else IFD assert can trip: si.addFile(fileName); @@ -153,6 +126,7 @@ public class Lucene50SegmentInfoFormat extends SegmentInfoFormat { } output.writeSetOfStrings(files); output.writeMapOfStrings(si.getAttributes()); + CodecUtil.writeFooter(output); } } diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java index 74486d6c38c..df8a1b49404 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java @@ -29,7 +29,7 @@ import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene60.Lucene60Codec; +import org.apache.lucene.codecs.lucene62.Lucene62Codec; import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexDeletionPolicy; @@ -139,7 +139,7 @@ public class CreateIndexTask extends PerfTask { if (defaultCodec == null && postingsFormat != null) { try { final PostingsFormat postingsFormatChosen = PostingsFormat.forName(postingsFormat); - iwConf.setCodec(new Lucene60Codec() { + iwConf.setCodec(new Lucene62Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postingsFormatChosen; diff --git a/lucene/classification/build.xml b/lucene/classification/build.xml index 3ddb9bd26e6..704cae8973d 100644 --- a/lucene/classification/build.xml +++ b/lucene/classification/build.xml @@ -28,7 +28,6 @@ - @@ -37,17 +36,16 @@ - + - - diff --git a/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java b/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java index fce786bf1e9..c1c8ad19ee6 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java @@ -29,6 +29,7 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.Terms; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; @@ -38,7 +39,6 @@ import org.apache.lucene.search.grouping.GroupDocs; import org.apache.lucene.search.grouping.GroupingSearch; import org.apache.lucene.search.grouping.TopGroups; import org.apache.lucene.store.Directory; -import org.apache.lucene.uninverting.UninvertingReader; /** * Utility class for creating training / test / cross validation indexes from the original index. @@ -68,7 +68,7 @@ public class DatasetSplitter { * @param crossValidationIndex a {@link Directory} used to write the cross validation index * @param analyzer {@link Analyzer} used to create the new docs * @param termVectors {@code true} if term vectors should be kept - * @param classFieldName names of the field used as the label for classification + * @param classFieldName name of the field used as the label for classification; this must be indexed with sorted doc values * @param fieldNames names of fields that need to be put in the new indexes or null if all should be used * @throws IOException if any writing operation fails on any of the indexes */ @@ -80,30 +80,23 @@ public class DatasetSplitter { IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(analyzer)); IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(analyzer)); - // try to get the exact no. of existing classes - Terms terms = originalIndex.terms(classFieldName); - long noOfClasses = -1; - if (terms != null) { - noOfClasses = terms.size(); - - } - if (noOfClasses == -1) { - noOfClasses = 10000; // fallback + // get the exact no. of existing classes + SortedDocValues classValues = originalIndex.getSortedDocValues(classFieldName); + if (classValues == null) { + throw new IllegalStateException("the classFieldName \"" + classFieldName + "\" must index sorted doc values"); } - HashMap mapping = new HashMap<>(); - mapping.put(classFieldName, UninvertingReader.Type.SORTED); - UninvertingReader uninvertingReader = new UninvertingReader(originalIndex, mapping); + int noOfClasses = classValues.getValueCount(); try { - IndexSearcher indexSearcher = new IndexSearcher(uninvertingReader); + IndexSearcher indexSearcher = new IndexSearcher(originalIndex); GroupingSearch gs = new GroupingSearch(classFieldName); gs.setGroupSort(Sort.INDEXORDER); gs.setSortWithinGroup(Sort.INDEXORDER); gs.setAllGroups(true); gs.setGroupDocsLimit(originalIndex.maxDoc()); - TopGroups topGroups = gs.search(indexSearcher, new MatchAllDocsQuery(), 0, (int) noOfClasses); + TopGroups topGroups = gs.search(indexSearcher, new MatchAllDocsQuery(), 0, noOfClasses); // set the type to be indexed, stored, with term vectors FieldType ft = new FieldType(TextField.TYPE_STORED); @@ -156,7 +149,7 @@ public class DatasetSplitter { testWriter.close(); cvWriter.close(); trainingWriter.close(); - uninvertingReader.close(); + originalIndex.close(); } } diff --git a/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java b/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java index 0b6f077cdfd..fdd4b0bb4a8 100644 --- a/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java +++ b/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java @@ -70,7 +70,9 @@ public class DataSplitterTest extends LuceneTestCase { doc = new Document(); doc.add(new Field(idFieldName, "id" + Integer.toString(i), ft)); doc.add(new Field(textFieldName, TestUtil.randomUnicodeString(rnd, 1024), ft)); - doc.add(new Field(classFieldName, Integer.toString(rnd.nextInt(10)), ft)); + String className = Integer.toString(rnd.nextInt(10)); + doc.add(new Field(classFieldName, className, ft)); + doc.add(new SortedDocValuesField(classFieldName, new BytesRef(className))); indexWriter.addDocument(doc); } @@ -89,13 +91,11 @@ public class DataSplitterTest extends LuceneTestCase { super.tearDown(); } - @Test public void testSplitOnAllFields() throws Exception { assertSplit(originalIndex, 0.1, 0.1); } - @Test public void testSplitOnSomeFields() throws Exception { assertSplit(originalIndex, 0.2, 0.35, idFieldName, textFieldName); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java index 3b026bedacd..20235528dca 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java @@ -36,6 +36,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer { private IndexOutput out; private final BytesRefBuilder scratch = new BytesRefBuilder(); private final SegmentWriteState writeState; + final String segment; final static BytesRef END = new BytesRef("END"); final static BytesRef FIELD = new BytesRef("field "); @@ -49,6 +50,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer { public SimpleTextFieldsWriter(SegmentWriteState writeState) throws IOException { final String fileName = SimpleTextPostingsFormat.getPostingsFileName(writeState.segmentInfo.name, writeState.segmentSuffix); + segment = writeState.segmentInfo.name; out = writeState.directory.createOutput(fileName, writeState.context); this.writeState = writeState; } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java index 0823a888040..146e92a6a29 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java @@ -31,6 +31,8 @@ import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -59,6 +61,11 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat { final static BytesRef SI_NUM_FILES = new BytesRef(" files "); final static BytesRef SI_FILE = new BytesRef(" file "); final static BytesRef SI_ID = new BytesRef(" id "); + final static BytesRef SI_SORT = new BytesRef(" sort "); + final static BytesRef SI_SORT_FIELD = new BytesRef(" field "); + final static BytesRef SI_SORT_TYPE = new BytesRef(" type "); + final static BytesRef SI_SORT_REVERSE = new BytesRef(" reverse "); + final static BytesRef SI_SORT_MISSING = new BytesRef(" missing "); public static final String SI_EXTENSION = "si"; @@ -137,10 +144,119 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat { + ", got: " + StringHelper.idToString(id), input); } + SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch.get(), SI_SORT); + final int numSortFields = Integer.parseInt(readString(SI_SORT.length, scratch)); + SortField[] sortField = new SortField[numSortFields]; + for (int i = 0; i < numSortFields; ++i) { + SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch.get(), SI_SORT_FIELD); + final String field = readString(SI_SORT_FIELD.length, scratch); + + SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch.get(), SI_SORT_TYPE); + final String typeAsString = readString(SI_SORT_TYPE.length, scratch); + + final SortField.Type type; + switch (typeAsString) { + case "string": + type = SortField.Type.STRING; + break; + case "long": + type = SortField.Type.LONG; + break; + case "int": + type = SortField.Type.INT; + break; + case "double": + type = SortField.Type.DOUBLE; + break; + case "float": + type = SortField.Type.FLOAT; + break; + default: + throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input); + } + + SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch.get(), SI_SORT_REVERSE); + final boolean reverse = Boolean.parseBoolean(readString(SI_SORT_REVERSE.length, scratch)); + + SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch.get(), SI_SORT_MISSING); + final String missingLastAsString = readString(SI_SORT_MISSING.length, scratch); + final Object missingValue; + switch (type) { + case STRING: + switch (missingLastAsString) { + case "null": + missingValue = null; + break; + case "first": + missingValue = SortField.STRING_FIRST; + break; + case "last": + missingValue = SortField.STRING_LAST; + break; + default: + throw new CorruptIndexException("unable to parse missing string: " + typeAsString, input); + } + break; + case LONG: + switch (missingLastAsString) { + case "null": + missingValue = null; + break; + default: + missingValue = Long.parseLong(missingLastAsString); + break; + } + break; + case INT: + switch (missingLastAsString) { + case "null": + missingValue = null; + break; + default: + missingValue = Integer.parseInt(missingLastAsString); + break; + } + break; + case DOUBLE: + switch (missingLastAsString) { + case "null": + missingValue = null; + break; + default: + missingValue = Double.parseDouble(missingLastAsString); + break; + } + break; + case FLOAT: + switch (missingLastAsString) { + case "null": + missingValue = null; + break; + default: + missingValue = Float.parseFloat(missingLastAsString); + break; + } + break; + default: + throw new AssertionError(); + } + sortField[i] = new SortField(field, type, reverse); + if (missingValue != null) { + sortField[i].setMissingValue(missingValue); + } + } + Sort indexSort = sortField.length == 0 ? null : new Sort(sortField); + SimpleTextUtil.checkFooter(input); SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount, - isCompoundFile, null, Collections.unmodifiableMap(diagnostics), id, Collections.unmodifiableMap(attributes)); + isCompoundFile, null, Collections.unmodifiableMap(diagnostics), + id, Collections.unmodifiableMap(attributes), indexSort); info.setFiles(files); return info; } @@ -223,6 +339,62 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat { SimpleTextUtil.write(output, new BytesRef(si.getId())); SimpleTextUtil.writeNewline(output); + Sort indexSort = si.getIndexSort(); + SimpleTextUtil.write(output, SI_SORT); + final int numSortFields = indexSort == null ? 0 : indexSort.getSort().length; + SimpleTextUtil.write(output, Integer.toString(numSortFields), scratch); + SimpleTextUtil.writeNewline(output); + for (int i = 0; i < numSortFields; ++i) { + final SortField sortField = indexSort.getSort()[i]; + + SimpleTextUtil.write(output, SI_SORT_FIELD); + SimpleTextUtil.write(output, sortField.getField(), scratch); + SimpleTextUtil.writeNewline(output); + + SimpleTextUtil.write(output, SI_SORT_TYPE); + final String sortType; + switch (sortField.getType()) { + case STRING: + sortType = "string"; + break; + case LONG: + sortType = "long"; + break; + case INT: + sortType = "int"; + break; + case DOUBLE: + sortType = "double"; + break; + case FLOAT: + sortType = "float"; + break; + default: + throw new IllegalStateException("Unexpected sort type: " + sortField.getType()); + } + SimpleTextUtil.write(output, sortType, scratch); + SimpleTextUtil.writeNewline(output); + + SimpleTextUtil.write(output, SI_SORT_REVERSE); + SimpleTextUtil.write(output, Boolean.toString(sortField.getReverse()), scratch); + SimpleTextUtil.writeNewline(output); + + SimpleTextUtil.write(output, SI_SORT_MISSING); + final Object missingValue = sortField.getMissingValue(); + final String missing; + if (missingValue == null) { + missing = "null"; + } else if (missingValue == SortField.STRING_FIRST) { + missing = "first"; + } else if (missingValue == SortField.STRING_LAST) { + missing = "last"; + } else { + missing = missingValue.toString(); + } + SimpleTextUtil.write(output, missing, scratch); + SimpleTextUtil.writeNewline(output); + } + SimpleTextUtil.writeChecksum(output, scratch); } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java index 00259b89711..b59114a65e2 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java @@ -143,7 +143,6 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter { } else { write(TYPE_STRING); newLine(); - write(VALUE); write(field.stringValue()); newLine(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java index 5d704ca017d..442445c2237 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java @@ -57,7 +57,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI { } // TODO: should we use this, or maybe a system property is better? - static Codec defaultCodec = LOADER.lookup("Lucene60"); + static Codec defaultCodec = LOADER.lookup("Lucene62"); } private final String name; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java index 90abf2ad44b..427b520aa4a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java @@ -16,7 +16,6 @@ */ package org.apache.lucene.codecs; - import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; @@ -25,12 +24,13 @@ import java.util.List; import java.util.NoSuchElementException; import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocIDMerger; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FilteredTermsEnum; import org.apache.lucene.index.MergeState; -import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.MultiDocValues.OrdinalMap; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentWriteState; // javadocs import org.apache.lucene.index.SortedDocValues; @@ -44,6 +44,8 @@ import org.apache.lucene.util.LongBitSet; import org.apache.lucene.util.LongValues; import org.apache.lucene.util.packed.PackedInts; +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; + /** * Abstract API that consumes numeric, binary and * sorted docvalues. Concrete implementations of this @@ -240,6 +242,32 @@ public abstract class DocValuesConsumer implements Closeable { } } } + + /** Tracks state of one numeric sub-reader that we are merging */ + private static class NumericDocValuesSub extends DocIDMerger.Sub { + + private final NumericDocValues values; + private final Bits docsWithField; + private int docID = -1; + private final int maxDoc; + + public NumericDocValuesSub(MergeState.DocMap docMap, NumericDocValues values, Bits docsWithField, int maxDoc) { + super(docMap); + this.values = values; + this.docsWithField = docsWithField; + this.maxDoc = maxDoc; + } + + @Override + public int nextDoc() { + docID++; + if (docID == maxDoc) { + return NO_MORE_DOCS; + } else { + return docID; + } + } + } /** * Merges the numeric docvalues from toMerge. @@ -248,20 +276,23 @@ public abstract class DocValuesConsumer implements Closeable { * an Iterable that merges and filters deleted documents on the fly. */ public void mergeNumericField(final FieldInfo fieldInfo, final MergeState mergeState, final List toMerge, final List docsWithField) throws IOException { - addNumericField(fieldInfo, new Iterable() { @Override public Iterator iterator() { + + // We must make a new DocIDMerger for each iterator: + List subs = new ArrayList<>(); + assert mergeState.docMaps.length == toMerge.size(); + for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); + return new Iterator() { - int readerUpto = -1; - int docIDUpto; long nextValue; boolean nextHasValue; - int currentMaxDoc; - NumericDocValues currentValues; - Bits currentLiveDocs; - Bits currentDocsWithField; boolean nextIsSet; @Override @@ -276,7 +307,7 @@ public abstract class DocValuesConsumer implements Closeable { @Override public Number next() { - if (!hasNext()) { + if (hasNext() == false) { throw new NoSuchElementException(); } assert nextIsSet; @@ -285,43 +316,46 @@ public abstract class DocValuesConsumer implements Closeable { } private boolean setNext() { - while (true) { - if (readerUpto == toMerge.size()) { - return false; - } - - if (docIDUpto == currentMaxDoc) { - readerUpto++; - if (readerUpto < toMerge.size()) { - currentValues = toMerge.get(readerUpto); - currentDocsWithField = docsWithField.get(readerUpto); - currentLiveDocs = mergeState.liveDocs[readerUpto]; - currentMaxDoc = mergeState.maxDocs[readerUpto]; - } - docIDUpto = 0; - continue; - } - - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { - nextIsSet = true; - nextValue = currentValues.get(docIDUpto); - if (nextValue == 0 && currentDocsWithField.get(docIDUpto) == false) { - nextHasValue = false; - } else { - nextHasValue = true; - } - docIDUpto++; - return true; - } - - docIDUpto++; + NumericDocValuesSub sub = docIDMerger.next(); + if (sub == null) { + return false; } + nextIsSet = true; + nextValue = sub.values.get(sub.docID); + nextHasValue = nextValue != 0 || sub.docsWithField.get(sub.docID); + return true; } }; } }); } + /** Tracks state of one binary sub-reader that we are merging */ + private static class BinaryDocValuesSub extends DocIDMerger.Sub { + + private final BinaryDocValues values; + private final Bits docsWithField; + private int docID = -1; + private final int maxDoc; + + public BinaryDocValuesSub(MergeState.DocMap docMap, BinaryDocValues values, Bits docsWithField, int maxDoc) { + super(docMap); + this.values = values; + this.docsWithField = docsWithField; + this.maxDoc = maxDoc; + } + + @Override + public int nextDoc() { + docID++; + if (docID == maxDoc) { + return NO_MORE_DOCS; + } else { + return docID; + } + } + } + /** * Merges the binary docvalues from toMerge. *

@@ -329,20 +363,23 @@ public abstract class DocValuesConsumer implements Closeable { * an Iterable that merges and filters deleted documents on the fly. */ public void mergeBinaryField(FieldInfo fieldInfo, final MergeState mergeState, final List toMerge, final List docsWithField) throws IOException { - addBinaryField(fieldInfo, new Iterable() { @Override public Iterator iterator() { + + // We must make a new DocIDMerger for each iterator: + List subs = new ArrayList<>(); + assert mergeState.docMaps.length == toMerge.size(); + for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); + return new Iterator() { - int readerUpto = -1; - int docIDUpto; BytesRef nextValue; BytesRef nextPointer; // points to null if missing, or nextValue - int currentMaxDoc; - BinaryDocValues currentValues; - Bits currentLiveDocs; - Bits currentDocsWithField; boolean nextIsSet; @Override @@ -357,7 +394,7 @@ public abstract class DocValuesConsumer implements Closeable { @Override public BytesRef next() { - if (!hasNext()) { + if (hasNext() == false) { throw new NoSuchElementException(); } assert nextIsSet; @@ -367,42 +404,49 @@ public abstract class DocValuesConsumer implements Closeable { private boolean setNext() { while (true) { - if (readerUpto == toMerge.size()) { - return false; + BinaryDocValuesSub sub = docIDMerger.next(); + if (sub == null) { + return false; + } + nextIsSet = true; + if (sub.docsWithField.get(sub.docID)) { + nextPointer = nextValue = sub.values.get(sub.docID); + } else { + nextPointer = null; + } + return true; } - - if (docIDUpto == currentMaxDoc) { - readerUpto++; - if (readerUpto < toMerge.size()) { - currentValues = toMerge.get(readerUpto); - currentDocsWithField = docsWithField.get(readerUpto); - currentLiveDocs = mergeState.liveDocs[readerUpto]; - currentMaxDoc = mergeState.maxDocs[readerUpto]; - } - docIDUpto = 0; - continue; - } - - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { - nextIsSet = true; - if (currentDocsWithField.get(docIDUpto)) { - nextValue = currentValues.get(docIDUpto); - nextPointer = nextValue; - } else { - nextPointer = null; - } - docIDUpto++; - return true; - } - - docIDUpto++; } - } }; } }); } + /** Tracks state of one sorted numeric sub-reader that we are merging */ + private static class SortedNumericDocValuesSub extends DocIDMerger.Sub { + + private final SortedNumericDocValues values; + private int docID = -1; + private final int maxDoc; + + public SortedNumericDocValuesSub(MergeState.DocMap docMap, SortedNumericDocValues values, int maxDoc) { + super(docMap); + this.values = values; + this.maxDoc = maxDoc; + } + + @Override + public int nextDoc() { + docID++; + if (docID == maxDoc) { + return NO_MORE_DOCS; + } else { + values.setDocument(docID); + return docID; + } + } + } + /** * Merges the sorted docvalues from toMerge. *

@@ -410,21 +454,24 @@ public abstract class DocValuesConsumer implements Closeable { * iterables that filter deleted documents. */ public void mergeSortedNumericField(FieldInfo fieldInfo, final MergeState mergeState, List toMerge) throws IOException { - final int numReaders = toMerge.size(); - final SortedNumericDocValues dvs[] = toMerge.toArray(new SortedNumericDocValues[numReaders]); - // step 3: add field addSortedNumericField(fieldInfo, // doc -> value count new Iterable() { @Override public Iterator iterator() { + + // We must make a new DocIDMerger for each iterator: + List subs = new ArrayList<>(); + assert mergeState.docMaps.length == toMerge.size(); + for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); + return new Iterator() { - int readerUpto = -1; - int docIDUpto; int nextValue; - int currentMaxDoc; - Bits currentLiveDocs; boolean nextIsSet; @Override @@ -439,7 +486,7 @@ public abstract class DocValuesConsumer implements Closeable { @Override public Number next() { - if (!hasNext()) { + if (hasNext() == false) { throw new NoSuchElementException(); } assert nextIsSet; @@ -449,30 +496,13 @@ public abstract class DocValuesConsumer implements Closeable { private boolean setNext() { while (true) { - if (readerUpto == numReaders) { + SortedNumericDocValuesSub sub = docIDMerger.next(); + if (sub == null) { return false; } - - if (docIDUpto == currentMaxDoc) { - readerUpto++; - if (readerUpto < numReaders) { - currentLiveDocs = mergeState.liveDocs[readerUpto]; - currentMaxDoc = mergeState.maxDocs[readerUpto]; - } - docIDUpto = 0; - continue; - } - - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { - nextIsSet = true; - SortedNumericDocValues dv = dvs[readerUpto]; - dv.setDocument(docIDUpto); - nextValue = dv.count(); - docIDUpto++; - return true; - } - - docIDUpto++; + nextIsSet = true; + nextValue = sub.values.count(); + return true; } } }; @@ -482,15 +512,21 @@ public abstract class DocValuesConsumer implements Closeable { new Iterable() { @Override public Iterator iterator() { + // We must make a new DocIDMerger for each iterator: + List subs = new ArrayList<>(); + assert mergeState.docMaps.length == toMerge.size(); + for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); + return new Iterator() { - int readerUpto = -1; - int docIDUpto; long nextValue; - int currentMaxDoc; - Bits currentLiveDocs; boolean nextIsSet; int valueUpto; int valueLength; + SortedNumericDocValuesSub current; @Override public boolean hasNext() { @@ -504,7 +540,7 @@ public abstract class DocValuesConsumer implements Closeable { @Override public Number next() { - if (!hasNext()) { + if (hasNext() == false) { throw new NoSuchElementException(); } assert nextIsSet; @@ -514,38 +550,21 @@ public abstract class DocValuesConsumer implements Closeable { private boolean setNext() { while (true) { - if (readerUpto == numReaders) { - return false; - } if (valueUpto < valueLength) { - nextValue = dvs[readerUpto].valueAt(valueUpto); + nextValue = current.values.valueAt(valueUpto); valueUpto++; nextIsSet = true; return true; } - if (docIDUpto == currentMaxDoc) { - readerUpto++; - if (readerUpto < numReaders) { - currentLiveDocs = mergeState.liveDocs[readerUpto]; - currentMaxDoc = mergeState.maxDocs[readerUpto]; - } - docIDUpto = 0; - continue; + current = docIDMerger.next(); + if (current == null) { + return false; } - - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { - assert docIDUpto < currentMaxDoc; - SortedNumericDocValues dv = dvs[readerUpto]; - dv.setDocument(docIDUpto); - valueUpto = 0; - valueLength = dv.count(); - docIDUpto++; - continue; - } - - docIDUpto++; + valueUpto = 0; + valueLength = current.values.count(); + continue; } } }; @@ -554,6 +573,32 @@ public abstract class DocValuesConsumer implements Closeable { ); } + /** Tracks state of one sorted sub-reader that we are merging */ + private static class SortedDocValuesSub extends DocIDMerger.Sub { + + private final SortedDocValues values; + private int docID = -1; + private final int maxDoc; + private final LongValues map; + + public SortedDocValuesSub(MergeState.DocMap docMap, SortedDocValues values, int maxDoc, LongValues map) { + super(docMap); + this.values = values; + this.maxDoc = maxDoc; + this.map = map; + } + + @Override + public int nextDoc() { + docID++; + if (docID == maxDoc) { + return NO_MORE_DOCS; + } else { + return docID; + } + } + } + /** * Merges the sorted docvalues from toMerge. *

@@ -608,7 +653,7 @@ public abstract class DocValuesConsumer implements Closeable { @Override public BytesRef next() { - if (!hasNext()) { + if (hasNext() == false) { throw new NoSuchElementException(); } int segmentNumber = map.getFirstSegmentNumber(currentOrd); @@ -629,13 +674,17 @@ public abstract class DocValuesConsumer implements Closeable { new Iterable() { @Override public Iterator iterator() { + // We must make a new DocIDMerger for each iterator: + List subs = new ArrayList<>(); + assert mergeState.docMaps.length == toMerge.size(); + for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); + return new Iterator() { - int readerUpto = -1; - int docIDUpto; int nextValue; - int currentMaxDoc; - Bits currentLiveDocs; - LongValues currentMap; boolean nextIsSet; @Override @@ -650,7 +699,7 @@ public abstract class DocValuesConsumer implements Closeable { @Override public Number next() { - if (!hasNext()) { + if (hasNext() == false) { throw new NoSuchElementException(); } assert nextIsSet; @@ -661,30 +710,15 @@ public abstract class DocValuesConsumer implements Closeable { private boolean setNext() { while (true) { - if (readerUpto == numReaders) { + SortedDocValuesSub sub = docIDMerger.next(); + if (sub == null) { return false; } - if (docIDUpto == currentMaxDoc) { - readerUpto++; - if (readerUpto < numReaders) { - currentMap = map.getGlobalOrds(readerUpto); - currentLiveDocs = mergeState.liveDocs[readerUpto]; - currentMaxDoc = mergeState.maxDocs[readerUpto]; - } - docIDUpto = 0; - continue; - } - - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { - nextIsSet = true; - int segOrd = dvs[readerUpto].getOrd(docIDUpto); - nextValue = segOrd == -1 ? -1 : (int) currentMap.get(segOrd); - docIDUpto++; - return true; - } - - docIDUpto++; + nextIsSet = true; + int segOrd = sub.values.getOrd(sub.docID); + nextValue = segOrd == -1 ? -1 : (int) sub.map.get(segOrd); + return true; } } }; @@ -693,6 +727,37 @@ public abstract class DocValuesConsumer implements Closeable { ); } + /** Tracks state of one sorted set sub-reader that we are merging */ + private static class SortedSetDocValuesSub extends DocIDMerger.Sub { + + private final SortedSetDocValues values; + int docID = -1; + private final int maxDoc; + private final LongValues map; + + public SortedSetDocValuesSub(MergeState.DocMap docMap, SortedSetDocValues values, int maxDoc, LongValues map) { + super(docMap); + this.values = values; + this.maxDoc = maxDoc; + this.map = map; + } + + @Override + public int nextDoc() { + docID++; + if (docID == maxDoc) { + return NO_MORE_DOCS; + } else { + return docID; + } + } + + @Override + public String toString() { + return "SortedSetDocValuesSub(docID=" + docID + " mappedDocID=" + mappedDocID + " values=" + values + ")"; + } + } + /** * Merges the sortedset docvalues from toMerge. *

@@ -700,14 +765,12 @@ public abstract class DocValuesConsumer implements Closeable { * an Iterable that merges ordinals and values and filters deleted documents . */ public void mergeSortedSetField(FieldInfo fieldInfo, final MergeState mergeState, List toMerge) throws IOException { - final SortedSetDocValues dvs[] = toMerge.toArray(new SortedSetDocValues[toMerge.size()]); - final int numReaders = mergeState.maxDocs.length; // step 1: iterate thru each sub and mark terms still in use - TermsEnum liveTerms[] = new TermsEnum[dvs.length]; + TermsEnum liveTerms[] = new TermsEnum[toMerge.size()]; long[] weights = new long[liveTerms.length]; for (int sub = 0; sub < liveTerms.length; sub++) { - SortedSetDocValues dv = dvs[sub]; + SortedSetDocValues dv = toMerge.get(sub); Bits liveDocs = mergeState.liveDocs[sub]; int maxDoc = mergeState.maxDocs[sub]; if (liveDocs == null) { @@ -748,12 +811,12 @@ public abstract class DocValuesConsumer implements Closeable { @Override public BytesRef next() { - if (!hasNext()) { + if (hasNext() == false) { throw new NoSuchElementException(); } int segmentNumber = map.getFirstSegmentNumber(currentOrd); long segmentOrd = map.getFirstSegmentOrd(currentOrd); - final BytesRef term = dvs[segmentNumber].lookupOrd(segmentOrd); + final BytesRef term = toMerge.get(segmentNumber).lookupOrd(segmentOrd); currentOrd++; return term; } @@ -769,12 +832,18 @@ public abstract class DocValuesConsumer implements Closeable { new Iterable() { @Override public Iterator iterator() { + + // We must make a new DocIDMerger for each iterator: + List subs = new ArrayList<>(); + assert mergeState.docMaps.length == toMerge.size(); + for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); + return new Iterator() { - int readerUpto = -1; - int docIDUpto; int nextValue; - int currentMaxDoc; - Bits currentLiveDocs; boolean nextIsSet; @Override @@ -789,7 +858,7 @@ public abstract class DocValuesConsumer implements Closeable { @Override public Number next() { - if (!hasNext()) { + if (hasNext() == false) { throw new NoSuchElementException(); } assert nextIsSet; @@ -800,33 +869,18 @@ public abstract class DocValuesConsumer implements Closeable { private boolean setNext() { while (true) { - if (readerUpto == numReaders) { + SortedSetDocValuesSub sub = docIDMerger.next(); + if (sub == null) { return false; } - - if (docIDUpto == currentMaxDoc) { - readerUpto++; - if (readerUpto < numReaders) { - currentLiveDocs = mergeState.liveDocs[readerUpto]; - currentMaxDoc = mergeState.maxDocs[readerUpto]; - } - docIDUpto = 0; - continue; + sub.values.setDocument(sub.docID); + nextValue = 0; + while (sub.values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) { + nextValue++; } - - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { - nextIsSet = true; - SortedSetDocValues dv = dvs[readerUpto]; - dv.setDocument(docIDUpto); - nextValue = 0; - while (dv.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) { - nextValue++; - } - docIDUpto++; - return true; - } - - docIDUpto++; + //System.out.println(" doc " + sub + " -> ord count = " + nextValue); + nextIsSet = true; + return true; } } }; @@ -836,13 +890,18 @@ public abstract class DocValuesConsumer implements Closeable { new Iterable() { @Override public Iterator iterator() { + + // We must make a new DocIDMerger for each iterator: + List subs = new ArrayList<>(); + assert mergeState.docMaps.length == toMerge.size(); + for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); + return new Iterator() { - int readerUpto = -1; - int docIDUpto; long nextValue; - int currentMaxDoc; - Bits currentLiveDocs; - LongValues currentMap; boolean nextIsSet; long ords[] = new long[8]; int ordUpto; @@ -860,7 +919,7 @@ public abstract class DocValuesConsumer implements Closeable { @Override public Number next() { - if (!hasNext()) { + if (hasNext() == false) { throw new NoSuchElementException(); } assert nextIsSet; @@ -871,10 +930,6 @@ public abstract class DocValuesConsumer implements Closeable { private boolean setNext() { while (true) { - if (readerUpto == numReaders) { - return false; - } - if (ordUpto < ordLength) { nextValue = ords[ordUpto]; ordUpto++; @@ -882,35 +937,22 @@ public abstract class DocValuesConsumer implements Closeable { return true; } - if (docIDUpto == currentMaxDoc) { - readerUpto++; - if (readerUpto < numReaders) { - currentMap = map.getGlobalOrds(readerUpto); - currentLiveDocs = mergeState.liveDocs[readerUpto]; - currentMaxDoc = mergeState.maxDocs[readerUpto]; - } - docIDUpto = 0; - continue; - } - - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { - assert docIDUpto < currentMaxDoc; - SortedSetDocValues dv = dvs[readerUpto]; - dv.setDocument(docIDUpto); - ordUpto = ordLength = 0; - long ord; - while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { - if (ordLength == ords.length) { - ords = ArrayUtil.grow(ords, ordLength+1); - } - ords[ordLength] = currentMap.get(ord); - ordLength++; - } - docIDUpto++; - continue; + SortedSetDocValuesSub sub = docIDMerger.next(); + if (sub == null) { + return false; } + sub.values.setDocument(sub.docID); - docIDUpto++; + ordUpto = ordLength = 0; + long ord; + while ((ord = sub.values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + if (ordLength == ords.length) { + ords = ArrayUtil.grow(ords, ordLength+1); + } + ords[ordLength] = sub.map.get(ord); + ordLength++; + } + continue; } } }; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java index b771aabf43a..39d39022a75 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java @@ -16,7 +16,6 @@ */ package org.apache.lucene.codecs; - import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; @@ -24,6 +23,7 @@ import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; +import org.apache.lucene.index.DocIDMerger; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.MergeState; @@ -31,6 +31,8 @@ import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.Bits; +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; + /** * Abstract API that consumes normalization values. * Concrete implementations of this @@ -98,6 +100,30 @@ public abstract class NormsConsumer implements Closeable { } } + /** Tracks state of one numeric sub-reader that we are merging */ + private static class NumericDocValuesSub extends DocIDMerger.Sub { + + private final NumericDocValues values; + private int docID = -1; + private final int maxDoc; + + public NumericDocValuesSub(MergeState.DocMap docMap, NumericDocValues values, int maxDoc) { + super(docMap); + this.values = values; + this.maxDoc = maxDoc; + } + + @Override + public int nextDoc() { + docID++; + if (docID == maxDoc) { + return NO_MORE_DOCS; + } else { + return docID; + } + } + } + /** * Merges the norms from toMerge. *

@@ -111,13 +137,18 @@ public abstract class NormsConsumer implements Closeable { new Iterable() { @Override public Iterator iterator() { + + // We must make a new DocIDMerger for each iterator: + List subs = new ArrayList<>(); + assert mergeState.docMaps.length == toMerge.size(); + for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); + return new Iterator() { - int readerUpto = -1; - int docIDUpto; long nextValue; - int maxDoc; - NumericDocValues currentValues; - Bits currentLiveDocs; boolean nextIsSet; @Override @@ -141,31 +172,13 @@ public abstract class NormsConsumer implements Closeable { } private boolean setNext() { - while (true) { - if (readerUpto == toMerge.size()) { - return false; - } - - if (currentValues == null || docIDUpto == maxDoc) { - readerUpto++; - if (readerUpto < toMerge.size()) { - currentValues = toMerge.get(readerUpto); - currentLiveDocs = mergeState.liveDocs[readerUpto]; - maxDoc = mergeState.maxDocs[readerUpto]; - } - docIDUpto = 0; - continue; - } - - if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { - nextIsSet = true; - nextValue = currentValues.get(docIDUpto); - docIDUpto++; - return true; - } - - docIDUpto++; + NumericDocValuesSub sub = docIDMerger.next(); + if (sub == null) { + return false; } + nextIsSet = true; + nextValue = sub.values.get(sub.docID); + return true; } }; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java index 43b4416fedb..05084db6ca1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java @@ -76,7 +76,6 @@ public abstract class PointsWriter implements Closeable { } MergeState.DocMap docMap = mergeState.docMaps[i]; - int docBase = mergeState.docBase[i]; pointsReader.intersect(fieldInfo.name, new IntersectVisitor() { @Override @@ -90,7 +89,7 @@ public abstract class PointsWriter implements Closeable { int newDocID = docMap.get(docID); if (newDocID != -1) { // Not deleted: - mergedVisitor.visit(docBase + newDocID, packedValue); + mergedVisitor.visit(newDocID, packedValue); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java index b8cff117e5f..26652aa8231 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java @@ -20,10 +20,13 @@ import java.io.Closeable; import java.io.IOException; import java.io.Reader; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.DocIDMerger; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexableField; @@ -33,6 +36,8 @@ import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; + /** * Codec API for writing stored fields: *

    @@ -73,6 +78,30 @@ public abstract class StoredFieldsWriter implements Closeable { * check that this is the case to detect the JRE bug described * in LUCENE-1282. */ public abstract void finish(FieldInfos fis, int numDocs) throws IOException; + + private static class StoredFieldsMergeSub extends DocIDMerger.Sub { + private final StoredFieldsReader reader; + private final int maxDoc; + private final MergeVisitor visitor; + int docID = -1; + + public StoredFieldsMergeSub(MergeVisitor visitor, MergeState.DocMap docMap, StoredFieldsReader reader, int maxDoc) { + super(docMap); + this.maxDoc = maxDoc; + this.reader = reader; + this.visitor = visitor; + } + + @Override + public int nextDoc() { + docID++; + if (docID == maxDoc) { + return NO_MORE_DOCS; + } else { + return docID; + } + } + } /** Merges in the stored fields from the readers in * mergeState. The default implementation skips @@ -82,23 +111,26 @@ public abstract class StoredFieldsWriter implements Closeable { * Implementations can override this method for more sophisticated * merging (bulk-byte copying, etc). */ public int merge(MergeState mergeState) throws IOException { - int docCount = 0; - for (int i=0;i subs = new ArrayList<>(); + for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); + + int docCount = 0; + while (true) { + StoredFieldsMergeSub sub = docIDMerger.next(); + if (sub == null) { + break; } + assert sub.mappedDocID == docCount; + startDocument(); + sub.reader.visitDocument(sub.docID, sub.visitor); + finishDocument(); + docCount++; } finish(mergeState.mergeFieldInfos, docCount); return docCount; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java index 1aff7379d37..5756d5beb87 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java @@ -16,16 +16,18 @@ */ package org.apache.lucene.codecs; - import java.io.Closeable; import java.io.IOException; +import java.util.ArrayList; import java.util.Iterator; +import java.util.List; -import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.DocIDMerger; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.Fields; import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSetIterator; @@ -34,6 +36,8 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; + /** * Codec API for writing term vectors: *
      @@ -160,6 +164,28 @@ public abstract class TermVectorsWriter implements Closeable { } } + private static class TermVectorsMergeSub extends DocIDMerger.Sub { + private final TermVectorsReader reader; + private final int maxDoc; + int docID = -1; + + public TermVectorsMergeSub(MergeState.DocMap docMap, TermVectorsReader reader, int maxDoc) { + super(docMap); + this.maxDoc = maxDoc; + this.reader = reader; + } + + @Override + public int nextDoc() { + docID++; + if (docID == maxDoc) { + return NO_MORE_DOCS; + } else { + return docID; + } + } + } + /** Merges in the term vectors from the readers in * mergeState. The default implementation skips * over deleted documents, and uses {@link #startDocument(int)}, @@ -170,32 +196,35 @@ public abstract class TermVectorsWriter implements Closeable { * Implementations can override this method for more sophisticated * merging (bulk-byte copying, etc). */ public int merge(MergeState mergeState) throws IOException { + + List subs = new ArrayList<>(); + for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null); + int docCount = 0; - int numReaders = mergeState.maxDocs.length; - for (int i = 0; i < numReaders; i++) { - int maxDoc = mergeState.maxDocs[i]; - Bits liveDocs = mergeState.liveDocs[i]; - TermVectorsReader termVectorsReader = mergeState.termVectorsReaders[i]; - if (termVectorsReader != null) { - termVectorsReader.checkIntegrity(); + while (true) { + TermVectorsMergeSub sub = docIDMerger.next(); + if (sub == null) { + break; } - for (int docID=0;docIDEach segment index maintains the following:

      *
        *
      • - * {@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment info}. + * {@link org.apache.lucene.codecs.lucene62.Lucene62SegmentInfoFormat Segment info}. * This contains metadata about a segment, such as the number of documents, * what files it uses, *
      • @@ -235,7 +235,7 @@ * file. * * - * {@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment Info} + * {@link org.apache.lucene.codecs.lucene62.Lucene62SegmentInfoFormat Segment Info} * .si * Stores metadata about a segment * diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java index 2b1e13dfedf..63308c422b3 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java @@ -123,6 +123,13 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable { @Override public void merge(MergeState mergeState) throws IOException { + if (mergeState.segmentInfo.getIndexSort() != null) { + // TODO: can we gain back some optos even if index is sorted? E.g. if sort results in large chunks of contiguous docs from one sub + // being copied over...? + super.merge(mergeState); + return; + } + for(PointsReader reader : mergeState.pointsReaders) { if (reader instanceof Lucene60PointsReader == false) { // We can only bulk merge when all to-be-merged segments use our format: @@ -171,7 +178,6 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable { singleValuePerDoc)) { List bkdReaders = new ArrayList<>(); List docMaps = new ArrayList<>(); - List docIDBases = new ArrayList<>(); for(int i=0;iApache Lucene - Index File Formats - * - * - *

        Introduction

        - *
        - *

        This document defines the index file formats used in this version of Lucene. - * If you are using a different version of Lucene, please consult the copy of - * docs/ that was distributed with - * the version you are using.

        - *

        Apache Lucene is written in Java, but several efforts are underway to write - * versions of - * Lucene in other programming languages. If these versions are to remain - * compatible with Apache Lucene, then a language-independent definition of the - * Lucene index format is required. This document thus attempts to provide a - * complete and independent definition of the Apache Lucene file formats.

        - *

        As Lucene evolves, this document should evolve. Versions of Lucene in - * different programming languages should endeavor to agree on file formats, and - * generate new versions of this document.

        - *
        - * - *

        Definitions

        - *
        - *

        The fundamental concepts in Lucene are index, document, field and term.

        - *

        An index contains a sequence of documents.

        - *
          - *
        • A document is a sequence of fields.
        • - *
        • A field is a named sequence of terms.
        • - *
        • A term is a sequence of bytes.
        • - *
        - *

        The same sequence of bytes in two different fields is considered a different - * term. Thus terms are represented as a pair: the string naming the field, and the - * bytes within the field.

        - * - *

        Inverted Indexing

        - *

        The index stores statistics about terms in order to make term-based search - * more efficient. Lucene's index falls into the family of indexes known as an - * inverted index. This is because it can list, for a term, the documents - * that contain it. This is the inverse of the natural relationship, in which - * documents list terms.

        - * - *

        Types of Fields

        - *

        In Lucene, fields may be stored, in which case their text is stored - * in the index literally, in a non-inverted manner. Fields that are inverted are - * called indexed. A field may be both stored and indexed.

        - *

        The text of a field may be tokenized into terms to be indexed, or the - * text of a field may be used literally as a term to be indexed. Most fields are - * tokenized, but sometimes it is useful for certain identifier fields to be - * indexed literally.

        - *

        See the {@link org.apache.lucene.document.Field Field} - * java docs for more information on Fields.

        - * - *

        Segments

        - *

        Lucene indexes may be composed of multiple sub-indexes, or segments. - * Each segment is a fully independent index, which could be searched separately. - * Indexes evolve by:

        - *
          - *
        1. Creating new segments for newly added documents.
        2. - *
        3. Merging existing segments.
        4. - *
        - *

        Searches may involve multiple segments and/or multiple indexes, each index - * potentially composed of a set of segments.

        - * - *

        Document Numbers

        - *

        Internally, Lucene refers to documents by an integer document number. - * The first document added to an index is numbered zero, and each subsequent - * document added gets a number one greater than the previous.

        - *

        Note that a document's number may change, so caution should be taken when - * storing these numbers outside of Lucene. In particular, numbers may change in - * the following situations:

        - *
          - *
        • - *

          The numbers stored in each segment are unique only within the segment, and - * must be converted before they can be used in a larger context. The standard - * technique is to allocate each segment a range of values, based on the range of - * numbers used in that segment. To convert a document number from a segment to an - * external value, the segment's base document number is added. To convert - * an external value back to a segment-specific value, the segment is identified - * by the range that the external value is in, and the segment's base value is - * subtracted. For example two five document segments might be combined, so that - * the first segment has a base value of zero, and the second of five. Document - * three from the second segment would have an external value of eight.

          - *
        • - *
        • - *

          When documents are deleted, gaps are created in the numbering. These are - * eventually removed as the index evolves through merging. Deleted documents are - * dropped when segments are merged. A freshly-merged segment thus has no gaps in - * its numbering.

          - *
        • - *
        - *
        - * - *

        Index Structure Overview

        - *
        - *

        Each segment index maintains the following:

        - *
          - *
        • - * {@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment info}. - * This contains metadata about a segment, such as the number of documents, - * what files it uses, - *
        • - *
        • - * {@link org.apache.lucene.codecs.lucene50.Lucene50FieldInfosFormat Field names}. - * This contains the set of field names used in the index. - *
        • - *
        • - * {@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Stored Field values}. - * This contains, for each document, a list of attribute-value pairs, where the attributes - * are field names. These are used to store auxiliary information about the document, such as - * its title, url, or an identifier to access a database. The set of stored fields are what is - * returned for each hit when searching. This is keyed by document number. - *
        • - *
        • - * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term dictionary}. - * A dictionary containing all of the terms used in all of the - * indexed fields of all of the documents. The dictionary also contains the number - * of documents which contain the term, and pointers to the term's frequency and - * proximity data. - *
        • - *
        • - * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Frequency data}. - * For each term in the dictionary, the numbers of all the - * documents that contain that term, and the frequency of the term in that - * document, unless frequencies are omitted (IndexOptions.DOCS_ONLY) - *
        • - *
        • - * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Proximity data}. - * For each term in the dictionary, the positions that the - * term occurs in each document. Note that this will not exist if all fields in - * all documents omit position data. - *
        • - *
        • - * {@link org.apache.lucene.codecs.lucene53.Lucene53NormsFormat Normalization factors}. - * For each field in each document, a value is stored - * that is multiplied into the score for hits on that field. - *
        • - *
        • - * {@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vectors}. - * For each field in each document, the term vector (sometimes - * called document vector) may be stored. A term vector consists of term text and - * term frequency. To add Term Vectors to your index see the - * {@link org.apache.lucene.document.Field Field} constructors - *
        • - *
        • - * {@link org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat Per-document values}. - * Like stored values, these are also keyed by document - * number, but are generally intended to be loaded into main memory for fast - * access. Whereas stored values are generally intended for summary results from - * searches, per-document values are useful for things like scoring factors. - *
        • - *
        • - * {@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live documents}. - * An optional file indicating which documents are live. - *
        • - *
        • - * {@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}. - * Optional pair of files, recording dimensionally indexed fields, to enable fast - * numeric range filtering and large numeric values like BigInteger and BigDecimal (1D) - * and geographic shape intersection (2D, 3D). - *
        • - *
        - *

        Details on each of these are provided in their linked pages.

        - *
        - * - *

        File Naming

        - *
        - *

        All files belonging to a segment have the same name with varying extensions. - * The extensions correspond to the different file formats described below. When - * using the Compound File format (default in 1.4 and greater) these files (except - * for the Segment info file, the Lock file, and Deleted documents file) are collapsed - * into a single .cfs file (see below for details)

        - *

        Typically, all segments in an index are stored in a single directory, - * although this is not required.

        - *

        As of version 2.1 (lock-less commits), file names are never re-used. - * That is, when any file is saved - * to the Directory it is given a never before used filename. This is achieved - * using a simple generations approach. For example, the first segments file is - * segments_1, then segments_2, etc. The generation is a sequential long integer - * represented in alpha-numeric (base 36) form.

        - *
        - * - *

        Summary of File Extensions

        - *
        - *

        The following table summarizes the names and extensions of the files in - * Lucene:

        - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
        NameExtensionBrief Description
        {@link org.apache.lucene.index.SegmentInfos Segments File}segments_NStores information about a commit point
        Lock Filewrite.lockThe Write lock prevents multiple IndexWriters from writing to the same - * file.
        {@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment Info}.siStores metadata about a segment
        {@link org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat Compound File}.cfs, .cfeAn optional "virtual" file consisting of all the other index files for - * systems that frequently run out of file handles.
        {@link org.apache.lucene.codecs.lucene50.Lucene50FieldInfosFormat Fields}.fnmStores information about the fields
        {@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Field Index}.fdxContains pointers to field data
        {@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Field Data}.fdtThe stored fields for documents
        {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Dictionary}.timThe term dictionary, stores term info
        {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Index}.tipThe index into the Term Dictionary
        {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Frequencies}.docContains the list of docs which contain each term along with frequency
        {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Positions}.posStores position information about where a term occurs in the index
        {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Payloads}.payStores additional per-position metadata information such as character offsets and user payloads
        {@link org.apache.lucene.codecs.lucene53.Lucene53NormsFormat Norms}.nvd, .nvmEncodes length and boost factors for docs and fields
        {@link org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat Per-Document Values}.dvd, .dvmEncodes additional scoring factors or other per-document information.
        {@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vector Index}.tvxStores offset into the document data file
        {@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vector Documents}.tvdContains information about each document that has term vectors
        {@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vector Fields}.tvfThe field level info about term vectors
        {@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live Documents}.livInfo about what files are live
        {@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}.dii, .dimHolds indexed points, if any
        - *
        - * - *

        Lock File

        - * The write lock, which is stored in the index directory by default, is named - * "write.lock". If the lock directory is different from the index directory then - * the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix - * derived from the full path to the index directory. When this file is present, a - * writer is currently modifying the index (adding or removing documents). This - * lock file ensures that only one writer is modifying the index at a time. - * - *

        History

        - *

        Compatibility notes are provided in this document, describing how file - * formats have changed from prior versions:

        - *
          - *
        • In version 2.1, the file format was changed to allow lock-less commits (ie, - * no more commit lock). The change is fully backwards compatible: you can open a - * pre-2.1 index for searching or adding/deleting of docs. When the new segments - * file is saved (committed), it will be written in the new file format (meaning - * no specific "upgrade" process is needed). But note that once a commit has - * occurred, pre-2.1 Lucene will not be able to read the index.
        • - *
        • In version 2.3, the file format was changed to allow segments to share a - * single set of doc store (vectors & stored fields) files. This allows for - * faster indexing in certain cases. The change is fully backwards compatible (in - * the same way as the lock-less commits change in 2.1).
        • - *
        • In version 2.4, Strings are now written as true UTF-8 byte sequence, not - * Java's modified UTF-8. See - * LUCENE-510 for details.
        • - *
        • In version 2.9, an optional opaque Map<String,String> CommitUserData - * may be passed to IndexWriter's commit methods (and later retrieved), which is - * recorded in the segments_N file. See - * LUCENE-1382 for details. Also, - * diagnostics were added to each segment written recording details about why it - * was written (due to flush, merge; which OS/JRE was used; etc.). See issue - * LUCENE-1654 for details.
        • - *
        • In version 3.0, compressed fields are no longer written to the index (they - * can still be read, but on merge the new segment will write them, uncompressed). - * See issue LUCENE-1960 - * for details.
        • - *
        • In version 3.1, segments records the code version that created them. See - * LUCENE-2720 for details. - * Additionally segments track explicitly whether or not they have term vectors. - * See LUCENE-2811 - * for details.
        • - *
        • In version 3.2, numeric fields are written as natively to stored fields - * file, previously they were stored in text format only.
        • - *
        • In version 3.4, fields can omit position data while still indexing term - * frequencies.
        • - *
        • In version 4.0, the format of the inverted index became extensible via - * the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage - * ({@code DocValues}) was introduced. Normalization factors need no longer be a - * single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. - * Terms need not be unicode strings, they can be any byte sequence. Term offsets - * can optionally be indexed into the postings lists. Payloads can be stored in the - * term vectors.
        • - *
        • In version 4.1, the format of the postings list changed to use either - * of FOR compression or variable-byte encoding, depending upon the frequency - * of the term. Terms appearing only once were changed to inline directly into - * the term dictionary. Stored fields are compressed by default.
        • - *
        • In version 4.2, term vectors are compressed by default. DocValues has - * a new multi-valued type (SortedSet), that can be used for faceting/grouping/joining - * on multi-valued fields.
        • - *
        • In version 4.5, DocValues were extended to explicitly represent missing values.
        • - *
        • In version 4.6, FieldInfos were extended to support per-field DocValues generation, to - * allow updating NumericDocValues fields.
        • - *
        • In version 4.8, checksum footers were added to the end of each index file - * for improved data integrity. Specifically, the last 8 bytes of every index file - * contain the zlib-crc32 checksum of the file.
        • - *
        • In version 4.9, DocValues has a new multi-valued numeric type (SortedNumeric) - * that is suitable for faceting/sorting/analytics. - *
        • In version 5.4, DocValues have been improved to store more information on disk: - * addresses for binary fields and ord indexes for multi-valued fields. - *
        • In version 6.0, Points were added, for multi-dimensional range/distance search. - *
        • - *
        - * - *

        Limitations

        - *
        - *

        Lucene uses a Java int to refer to - * document numbers, and the index file format uses an Int32 - * on-disk to store document numbers. This is a limitation - * of both the index file format and the current implementation. Eventually these - * should be replaced with either UInt64 values, or - * better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.

        - *
        + * Components from the Lucene 6.0 index format. See {@link org.apache.lucene.codecs.lucene62} + * for an overview of the index format. */ package org.apache.lucene.codecs.lucene60; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java new file mode 100644 index 00000000000..50710752694 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene62; + +import java.util.Objects; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.codecs.NormsFormat; +import org.apache.lucene.codecs.PointsFormat; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.TermVectorsFormat; +import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; +import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat; +import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; +import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; +import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat; +import org.apache.lucene.codecs.lucene53.Lucene53NormsFormat; +import org.apache.lucene.codecs.lucene60.Lucene60FieldInfosFormat; +import org.apache.lucene.codecs.lucene60.Lucene60PointsFormat; +import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; +import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; + +/** + * Implements the Lucene 6.2 index format, with configurable per-field postings + * and docvalues formats. + *

        + * If you want to reuse functionality of this codec in another codec, extend + * {@link FilterCodec}. + * + * @see org.apache.lucene.codecs.lucene60 package documentation for file format details. + * + * @lucene.experimental + */ +public class Lucene62Codec extends Codec { + private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat(); + private final FieldInfosFormat fieldInfosFormat = new Lucene60FieldInfosFormat(); + private final SegmentInfoFormat segmentInfosFormat = new Lucene62SegmentInfoFormat(); + private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); + private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); + + private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { + @Override + public PostingsFormat getPostingsFormatForField(String field) { + return Lucene62Codec.this.getPostingsFormatForField(field); + } + }; + + private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return Lucene62Codec.this.getDocValuesFormatForField(field); + } + }; + + private final StoredFieldsFormat storedFieldsFormat; + + /** + * Instantiates a new codec. + */ + public Lucene62Codec() { + this(Mode.BEST_SPEED); + } + + /** + * Instantiates a new codec, specifying the stored fields compression + * mode to use. + * @param mode stored fields compression mode to use for newly + * flushed/merged segments. + */ + public Lucene62Codec(Mode mode) { + super("Lucene62"); + this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Objects.requireNonNull(mode)); + } + + @Override + public final StoredFieldsFormat storedFieldsFormat() { + return storedFieldsFormat; + } + + @Override + public final TermVectorsFormat termVectorsFormat() { + return vectorsFormat; + } + + @Override + public final PostingsFormat postingsFormat() { + return postingsFormat; + } + + @Override + public final FieldInfosFormat fieldInfosFormat() { + return fieldInfosFormat; + } + + @Override + public final SegmentInfoFormat segmentInfoFormat() { + return segmentInfosFormat; + } + + @Override + public final LiveDocsFormat liveDocsFormat() { + return liveDocsFormat; + } + + @Override + public final CompoundFormat compoundFormat() { + return compoundFormat; + } + + @Override + public final PointsFormat pointsFormat() { + return new Lucene60PointsFormat(); + } + + /** Returns the postings format that should be used for writing + * new segments of field. + * + * The default implementation always returns "Lucene50". + *

        + * WARNING: if you subclass, you are responsible for index + * backwards compatibility: future version of Lucene are only + * guaranteed to be able to read the default implementation. + */ + public PostingsFormat getPostingsFormatForField(String field) { + return defaultFormat; + } + + /** Returns the docvalues format that should be used for writing + * new segments of field. + * + * The default implementation always returns "Lucene54". + *

        + * WARNING: if you subclass, you are responsible for index + * backwards compatibility: future version of Lucene are only + * guaranteed to be able to read the default implementation. + */ + public DocValuesFormat getDocValuesFormatForField(String field) { + return defaultDVFormat; + } + + @Override + public final DocValuesFormat docValuesFormat() { + return docValuesFormat; + } + + private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene50"); + private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene54"); + + private final NormsFormat normsFormat = new Lucene53NormsFormat(); + + @Override + public final NormsFormat normsFormat() { + return normsFormat; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java new file mode 100644 index 00000000000..fe78572680c --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene62; + +import java.io.IOException; +import java.util.Collections; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexWriter; // javadocs +import org.apache.lucene.index.SegmentInfo; // javadocs +import org.apache.lucene.index.SegmentInfos; // javadocs +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.DataOutput; // javadocs +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.Version; + +/** + * Lucene 6.2 Segment info format. + *

        + * Files: + *

          + *
        • .si: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Attributes, IndexSort, Footer + *
        + * Data types: + *
          + *
        • Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
        • + *
        • SegSize --> {@link DataOutput#writeInt Int32}
        • + *
        • SegVersion --> {@link DataOutput#writeString String}
        • + *
        • Files --> {@link DataOutput#writeSetOfStrings Set<String>}
        • + *
        • Diagnostics,Attributes --> {@link DataOutput#writeMapOfStrings Map<String,String>}
        • + *
        • IsCompoundFile --> {@link DataOutput#writeByte Int8}
        • + *
        • IndexSort --> {@link DataOutput#writeVInt Int32} count, followed by {@code count} SortField
        • + *
        • SortField --> {@link DataOutput#writeString String} field name, followed by {@link DataOutput#writeVInt Int32} sort type ID, + * followed by {@link DataOutput#writeByte Int8} indicatating reversed sort, followed by a type-specific encoding of the optional missing value + *
        • Footer --> {@link CodecUtil#writeFooter CodecFooter}
        • + *
        + * Field Descriptions: + *
          + *
        • SegVersion is the code version that created the segment.
        • + *
        • SegSize is the number of documents contained in the segment index.
        • + *
        • IsCompoundFile records whether the segment is written as a compound file or + * not. If this is -1, the segment is not a compound file. If it is 1, the segment + * is a compound file.
        • + *
        • The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid, + * for each segment it creates. It includes metadata like the current Lucene + * version, OS, Java version, why the segment was created (merge, flush, + * addIndexes), etc.
        • + *
        • Files is a list of files referred to by this segment.
        • + *
        + * + * @see SegmentInfos + * @lucene.experimental + */ +public class Lucene62SegmentInfoFormat extends SegmentInfoFormat { + + /** Sole constructor. */ + public Lucene62SegmentInfoFormat() { + } + + @Override + public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException { + final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene62SegmentInfoFormat.SI_EXTENSION); + try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) { + Throwable priorE = null; + SegmentInfo si = null; + try { + int format = CodecUtil.checkIndexHeader(input, Lucene62SegmentInfoFormat.CODEC_NAME, + Lucene62SegmentInfoFormat.VERSION_START, + Lucene62SegmentInfoFormat.VERSION_CURRENT, + segmentID, ""); + final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt()); + + final int docCount = input.readInt(); + if (docCount < 0) { + throw new CorruptIndexException("invalid docCount: " + docCount, input); + } + final boolean isCompoundFile = input.readByte() == SegmentInfo.YES; + + final Map diagnostics = input.readMapOfStrings(); + final Set files = input.readSetOfStrings(); + final Map attributes = input.readMapOfStrings(); + + int numSortFields = input.readVInt(); + Sort indexSort; + if (numSortFields > 0) { + SortField[] sortFields = new SortField[numSortFields]; + for(int i=0;i= 5 but got: " + version.major + " segment=" + si); + } + // Write the Lucene version that created this segment, since 3.1 + output.writeInt(version.major); + output.writeInt(version.minor); + output.writeInt(version.bugfix); + assert version.prerelease == 0; + output.writeInt(si.maxDoc()); + + output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); + output.writeMapOfStrings(si.getDiagnostics()); + Set files = si.files(); + for (String file : files) { + if (!IndexFileNames.parseSegmentName(file).equals(si.name)) { + throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files); + } + } + output.writeSetOfStrings(files); + output.writeMapOfStrings(si.getAttributes()); + + Sort indexSort = si.getIndexSort(); + int numSortFields = indexSort == null ? 0 : indexSort.getSort().length; + output.writeVInt(numSortFields); + for (int i = 0; i < numSortFields; ++i) { + SortField sortField = indexSort.getSort()[i]; + output.writeString(sortField.getField()); + int sortTypeID; + switch (sortField.getType()) { + case STRING: + sortTypeID = 0; + break; + case LONG: + sortTypeID = 1; + break; + case INT: + sortTypeID = 2; + break; + case DOUBLE: + sortTypeID = 3; + break; + case FLOAT: + sortTypeID = 4; + break; + default: + throw new IllegalStateException("Unexpected sort type: " + sortField.getType()); + } + output.writeVInt(sortTypeID); + output.writeByte((byte) (sortField.getReverse() ? 0 : 1)); + + // write missing value + Object missingValue = sortField.getMissingValue(); + if (missingValue == null) { + output.writeByte((byte) 0); + } else { + switch(sortField.getType()) { + case STRING: + if (missingValue == SortField.STRING_LAST) { + output.writeByte((byte) 1); + } else if (missingValue == SortField.STRING_FIRST) { + output.writeByte((byte) 2); + } else { + throw new AssertionError("unrecognized missing value for STRING field \"" + sortField.getField() + "\": " + missingValue); + } + break; + case LONG: + output.writeByte((byte) 1); + output.writeLong(((Long) missingValue).longValue()); + break; + case INT: + output.writeByte((byte) 1); + output.writeInt(((Integer) missingValue).intValue()); + break; + case DOUBLE: + output.writeByte((byte) 1); + output.writeLong(Double.doubleToLongBits(((Double) missingValue).doubleValue())); + break; + case FLOAT: + output.writeByte((byte) 1); + output.writeInt(Float.floatToIntBits(((Float) missingValue).floatValue())); + break; + default: + throw new IllegalStateException("Unexpected sort type: " + sortField.getType()); + } + } + } + + CodecUtil.writeFooter(output); + } + } + + /** File extension used to store {@link SegmentInfo}. */ + public final static String SI_EXTENSION = "si"; + static final String CODEC_NAME = "Lucene62SegmentInfo"; + static final int VERSION_START = 0; + static final int VERSION_CURRENT = VERSION_START; +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java new file mode 100644 index 00000000000..2fe2dc74b4a --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Components from the Lucene 6.2 index format + * See {@link org.apache.lucene.codecs.lucene62} for an overview + * of the index format. + */ + +package org.apache.lucene.codecs.lucene62; diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 89b36efa2d3..9dee2d14e1c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -43,6 +43,9 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.DocumentStoredFieldVisitor; import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.LeafFieldComparator; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; @@ -217,6 +220,9 @@ public final class CheckIndex implements Closeable { /** Status for testing of PointValues (null if PointValues could not be tested). */ public PointsStatus pointsStatus; + + /** Status of index sort */ + public IndexSortStatus indexSortStatus; } /** @@ -374,6 +380,18 @@ public final class CheckIndex implements Closeable { /** Exception thrown during doc values test (null on success) */ public Throwable error = null; } + + /** + * Status from testing index sort + */ + public static final class IndexSortStatus { + IndexSortStatus() { + } + + /** Exception thrown during term index test (null on success) */ + public Throwable error = null; + } + } /** Create a new CheckIndex on the directory. */ @@ -632,6 +650,7 @@ public final class CheckIndex implements Closeable { int toLoseDocCount = info.info.maxDoc(); SegmentReader reader = null; + Sort previousIndexSort = null; try { msg(infoStream, " version=" + (version == null ? "3.0" : version)); @@ -642,6 +661,17 @@ public final class CheckIndex implements Closeable { msg(infoStream, " compound=" + info.info.getUseCompoundFile()); segInfoStat.compound = info.info.getUseCompoundFile(); msg(infoStream, " numFiles=" + info.files().size()); + Sort indexSort = info.info.getIndexSort(); + if (indexSort != null) { + msg(infoStream, " sort=" + indexSort); + if (previousIndexSort != null) { + if (previousIndexSort.equals(indexSort) == false) { + throw new RuntimeException("index sort changed from " + previousIndexSort + " to " + indexSort); + } + } else { + previousIndexSort = indexSort; + } + } segInfoStat.numFiles = info.files().size(); segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.); msg(infoStream, " size (MB)=" + nf.format(segInfoStat.sizeMB)); @@ -722,6 +752,9 @@ public final class CheckIndex implements Closeable { // Test PointValues segInfoStat.pointsStatus = testPoints(reader, infoStream, failFast); + // Test index sort + segInfoStat.indexSortStatus = testSort(reader, indexSort, infoStream, failFast); + // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.liveDocStatus.error != null) { @@ -790,6 +823,72 @@ public final class CheckIndex implements Closeable { return result; } + + /** + * Tests index sort order. + * @lucene.experimental + */ + public static Status.IndexSortStatus testSort(CodecReader reader, Sort sort, PrintStream infoStream, boolean failFast) throws IOException { + // This segment claims its documents are sorted according to the incoming sort ... let's make sure: + + long startNS = System.nanoTime(); + + Status.IndexSortStatus status = new Status.IndexSortStatus(); + + if (sort != null) { + if (infoStream != null) { + infoStream.print(" test: index sort.........."); + } + + SortField fields[] = sort.getSort(); + final int reverseMul[] = new int[fields.length]; + final LeafFieldComparator comparators[] = new LeafFieldComparator[fields.length]; + + LeafReaderContext readerContext = new LeafReaderContext(reader); + + for (int i = 0; i < fields.length; i++) { + reverseMul[i] = fields[i].getReverse() ? -1 : 1; + comparators[i] = fields[i].getComparator(1, i).getLeafComparator(readerContext); + } + + int maxDoc = reader.maxDoc(); + + try { + + for(int docID=1;docID < maxDoc;docID++) { + + int cmp = 0; + + for (int i = 0; i < comparators.length; i++) { + // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co, + // the segments are always the same here... + comparators[i].copy(0, docID-1); + comparators[i].setBottom(0); + cmp = reverseMul[i] * comparators[i].compareBottom(docID); + if (cmp != 0) { + break; + } + } + + if (cmp > 0) { + throw new RuntimeException("segment has indexSort=" + sort + " but docID=" + (docID-1) + " sorts after docID=" + docID); + } + } + msg(infoStream, String.format(Locale.ROOT, "OK [took %.3f sec]", nsToSec(System.nanoTime()-startNS))); + } catch (Throwable e) { + if (failFast) { + IOUtils.reThrow(e); + } + msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]"); + status.error = e; + if (infoStream != null) { + e.printStackTrace(infoStream); + } + } + } + + return status; + } /** * Test live docs. diff --git a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java new file mode 100644 index 00000000000..07c9e725270 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.search.DocIdSetIterator; // javadocs +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.PriorityQueue; + +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; + +/** Utility class to help merging documents from sub-readers according to either simple + * concatenated (unsorted) order, or by a specified index-time sort, skipping + * deleted documents and remapping non-deleted documents. */ + +public class DocIDMerger { + + private final List subs; + + // Used when indexSort != null: + private final PriorityQueue queue; + private boolean first; + + // Used when indexIsSorted + private T current; + private int nextIndex; + + /** Represents one sub-reader being merged */ + public static abstract class Sub { + /** Mapped doc ID */ + public int mappedDocID; + + final MergeState.DocMap docMap; + + /** Sole constructor */ + public Sub(MergeState.DocMap docMap) { + this.docMap = docMap; + } + + /** Returns the next document ID from this sub reader, and {@link DocIdSetIterator#NO_MORE_DOCS} when done */ + public abstract int nextDoc(); + } + + /** Construct this from the provided subs, specifying the maximum sub count */ + public DocIDMerger(List subs, int maxCount, boolean indexIsSorted) { + this.subs = subs; + + if (indexIsSorted && maxCount > 1) { + queue = new PriorityQueue(maxCount) { + @Override + protected boolean lessThan(Sub a, Sub b) { + assert a.mappedDocID != b.mappedDocID; + return a.mappedDocID < b.mappedDocID; + } + }; + } else { + // We simply concatentate + queue = null; + } + + reset(); + } + + /** Construct this from the provided subs */ + public DocIDMerger(List subs, boolean indexIsSorted) { + this(subs, subs.size(), indexIsSorted); + } + + /** Reuse API, currently only used by postings during merge */ + public void reset() { + if (queue != null) { + // caller may not have fully consumed the queue: + queue.clear(); + for(T sub : subs) { + while (true) { + int docID = sub.nextDoc(); + if (docID == NO_MORE_DOCS) { + // all docs in this sub were deleted; do not add it to the queue! + break; + } + + int mappedDocID = sub.docMap.get(docID); + if (mappedDocID == -1) { + // doc was deleted + continue; + } else { + sub.mappedDocID = mappedDocID; + queue.add(sub); + break; + } + } + } + first = true; + } else if (subs.size() > 0) { + current = subs.get(0); + nextIndex = 1; + } else { + current = null; + nextIndex = 0; + } + } + + /** Returns null when done */ + public T next() { + // Loop until we find a non-deleted document + if (queue != null) { + T top = queue.top(); + if (top == null) { + // NOTE: it's annoying that caller is allowed to call us again even after we returned null before + return null; + } + + if (first == false) { + while (true) { + int docID = top.nextDoc(); + if (docID == NO_MORE_DOCS) { + queue.pop(); + top = queue.top(); + break; + } + int mappedDocID = top.docMap.get(docID); + if (mappedDocID == -1) { + // doc was deleted + continue; + } else { + top.mappedDocID = mappedDocID; + top = queue.updateTop(); + break; + } + } + } + + first = false; + + return top; + + } else { + while (true) { + if (current == null) { + // NOTE: it's annoying that caller is allowed to call us again even after we returned null before + return null; + } + int docID = current.nextDoc(); + if (docID == NO_MORE_DOCS) { + if (nextIndex == subs.size()) { + current = null; + return null; + } + current = subs.get(nextIndex); + nextIndex++; + continue; + } + int mappedDocID = current.docMap.get(docID); + if (mappedDocID == -1) { + // doc is deleted + continue; + } + + current.mappedDocID = mappedDocID; + return current; + } + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValues.java b/lucene/core/src/java/org/apache/lucene/index/DocValues.java index feceb3bd3ff..4de42387042 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocValues.java @@ -210,7 +210,7 @@ public final class DocValues { (expected.length == 1 ? "(expected=" + expected[0] : "(expected one of " + Arrays.toString(expected)) + "). " + - "Use UninvertingReader or index with docvalues."); + "Re-index with correct docvalues type."); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index 65d6a144e74..3e8a2270297 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -178,7 +178,7 @@ class DocumentsWriterPerThread { pendingUpdates.clear(); deleteSlice = deleteQueue.newSlice(); - segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); + segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); assert numDocsInRAM == 0; if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentName + " delQueue=" + deleteQueue); diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java index c35dc6719c9..13b6e8d6d4d 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java @@ -25,6 +25,7 @@ import org.apache.lucene.codecs.NormsProducer; import org.apache.lucene.codecs.PointsReader; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; +import org.apache.lucene.search.Sort; import org.apache.lucene.util.Bits; /** @@ -101,6 +102,11 @@ public abstract class FilterCodecReader extends CodecReader { return in.maxDoc(); } + @Override + public Sort getIndexSort() { + return in.getIndexSort(); + } + @Override public void addCoreClosedListener(CoreClosedListener listener) { in.addCoreClosedListener(listener); diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java index f273dba6fc9..886c12a6836 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java @@ -22,6 +22,7 @@ import java.util.Iterator; import java.util.Objects; import org.apache.lucene.search.QueryCache; +import org.apache.lucene.search.Sort; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -471,6 +472,12 @@ public abstract class FilterLeafReader extends LeafReader { return in.getDocsWithField(field); } + @Override + public Sort getIndexSort() { + ensureOpen(); + return in.getIndexSort(); + } + @Override public void checkIntegrity() throws IOException { ensureOpen(); diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 2b45b6b38ab..159f5917c42 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -16,7 +16,6 @@ */ package org.apache.lucene.index; - import java.io.Closeable; import java.io.FileNotFoundException; import java.io.IOException; @@ -32,8 +31,8 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.Map.Entry; +import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; @@ -49,6 +48,7 @@ import org.apache.lucene.index.FieldInfos.FieldNumbers; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; @@ -937,6 +937,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { // NOTE: this is correct even for an NRT reader because we'll pull FieldInfos even for the un-committed segments: globalFieldNumberMap = getFieldNumberMap(); + validateIndexSort(); + config.getFlushPolicy().init(config); docWriter = new DocumentsWriter(this, config, directoryOrig, directory); eventQueue = docWriter.eventQueue(); @@ -1000,6 +1002,20 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { } } + /** Confirms that the incoming index sort (if any) matches the existing index sort (if any). This is unfortunately just best effort, + * because it could be the old index only has flushed segments. */ + private void validateIndexSort() { + Sort indexSort = config.getIndexSort(); + if (indexSort != null) { + for(SegmentCommitInfo info : segmentInfos) { + Sort segmentIndexSort = info.info.getIndexSort(); + if (segmentIndexSort != null && indexSort.equals(segmentIndexSort) == false) { + throw new IllegalArgumentException("cannot change previous indexSort=" + segmentIndexSort + " (from segment=" + info + ") to new indexSort=" + indexSort); + } + } + } + } + // reads latest field infos for the commit // this is used on IW init and addIndexes(Dir) to create/update the global field map. // TODO: fix tests abusing this method! @@ -2472,7 +2488,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error * @throws IllegalArgumentException if addIndexes would cause - * the index to exceed {@link #MAX_DOCS} + * the index to exceed {@link #MAX_DOCS}, or if the indoming + * index sort does not match this index's index sort */ public void addIndexes(Directory... dirs) throws IOException { ensureOpen(); @@ -2481,6 +2498,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { List locks = acquireWriteLocks(dirs); + Sort indexSort = config.getIndexSort(); + boolean successTop = false; try { @@ -2513,6 +2532,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { for (SegmentCommitInfo info : sis) { assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name; + Sort segmentIndexSort = info.info.getIndexSort(); + + if (indexSort != null && segmentIndexSort != null && indexSort.equals(segmentIndexSort) == false) { + // TODO: we could make this smarter, e.g. if the incoming indexSort is congruent with our sort ("starts with") then it's OK + throw new IllegalArgumentException("cannot change index sort from " + segmentIndexSort + " to " + indexSort); + } + String newSegName = newSegmentName(); if (infoStream.isEnabled("IW")) { @@ -2609,6 +2635,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { // long so we can detect int overflow: long numDocs = 0; + Sort indexSort = config.getIndexSort(); + try { if (infoStream.isEnabled("IW")) { infoStream.message("IW", "flush at addIndexes(CodecReader...)"); @@ -2618,6 +2646,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { String mergedName = newSegmentName(); for (CodecReader leaf : readers) { numDocs += leaf.numDocs(); + Sort leafIndexSort = leaf.getIndexSort(); + if (indexSort != null && leafIndexSort != null && indexSort.equals(leafIndexSort) == false) { + throw new IllegalArgumentException("cannot change index sort from " + leafIndexSort + " to " + indexSort); + } } // Best-effort up front check: @@ -2630,7 +2662,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory); SegmentInfo info = new SegmentInfo(directoryOrig, Version.LATEST, mergedName, -1, - false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); + false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), config.getIndexSort()); SegmentMerger merger = new SegmentMerger(Arrays.asList(readers), info, infoStream, trackingDir, globalFieldNumberMap, @@ -2715,7 +2747,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { // Same SI as before but we change directory and name SegmentInfo newInfo = new SegmentInfo(directoryOrig, info.info.getVersion(), segName, info.info.maxDoc(), info.info.getUseCompoundFile(), info.info.getCodec(), - info.info.getDiagnostics(), info.info.getId(), info.info.getAttributes()); + info.info.getDiagnostics(), info.info.getId(), info.info.getAttributes(), info.info.getIndexSort()); SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo, info.getDelCount(), info.getDelGen(), info.getFieldInfosGen(), info.getDocValuesGen()); @@ -3243,16 +3275,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { private static class MergedDeletesAndUpdates { ReadersAndUpdates mergedDeletesAndUpdates = null; - MergePolicy.DocMap docMap = null; boolean initializedWritableLiveDocs = false; MergedDeletesAndUpdates() {} - final void init(ReaderPool readerPool, MergePolicy.OneMerge merge, MergeState mergeState, boolean initWritableLiveDocs) throws IOException { + final void init(ReaderPool readerPool, MergePolicy.OneMerge merge, boolean initWritableLiveDocs) throws IOException { if (mergedDeletesAndUpdates == null) { mergedDeletesAndUpdates = readerPool.get(merge.info, true); - docMap = merge.getDocMap(mergeState); - assert docMap.isConsistent(merge.info.info.maxDoc()); } if (initWritableLiveDocs && !initializedWritableLiveDocs) { mergedDeletesAndUpdates.initWritableLiveDocs(); @@ -3262,18 +3291,18 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { } - private void maybeApplyMergedDVUpdates(MergePolicy.OneMerge merge, MergeState mergeState, int docUpto, + private void maybeApplyMergedDVUpdates(MergePolicy.OneMerge merge, MergeState mergeState, MergedDeletesAndUpdates holder, String[] mergingFields, DocValuesFieldUpdates[] dvFieldUpdates, - DocValuesFieldUpdates.Iterator[] updatesIters, int curDoc) throws IOException { + DocValuesFieldUpdates.Iterator[] updatesIters, int segment, int curDoc) throws IOException { int newDoc = -1; for (int idx = 0; idx < mergingFields.length; idx++) { DocValuesFieldUpdates.Iterator updatesIter = updatesIters[idx]; if (updatesIter.doc() == curDoc) { // document has an update if (holder.mergedDeletesAndUpdates == null) { - holder.init(readerPool, merge, mergeState, false); + holder.init(readerPool, merge, false); } if (newDoc == -1) { // map once per all field updates, but only if there are any updates - newDoc = holder.docMap.map(docUpto); + newDoc = mergeState.docMaps[segment].get(curDoc); } DocValuesFieldUpdates dvUpdates = dvFieldUpdates[idx]; dvUpdates.add(newDoc, updatesIter.value()); @@ -3306,13 +3335,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { // Carefully merge deletes that occurred after we // started merging: - int docUpto = 0; long minGen = Long.MAX_VALUE; // Lazy init (only when we find a delete to carry over): final MergedDeletesAndUpdates holder = new MergedDeletesAndUpdates(); final DocValuesFieldUpdates.Container mergedDVUpdates = new DocValuesFieldUpdates.Container(); - + + assert sourceSegments.size() == mergeState.docMaps.length; for (int i = 0; i < sourceSegments.size(); i++) { SegmentCommitInfo info = sourceSegments.get(i); minGen = Math.min(info.getBufferedDeletesGen(), minGen); @@ -3375,21 +3404,20 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { // since we started the merge, so we // must merge them: for (int j = 0; j < maxDoc; j++) { - if (!prevLiveDocs.get(j)) { - assert !currentLiveDocs.get(j); - } else { - if (!currentLiveDocs.get(j)) { - if (holder.mergedDeletesAndUpdates == null || !holder.initializedWritableLiveDocs) { - holder.init(readerPool, merge, mergeState, true); - } - holder.mergedDeletesAndUpdates.delete(holder.docMap.map(docUpto)); - if (mergingFields != null) { // advance all iters beyond the deleted document - skipDeletedDoc(updatesIters, j); - } - } else if (mergingFields != null) { - maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); + if (prevLiveDocs.get(j) == false) { + // if the document was deleted before, it better still be deleted! + assert currentLiveDocs.get(j) == false; + } else if (currentLiveDocs.get(j) == false) { + // the document was deleted while we were merging: + if (holder.mergedDeletesAndUpdates == null || holder.initializedWritableLiveDocs == false) { + holder.init(readerPool, merge, true); } - docUpto++; + holder.mergedDeletesAndUpdates.delete(mergeState.docMaps[i].get(mergeState.leafDocMaps[i].get(j))); + if (mergingFields != null) { // advance all iters beyond the deleted document + skipDeletedDoc(updatesIters, j); + } + } else if (mergingFields != null) { + maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j); } } } else if (mergingFields != null) { @@ -3397,50 +3425,38 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { for (int j = 0; j < maxDoc; j++) { if (prevLiveDocs.get(j)) { // document isn't deleted, check if any of the fields have an update to it - maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); - // advance docUpto for every non-deleted document - docUpto++; + maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j); } else { // advance all iters beyond the deleted document skipDeletedDoc(updatesIters, j); } } - } else { - docUpto += info.info.maxDoc() - info.getDelCount() - rld.getPendingDeleteCount(); } } else if (currentLiveDocs != null) { assert currentLiveDocs.length() == maxDoc; // This segment had no deletes before but now it // does: for (int j = 0; j < maxDoc; j++) { - if (!currentLiveDocs.get(j)) { + if (currentLiveDocs.get(j) == false) { if (holder.mergedDeletesAndUpdates == null || !holder.initializedWritableLiveDocs) { - holder.init(readerPool, merge, mergeState, true); + holder.init(readerPool, merge, true); } - holder.mergedDeletesAndUpdates.delete(holder.docMap.map(docUpto)); + holder.mergedDeletesAndUpdates.delete(mergeState.docMaps[i].get(mergeState.leafDocMaps[i].get(j))); if (mergingFields != null) { // advance all iters beyond the deleted document skipDeletedDoc(updatesIters, j); } } else if (mergingFields != null) { - maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); + maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j); } - docUpto++; } } else if (mergingFields != null) { // no deletions before or after, but there were updates for (int j = 0; j < maxDoc; j++) { - maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); - // advance docUpto for every non-deleted document - docUpto++; + maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j); } - } else { - // No deletes or updates before or after - docUpto += info.info.maxDoc(); } } - assert docUpto == merge.info.info.maxDoc(); - if (mergedDVUpdates.any()) { // System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: mergedDeletes.info=" + mergedDeletes.info + ", mergedFieldUpdates=" + mergedFieldUpdates); boolean success = false; @@ -3881,7 +3897,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { // ConcurrentMergePolicy we keep deterministic segment // names. final String mergeSegmentName = newSegmentName(); - SegmentInfo si = new SegmentInfo(directoryOrig, Version.LATEST, mergeSegmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); + SegmentInfo si = new SegmentInfo(directoryOrig, Version.LATEST, mergeSegmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), config.getIndexSort()); Map details = new HashMap<>(); details.put("mergeMaxNumSegments", "" + merge.maxNumSegments); details.put("mergeFactor", Integer.toString(merge.segments.size())); @@ -4082,10 +4098,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { } // System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders()); - - // we pass merge.getMergeReaders() instead of merge.readers to allow the - // OneMerge to return a view over the actual segments to merge - final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(), + + // Let the merge wrap readers + List mergeReaders = new ArrayList<>(); + for (SegmentReader reader : merge.readers) { + mergeReaders.add(merge.wrapForMerge(reader)); + } + final SegmentMerger merger = new SegmentMerger(mergeReaders, merge.info.info, infoStream, dirWrapper, globalFieldNumberMap, context); diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java index a90d625e305..e2957d74316 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java @@ -18,16 +18,19 @@ package org.apache.lucene.index; import java.io.PrintStream; +import java.util.EnumSet; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.PrintStreamInfoStream; -import org.apache.lucene.util.SetOnce; import org.apache.lucene.util.SetOnce.AlreadySetException; +import org.apache.lucene.util.SetOnce; /** * Holds all the configuration that is used to create an {@link IndexWriter}. @@ -439,6 +442,26 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig { return this; } + /** We only allow sorting on these types */ + private static final EnumSet ALLOWED_INDEX_SORT_TYPES = EnumSet.of(SortField.Type.STRING, + SortField.Type.LONG, + SortField.Type.INT, + SortField.Type.DOUBLE, + SortField.Type.FLOAT); + + /** + * Set the {@link Sort} order to use when merging segments. Note that newly flushed segments will remain unsorted. + */ + public IndexWriterConfig setIndexSort(Sort sort) { + for(SortField sortField : sort.getSort()) { + if (ALLOWED_INDEX_SORT_TYPES.contains(sortField.getType()) == false) { + throw new IllegalArgumentException("invalid SortField type: must be one of " + ALLOWED_INDEX_SORT_TYPES + " but got: " + sortField); + } + } + this.indexSort = sort; + return this; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(super.toString()); diff --git a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java index 9622d4e2f85..44e61e2787f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java @@ -20,6 +20,7 @@ package org.apache.lucene.index; import java.io.IOException; import org.apache.lucene.index.IndexReader.ReaderClosedListener; +import org.apache.lucene.search.Sort; import org.apache.lucene.util.Bits; /** {@code LeafReader} is an abstract class, providing an interface for accessing an @@ -312,4 +313,7 @@ public abstract class LeafReader extends IndexReader { * @lucene.internal */ public abstract void checkIntegrity() throws IOException; + + /** Returns null if this leaf is unsorted, or the {@link Sort} that it was sorted by */ + public abstract Sort getIndexSort(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java index 1a0002c73f7..cec70c099aa 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java @@ -23,6 +23,7 @@ import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Sort; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.InfoStream; @@ -94,6 +95,9 @@ public class LiveIndexWriterConfig { /** True if calls to {@link IndexWriter#close()} should first do a commit. */ protected boolean commitOnClose = IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE; + /** The sort order to use to write merged segments. */ + protected Sort indexSort = null; + // used by IndexWriterConfig LiveIndexWriterConfig(Analyzer analyzer) { this.analyzer = analyzer; @@ -445,6 +449,14 @@ public class LiveIndexWriterConfig { return commitOnClose; } + /** + * Set the index-time {@link Sort} order. Merged segments will be written + * in this order. + */ + public Sort getIndexSort() { + return indexSort; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); @@ -467,6 +479,7 @@ public class LiveIndexWriterConfig { sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n"); sb.append("useCompoundFile=").append(getUseCompoundFile()).append("\n"); sb.append("commitOnClose=").append(getCommitOnClose()).append("\n"); + sb.append("indexSort=").append(getIndexSort()).append("\n"); return sb.toString(); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java index a06c34f3c80..166878d3f8f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java @@ -18,8 +18,11 @@ package org.apache.lucene.index; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import org.apache.lucene.index.MultiPostingsEnum.EnumWithSlice; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; /** @@ -30,52 +33,66 @@ import org.apache.lucene.util.BytesRef; */ final class MappingMultiPostingsEnum extends PostingsEnum { - private MultiPostingsEnum.EnumWithSlice[] subs; - int numSubs; - int upto; - MergeState.DocMap currentMap; - PostingsEnum current; - int currentBase; - int doc = -1; - private MergeState mergeState; MultiPostingsEnum multiDocsAndPositionsEnum; final String field; + final DocIDMerger docIDMerger; + private MappingPostingsSub current; + private final MappingPostingsSub[] allSubs; + private final List subs = new ArrayList<>(); + + private static class MappingPostingsSub extends DocIDMerger.Sub { + public PostingsEnum postings; + + public MappingPostingsSub(MergeState.DocMap docMap) { + super(docMap); + } + + @Override + public int nextDoc() { + try { + return postings.nextDoc(); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } + } /** Sole constructor. */ - public MappingMultiPostingsEnum(String field, MergeState mergeState) { + public MappingMultiPostingsEnum(String field, MergeState mergeState) throws IOException { this.field = field; - this.mergeState = mergeState; + allSubs = new MappingPostingsSub[mergeState.fieldsProducers.length]; + for(int i=0;i(subs, allSubs.length, mergeState.segmentInfo.getIndexSort() != null); } - MappingMultiPostingsEnum reset(MultiPostingsEnum postingsEnum) { - this.numSubs = postingsEnum.getNumSubs(); - this.subs = postingsEnum.getSubs(); - upto = -1; - doc = -1; - current = null; + MappingMultiPostingsEnum reset(MultiPostingsEnum postingsEnum) throws IOException { this.multiDocsAndPositionsEnum = postingsEnum; + MultiPostingsEnum.EnumWithSlice[] subsArray = postingsEnum.getSubs(); + int count = postingsEnum.getNumSubs(); + subs.clear(); + for(int i=0;i IndexWriter.MAX_POSITION=" + IndexWriter.MAX_POSITION + "), field=\"" + field + "\" doc=" + doc, - mergeState.fieldsProducers[upto].toString()); + throw new CorruptIndexException("position=" + pos + " is too large (> IndexWriter.MAX_POSITION=" + IndexWriter.MAX_POSITION + "), field=\"" + field + "\" doc=" + current.mappedDocID, + current.postings.toString()); } return pos; } @Override public int startOffset() throws IOException { - return current.startOffset(); + return current.postings.startOffset(); } @Override public int endOffset() throws IOException { - return current.endOffset(); + return current.postings.endOffset(); } @Override public BytesRef getPayload() throws IOException { - return current.getPayload(); + return current.postings.getPayload(); } @Override public long cost() { long cost = 0; - for (EnumWithSlice enumWithSlice : subs) { - cost += enumWithSlice.postingsEnum.cost(); + for (MappingPostingsSub sub : subs) { + cost += sub.postings.cost(); } return cost; } diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java index 1d67c4a0abc..c42b052d288 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java @@ -58,31 +58,6 @@ import org.apache.lucene.util.FixedBitSet; */ public abstract class MergePolicy { - /** A map of doc IDs. */ - public static abstract class DocMap { - /** Sole constructor, typically invoked from sub-classes constructors. */ - protected DocMap() {} - - /** Return the new doc ID according to its old value. */ - public abstract int map(int old); - - /** Useful from an assert. */ - boolean isConsistent(int maxDoc) { - final FixedBitSet targets = new FixedBitSet(maxDoc); - for (int i = 0; i < maxDoc; ++i) { - final int target = map(i); - if (target < 0 || target >= maxDoc) { - assert false : "out of range: " + target + " not in [0-" + maxDoc + "["; - return false; - } else if (targets.get(target)) { - assert false : target + " is already taken (" + i + ")"; - return false; - } - } - return true; - } - } - /** OneMerge provides the information necessary to perform * an individual primitive merge operation, resulting in * a single new segment. The merge spec includes the @@ -140,25 +115,11 @@ public abstract class MergePolicy { public void mergeFinished() throws IOException { } - /** Expert: Get the list of readers to merge. Note that this list does not - * necessarily match the list of segments to merge and should only be used - * to feed SegmentMerger to initialize a merge. When a {@link OneMerge} - * reorders doc IDs, it must override {@link #getDocMap} too so that - * deletes that happened during the merge can be applied to the newly - * merged segment. */ - public List getMergeReaders() throws IOException { - if (readers == null) { - throw new IllegalStateException("IndexWriter has not initialized readers from the segment infos yet"); - } - final List readers = new ArrayList<>(this.readers.size()); - for (SegmentReader reader : this.readers) { - if (reader.numDocs() > 0) { - readers.add(reader); - } - } - return Collections.unmodifiableList(readers); + /** Wrap the reader in order to add/remove information to the merged segment. */ + public CodecReader wrapForMerge(CodecReader reader) throws IOException { + return reader; } - + /** * Expert: Sets the {@link SegmentCommitInfo} of the merged segment. * Allows sub-classes to e.g. set diagnostics properties. @@ -175,20 +136,6 @@ public abstract class MergePolicy { return info; } - /** Expert: If {@link #getMergeReaders()} reorders document IDs, this method - * must be overridden to return a mapping from the natural doc ID - * (the doc ID that would result from a natural merge) to the actual doc - * ID. This mapping is used to apply deletions that happened during the - * merge to the new segment. */ - public DocMap getDocMap(MergeState mergeState) { - return new DocMap() { - @Override - public int map(int docID) { - return docID; - } - }; - } - /** Record that an exception occurred while executing * this merge */ synchronized void setException(Throwable error) { diff --git a/lucene/misc/src/java/org/apache/lucene/index/MergeReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java similarity index 95% rename from lucene/misc/src/java/org/apache/lucene/index/MergeReaderWrapper.java rename to lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java index dba5c913f00..2401d0fa3ee 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/MergeReaderWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java @@ -1,3 +1,5 @@ +package org.apache.lucene.index; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -14,7 +16,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.index; import java.io.IOException; @@ -23,18 +24,19 @@ import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.NormsProducer; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; +import org.apache.lucene.search.Sort; import org.apache.lucene.util.Bits; -/** this is a hack to make SortingMP fast! */ +/** This is a hack to make index sorting fast, with a {@link LeafReader} that always returns merge instances when you ask for the codec readers. */ class MergeReaderWrapper extends LeafReader { - final SegmentReader in; + final CodecReader in; final FieldsProducer fields; final NormsProducer norms; final DocValuesProducer docValues; final StoredFieldsReader store; final TermVectorsReader vectors; - MergeReaderWrapper(SegmentReader in) throws IOException { + MergeReaderWrapper(CodecReader in) throws IOException { this.in = in; FieldsProducer fields = in.getPostingsReader(); @@ -256,4 +258,9 @@ class MergeReaderWrapper extends LeafReader { public String toString() { return "MergeReaderWrapper(" + in + ")"; } + + @Override + public Sort getIndexSort() { + return in.getIndexSort(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/MergeState.java b/lucene/core/src/java/org/apache/lucene/index/MergeState.java index 7242785e101..3723f19476f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergeState.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergeState.java @@ -18,7 +18,10 @@ package org.apache.lucene.index; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Locale; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.FieldsProducer; @@ -26,6 +29,7 @@ import org.apache.lucene.codecs.NormsProducer; import org.apache.lucene.codecs.PointsReader; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; +import org.apache.lucene.search.Sort; import org.apache.lucene.util.Bits; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.packed.PackedInts; @@ -36,6 +40,12 @@ import org.apache.lucene.util.packed.PackedLongValues; * @lucene.experimental */ public class MergeState { + /** Maps document IDs from old segments to document IDs in the new segment */ + public final DocMap[] docMaps; + + // Only used by IW when it must remap deletes that arrived against the merging segmetns while a merge was running: + final DocMap[] leafDocMaps; + /** {@link SegmentInfo} of the newly merged segment. */ public final SegmentInfo segmentInfo; @@ -60,18 +70,12 @@ public class MergeState { /** Live docs for each reader */ public final Bits[] liveDocs; - /** Maps docIDs around deletions. */ - public final DocMap[] docMaps; - /** Postings to merge */ public final FieldsProducer[] fieldsProducers; /** Point readers to merge */ public final PointsReader[] pointsReaders; - /** New docID base per reader. */ - public final int[] docBase; - /** Max docs per reader */ public final int[] maxDocs; @@ -79,11 +83,15 @@ public class MergeState { public final InfoStream infoStream; /** Sole constructor. */ - MergeState(List readers, SegmentInfo segmentInfo, InfoStream infoStream) throws IOException { + MergeState(List originalReaders, SegmentInfo segmentInfo, InfoStream infoStream) throws IOException { + + this.infoStream = infoStream; + + final Sort indexSort = segmentInfo.getIndexSort(); + int numReaders = originalReaders.size(); + leafDocMaps = new DocMap[numReaders]; + List readers = maybeSortReaders(originalReaders, segmentInfo); - int numReaders = readers.size(); - docMaps = new DocMap[numReaders]; - docBase = new int[numReaders]; maxDocs = new int[numReaders]; fieldsProducers = new FieldsProducer[numReaders]; normsProducers = new NormsProducer[numReaders]; @@ -94,6 +102,7 @@ public class MergeState { fieldInfos = new FieldInfos[numReaders]; liveDocs = new Bits[numReaders]; + int numDocs = 0; for(int i=0;i readers) throws IOException { - final int numReaders = maxDocs.length; + private DocMap[] buildDocMaps(List readers, Sort indexSort) throws IOException { - // Remap docIDs - int docBase = 0; - for(int i=0;i 0; - } - - /** Creates a {@link DocMap} instance appropriate for - * this reader. */ - public static DocMap build(CodecReader reader) { - final int maxDoc = reader.maxDoc(); - if (!reader.hasDeletions()) { - return new NoDelDocMap(maxDoc); - } - final Bits liveDocs = reader.getLiveDocs(); - return build(maxDoc, liveDocs); - } - - static DocMap build(final int maxDoc, final Bits liveDocs) { - assert liveDocs != null; - final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); - int del = 0; - for (int i = 0; i < maxDoc; ++i) { - docMapBuilder.add(i - del); - if (!liveDocs.get(i)) { - ++del; + final PackedLongValues delDocMap; + if (liveDocs != null) { + delDocMap = removeDeletes(reader.maxDoc(), liveDocs); + } else { + delDocMap = null; } - } - final PackedLongValues docMap = docMapBuilder.build(); - final int numDeletedDocs = del; - assert docMap.size() == maxDoc; - return new DocMap() { - @Override - public int get(int docID) { - if (!liveDocs.get(docID)) { - return -1; + final int docBase = totalDocs; + docMaps[i] = new DocMap() { + @Override + public int get(int docID) { + if (liveDocs == null) { + return docBase + docID; + } else if (liveDocs.get(docID)) { + return docBase + (int) delDocMap.get(docID); + } else { + return -1; + } } - return (int) docMap.get(docID); - } + }; + totalDocs += reader.numDocs(); + } - @Override - public int maxDoc() { - return maxDoc; - } + return docMaps; - @Override - public int numDeletedDocs() { - return numDeletedDocs; - } - }; + } else { + // do a merge sort of the incoming leaves: + long t0 = System.nanoTime(); + DocMap[] result = MultiSorter.sort(indexSort, readers); + long t1 = System.nanoTime(); + if (infoStream.isEnabled("SM")) { + infoStream.message("SM", String.format(Locale.ROOT, "%.2f msec to build merge sorted DocMaps", (t1-t0)/1000000.0)); + } + return result; } } - private static final class NoDelDocMap extends DocMap { + private List maybeSortReaders(List originalReaders, SegmentInfo segmentInfo) throws IOException { - private final int maxDoc; - - NoDelDocMap(int maxDoc) { - this.maxDoc = maxDoc; + // Default to identity: + for(int i=0;i readers = new ArrayList<>(originalReaders.size()); + + for (CodecReader leaf : originalReaders) { + Sort segmentSort = leaf.getIndexSort(); + + if (segmentSort == null) { + // TODO: fix IW to also sort when flushing? It's somewhat tricky because of stored fields and term vectors, which write "live" + // to their index files on each indexed document: + + // This segment was written by flush, so documents are not yet sorted, so we sort them now: + long t0 = System.nanoTime(); + Sorter.DocMap sortDocMap = sorter.sort(leaf); + long t1 = System.nanoTime(); + double msec = (t1-t0)/1000000.0; + + if (sortDocMap != null) { + if (infoStream.isEnabled("SM")) { + infoStream.message("SM", String.format(Locale.ROOT, "segment %s is not sorted; wrapping for sort %s now (%.2f msec to sort)", leaf, indexSort, msec)); + } + leaf = SlowCodecReaderWrapper.wrap(SortingLeafReader.wrap(new MergeReaderWrapper(leaf), sortDocMap)); + leafDocMaps[readers.size()] = new DocMap() { + @Override + public int get(int docID) { + return sortDocMap.oldToNew(docID); + } + }; + } else { + if (infoStream.isEnabled("SM")) { + infoStream.message("SM", String.format(Locale.ROOT, "segment %s is not sorted, but is already accidentally in sort %s order (%.2f msec to sort)", leaf, indexSort, msec)); + } + } + + } else { + if (segmentSort.equals(indexSort) == false) { + throw new IllegalArgumentException("index sort mismatch: merged segment has sort=" + indexSort + " but to-be-merged segment has sort=" + segmentSort); + } + if (infoStream.isEnabled("SM")) { + infoStream.message("SM", "segment " + leaf + " already sorted"); + } + } + + readers.add(leaf); } - @Override - public int numDeletedDocs() { - return 0; + return readers; + } + + /** A map of doc IDs. */ + public static abstract class DocMap { + /** Sole constructor */ + public DocMap() { } + + /** Return the mapped docID or -1 if the given doc is not mapped. */ + public abstract int get(int docID); + } + + static PackedLongValues removeDeletes(final int maxDoc, final Bits liveDocs) { + final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); + int del = 0; + for (int i = 0; i < maxDoc; ++i) { + docMapBuilder.add(i - del); + if (liveDocs.get(i) == false) { + ++del; + } + } + return docMapBuilder.build(); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java index 54563254162..062fc303c09 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java @@ -57,7 +57,7 @@ public final class MultiPostingsEnum extends PostingsEnum { return this.parent == parent; } - /** Rre-use and reset this instance on the provided slices. */ + /** Re-use and reset this instance on the provided slices. */ public MultiPostingsEnum reset(final EnumWithSlice[] subs, final int numSubs) { this.numSubs = numSubs; for(int i=0;i readers) throws IOException { + + // TODO: optimize if only 1 reader is incoming, though that's a rare case + + SortField fields[] = sort.getSort(); + final CrossReaderComparator[] comparators = new CrossReaderComparator[fields.length]; + for(int i=0;i queue = new PriorityQueue(leafCount) { + @Override + public boolean lessThan(LeafAndDocID a, LeafAndDocID b) { + for(int i=0;i readers, SortField sortField) throws IOException { + switch(sortField.getType()) { + + case STRING: + { + // this uses the efficient segment-local ordinal map: + MultiReader multiReader = new MultiReader(readers.toArray(new LeafReader[readers.size()])); + final SortedDocValues sorted = MultiDocValues.getSortedValues(multiReader, sortField.getField()); + final int[] docStarts = new int[readers.size()]; + List leaves = multiReader.leaves(); + for(int i=0;i values = new ArrayList<>(); + List docsWithFields = new ArrayList<>(); + for(CodecReader reader : readers) { + values.add(DocValues.getNumeric(reader, sortField.getField())); + docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField())); + } + + final int reverseMul; + if (sortField.getReverse()) { + reverseMul = -1; + } else { + reverseMul = 1; + } + + final long missingValue; + + if (sortField.getMissingValue() != null) { + missingValue = (Long) sortField.getMissingValue(); + } else { + missingValue = 0; + } + + return new CrossReaderComparator() { + @Override + public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) { + long valueA; + if (docsWithFields.get(readerIndexA).get(docIDA)) { + valueA = values.get(readerIndexA).get(docIDA); + } else { + valueA = missingValue; + } + + long valueB; + if (docsWithFields.get(readerIndexB).get(docIDB)) { + valueB = values.get(readerIndexB).get(docIDB); + } else { + valueB = missingValue; + } + return reverseMul * Long.compare(valueA, valueB); + } + }; + } + + case INT: + { + List values = new ArrayList<>(); + List docsWithFields = new ArrayList<>(); + for(CodecReader reader : readers) { + values.add(DocValues.getNumeric(reader, sortField.getField())); + docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField())); + } + + final int reverseMul; + if (sortField.getReverse()) { + reverseMul = -1; + } else { + reverseMul = 1; + } + + final int missingValue; + + if (sortField.getMissingValue() != null) { + missingValue = (Integer) sortField.getMissingValue(); + } else { + missingValue = 0; + } + + return new CrossReaderComparator() { + @Override + public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) { + int valueA; + if (docsWithFields.get(readerIndexA).get(docIDA)) { + valueA = (int) values.get(readerIndexA).get(docIDA); + } else { + valueA = missingValue; + } + + int valueB; + if (docsWithFields.get(readerIndexB).get(docIDB)) { + valueB = (int) values.get(readerIndexB).get(docIDB); + } else { + valueB = missingValue; + } + return reverseMul * Integer.compare(valueA, valueB); + } + }; + } + + case DOUBLE: + { + List values = new ArrayList<>(); + List docsWithFields = new ArrayList<>(); + for(CodecReader reader : readers) { + values.add(DocValues.getNumeric(reader, sortField.getField())); + docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField())); + } + + final int reverseMul; + if (sortField.getReverse()) { + reverseMul = -1; + } else { + reverseMul = 1; + } + + final double missingValue; + + if (sortField.getMissingValue() != null) { + missingValue = (Double) sortField.getMissingValue(); + } else { + missingValue = 0.0; + } + + return new CrossReaderComparator() { + @Override + public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) { + double valueA; + if (docsWithFields.get(readerIndexA).get(docIDA)) { + valueA = Double.longBitsToDouble(values.get(readerIndexA).get(docIDA)); + } else { + valueA = missingValue; + } + + double valueB; + if (docsWithFields.get(readerIndexB).get(docIDB)) { + valueB = Double.longBitsToDouble(values.get(readerIndexB).get(docIDB)); + } else { + valueB = missingValue; + } + return reverseMul * Double.compare(valueA, valueB); + } + }; + } + + case FLOAT: + { + List values = new ArrayList<>(); + List docsWithFields = new ArrayList<>(); + for(CodecReader reader : readers) { + values.add(DocValues.getNumeric(reader, sortField.getField())); + docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField())); + } + + final int reverseMul; + if (sortField.getReverse()) { + reverseMul = -1; + } else { + reverseMul = 1; + } + + final float missingValue; + + if (sortField.getMissingValue() != null) { + missingValue = (Float) sortField.getMissingValue(); + } else { + missingValue = 0.0f; + } + + return new CrossReaderComparator() { + @Override + public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) { + float valueA; + if (docsWithFields.get(readerIndexA).get(docIDA)) { + valueA = Float.intBitsToFloat((int) values.get(readerIndexA).get(docIDA)); + } else { + valueA = missingValue; + } + + float valueB; + if (docsWithFields.get(readerIndexB).get(docIDB)) { + valueB = Float.intBitsToFloat((int) values.get(readerIndexB).get(docIDB)); + } else { + valueB = missingValue; + } + return reverseMul * Float.compare(valueA, valueB); + } + }; + } + + default: + throw new IllegalArgumentException("unhandled SortField.getType()=" + sortField.getType()); + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java index 532265f59c7..d85ff2d0fa2 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java @@ -26,6 +26,7 @@ import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; +import org.apache.lucene.search.Sort; import org.apache.lucene.util.Bits; /** An {@link LeafReader} which reads multiple, parallel indexes. Each index @@ -55,6 +56,7 @@ public class ParallelLeafReader extends LeafReader { private final boolean closeSubReaders; private final int maxDoc, numDocs; private final boolean hasDeletions; + private final Sort indexSort; private final SortedMap fieldToReader = new TreeMap<>(); private final SortedMap tvFieldToReader = new TreeMap<>(); @@ -100,8 +102,18 @@ public class ParallelLeafReader extends LeafReader { // TODO: make this read-only in a cleaner way? FieldInfos.Builder builder = new FieldInfos.Builder(); + + Sort indexSort = null; + // build FieldInfos and fieldToReader map: for (final LeafReader reader : this.parallelReaders) { + Sort leafIndexSort = reader.getIndexSort(); + if (indexSort == null) { + indexSort = leafIndexSort; + } else if (leafIndexSort != null && indexSort.equals(leafIndexSort) == false) { + throw new IllegalArgumentException("cannot combine LeafReaders that have different index sorts: saw both sort=" + indexSort + " and " + leafIndexSort); + } + final FieldInfos readerFieldInfos = reader.getFieldInfos(); for (FieldInfo fieldInfo : readerFieldInfos) { // NOTE: first reader having a given field "wins": @@ -115,6 +127,7 @@ public class ParallelLeafReader extends LeafReader { } } fieldInfos = builder.finish(); + this.indexSort = indexSort; // build Fields instance for (final LeafReader reader : this.parallelReaders) { @@ -423,4 +436,10 @@ public class ParallelLeafReader extends LeafReader { ensureOpen(); return parallelReaders; } + + @Override + public Sort getIndexSort() { + return indexSort; + } + } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java index bed84589576..ec12365e958 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java @@ -28,6 +28,7 @@ import java.util.Set; import java.util.regex.Matcher; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.search.Sort; import org.apache.lucene.store.Directory; import org.apache.lucene.store.TrackingDirectoryWrapper; import org.apache.lucene.util.StringHelper; @@ -69,6 +70,8 @@ public final class SegmentInfo { private final Map attributes; + private final Sort indexSort; + // Tracks the Lucene version this segment was created with, since 3.1. Null // indicates an older than 3.0 index, and it's used to detect a too old index. // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and @@ -93,7 +96,7 @@ public final class SegmentInfo { */ public SegmentInfo(Directory dir, Version version, String name, int maxDoc, boolean isCompoundFile, Codec codec, Map diagnostics, - byte[] id, Map attributes) { + byte[] id, Map attributes, Sort indexSort) { assert !(dir instanceof TrackingDirectoryWrapper); this.dir = Objects.requireNonNull(dir); this.version = Objects.requireNonNull(version); @@ -107,6 +110,7 @@ public final class SegmentInfo { throw new IllegalArgumentException("invalid id: " + Arrays.toString(id)); } this.attributes = Objects.requireNonNull(attributes); + this.indexSort = indexSort; } /** @@ -194,13 +198,9 @@ public final class SegmentInfo { s.append('/').append(delCount); } - final String sorter_key = "sorter"; // SortingMergePolicy.SORTER_ID_PROP; // TODO: use this once we can import SortingMergePolicy (currently located in 'misc' instead of 'core') - final String sorter_val = diagnostics.get(sorter_key); - if (sorter_val != null) { - s.append(":["); - s.append(sorter_key); - s.append('='); - s.append(sorter_val); + if (indexSort != null) { + s.append(":[indexSort="); + s.append(indexSort); s.append(']'); } @@ -311,5 +311,10 @@ public final class SegmentInfo { public Map getAttributes() { return attributes; } + + /** Return the sort order of this segment, or null if the index has no sort. */ + public Sort getIndexSort() { + return indexSort; + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java index b0d9bcff50b..d23f01024d8 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java @@ -59,6 +59,11 @@ final class SegmentMerger { this.codec = segmentInfo.getCodec(); this.context = context; this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers); + if (mergeState.infoStream.isEnabled("SM")) { + if (segmentInfo.getIndexSort() != null) { + mergeState.infoStream.message("SM", "index sort during merge: " + segmentInfo.getIndexSort()); + } + } } /** True if any merging should happen */ diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java index 8ed93e376c3..e68f8186272 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java @@ -28,6 +28,7 @@ import org.apache.lucene.codecs.NormsProducer; import org.apache.lucene.codecs.PointsReader; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; +import org.apache.lucene.search.Sort; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Bits; @@ -303,4 +304,9 @@ public final class SegmentReader extends CodecReader { ensureOpen(); core.removeCoreClosedListener(listener); } + + @Override + public Sort getIndexSort() { + return si.info.getIndexSort(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java index 3a7370138a6..2742247381a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java @@ -26,6 +26,7 @@ import org.apache.lucene.codecs.NormsProducer; import org.apache.lucene.codecs.PointsReader; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; +import org.apache.lucene.search.Sort; import org.apache.lucene.util.Bits; /** @@ -125,6 +126,16 @@ public final class SlowCodecReaderWrapper { public void removeCoreClosedListener(CoreClosedListener listener) { reader.removeCoreClosedListener(listener); } + + @Override + public String toString() { + return "SlowCodecReaderWrapper(" + reader + ")"; + } + + @Override + public Sort getIndexSort() { + return reader.getIndexSort(); + } }; } } diff --git a/lucene/misc/src/java/org/apache/lucene/index/Sorter.java b/lucene/core/src/java/org/apache/lucene/index/Sorter.java similarity index 99% rename from lucene/misc/src/java/org/apache/lucene/index/Sorter.java rename to lucene/core/src/java/org/apache/lucene/index/Sorter.java index 7e4e475a248..cf75c18f6f0 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/Sorter.java +++ b/lucene/core/src/java/org/apache/lucene/index/Sorter.java @@ -168,6 +168,7 @@ final class Sorter { } final PackedLongValues newToOld = newToOldBuilder.build(); + // invert the docs mapping: for (int i = 0; i < maxDoc; ++i) { docs[(int) newToOld.get(i)] = i; } // docs is now the oldToNew mapping @@ -196,7 +197,7 @@ final class Sorter { } }; } - + /** * Returns a mapping from the old document ID to its new location in the * sorted index. Implementations can use the auxiliary diff --git a/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java similarity index 96% rename from lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java rename to lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java index 683c5c263bb..70d5d204439 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java @@ -1,3 +1,5 @@ +package org.apache.lucene.index; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -14,7 +16,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.index; import java.io.IOException; import java.util.Arrays; @@ -35,21 +36,13 @@ import org.apache.lucene.util.automaton.CompiledAutomaton; /** * An {@link org.apache.lucene.index.LeafReader} which supports sorting documents by a given - * {@link Sort}. You can use this class to sort an index as follows: - * - *
        - * IndexWriter writer; // writer to which the sorted index will be added
        - * DirectoryReader reader; // reader on the input index
        - * Sort sort; // determines how the documents are sorted
        - * LeafReader sortingReader = SortingLeafReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
        - * writer.addIndexes(reader);
        - * writer.close();
        - * reader.close();
        - * 
        + * {@link Sort}. This is package private and is only used by Lucene when it needs to merge + * a newly flushed (unsorted) segment. * * @lucene.experimental */ -public class SortingLeafReader extends FilterLeafReader { + +class SortingLeafReader extends FilterLeafReader { private static class SortingFields extends FilterFields { @@ -111,25 +104,6 @@ public class SortingLeafReader extends FilterLeafReader { this.hasPositions = hasPositions; } - Bits newToOld(final Bits liveDocs) { - if (liveDocs == null) { - return null; - } - return new Bits() { - - @Override - public boolean get(int index) { - return liveDocs.get(docMap.oldToNew(index)); - } - - @Override - public int length() { - return liveDocs.length(); - } - - }; - } - @Override public PostingsEnum postings( PostingsEnum reuse, final int flags) throws IOException { @@ -368,6 +342,7 @@ public class SortingLeafReader extends FilterLeafReader { @Override public void setDocument(int docID) { + //System.out.println(" slr.sssdv.setDocument docID=" + docID + " this=" + this); in.setDocument(docMap.newToOld(docID)); } @@ -865,7 +840,6 @@ public class SortingLeafReader extends FilterLeafReader { if (inPointValues == null) { return null; } else { - // TODO: this is untested! return new SortingPointValues(inPointValues, docMap); } } diff --git a/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java b/lucene/core/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java similarity index 77% rename from lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java rename to lucene/core/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java index 5d82be41450..1af1b9f35e2 100644 --- a/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java @@ -20,14 +20,14 @@ import java.io.IOException; import java.util.Arrays; import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.SortingMergePolicy; -import org.apache.lucene.search.LeafCollector; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.Collector; -import org.apache.lucene.search.FilterLeafCollector; import org.apache.lucene.search.FilterCollector; +import org.apache.lucene.search.FilterLeafCollector; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TotalHitCountCollector; @@ -39,8 +39,7 @@ import org.apache.lucene.search.TotalHitCountCollector; * *

        * NOTE: the {@code Collector} detects segments sorted according to a - * {@link SortingMergePolicy}'s {@link Sort} and so it's best used in conjunction - * with a {@link SortingMergePolicy}. Also,it collects up to a specified + * an {@link IndexWriterConfig#setIndexSort}. Also, it collects up to a specified * {@code numDocsToCollect} from each segment, and therefore is mostly suitable * for use in conjunction with collectors such as {@link TopDocsCollector}, and * not e.g. {@link TotalHitCountCollector}. @@ -48,24 +47,12 @@ import org.apache.lucene.search.TotalHitCountCollector; * NOTE: If you wrap a {@code TopDocsCollector} that sorts in the same * order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs} * will be correct. However the total of {@link TopDocsCollector#getTotalHits() - * hit count} will be underestimated since not all matching documents will have + * hit count} will be vastly underestimated since not all matching documents will have * been collected. - *

        - * NOTE: This {@code Collector} uses {@link Sort#toString()} to detect - * whether a segment was sorted with the same {@code Sort}. This has - * two implications: - *

          - *
        • if a custom comparator is not implemented correctly and returns - * different identifiers for equivalent instances, this collector will not - * detect sorted segments,
        • - *
        • if you suddenly change the {@link IndexWriter}'s - * {@code SortingMergePolicy} to sort according to another criterion and if both - * the old and the new {@code Sort}s have the same identifier, this - * {@code Collector} will incorrectly detect sorted segments.
        • - *
        * * @lucene.experimental */ + public class EarlyTerminatingSortingCollector extends FilterCollector { /** Returns whether collection can be early-terminated if it sorts with the @@ -85,7 +72,6 @@ public class EarlyTerminatingSortingCollector extends FilterCollector { protected final Sort sort; /** Number of documents to collect in each segment */ protected final int numDocsToCollect; - private final Sort mergePolicySort; private final AtomicBoolean terminatedEarly = new AtomicBoolean(false); /** @@ -99,27 +85,26 @@ public class EarlyTerminatingSortingCollector extends FilterCollector { * the number of documents to collect on each segment. When wrapping * a {@link TopDocsCollector}, this number should be the number of * hits. - * @param mergePolicySort - * the sort your {@link SortingMergePolicy} uses * @throws IllegalArgumentException if the sort order doesn't allow for early * termination with the given merge policy. */ - public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect, Sort mergePolicySort) { + public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) { super(in); if (numDocsToCollect <= 0) { throw new IllegalArgumentException("numDocsToCollect must always be > 0, got " + numDocsToCollect); } - if (canEarlyTerminate(sort, mergePolicySort) == false) { - throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + mergePolicySort); - } this.sort = sort; this.numDocsToCollect = numDocsToCollect; - this.mergePolicySort = mergePolicySort; } @Override public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - if (SortingMergePolicy.isSorted(context.reader(), mergePolicySort)) { + Sort segmentSort = context.reader().getIndexSort(); + if (segmentSort != null && canEarlyTerminate(sort, segmentSort) == false) { + throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + segmentSort); + } + + if (segmentSort != null) { // segment is sorted, can early-terminate return new FilterLeafCollector(super.getLeafCollector(context)) { private int numCollected; @@ -142,5 +127,4 @@ public class EarlyTerminatingSortingCollector extends FilterCollector { public boolean terminatedEarly() { return terminatedEarly.get(); } - } diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 3ac64c84c5d..b81b8079a1c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -819,6 +819,7 @@ public class IndexSearcher { sumTotalTermFreq = terms.getSumTotalTermFreq(); sumDocFreq = terms.getSumDocFreq(); } + return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/Sort.java b/lucene/core/src/java/org/apache/lucene/search/Sort.java index 7493e9b6a89..77585a2a674 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Sort.java +++ b/lucene/core/src/java/org/apache/lucene/search/Sort.java @@ -147,6 +147,9 @@ public class Sort { * etc. Finally, if there is still a tie after all SortFields * are checked, the internal Lucene docid is used to break it. */ public void setSort(SortField... fields) { + if (fields.length == 0) { + throw new IllegalArgumentException("There must be at least 1 sort field"); + } this.fields = fields; } diff --git a/lucene/core/src/java/org/apache/lucene/search/SortField.java b/lucene/core/src/java/org/apache/lucene/search/SortField.java index 880697bd605..412a50ab4b4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SortField.java +++ b/lucene/core/src/java/org/apache/lucene/search/SortField.java @@ -77,9 +77,6 @@ public class SortField { * uses ordinals to do the sorting. */ STRING_VAL, - /** Sort use byte[] index values. */ - BYTES, - /** Force rewriting of SortField using {@link SortField#rewrite(IndexSearcher)} * before it can be used for sorting */ REWRITEABLE diff --git a/lucene/core/src/java/org/apache/lucene/util/Version.java b/lucene/core/src/java/org/apache/lucene/util/Version.java index d8873ccf65f..d5640d8a7a4 100644 --- a/lucene/core/src/java/org/apache/lucene/util/Version.java +++ b/lucene/core/src/java/org/apache/lucene/util/Version.java @@ -33,14 +33,18 @@ import java.util.Locale; public final class Version { /** Match settings and bugs in Lucene's 6.0 release. - *

        - * Use this to get the latest & greatest settings, bug - * fixes, etc, for Lucene. * @deprecated (7.0.0) Use latest */ @Deprecated public static final Version LUCENE_6_0_0 = new Version(6, 0, 0); + /** + * Match settings and bugs in Lucene's 6.0.1 release. + * @deprecated Use latest + */ + @Deprecated + public static final Version LUCENE_6_0_1 = new Version(6, 0, 1); + /** * Match settings and bugs in Lucene's 6.1.0 release. * @deprecated Use latest @@ -50,6 +54,9 @@ public final class Version { /** * Match settings and bugs in Lucene's 7.0.0 release. + *

        + * Use this to get the latest & greatest settings, bug + * fixes, etc, for Lucene. */ public static final Version LUCENE_7_0_0 = new Version(7, 0, 0); diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java index 288ece4c51d..09eef266b6d 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java @@ -299,9 +299,6 @@ public class BKDWriter implements Closeable { final BKDReader.IntersectState state; final MergeState.DocMap docMap; - /** Base offset for all our docIDs */ - final int docIDBase; - /** Current doc ID */ public int docID; @@ -314,7 +311,7 @@ public class BKDWriter implements Closeable { /** Which leaf block we are up to */ private int blockID; - public MergeReader(BKDReader bkd, MergeState.DocMap docMap, int docIDBase) throws IOException { + public MergeReader(BKDReader bkd, MergeState.DocMap docMap) throws IOException { this.bkd = bkd; state = new BKDReader.IntersectState(bkd.in.clone(), bkd.numDims, @@ -322,7 +319,6 @@ public class BKDWriter implements Closeable { bkd.maxPointsInLeafNode, null); this.docMap = docMap; - this.docIDBase = docIDBase; long minFP = Long.MAX_VALUE; //System.out.println("MR.init " + this + " bkdreader=" + bkd + " leafBlockFPs.length=" + bkd.leafBlockFPs.length); for(long fp : bkd.leafBlockFPs) { @@ -396,14 +392,14 @@ public class BKDWriter implements Closeable { } // Tie break by sorting smaller docIDs earlier: - return a.docIDBase < b.docIDBase; + return a.docID < b.docID; } } /** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already * sorted values and currently only works when numDims==1. This returns -1 if all documents containing * dimensional values were deleted. */ - public long merge(IndexOutput out, List docMaps, List readers, List docIDBases) throws IOException { + public long merge(IndexOutput out, List docMaps, List readers) throws IOException { if (numDims != 1) { throw new UnsupportedOperationException("numDims must be 1 but got " + numDims); } @@ -411,8 +407,6 @@ public class BKDWriter implements Closeable { throw new IllegalStateException("cannot mix add and merge"); } - //System.out.println("BKDW.merge segs=" + readers.size()); - // Catch user silliness: if (heapPointWriter == null && tempInput == null) { throw new IllegalStateException("already finished"); @@ -433,7 +427,7 @@ public class BKDWriter implements Closeable { } else { docMap = docMaps.get(i); } - MergeReader reader = new MergeReader(bkd, docMap, docIDBases.get(i)); + MergeReader reader = new MergeReader(bkd, docMap); if (reader.next()) { queue.add(reader); } @@ -468,7 +462,7 @@ public class BKDWriter implements Closeable { // System.out.println("iter reader=" + reader); // NOTE: doesn't work with subclasses (e.g. SimpleText!) - int docID = reader.docIDBase + reader.docID; + int docID = reader.docID; leafBlockDocIDs[leafCount] = docID; System.arraycopy(reader.state.scratchPackedValue, 0, leafBlockPackedValues[leafCount], 0, packedBytesLength); docsSeen.set(docID); diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec index 875aba527e2..548f8d09244 100644 --- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -org.apache.lucene.codecs.lucene60.Lucene60Codec +org.apache.lucene.codecs.lucene62.Lucene62Codec diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java index 59e48144d44..f945c2d0dc0 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.lucene50; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; -import org.apache.lucene.codecs.lucene60.Lucene60Codec; +import org.apache.lucene.codecs.lucene62.Lucene62Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.BaseStoredFieldsFormatTestCase; @@ -33,7 +33,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks; public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFieldsFormatTestCase { @Override protected Codec getCodec() { - return new Lucene60Codec(Mode.BEST_COMPRESSION); + return new Lucene62Codec(Mode.BEST_COMPRESSION); } /** @@ -44,7 +44,7 @@ public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFie Directory dir = newDirectory(); for (int i = 0; i < 10; i++) { IndexWriterConfig iwc = newIndexWriterConfig(); - iwc.setCodec(new Lucene60Codec(RandomPicks.randomFrom(random(), Mode.values()))); + iwc.setCodec(new Lucene62Codec(RandomPicks.randomFrom(random(), Mode.values()))); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig()); Document doc = new Document(); doc.add(new StoredField("field1", "value1")); @@ -71,7 +71,7 @@ public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFie public void testInvalidOptions() throws Exception { expectThrows(NullPointerException.class, () -> { - new Lucene60Codec(null); + new Lucene62Codec(null); }); expectThrows(NullPointerException.class, () -> { diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java index c915de0bfce..a0ad87fca87 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java @@ -18,14 +18,14 @@ package org.apache.lucene.codecs.lucene53; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.lucene60.Lucene60Codec; +import org.apache.lucene.codecs.lucene62.Lucene62Codec; import org.apache.lucene.index.BaseNormsFormatTestCase; /** * Tests Lucene53NormsFormat */ public class TestLucene53NormsFormat extends BaseNormsFormatTestCase { - private final Codec codec = new Lucene60Codec(); + private final Codec codec = new Lucene62Codec(); @Override protected Codec getCodec() { diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50SegmentInfoFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene62/TestLucene62SegmentInfoFormat.java similarity index 89% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50SegmentInfoFormat.java rename to lucene/core/src/test/org/apache/lucene/codecs/lucene62/TestLucene62SegmentInfoFormat.java index 81143300ee5..8c758f29e5a 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50SegmentInfoFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene62/TestLucene62SegmentInfoFormat.java @@ -1,3 +1,5 @@ +package org.apache.lucene.codecs.lucene62; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -14,8 +16,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.codecs.lucene50; - import org.apache.lucene.codecs.Codec; import org.apache.lucene.index.BaseSegmentInfoFormatTestCase; @@ -23,9 +23,9 @@ import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.Version; /** - * Tests Lucene50SegmentInfoFormat + * Tests Lucene62SegmentInfoFormat */ -public class TestLucene50SegmentInfoFormat extends BaseSegmentInfoFormatTestCase { +public class TestLucene62SegmentInfoFormat extends BaseSegmentInfoFormatTestCase { @Override protected Version[] getVersions() { diff --git a/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java b/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java index 2f3a3a69890..da8dbac0f5e 100644 --- a/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java +++ b/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java @@ -24,8 +24,6 @@ import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.PointsFormat; import org.apache.lucene.codecs.PointsReader; import org.apache.lucene.codecs.PointsWriter; -import org.apache.lucene.codecs.lucene60.Lucene60PointsReader; -import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter; import org.apache.lucene.document.Document; import org.apache.lucene.document.LongPoint; import org.apache.lucene.search.IndexSearcher; @@ -143,6 +141,6 @@ public class Test2BPoints extends LuceneTestCase { } private static Codec getCodec() { - return Codec.forName("Lucene60"); + return Codec.forName("Lucene62"); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java b/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java index 22b3605965b..22d12346d4e 100644 --- a/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java +++ b/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java @@ -53,7 +53,7 @@ import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; // disk (but, should run successfully). Best to run w/ // -Dtests.codec=, and w/ plenty of RAM, eg: // -// ant test -Dtests.monster=true -Dtests.heapsize=8g -Dtests.codec=Lucene60 -Dtestcase=Test2BTerms +// ant test -Dtests.monster=true -Dtests.heapsize=8g -Dtests.codec=Lucene62 -Dtestcase=Test2BTerms // @SuppressCodecs({ "SimpleText", "Memory", "Direct" }) @Monster("very slow, use 5g minimum heap") diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java index d1148ef13a6..9d00c3f42d2 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -39,6 +39,8 @@ import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.BaseDirectoryWrapper; import org.apache.lucene.store.Directory; @@ -1281,4 +1283,53 @@ public class TestAddIndexes extends LuceneTestCase { w2.close(); IOUtils.close(src, dest); } + + public void testIllegalIndexSortChange1() throws Exception { + Directory dir1 = newDirectory(); + IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random())); + iwc1.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT))); + RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1, iwc1); + w1.addDocument(new Document()); + w1.commit(); + w1.addDocument(new Document()); + w1.commit(); + // so the index sort is in fact burned into the index: + w1.forceMerge(1); + w1.close(); + + Directory dir2 = newDirectory(); + IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random())); + iwc2.setIndexSort(new Sort(new SortField("foo", SortField.Type.STRING))); + RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2, iwc2); + String message = expectThrows(IllegalArgumentException.class, () -> { + w2.addIndexes(dir1); + }).getMessage(); + assertEquals("cannot change index sort from to ", message); + IOUtils.close(dir1, w2, dir2); + } + + public void testIllegalIndexSortChange2() throws Exception { + Directory dir1 = newDirectory(); + IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random())); + iwc1.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT))); + RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1, iwc1); + w1.addDocument(new Document()); + w1.commit(); + w1.addDocument(new Document()); + w1.commit(); + // so the index sort is in fact burned into the index: + w1.forceMerge(1); + w1.close(); + + Directory dir2 = newDirectory(); + IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random())); + iwc2.setIndexSort(new Sort(new SortField("foo", SortField.Type.STRING))); + RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2, iwc2); + IndexReader r1 = DirectoryReader.open(dir1); + String message = expectThrows(IllegalArgumentException.class, () -> { + w2.addIndexes((SegmentReader) getOnlyLeafReader(r1)); + }).getMessage(); + assertEquals("cannot change index sort from to ", message); + IOUtils.close(r1, dir1, w2, dir2); + } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java index 73b4622b82c..bd1e9b6e14f 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java @@ -222,7 +222,7 @@ public class TestCodecs extends LuceneTestCase { final FieldInfos fieldInfos = builder.finish(); final Directory dir = newDirectory(); Codec codec = Codec.getDefault(); - final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); + final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); this.write(si, fieldInfos, dir, fields); final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()))); @@ -279,7 +279,7 @@ public class TestCodecs extends LuceneTestCase { } Codec codec = Codec.getDefault(); - final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); + final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); this.write(si, fieldInfos, dir, fields); if (VERBOSE) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java b/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java index 9f3339c8c69..0dc654cb212 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java @@ -503,7 +503,7 @@ public class TestDemoParallelLeafReader extends LuceneTestCase { class ReindexingOneMerge extends OneMerge { - List parallelReaders; + final List parallelReaders = new ArrayList<>(); final long schemaGen; ReindexingOneMerge(List segments) { @@ -519,33 +519,23 @@ public class TestDemoParallelLeafReader extends LuceneTestCase { } @Override - public List getMergeReaders() throws IOException { - if (parallelReaders == null) { - parallelReaders = new ArrayList<>(); - for (CodecReader reader : super.getMergeReaders()) { - parallelReaders.add(getCurrentReader((SegmentReader)reader, schemaGen)); - } + public CodecReader wrapForMerge(CodecReader reader) throws IOException { + LeafReader wrapped = getCurrentReader((SegmentReader)reader, schemaGen); + if (wrapped instanceof ParallelLeafReader) { + parallelReaders.add((ParallelLeafReader) wrapped); } - - // TODO: fix ParallelLeafReader, if this is a good use case - List mergeReaders = new ArrayList<>(); - for (LeafReader reader : parallelReaders) { - mergeReaders.add(SlowCodecReaderWrapper.wrap(reader)); - } - return mergeReaders; + return SlowCodecReaderWrapper.wrap(wrapped); } @Override public void mergeFinished() throws IOException { Throwable th = null; - for(LeafReader r : parallelReaders) { - if (r instanceof ParallelLeafReader) { - try { - r.decRef(); - } catch (Throwable t) { - if (th == null) { - th = t; - } + for (ParallelLeafReader r : parallelReaders) { + try { + r.decRef(); + } catch (Throwable t) { + if (th == null) { + th = t; } } } @@ -561,10 +551,6 @@ public class TestDemoParallelLeafReader extends LuceneTestCase { super.setMergeInfo(info); } - @Override - public MergePolicy.DocMap getDocMap(final MergeState mergeState) { - return super.getDocMap(mergeState); - } } class ReindexingMergeSpecification extends MergeSpecification { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java index 803b1d9bc65..8b24b4d7bc5 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java @@ -218,7 +218,7 @@ public class TestDoc extends LuceneTestCase { final Codec codec = Codec.getDefault(); TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir); - final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); + final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); SegmentMerger merger = new SegmentMerger(Arrays.asList(r1, r2), si, InfoStream.getDefault(), trackingDir, diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java b/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java new file mode 100644 index 00000000000..003db9e4529 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; + +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; + +public class TestDocIDMerger extends LuceneTestCase { + + private static class TestSubUnsorted extends DocIDMerger.Sub { + private int docID = -1; + final int valueStart; + final int maxDoc; + + public TestSubUnsorted(MergeState.DocMap docMap, int maxDoc, int valueStart) { + super(docMap); + this.maxDoc = maxDoc; + this.valueStart = valueStart; + } + + @Override + public int nextDoc() { + docID++; + if (docID == maxDoc) { + return NO_MORE_DOCS; + } else { + return docID; + } + } + + public int getValue() { + return valueStart + docID; + } + } + + public void testNoSort() throws Exception { + + int subCount = TestUtil.nextInt(random(), 1, 20); + List subs = new ArrayList<>(); + int valueStart = 0; + for(int i=0;i merger = new DocIDMerger<>(subs, false); + + int count = 0; + while (true) { + TestSubUnsorted sub = merger.next(); + if (sub == null) { + break; + } + assertEquals(count, sub.mappedDocID); + assertEquals(count, sub.getValue()); + count++; + } + + assertEquals(valueStart, count); + } + + private static class TestSubSorted extends DocIDMerger.Sub { + private int docID = -1; + final int maxDoc; + final int index; + + public TestSubSorted(MergeState.DocMap docMap, int maxDoc, int index) { + super(docMap); + this.maxDoc = maxDoc; + this.index = index; + } + + @Override + public int nextDoc() { + docID++; + if (docID == maxDoc) { + return NO_MORE_DOCS; + } else { + return docID; + } + } + + @Override + public String toString() { + return "TestSubSorted(index=" + index + ", mappedDocID=" + mappedDocID+ ")"; + } + } + + public void testWithSort() throws Exception { + + int subCount = TestUtil.nextInt(random(), 1, 20); + List oldToNew = new ArrayList<>(); + // how many docs we've written to each sub: + List uptos = new ArrayList<>(); + int totDocCount = 0; + for(int i=0;i completedSubs = new ArrayList<>(); + + // randomly distribute target docIDs into the segments: + for(int docID=0;docID subs = new ArrayList<>(); + for(int i=0;i merger = new DocIDMerger<>(subs, true); + + int count = 0; + while (true) { + TestSubSorted sub = merger.next(); + if (sub == null) { + break; + } + if (liveDocs != null) { + count = liveDocs.nextSetBit(count); + } + assertEquals(count, sub.mappedDocID); + count++; + } + + if (liveDocs != null) { + if (count < totDocCount) { + assertEquals(NO_MORE_DOCS, liveDocs.nextSetBit(count)); + } else { + assertEquals(totDocCount, count); + } + } else { + assertEquals(totDocCount, count); + } + } +} diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java new file mode 100644 index 00000000000..4e775f3e5da --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java @@ -0,0 +1,1377 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.BinaryPoint; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.PointValues.IntersectVisitor; +import org.apache.lucene.index.PointValues.Relation; +import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.search.CollectionStatistics; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.EarlyTerminatingSortingCollector; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopFieldCollector; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.TestUtil; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +public class TestIndexSorting extends LuceneTestCase { + + public void testBasicString() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + Sort indexSort = new Sort(new SortField("foo", SortField.Type.STRING)); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new SortedDocValuesField("foo", new BytesRef("zzz"))); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + doc = new Document(); + doc.add(new SortedDocValuesField("foo", new BytesRef("aaa"))); + w.addDocument(doc); + w.commit(); + + doc = new Document(); + doc.add(new SortedDocValuesField("foo", new BytesRef("mmm"))); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + SortedDocValues values = leaf.getSortedDocValues("foo"); + assertEquals("aaa", values.get(0).utf8ToString()); + assertEquals("mmm", values.get(1).utf8ToString()); + assertEquals("zzz", values.get(2).utf8ToString()); + r.close(); + w.close(); + dir.close(); + } + + public void testMissingStringFirst() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + SortField sortField = new SortField("foo", SortField.Type.STRING); + sortField.setMissingValue(SortField.STRING_FIRST); + Sort indexSort = new Sort(sortField); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new SortedDocValuesField("foo", new BytesRef("zzz"))); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + // missing + w.addDocument(new Document()); + w.commit(); + + doc = new Document(); + doc.add(new SortedDocValuesField("foo", new BytesRef("mmm"))); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + SortedDocValues values = leaf.getSortedDocValues("foo"); + assertEquals(-1, values.getOrd(0)); + assertEquals("mmm", values.get(1).utf8ToString()); + assertEquals("zzz", values.get(2).utf8ToString()); + r.close(); + w.close(); + dir.close(); + } + + public void testMissingStringLast() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + SortField sortField = new SortField("foo", SortField.Type.STRING); + sortField.setMissingValue(SortField.STRING_LAST); + Sort indexSort = new Sort(sortField); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new SortedDocValuesField("foo", new BytesRef("zzz"))); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + // missing + w.addDocument(new Document()); + w.commit(); + + doc = new Document(); + doc.add(new SortedDocValuesField("foo", new BytesRef("mmm"))); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + SortedDocValues values = leaf.getSortedDocValues("foo"); + assertEquals("mmm", values.get(0).utf8ToString()); + assertEquals("zzz", values.get(1).utf8ToString()); + assertEquals(-1, values.getOrd(2)); + r.close(); + w.close(); + dir.close(); + } + + public void testBasicLong() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG)); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new NumericDocValuesField("foo", 18)); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + doc = new Document(); + doc.add(new NumericDocValuesField("foo", -1)); + w.addDocument(doc); + w.commit(); + + doc = new Document(); + doc.add(new NumericDocValuesField("foo", 7)); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + NumericDocValues values = leaf.getNumericDocValues("foo"); + assertEquals(-1, values.get(0)); + assertEquals(7, values.get(1)); + assertEquals(18, values.get(2)); + r.close(); + w.close(); + dir.close(); + } + + public void testMissingLongFirst() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + SortField sortField = new SortField("foo", SortField.Type.LONG); + sortField.setMissingValue(Long.valueOf(Long.MIN_VALUE)); + Sort indexSort = new Sort(sortField); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new NumericDocValuesField("foo", 18)); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + // missing + w.addDocument(new Document()); + w.commit(); + + doc = new Document(); + doc.add(new NumericDocValuesField("foo", 7)); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + NumericDocValues values = leaf.getNumericDocValues("foo"); + Bits docsWithField = leaf.getDocsWithField("foo"); + assertEquals(0, values.get(0)); + assertFalse(docsWithField.get(0)); + assertEquals(7, values.get(1)); + assertEquals(18, values.get(2)); + r.close(); + w.close(); + dir.close(); + } + + public void testMissingLongLast() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + SortField sortField = new SortField("foo", SortField.Type.LONG); + sortField.setMissingValue(Long.valueOf(Long.MAX_VALUE)); + Sort indexSort = new Sort(sortField); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new NumericDocValuesField("foo", 18)); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + // missing + w.addDocument(new Document()); + w.commit(); + + doc = new Document(); + doc.add(new NumericDocValuesField("foo", 7)); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + NumericDocValues values = leaf.getNumericDocValues("foo"); + Bits docsWithField = leaf.getDocsWithField("foo"); + assertEquals(7, values.get(0)); + assertEquals(18, values.get(1)); + assertEquals(0, values.get(2)); + assertFalse(docsWithField.get(2)); + r.close(); + w.close(); + dir.close(); + } + + public void testBasicInt() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT)); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new NumericDocValuesField("foo", 18)); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + doc = new Document(); + doc.add(new NumericDocValuesField("foo", -1)); + w.addDocument(doc); + w.commit(); + + doc = new Document(); + doc.add(new NumericDocValuesField("foo", 7)); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + NumericDocValues values = leaf.getNumericDocValues("foo"); + assertEquals(-1, values.get(0)); + assertEquals(7, values.get(1)); + assertEquals(18, values.get(2)); + r.close(); + w.close(); + dir.close(); + } + + public void testMissingIntFirst() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + SortField sortField = new SortField("foo", SortField.Type.INT); + sortField.setMissingValue(Integer.valueOf(Integer.MIN_VALUE)); + Sort indexSort = new Sort(sortField); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new NumericDocValuesField("foo", 18)); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + // missing + w.addDocument(new Document()); + w.commit(); + + doc = new Document(); + doc.add(new NumericDocValuesField("foo", 7)); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + NumericDocValues values = leaf.getNumericDocValues("foo"); + Bits docsWithField = leaf.getDocsWithField("foo"); + assertEquals(0, values.get(0)); + assertFalse(docsWithField.get(0)); + assertEquals(7, values.get(1)); + assertEquals(18, values.get(2)); + r.close(); + w.close(); + dir.close(); + } + + public void testMissingIntLast() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + SortField sortField = new SortField("foo", SortField.Type.INT); + sortField.setMissingValue(Integer.valueOf(Integer.MAX_VALUE)); + Sort indexSort = new Sort(sortField); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new NumericDocValuesField("foo", 18)); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + // missing + w.addDocument(new Document()); + w.commit(); + + doc = new Document(); + doc.add(new NumericDocValuesField("foo", 7)); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + NumericDocValues values = leaf.getNumericDocValues("foo"); + Bits docsWithField = leaf.getDocsWithField("foo"); + assertEquals(7, values.get(0)); + assertEquals(18, values.get(1)); + assertEquals(0, values.get(2)); + assertFalse(docsWithField.get(2)); + r.close(); + w.close(); + dir.close(); + } + + public void testBasicDouble() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + Sort indexSort = new Sort(new SortField("foo", SortField.Type.DOUBLE)); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new DoubleDocValuesField("foo", 18.0)); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + doc = new Document(); + doc.add(new DoubleDocValuesField("foo", -1.0)); + w.addDocument(doc); + w.commit(); + + doc = new Document(); + doc.add(new DoubleDocValuesField("foo", 7.0)); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + NumericDocValues values = leaf.getNumericDocValues("foo"); + assertEquals(-1.0, Double.longBitsToDouble(values.get(0)), 0.0); + assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0); + assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0); + r.close(); + w.close(); + dir.close(); + } + + public void testMissingDoubleFirst() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + SortField sortField = new SortField("foo", SortField.Type.DOUBLE); + sortField.setMissingValue(Double.NEGATIVE_INFINITY); + Sort indexSort = new Sort(sortField); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new DoubleDocValuesField("foo", 18.0)); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + // missing + w.addDocument(new Document()); + w.commit(); + + doc = new Document(); + doc.add(new DoubleDocValuesField("foo", 7.0)); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + NumericDocValues values = leaf.getNumericDocValues("foo"); + Bits docsWithField = leaf.getDocsWithField("foo"); + assertEquals(0.0, Double.longBitsToDouble(values.get(0)), 0.0); + assertFalse(docsWithField.get(0)); + assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0); + assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0); + r.close(); + w.close(); + dir.close(); + } + + public void testMissingDoubleLast() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + SortField sortField = new SortField("foo", SortField.Type.DOUBLE); + sortField.setMissingValue(Double.POSITIVE_INFINITY); + Sort indexSort = new Sort(sortField); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new DoubleDocValuesField("foo", 18.0)); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + // missing + w.addDocument(new Document()); + w.commit(); + + doc = new Document(); + doc.add(new DoubleDocValuesField("foo", 7.0)); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + NumericDocValues values = leaf.getNumericDocValues("foo"); + Bits docsWithField = leaf.getDocsWithField("foo"); + assertEquals(7.0, Double.longBitsToDouble(values.get(0)), 0.0); + assertEquals(18.0, Double.longBitsToDouble(values.get(1)), 0.0); + assertEquals(0.0, Double.longBitsToDouble(values.get(2)), 0.0); + assertFalse(docsWithField.get(2)); + r.close(); + w.close(); + dir.close(); + } + + public void testBasicFloat() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + Sort indexSort = new Sort(new SortField("foo", SortField.Type.FLOAT)); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new FloatDocValuesField("foo", 18.0f)); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + doc = new Document(); + doc.add(new FloatDocValuesField("foo", -1.0f)); + w.addDocument(doc); + w.commit(); + + doc = new Document(); + doc.add(new FloatDocValuesField("foo", 7.0f)); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + NumericDocValues values = leaf.getNumericDocValues("foo"); + assertEquals(-1.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f); + assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f); + assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f); + r.close(); + w.close(); + dir.close(); + } + + public void testMissingFloatFirst() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + SortField sortField = new SortField("foo", SortField.Type.FLOAT); + sortField.setMissingValue(Float.NEGATIVE_INFINITY); + Sort indexSort = new Sort(sortField); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new FloatDocValuesField("foo", 18.0f)); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + // missing + w.addDocument(new Document()); + w.commit(); + + doc = new Document(); + doc.add(new FloatDocValuesField("foo", 7.0f)); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + NumericDocValues values = leaf.getNumericDocValues("foo"); + Bits docsWithField = leaf.getDocsWithField("foo"); + assertEquals(0.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f); + assertFalse(docsWithField.get(0)); + assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f); + assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f); + r.close(); + w.close(); + dir.close(); + } + + public void testMissingFloatLast() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + SortField sortField = new SortField("foo", SortField.Type.FLOAT); + sortField.setMissingValue(Float.POSITIVE_INFINITY); + Sort indexSort = new Sort(sortField); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new FloatDocValuesField("foo", 18.0f)); + w.addDocument(doc); + // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging: + w.commit(); + + // missing + w.addDocument(new Document()); + w.commit(); + + doc = new Document(); + doc.add(new FloatDocValuesField("foo", 7.0f)); + w.addDocument(doc); + w.forceMerge(1); + + DirectoryReader r = DirectoryReader.open(w); + LeafReader leaf = getOnlyLeafReader(r); + assertEquals(3, leaf.maxDoc()); + NumericDocValues values = leaf.getNumericDocValues("foo"); + Bits docsWithField = leaf.getDocsWithField("foo"); + assertEquals(7.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f); + assertEquals(18.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f); + assertEquals(0.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f); + assertFalse(docsWithField.get(2)); + r.close(); + w.close(); + dir.close(); + } + + public void testRandom1() throws IOException { + boolean withDeletes = random().nextBoolean(); + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG)); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + final int numDocs = atLeast(1000); + final FixedBitSet deleted = new FixedBitSet(numDocs); + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("foo", random().nextInt(20))); + doc.add(new StringField("id", Integer.toString(i), Store.YES)); + doc.add(new NumericDocValuesField("id", i)); + w.addDocument(doc); + if (random().nextInt(5) == 0) { + w.getReader().close(); + } else if (random().nextInt(30) == 0) { + w.forceMerge(2); + } else if (random().nextInt(4) == 0) { + final int id = TestUtil.nextInt(random(), 0, i); + deleted.set(id); + w.deleteDocuments(new Term("id", Integer.toString(id))); + } + } + + // Check that segments are sorted + DirectoryReader reader = w.getReader(); + for (LeafReaderContext ctx : reader.leaves()) { + final SegmentReader leaf = (SegmentReader) ctx.reader(); + SegmentInfo info = leaf.getSegmentInfo().info; + switch (info.getDiagnostics().get(IndexWriter.SOURCE)) { + case IndexWriter.SOURCE_FLUSH: + assertNull(info.getIndexSort()); + break; + case IndexWriter.SOURCE_MERGE: + assertEquals(indexSort, info.getIndexSort()); + final NumericDocValues values = leaf.getNumericDocValues("foo"); + long previous = Long.MIN_VALUE; + for (int i = 0; i < leaf.maxDoc(); ++i) { + final long value = values.get(i); + assertTrue(value >= previous); + previous = value; + } + break; + default: + fail(); + } + } + + // Now check that the index is consistent + IndexSearcher searcher = newSearcher(reader); + for (int i = 0; i < numDocs; ++i) { + TermQuery termQuery = new TermQuery(new Term("id", Integer.toString(i))); + final TopDocs topDocs = searcher.search(termQuery, 1); + if (deleted.get(i)) { + assertEquals(0, topDocs.totalHits); + } else { + assertEquals(1, topDocs.totalHits); + assertEquals(i, MultiDocValues.getNumericValues(reader, "id").get(topDocs.scoreDocs[0].doc)); + Document document = reader.document(topDocs.scoreDocs[0].doc); + assertEquals(Integer.toString(i), document.get("id")); + } + } + + reader.close(); + w.close(); + dir.close(); + } + + static class UpdateRunnable implements Runnable { + + private final int numDocs; + private final Random random; + private final AtomicInteger updateCount; + private final IndexWriter w; + private final Map values; + private final CountDownLatch latch; + + UpdateRunnable(int numDocs, Random random, CountDownLatch latch, AtomicInteger updateCount, IndexWriter w, Map values) { + this.numDocs = numDocs; + this.random = random; + this.latch = latch; + this.updateCount = updateCount; + this.w = w; + this.values = values; + } + + @Override + public void run() { + try { + latch.await(); + while (updateCount.decrementAndGet() >= 0) { + final int id = random.nextInt(numDocs); + final long value = random.nextInt(20); + Document doc = new Document(); + doc.add(new StringField("id", Integer.toString(id), Store.NO)); + doc.add(new NumericDocValuesField("foo", value)); + + synchronized (values) { + w.updateDocument(new Term("id", Integer.toString(id)), doc); + values.put(id, value); + } + + switch (random.nextInt(10)) { + case 0: + case 1: + // reopen + DirectoryReader.open(w).close(); + break; + case 2: + w.forceMerge(3); + break; + } + } + } catch (IOException | InterruptedException e) { + throw new RuntimeException(e); + } + } + + } + + // There is tricky logic to resolve deletes that happened while merging + public void testConcurrentUpdates() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG)); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Map values = new HashMap<>(); + + final int numDocs = atLeast(100); + Thread[] threads = new Thread[2]; + final AtomicInteger updateCount = new AtomicInteger(atLeast(1000)); + final CountDownLatch latch = new CountDownLatch(1); + for (int i = 0; i < threads.length; ++i) { + Random r = new Random(random().nextLong()); + threads[i] = new Thread(new UpdateRunnable(numDocs, r, latch, updateCount, w, values)); + } + for (Thread thread : threads) { + thread.start(); + } + latch.countDown(); + for (Thread thread : threads) { + thread.join(); + } + w.forceMerge(1); + DirectoryReader reader = DirectoryReader.open(w); + IndexSearcher searcher = newSearcher(reader); + for (int i = 0; i < numDocs; ++i) { + final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1); + if (values.containsKey(i) == false) { + assertEquals(0, topDocs.totalHits); + } else { + assertEquals(1, topDocs.totalHits); + assertEquals(values.get(i).longValue(), MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc)); + } + } + reader.close(); + w.close(); + dir.close(); + } + + static class DVUpdateRunnable implements Runnable { + + private final int numDocs; + private final Random random; + private final AtomicInteger updateCount; + private final IndexWriter w; + private final Map values; + private final CountDownLatch latch; + + DVUpdateRunnable(int numDocs, Random random, CountDownLatch latch, AtomicInteger updateCount, IndexWriter w, Map values) { + this.numDocs = numDocs; + this.random = random; + this.latch = latch; + this.updateCount = updateCount; + this.w = w; + this.values = values; + } + + @Override + public void run() { + try { + latch.await(); + while (updateCount.decrementAndGet() >= 0) { + final int id = random.nextInt(numDocs); + final long value = random.nextInt(20); + + synchronized (values) { + w.updateDocValues(new Term("id", Integer.toString(id)), new NumericDocValuesField("foo", value)); + values.put(id, value); + } + + switch (random.nextInt(10)) { + case 0: + case 1: + // reopen + DirectoryReader.open(w).close(); + break; + case 2: + w.forceMerge(3); + break; + } + } + } catch (IOException | InterruptedException e) { + throw new RuntimeException(e); + } + } + + } + + // There is tricky logic to resolve dv updates that happened while merging + public void testConcurrentDVUpdates() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG)); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Map values = new HashMap<>(); + + final int numDocs = atLeast(100); + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + doc.add(new StringField("id", Integer.toString(i), Store.NO)); + doc.add(new NumericDocValuesField("foo", -1)); + w.addDocument(doc); + values.put(i, -1L); + } + Thread[] threads = new Thread[2]; + final AtomicInteger updateCount = new AtomicInteger(atLeast(1000)); + final CountDownLatch latch = new CountDownLatch(1); + for (int i = 0; i < threads.length; ++i) { + Random r = new Random(random().nextLong()); + threads[i] = new Thread(new DVUpdateRunnable(numDocs, r, latch, updateCount, w, values)); + } + for (Thread thread : threads) { + thread.start(); + } + latch.countDown(); + for (Thread thread : threads) { + thread.join(); + } + w.forceMerge(1); + DirectoryReader reader = DirectoryReader.open(w); + IndexSearcher searcher = newSearcher(reader); + for (int i = 0; i < numDocs; ++i) { + final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1); + assertEquals(1, topDocs.totalHits); + assertEquals(values.get(i).longValue(), MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc)); + } + reader.close(); + w.close(); + dir.close(); + } + + public void testAddIndexes(boolean withDeletes, boolean useReaders) throws Exception { + Directory dir = newDirectory(); + Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG)); + IndexWriterConfig iwc1 = newIndexWriterConfig(); + if (random().nextBoolean()) { + iwc1.setIndexSort(indexSort); + } + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + final int numDocs = atLeast(100); + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + doc.add(new StringField("id", Integer.toString(i), Store.NO)); + doc.add(new NumericDocValuesField("foo", random().nextInt(20))); + w.addDocument(doc); + } + if (withDeletes) { + for (int i = random().nextInt(5); i < numDocs; i += TestUtil.nextInt(random(), 1, 5)) { + w.deleteDocuments(new Term("id", Integer.toString(i))); + } + } + if (random().nextBoolean()) { + w.forceMerge(1); + } + final IndexReader reader = w.getReader(); + w.close(); + + Directory dir2 = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + iwc.setIndexSort(indexSort); + IndexWriter w2 = new IndexWriter(dir2, iwc); + + if (useReaders) { + CodecReader[] codecReaders = new CodecReader[reader.leaves().size()]; + for (int i = 0; i < codecReaders.length; ++i) { + codecReaders[i] = (CodecReader) reader.leaves().get(i).reader(); + } + w2.addIndexes(codecReaders); + } else { + w2.addIndexes(dir); + } + final IndexReader reader2 = w2.getReader(); + final IndexSearcher searcher = newSearcher(reader); + final IndexSearcher searcher2 = newSearcher(reader2); + for (int i = 0; i < numDocs; ++i) { + Query query = new TermQuery(new Term("id", Integer.toString(i))); + final TopDocs topDocs = searcher.search(query, 1); + final TopDocs topDocs2 = searcher2.search(query, 1); + assertEquals(topDocs.totalHits, topDocs2.totalHits); + if (topDocs.totalHits == 1) { + assertEquals( + MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc), + MultiDocValues.getNumericValues(reader2, "foo").get(topDocs2.scoreDocs[0].doc)); + } + } + + IOUtils.close(reader, reader2, w2, dir, dir2); + } + + public void testAddIndexes() throws Exception { + testAddIndexes(false, true); + } + + public void testAddIndexesWithDeletions() throws Exception { + testAddIndexes(true, true); + } + + public void testAddIndexesWithDirectory() throws Exception { + testAddIndexes(false, false); + } + + public void testAddIndexesWithDeletionsAndDirectory() throws Exception { + testAddIndexes(true, false); + } + + public void testBadSort() throws Exception { + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { + iwc.setIndexSort(Sort.RELEVANCE); + }); + assertEquals("invalid SortField type: must be one of [STRING, INT, FLOAT, LONG, DOUBLE] but got: ", expected.getMessage()); + } + + // you can't change the index sort on an existing index: + public void testIllegalChangeSort() throws Exception { + final Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.LONG))); + IndexWriter w = new IndexWriter(dir, iwc); + w.addDocument(new Document()); + DirectoryReader.open(w).close(); + w.addDocument(new Document()); + w.forceMerge(1); + w.close(); + + final IndexWriterConfig iwc2 = new IndexWriterConfig(new MockAnalyzer(random())); + iwc2.setIndexSort(new Sort(new SortField("bar", SortField.Type.LONG))); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> { + new IndexWriter(dir, iwc2); + }); + String message = e.getMessage(); + assertTrue(message.contains("cannot change previous indexSort=")); + assertTrue(message.contains("to new indexSort=")); + dir.close(); + } + + static final class NormsSimilarity extends Similarity { + + private final Similarity in; + + public NormsSimilarity(Similarity in) { + this.in = in; + } + + @Override + public long computeNorm(FieldInvertState state) { + if (state.getName().equals("norms")) { + return Float.floatToIntBits(state.getBoost()); + } else { + return in.computeNorm(state); + } + } + + @Override + public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) { + return in.computeWeight(collectionStats, termStats); + } + + @Override + public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { + return in.simScorer(weight, context); + } + + } + + static final class PositionsTokenStream extends TokenStream { + + private final CharTermAttribute term; + private final PayloadAttribute payload; + private final OffsetAttribute offset; + + private int pos, off; + + public PositionsTokenStream() { + term = addAttribute(CharTermAttribute.class); + payload = addAttribute(PayloadAttribute.class); + offset = addAttribute(OffsetAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + if (pos == 0) { + return false; + } + + clearAttributes(); + term.append("#all#"); + payload.setPayload(new BytesRef(Integer.toString(pos))); + offset.setOffset(off, off); + --pos; + ++off; + return true; + } + + void setId(int id) { + pos = id / 10 + 1; + off = 0; + } + } + + public void testRandom2() throws Exception { + int numDocs = atLeast(100); + + FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED); + POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + POSITIONS_TYPE.freeze(); + + FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED); + TERM_VECTORS_TYPE.setStoreTermVectors(true); + TERM_VECTORS_TYPE.freeze(); + + Analyzer a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new MockTokenizer(); + return new TokenStreamComponents(tokenizer, tokenizer); + } + }; + + List docs = new ArrayList<>(); + for (int i=0;i docs = new ArrayList<>(); + + Sort sort = randomSort(); + if (VERBOSE) { + System.out.println("TEST: numDocs=" + numDocs + " use sort=" + sort); + } + + // no index sorting, all search-time sorting: + Directory dir1 = newFSDirectory(createTempDir()); + IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random())); + IndexWriter w1 = new IndexWriter(dir1, iwc1); + + // use index sorting: + Directory dir2 = newFSDirectory(createTempDir()); + IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random())); + iwc2.setIndexSort(sort); + IndexWriter w2 = new IndexWriter(dir2, iwc2); + + Set toDelete = new HashSet<>(); + + double deleteChance = random().nextDouble(); + + for(int id=0;id { + new ParallelLeafReader(getOnlyLeafReader(r1), getOnlyLeafReader(r2)); + }).getMessage(); + assertEquals("cannot combine LeafReaders that have different index sorts: saw both sort= and ", message); + IOUtils.close(r1, dir1, r2, dir2); + } + + // ok to have one leaf w/ index sort and the other with no sort + public void testWithIndexSort2() throws Exception { + Directory dir1 = newDirectory(); + IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random())); + iwc1.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT))); + IndexWriter w1 = new IndexWriter(dir1, iwc1); + w1.addDocument(new Document()); + w1.commit(); + w1.addDocument(new Document()); + w1.forceMerge(1); + w1.close(); + IndexReader r1 = DirectoryReader.open(dir1); + + Directory dir2 = newDirectory(); + IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random())); + IndexWriter w2 = new IndexWriter(dir2, iwc2); + w2.addDocument(new Document()); + w2.addDocument(new Document()); + w2.close(); + + IndexReader r2 = DirectoryReader.open(dir2); + new ParallelLeafReader(false, getOnlyLeafReader(r1), getOnlyLeafReader(r2)).close(); + new ParallelLeafReader(false, getOnlyLeafReader(r2), getOnlyLeafReader(r1)).close(); + IOUtils.close(r1, dir1, r2, dir2); + } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java index 9f55ec3fdcc..9693c5c32b1 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java @@ -394,11 +394,11 @@ public class TestPointValues extends LuceneTestCase { dir.close(); } - // Write point values, one segment with Lucene60, another with SimpleText, then forceMerge with SimpleText + // Write point values, one segment with Lucene62, another with SimpleText, then forceMerge with SimpleText public void testDifferentCodecs1() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); - iwc.setCodec(Codec.forName("Lucene60")); + iwc.setCodec(Codec.forName("Lucene62")); IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new IntPoint("int", 1)); @@ -417,7 +417,7 @@ public class TestPointValues extends LuceneTestCase { dir.close(); } - // Write point values, one segment with Lucene60, another with SimpleText, then forceMerge with Lucene60 + // Write point values, one segment with Lucene62, another with SimpleText, then forceMerge with Lucene60 public void testDifferentCodecs2() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); @@ -429,7 +429,7 @@ public class TestPointValues extends LuceneTestCase { w.close(); iwc = new IndexWriterConfig(new MockAnalyzer(random())); - iwc.setCodec(Codec.forName("Lucene60")); + iwc.setCodec(Codec.forName("Lucene62")); w = new IndexWriter(dir, iwc); doc = new Document(); doc.add(new IntPoint("int", 1)); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java index 58ceb445ef6..179d2663a58 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java @@ -51,7 +51,7 @@ public class TestSegmentInfos extends LuceneTestCase { SegmentInfos sis = new SegmentInfos(); SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_6_0_0, "_0", 1, false, Codec.getDefault(), - Collections.emptyMap(), id, Collections.emptyMap()); + Collections.emptyMap(), id, Collections.emptyMap(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1); @@ -73,14 +73,14 @@ public class TestSegmentInfos extends LuceneTestCase { SegmentInfos sis = new SegmentInfos(); SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_6_0_0, "_0", 1, false, Codec.getDefault(), - Collections.emptyMap(), id, Collections.emptyMap()); + Collections.emptyMap(), id, Collections.emptyMap(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1); sis.add(commitInfo); info = new SegmentInfo(dir, Version.LUCENE_6_0_0, "_1", 1, false, Codec.getDefault(), - Collections.emptyMap(), id, Collections.emptyMap()); + Collections.emptyMap(), id, Collections.emptyMap(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java index e1075abe735..1ef37c0892f 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java @@ -35,6 +35,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.Version; +import org.apache.lucene.util.packed.PackedLongValues; public class TestSegmentMerger extends LuceneTestCase { //The variables for the new merged segment @@ -83,7 +84,7 @@ public class TestSegmentMerger extends LuceneTestCase { public void testMerge() throws IOException { final Codec codec = Codec.getDefault(); - final SegmentInfo si = new SegmentInfo(mergedDir, Version.LATEST, mergedSegment, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); + final SegmentInfo si = new SegmentInfo(mergedDir, Version.LATEST, mergedSegment, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); SegmentMerger merger = new SegmentMerger(Arrays.asList(reader1, reader2), si, InfoStream.getDefault(), mergedDir, @@ -144,22 +145,9 @@ public class TestSegmentMerger extends LuceneTestCase { mergedReader.close(); } - private static boolean equals(MergeState.DocMap map1, MergeState.DocMap map2) { - if (map1.maxDoc() != map2.maxDoc()) { - return false; - } - for (int i = 0; i < map1.maxDoc(); ++i) { - if (map1.get(i) != map2.get(i)) { - return false; - } - } - return true; - } - public void testBuildDocMap() { final int maxDoc = TestUtil.nextInt(random(), 1, 128); final int numDocs = TestUtil.nextInt(random(), 0, maxDoc); - final int numDeletedDocs = maxDoc - numDocs; final FixedBitSet liveDocs = new FixedBitSet(maxDoc); for (int i = 0; i < numDocs; ++i) { while (true) { @@ -171,15 +159,11 @@ public class TestSegmentMerger extends LuceneTestCase { } } - final MergeState.DocMap docMap = MergeState.DocMap.build(maxDoc, liveDocs); + final PackedLongValues docMap = MergeState.removeDeletes(maxDoc, liveDocs); - assertEquals(maxDoc, docMap.maxDoc()); - assertEquals(numDocs, docMap.numDocs()); - assertEquals(numDeletedDocs, docMap.numDeletedDocs()); // assert the mapping is compact for (int i = 0, del = 0; i < maxDoc; ++i) { - if (!liveDocs.get(i)) { - assertEquals(-1, docMap.get(i)); + if (liveDocs.get(i) == false) { ++del; } else { assertEquals(i - del, docMap.get(i)); diff --git a/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java similarity index 75% rename from lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java rename to lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java index 14bd43714fb..84d326ff48c 100644 --- a/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java @@ -25,23 +25,23 @@ import java.util.Random; import java.util.Set; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.codecs.Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.ExitableDirectoryReader; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MockRandomMergePolicy; import org.apache.lucene.index.QueryTimeout; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SerialMergeScheduler; -import org.apache.lucene.index.SortingMergePolicy; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TestSortingMergePolicy; -import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; @@ -50,8 +50,6 @@ import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.store.Directory; -import org.apache.lucene.uninverting.UninvertingReader; -import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; @@ -62,18 +60,11 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase { private int numDocs; private List terms; private Directory dir; - private Sort sort; + private final Sort sort = new Sort(new SortField("ndv1", SortField.Type.LONG)); private RandomIndexWriter iw; private IndexReader reader; - private SortingMergePolicy mergePolicy; private final int forceMergeMaxSegmentCount = 5; - @Override - public void setUp() throws Exception { - super.setUp(); - sort = new Sort(new SortField("ndv1", SortField.Type.LONG)); - } - private Document randomDocument() { final Document doc = new Document(); doc.add(new NumericDocValuesField("ndv1", random().nextInt(10))); @@ -93,9 +84,12 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase { terms = new ArrayList<>(randomTerms); final long seed = random().nextLong(); final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed))); + if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) { + // MockRandomMP randomly wraps the leaf readers which makes merging angry + iwc.setMergePolicy(newTieredMergePolicy()); + } iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests - mergePolicy = TestSortingMergePolicy.newSortingMergePolicy(sort); - iwc.setMergePolicy(mergePolicy); + iwc.setIndexSort(sort); iw = new RandomIndexWriter(new Random(seed), dir, iwc); iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP for (int i = 0; i < numDocs; ++i) { @@ -151,7 +145,7 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase { query = new MatchAllDocsQuery(); } searcher.search(query, collector1); - searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits, mergePolicy.getSort())); + searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits)); assertTrue(collector1.getTotalHits() >= collector2.getTotalHits()); assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs); } @@ -190,40 +184,16 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase { } public void testEarlyTerminationDifferentSorter() throws IOException { - createRandomIndex(false); - final int iters = atLeast(3); - for (int i = 0; i < iters; ++i) { - final IndexSearcher searcher = newSearcher(reader); - // test that the collector works correctly when the index was sorted by a - // different sorter than the one specified in the ctor. - final int numHits = TestUtil.nextInt(random(), 1, numDocs); - final Sort sort = new Sort(new SortField("ndv2", SortField.Type.LONG, false)); - final boolean fillFields = random().nextBoolean(); - final boolean trackDocScores = random().nextBoolean(); - final boolean trackMaxScore = random().nextBoolean(); - final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore); - final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore); - - final Query query; - if (random().nextBoolean()) { - query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms))); - } else { - query = new MatchAllDocsQuery(); - } - searcher.search(query, collector1); - Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG)); + createRandomIndex(true); - searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits, different) { - @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - final LeafCollector ret = super.getLeafCollector(context); - assertTrue("segment should not be recognized as sorted as different sorter was used", ret.getClass() == in.getLeafCollector(context).getClass()); - return ret; - } - }); - assertTrue(collector1.getTotalHits() >= collector2.getTotalHits()); - assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs); - } + Sort sort = new Sort(new SortField("ndv2", SortField.Type.LONG, false)); + Collector c = new EarlyTerminatingSortingCollector(TopFieldCollector.create(sort, 10, true, true, true), sort, 10); + IndexSearcher searcher = newSearcher(reader); + Exception e = expectThrows(IllegalStateException.class, + () -> { + searcher.search(new MatchAllDocsQuery(), c); + }); + assertEquals("Cannot early terminate with sort order if segments are sorted with ", e.getMessage()); closeIndex(); } @@ -262,34 +232,19 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase { } } - private IndexSearcher newSearcherForTestTerminatedEarly(IndexReader r) throws IOException { - switch(random().nextInt(2)) { - case 0: - return new IndexSearcher(r); - case 1: - assertTrue(r+" is not a DirectoryReader", (r instanceof DirectoryReader)); - final DirectoryReader directoryReader = ExitableDirectoryReader.wrap( - UninvertingReader.wrap((DirectoryReader) r, new HashMap()), - new TestEarlyTerminatingSortingcollectorQueryTimeout(false)); - return new IndexSearcher(directoryReader); - } - fail("newSearcherForTestTerminatedEarly("+r+") fell through switch"); - return null; - } - public void testTerminatedEarly() throws IOException { final int iters = atLeast(8); for (int i = 0; i < iters; ++i) { createRandomIndex(true); - final IndexSearcher searcher = newSearcherForTestTerminatedEarly(reader); // future TODO: use newSearcher(reader); + final IndexSearcher searcher = new IndexSearcher(reader); // future TODO: use newSearcher(reader); final Query query = new MatchAllDocsQuery(); // search for everything/anything final TestTerminatedEarlySimpleCollector collector1 = new TestTerminatedEarlySimpleCollector(); searcher.search(query, collector1); final TestTerminatedEarlySimpleCollector collector2 = new TestTerminatedEarlySimpleCollector(); - final EarlyTerminatingSortingCollector etsCollector = new EarlyTerminatingSortingCollector(collector2, sort, 1, mergePolicy.getSort()); + final EarlyTerminatingSortingCollector etsCollector = new EarlyTerminatingSortingCollector(collector2, sort, 1); searcher.search(query, etsCollector); assertTrue("collector1="+collector1.collectedSomething()+" vs. collector2="+collector2.collectedSomething(), collector1.collectedSomething() == collector2.collectedSomething()); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java index 88d89d29417..078c8da3653 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java @@ -1151,14 +1151,14 @@ public class TestPointQueries extends LuceneTestCase { } private static Codec getCodec() { - if (Codec.getDefault().getName().equals("Lucene60")) { + if (Codec.getDefault().getName().equals("Lucene62")) { int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048); double maxMBSortInHeap = 5.0 + (3*random().nextDouble()); if (VERBOSE) { System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap); } - return new FilterCodec("Lucene60", Codec.getDefault()) { + return new FilterCodec("Lucene62", Codec.getDefault()) { @Override public PointsFormat pointsFormat() { return new PointsFormat() { diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java index 38b3fb5c87c..b1a8f8d3f88 100644 --- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java +++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java @@ -25,6 +25,7 @@ import java.util.BitSet; import java.util.List; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.MergeState; import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.store.CorruptingIndexOutput; @@ -554,7 +555,7 @@ public class TestBKD extends LuceneTestCase { } List toMerge = null; - List docIDBases = null; + List docMaps = null; int seg = 0; BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, docValues.length, false); @@ -601,9 +602,15 @@ public class TestBKD extends LuceneTestCase { if (useMerge && segCount == valuesInThisSeg) { if (toMerge == null) { toMerge = new ArrayList<>(); - docIDBases = new ArrayList<>(); + docMaps = new ArrayList<>(); } - docIDBases.add(lastDocIDBase); + final int curDocIDBase = lastDocIDBase; + docMaps.add(new MergeState.DocMap() { + @Override + public int get(int docID) { + return curDocIDBase + docID; + } + }); toMerge.add(w.finish(out)); valuesInThisSeg = TestUtil.nextInt(random(), numValues/10, numValues/2); segCount = 0; @@ -620,8 +627,14 @@ public class TestBKD extends LuceneTestCase { if (toMerge != null) { if (segCount > 0) { - docIDBases.add(lastDocIDBase); toMerge.add(w.finish(out)); + final int curDocIDBase = lastDocIDBase; + docMaps.add(new MergeState.DocMap() { + @Override + public int get(int docID) { + return curDocIDBase + docID; + } + }); } out.close(); in = dir.openInput("bkd", IOContext.DEFAULT); @@ -633,7 +646,7 @@ public class TestBKD extends LuceneTestCase { readers.add(new BKDReader(in)); } out = dir.createOutput("bkd2", IOContext.DEFAULT); - indexFP = w.merge(out, null, readers, docIDBases); + indexFP = w.merge(out, docMaps, readers); out.close(); in.close(); in = dir.openInput("bkd2", IOContext.DEFAULT); diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java index 4d76fa9dd39..55f360ad308 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java @@ -21,7 +21,6 @@ import java.util.Collections; import java.util.Iterator; import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.PointValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; @@ -29,11 +28,13 @@ import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PointValues; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Terms; +import org.apache.lucene.search.Sort; import org.apache.lucene.util.Bits; /** @@ -178,4 +179,8 @@ public class TermVectorLeafReader extends LeafReader { public void document(int docID, StoredFieldVisitor visitor) throws IOException { } + @Override + public Sort getIndexSort() { + return null; + } } diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 5b133013ed5..e3aa4b14a13 100644 --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -40,6 +40,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.SimpleCollector; +import org.apache.lucene.search.Sort; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.*; @@ -1606,6 +1607,10 @@ public class MemoryIndex { return info.getNormDocValues(); } + @Override + public Sort getIndexSort() { + return null; + } } /** diff --git a/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java b/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java index c672ed00bac..368c2854a22 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java +++ b/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java @@ -140,7 +140,7 @@ public class IndexSplitter { SegmentInfo info = infoPerCommit.info; // Same info just changing the dir: SegmentInfo newInfo = new SegmentInfo(destFSDir, info.getVersion(), info.name, info.maxDoc(), - info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics(), info.getId(), new HashMap<>()); + info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics(), info.getId(), new HashMap<>(), null); destInfos.add(new SegmentCommitInfo(newInfo, infoPerCommit.getDelCount(), infoPerCommit.getDelGen(), infoPerCommit.getFieldInfosGen(), infoPerCommit.getDocValuesGen())); diff --git a/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java b/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java index de79ab07f76..de711fda460 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java +++ b/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java @@ -24,6 +24,7 @@ import java.util.Map; import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues; import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; import org.apache.lucene.index.MultiDocValues.OrdinalMap; +import org.apache.lucene.search.Sort; import org.apache.lucene.util.Bits; /** @@ -272,4 +273,9 @@ public final class SlowCompositeReaderWrapper extends LeafReader { ctx.reader().checkIntegrity(); } } + + @Override + public Sort getIndexSort() { + return null; + } } diff --git a/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java b/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java deleted file mode 100644 index cd8f84e056e..00000000000 --- a/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.index; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.MergePolicy; -import org.apache.lucene.index.MergeState; -import org.apache.lucene.index.MergeTrigger; -import org.apache.lucene.index.MultiReader; -import org.apache.lucene.index.SegmentCommitInfo; -import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.SegmentInfos; -import org.apache.lucene.index.SegmentReader; -import org.apache.lucene.index.SlowCompositeReaderWrapper; -import org.apache.lucene.search.Sort; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.InfoStream; -import org.apache.lucene.util.packed.PackedInts; -import org.apache.lucene.util.packed.PackedLongValues; - -/** A {@link MergePolicy} that reorders documents according to a {@link Sort} - * before merging them. As a consequence, all segments resulting from a merge - * will be sorted while segments resulting from a flush will be in the order - * in which documents have been added. - *

        NOTE: Never use this policy if you rely on - * {@link IndexWriter#addDocuments(Iterable) IndexWriter.addDocuments} - * to have sequentially-assigned doc IDs, this policy will scatter doc IDs. - *

        NOTE: This policy should only be used with idempotent {@code Sort}s - * so that the order of segments is predictable. For example, using - * {@link Sort#INDEXORDER} in reverse (which is not idempotent) will make - * the order of documents in a segment depend on the number of times the segment - * has been merged. - * @lucene.experimental */ -public final class SortingMergePolicy extends MergePolicyWrapper { - - /** - * Put in the {@link SegmentInfo#getDiagnostics() diagnostics} to denote that - * this segment is sorted. - */ - public static final String SORTER_ID_PROP = "sorter"; - - class SortingOneMerge extends OneMerge { - - List unsortedReaders; - Sorter.DocMap docMap; - LeafReader sortedView; - final InfoStream infoStream; - - SortingOneMerge(List segments, InfoStream infoStream) { - super(segments); - this.infoStream = infoStream; - } - - @Override - public List getMergeReaders() throws IOException { - if (unsortedReaders == null) { - unsortedReaders = super.getMergeReaders(); - if (infoStream.isEnabled("SMP")) { - infoStream.message("SMP", "sorting " + unsortedReaders); - for (LeafReader leaf : unsortedReaders) { - String sortDescription = getSortDescription(leaf); - if (sortDescription == null) { - sortDescription = "not sorted"; - } - infoStream.message("SMP", "seg=" + leaf + " " + sortDescription); - } - } - // wrap readers, to be optimal for merge; - List wrapped = new ArrayList<>(unsortedReaders.size()); - for (LeafReader leaf : unsortedReaders) { - if (leaf instanceof SegmentReader) { - leaf = new MergeReaderWrapper((SegmentReader)leaf); - } - wrapped.add(leaf); - } - final LeafReader atomicView; - if (wrapped.size() == 1) { - atomicView = wrapped.get(0); - } else { - final CompositeReader multiReader = new MultiReader(wrapped.toArray(new LeafReader[wrapped.size()])); - atomicView = new SlowCompositeReaderWrapper(multiReader, true); - } - docMap = sorter.sort(atomicView); - sortedView = SortingLeafReader.wrap(atomicView, docMap); - } - // a null doc map means that the readers are already sorted - if (docMap == null) { - if (infoStream.isEnabled("SMP")) { - infoStream.message("SMP", "readers already sorted, omitting sort"); - } - return unsortedReaders; - } else { - if (infoStream.isEnabled("SMP")) { - infoStream.message("SMP", "sorting readers by " + sort); - } - return Collections.singletonList(SlowCodecReaderWrapper.wrap(sortedView)); - } - } - - @Override - public void setMergeInfo(SegmentCommitInfo info) { - Map diagnostics = info.info.getDiagnostics(); - diagnostics.put(SORTER_ID_PROP, sorter.getID()); - super.setMergeInfo(info); - } - - private PackedLongValues getDeletes(List readers) { - PackedLongValues.Builder deletes = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); - int deleteCount = 0; - for (LeafReader reader : readers) { - final int maxDoc = reader.maxDoc(); - final Bits liveDocs = reader.getLiveDocs(); - for (int i = 0; i < maxDoc; ++i) { - if (liveDocs != null && !liveDocs.get(i)) { - ++deleteCount; - } else { - deletes.add(deleteCount); - } - } - } - return deletes.build(); - } - - @Override - public MergePolicy.DocMap getDocMap(final MergeState mergeState) { - if (unsortedReaders == null) { - throw new IllegalStateException(); - } - if (docMap == null) { - return super.getDocMap(mergeState); - } - assert mergeState.docMaps.length == 1; // we returned a singleton reader - final PackedLongValues deletes = getDeletes(unsortedReaders); - return new MergePolicy.DocMap() { - @Override - public int map(int old) { - final int oldWithDeletes = old + (int) deletes.get(old); - final int newWithDeletes = docMap.oldToNew(oldWithDeletes); - return mergeState.docMaps[0].get(newWithDeletes); - } - }; - } - - @Override - public String toString() { - return "SortingMergePolicy.SortingOneMerge(segments=" + segString() + " sort=" + sort + ")"; - } - } - - class SortingMergeSpecification extends MergeSpecification { - final InfoStream infoStream; - - SortingMergeSpecification(InfoStream infoStream) { - this.infoStream = infoStream; - } - - @Override - public void add(OneMerge merge) { - super.add(new SortingOneMerge(merge.segments, infoStream)); - } - - @Override - public String segString(Directory dir) { - return "SortingMergeSpec(" + super.segString(dir) + ", sorter=" + sorter + ")"; - } - - } - - /** Returns {@code true} if the given {@code reader} is sorted by the - * {@code sort} given. Typically the given {@code sort} would be the - * {@link SortingMergePolicy#getSort()} order of a {@link SortingMergePolicy}. */ - public static boolean isSorted(LeafReader reader, Sort sort) { - String description = getSortDescription(reader); - if (description != null && description.equals(sort.toString())) { - return true; - } - return false; - } - - private static String getSortDescription(LeafReader reader) { - if (reader instanceof SegmentReader) { - final SegmentReader segReader = (SegmentReader) reader; - final Map diagnostics = segReader.getSegmentInfo().info.getDiagnostics(); - if (diagnostics != null) { - return diagnostics.get(SORTER_ID_PROP); - } - } else if (reader instanceof FilterLeafReader) { - return getSortDescription(FilterLeafReader.unwrap(reader)); - } - return null; - } - - private MergeSpecification sortedMergeSpecification(MergeSpecification specification, InfoStream infoStream) { - if (specification == null) { - return null; - } - MergeSpecification sortingSpec = new SortingMergeSpecification(infoStream); - for (OneMerge merge : specification.merges) { - sortingSpec.add(merge); - } - return sortingSpec; - } - - final Sorter sorter; - final Sort sort; - - /** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */ - public SortingMergePolicy(MergePolicy in, Sort sort) { - super(in); - this.sorter = new Sorter(sort); - this.sort = sort; - } - - /** Return the {@link Sort} order that is used to sort segments when merging. */ - public Sort getSort() { - return sort; - } - - @Override - public MergeSpecification findMerges(MergeTrigger mergeTrigger, - SegmentInfos segmentInfos, IndexWriter writer) throws IOException { - return sortedMergeSpecification(in.findMerges(mergeTrigger, segmentInfos, writer), writer.infoStream); - } - - @Override - public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, - int maxSegmentCount, Map segmentsToMerge, IndexWriter writer) - throws IOException { - return sortedMergeSpecification(in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, writer), writer.infoStream); - } - - @Override - public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer) - throws IOException { - return sortedMergeSpecification(in.findForcedDeletesMerges(segmentInfos, writer), writer.infoStream); - } - - @Override - public String toString() { - return "SortingMergePolicy(" + in + ", sorter=" + sorter + ")"; - } -} diff --git a/lucene/misc/src/java/org/apache/lucene/index/package.html b/lucene/misc/src/java/org/apache/lucene/index/package.html index dc9cbb7b676..33ce964eaf2 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/package.html +++ b/lucene/misc/src/java/org/apache/lucene/index/package.html @@ -18,23 +18,5 @@ Misc index tools and index support. - -SortingMergePolicy: -

        Provides index sorting capablities. The application can use any -Sort specification, e.g. to sort by fields using DocValues or FieldCache, or to -reverse the order of the documents (by using SortField.Type.DOC in reverse). -Multi-level sorts can be specified the same way you would when searching, by -building Sort from multiple SortFields. - -

        {@link org.apache.lucene.index.SortingMergePolicy} can be used to -make Lucene sort segments before merging them. This will ensure that every -segment resulting from a merge will be sorted according to the provided -{@link org.apache.lucene.search.Sort}. This however makes merging and -thus indexing slower. - -

        Sorted segments allow for early query termination when the sort order -matches index order. This makes query execution faster since not all documents -need to be visited. Please note that this is an expert feature and should not -be used without a deep understanding of Lucene merging and document collection. diff --git a/lucene/misc/src/java/org/apache/lucene/search/BlockJoinComparatorSource.java b/lucene/misc/src/java/org/apache/lucene/search/BlockJoinComparatorSource.java deleted file mode 100644 index 03a2cb8ee78..00000000000 --- a/lucene/misc/src/java/org/apache/lucene/search/BlockJoinComparatorSource.java +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.search; - -import java.io.IOException; - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.ReaderUtil; -import org.apache.lucene.index.SortingMergePolicy; -import org.apache.lucene.util.BitSet; - -/** - * Helper class to sort readers that contain blocks of documents. - *

        - * Note that this class is intended to used with {@link SortingMergePolicy}, - * and for other purposes has some limitations: - *

          - *
        • Cannot yet be used with {@link IndexSearcher#searchAfter(ScoreDoc, Query, int, Sort) IndexSearcher.searchAfter} - *
        • Filling sort field values is not yet supported. - *
        - * @lucene.experimental - */ -// TODO: can/should we clean this thing up (e.g. return a proper sort value) -// and move to the join/ module? -public class BlockJoinComparatorSource extends FieldComparatorSource { - final Query parentsFilter; - final Sort parentSort; - final Sort childSort; - - /** - * Create a new BlockJoinComparatorSource, sorting only blocks of documents - * with {@code parentSort} and not reordering children with a block. - * - * @param parentsFilter Filter identifying parent documents - * @param parentSort Sort for parent documents - */ - public BlockJoinComparatorSource(Query parentsFilter, Sort parentSort) { - this(parentsFilter, parentSort, new Sort(SortField.FIELD_DOC)); - } - - /** - * Create a new BlockJoinComparatorSource, specifying the sort order for both - * blocks of documents and children within a block. - * - * @param parentsFilter Filter identifying parent documents - * @param parentSort Sort for parent documents - * @param childSort Sort for child documents in the same block - */ - public BlockJoinComparatorSource(Query parentsFilter, Sort parentSort, Sort childSort) { - this.parentsFilter = parentsFilter; - this.parentSort = parentSort; - this.childSort = childSort; - } - - @Override - @SuppressWarnings({"unchecked", "rawtypes"}) - public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException { - // we keep parallel slots: the parent ids and the child ids - final int parentSlots[] = new int[numHits]; - final int childSlots[] = new int[numHits]; - - SortField parentFields[] = parentSort.getSort(); - final int parentReverseMul[] = new int[parentFields.length]; - final FieldComparator parentComparators[] = new FieldComparator[parentFields.length]; - for (int i = 0; i < parentFields.length; i++) { - parentReverseMul[i] = parentFields[i].getReverse() ? -1 : 1; - parentComparators[i] = parentFields[i].getComparator(1, i); - } - - SortField childFields[] = childSort.getSort(); - final int childReverseMul[] = new int[childFields.length]; - final FieldComparator childComparators[] = new FieldComparator[childFields.length]; - for (int i = 0; i < childFields.length; i++) { - childReverseMul[i] = childFields[i].getReverse() ? -1 : 1; - childComparators[i] = childFields[i].getComparator(1, i); - } - - // NOTE: we could return parent ID as value but really our sort "value" is more complex... - // So we throw UOE for now. At the moment you really should only use this at indexing time. - return new FieldComparator() { - int bottomParent; - int bottomChild; - BitSet parentBits; - LeafFieldComparator[] parentLeafComparators; - LeafFieldComparator[] childLeafComparators; - - @Override - public int compare(int slot1, int slot2) { - try { - return compare(childSlots[slot1], parentSlots[slot1], childSlots[slot2], parentSlots[slot2]); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Override - public void setTopValue(Integer value) { - // we dont have enough information (the docid is needed) - throw new UnsupportedOperationException("this comparator cannot be used with deep paging"); - } - - @Override - public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException { - if (parentBits != null) { - throw new IllegalStateException("This comparator can only be used on a single segment"); - } - IndexSearcher searcher = new IndexSearcher(ReaderUtil.getTopLevelContext(context)); - searcher.setQueryCache(null); - final Weight weight = searcher.createNormalizedWeight(parentsFilter, false); - final Scorer parents = weight.scorer(context); - if (parents == null) { - throw new IllegalStateException("LeafReader " + context.reader() + " contains no parents!"); - } - parentBits = BitSet.of(parents.iterator(), context.reader().maxDoc()); - parentLeafComparators = new LeafFieldComparator[parentComparators.length]; - for (int i = 0; i < parentComparators.length; i++) { - parentLeafComparators[i] = parentComparators[i].getLeafComparator(context); - } - childLeafComparators = new LeafFieldComparator[childComparators.length]; - for (int i = 0; i < childComparators.length; i++) { - childLeafComparators[i] = childComparators[i].getLeafComparator(context); - } - - return new LeafFieldComparator() { - - @Override - public int compareBottom(int doc) throws IOException { - return compare(bottomChild, bottomParent, doc, parent(doc)); - } - - @Override - public int compareTop(int doc) throws IOException { - // we dont have enough information (the docid is needed) - throw new UnsupportedOperationException("this comparator cannot be used with deep paging"); - } - - @Override - public void copy(int slot, int doc) throws IOException { - childSlots[slot] = doc; - parentSlots[slot] = parent(doc); - } - - @Override - public void setBottom(int slot) { - bottomParent = parentSlots[slot]; - bottomChild = childSlots[slot]; - } - - @Override - public void setScorer(Scorer scorer) { - for (LeafFieldComparator comp : parentLeafComparators) { - comp.setScorer(scorer); - } - for (LeafFieldComparator comp : childLeafComparators) { - comp.setScorer(scorer); - } - } - - }; - } - - @Override - public Integer value(int slot) { - // really our sort "value" is more complex... - throw new UnsupportedOperationException("filling sort field values is not yet supported"); - } - - int parent(int doc) { - return parentBits.nextSetBit(doc); - } - - int compare(int docID1, int parent1, int docID2, int parent2) throws IOException { - if (parent1 == parent2) { // both are in the same block - if (docID1 == parent1 || docID2 == parent2) { - // keep parents at the end of blocks - return docID1 - docID2; - } else { - return compare(docID1, docID2, childLeafComparators, childReverseMul); - } - } else { - int cmp = compare(parent1, parent2, parentLeafComparators, parentReverseMul); - if (cmp == 0) { - return parent1 - parent2; - } else { - return cmp; - } - } - } - - int compare(int docID1, int docID2, LeafFieldComparator comparators[], int reverseMul[]) throws IOException { - for (int i = 0; i < comparators.length; i++) { - // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co, - // the segments are always the same here... - comparators[i].copy(0, docID1); - comparators[i].setBottom(0); - int comp = reverseMul[i] * comparators[i].compareBottom(docID2); - if (comp != 0) { - return comp; - } - } - return 0; // no need to docid tiebreak - } - }; - } - - @Override - public String toString() { - return "blockJoin(parentSort=" + parentSort + ",childSort=" + childSort + ")"; - } -} diff --git a/lucene/misc/src/test/org/apache/lucene/index/IndexSortingTest.java b/lucene/misc/src/test/org/apache/lucene/index/IndexSortingTest.java deleted file mode 100644 index 8b384f41db1..00000000000 --- a/lucene/misc/src/test/org/apache/lucene/index/IndexSortingTest.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.index; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.SlowCompositeReaderWrapper; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.TestUtil; -import org.junit.BeforeClass; - -public class IndexSortingTest extends SorterTestBase { - - private static final Sort[] SORT = new Sort[] { - new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG)), - new Sort(new SortField(null, SortField.Type.DOC, true)) - }; - - @BeforeClass - public static void beforeClassSorterUtilTest() throws Exception { - // NOTE: index was created by by super's @BeforeClass - - // only read the values of the undeleted documents, since after addIndexes, - // the deleted ones will be dropped from the index. - Bits liveDocs = unsortedReader.getLiveDocs(); - List values = new ArrayList<>(); - for (int i = 0; i < unsortedReader.maxDoc(); i++) { - if (liveDocs == null || liveDocs.get(i)) { - values.add(Integer.valueOf(unsortedReader.document(i).get(ID_FIELD))); - } - } - int idx = random().nextInt(SORT.length); - Sort sorter = SORT[idx]; - if (idx == 1) { // reverse doc sort - Collections.reverse(values); - } else { - Collections.sort(values); - if (random().nextBoolean()) { - sorter = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG, true)); // descending - Collections.reverse(values); - } - } - sortedValues = values.toArray(new Integer[values.size()]); - if (VERBOSE) { - System.out.println("sortedValues: " + sortedValues); - System.out.println("Sorter: " + sorter); - } - - Directory target = newDirectory(); - IndexWriter writer = new IndexWriter(target, newIndexWriterConfig(null)); - LeafReader reader = SortingLeafReader.wrap(unsortedReader, sorter); - writer.addIndexes(SlowCodecReaderWrapper.wrap(reader)); - writer.close(); - // NOTE: also closes unsortedReader - reader.close(); - dir.close(); - - // CheckIndex the target directory - dir = target; - TestUtil.checkIndex(dir); - - // set reader for tests - sortedReader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir)); - assertFalse("index should not have deletions", sortedReader.hasDeletions()); - } - -} diff --git a/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java b/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java deleted file mode 100644 index df1c80f881a..00000000000 --- a/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java +++ /dev/null @@ -1,405 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.index; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Random; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.document.BinaryDocValuesField; -import org.apache.lucene.document.BinaryPoint; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.document.SortedDocValuesField; -import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.document.SortedSetDocValuesField; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.PointValues.IntersectVisitor; -import org.apache.lucene.index.PointValues.Relation; -import org.apache.lucene.index.SortingLeafReader.SortingDocsEnum; -import org.apache.lucene.index.TermsEnum.SeekStatus; -import org.apache.lucene.search.CollectionStatistics; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.TermStatistics; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.FixedBitSet; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.NumericUtils; -import org.apache.lucene.util.TestUtil; -import org.junit.AfterClass; -import org.junit.BeforeClass; - -public abstract class SorterTestBase extends LuceneTestCase { - - static final class NormsSimilarity extends Similarity { - - private final Similarity in; - - public NormsSimilarity(Similarity in) { - this.in = in; - } - - @Override - public long computeNorm(FieldInvertState state) { - if (state.getName().equals(NORMS_FIELD)) { - return Float.floatToIntBits(state.getBoost()); - } else { - return in.computeNorm(state); - } - } - - @Override - public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) { - return in.computeWeight(collectionStats, termStats); - } - - @Override - public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException { - return in.simScorer(weight, context); - } - - } - - static final class PositionsTokenStream extends TokenStream { - - private final CharTermAttribute term; - private final PayloadAttribute payload; - private final OffsetAttribute offset; - - private int pos, off; - - public PositionsTokenStream() { - term = addAttribute(CharTermAttribute.class); - payload = addAttribute(PayloadAttribute.class); - offset = addAttribute(OffsetAttribute.class); - } - - @Override - public boolean incrementToken() throws IOException { - if (pos == 0) { - return false; - } - - clearAttributes(); - term.append(DOC_POSITIONS_TERM); - payload.setPayload(new BytesRef(Integer.toString(pos))); - offset.setOffset(off, off); - --pos; - ++off; - return true; - } - - void setId(int id) { - pos = id / 10 + 1; - off = 0; - } - } - - protected static final String ID_FIELD = "id"; - protected static final String DOCS_ENUM_FIELD = "docs"; - protected static final String DOCS_ENUM_TERM = "$all$"; - protected static final String DOC_POSITIONS_FIELD = "positions"; - protected static final String DOC_POSITIONS_TERM = "$all$"; - protected static final String NUMERIC_DV_FIELD = "numeric"; - protected static final String SORTED_NUMERIC_DV_FIELD = "sorted_numeric"; - protected static final String NORMS_FIELD = "norm"; - protected static final String BINARY_DV_FIELD = "binary"; - protected static final String SORTED_DV_FIELD = "sorted"; - protected static final String SORTED_SET_DV_FIELD = "sorted_set"; - protected static final String TERM_VECTORS_FIELD = "term_vectors"; - protected static final String DIMENSIONAL_FIELD = "numeric1d"; - - private static final FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED); - static { - TERM_VECTORS_TYPE.setStoreTermVectors(true); - TERM_VECTORS_TYPE.freeze(); - } - - private static final FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED); - static { - POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); - POSITIONS_TYPE.freeze(); - } - - protected static Directory dir; - protected static LeafReader unsortedReader; - protected static LeafReader sortedReader; - protected static Integer[] sortedValues; - - private static Document doc(final int id, PositionsTokenStream positions) { - final Document doc = new Document(); - doc.add(new StringField(ID_FIELD, Integer.toString(id), Store.YES)); - doc.add(new StringField(DOCS_ENUM_FIELD, DOCS_ENUM_TERM, Store.NO)); - positions.setId(id); - doc.add(new Field(DOC_POSITIONS_FIELD, positions, POSITIONS_TYPE)); - doc.add(new NumericDocValuesField(NUMERIC_DV_FIELD, id)); - TextField norms = new TextField(NORMS_FIELD, Integer.toString(id), Store.NO); - norms.setBoost(Float.intBitsToFloat(id)); - doc.add(norms); - doc.add(new BinaryDocValuesField(BINARY_DV_FIELD, new BytesRef(Integer.toString(id)))); - doc.add(new SortedDocValuesField(SORTED_DV_FIELD, new BytesRef(Integer.toString(id)))); - doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id)))); - doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id + 1)))); - doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id)); - doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id + 1)); - doc.add(new Field(TERM_VECTORS_FIELD, Integer.toString(id), TERM_VECTORS_TYPE)); - byte[] bytes = new byte[4]; - NumericUtils.intToSortableBytes(id, bytes, 0); - // TODO: index time sorting doesn't yet support points - //doc.add(new BinaryPoint(DIMENSIONAL_FIELD, bytes)); - return doc; - } - - /** Creates an unsorted index; subclasses then sort this index and open sortedReader. */ - private static void createIndex(Directory dir, int numDocs, Random random) throws IOException { - List ids = new ArrayList<>(); - for (int i = 0; i < numDocs; i++) { - ids.add(Integer.valueOf(i * 10)); - } - // shuffle them for indexing - Collections.shuffle(ids, random); - if (VERBOSE) { - System.out.println("Shuffled IDs for indexing: " + Arrays.toString(ids.toArray())); - } - - PositionsTokenStream positions = new PositionsTokenStream(); - IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random)); - conf.setMaxBufferedDocs(4); // create some segments - conf.setSimilarity(new NormsSimilarity(conf.getSimilarity())); // for testing norms field - RandomIndexWriter writer = new RandomIndexWriter(random, dir, conf); - writer.setDoRandomForceMerge(false); - for (int id : ids) { - writer.addDocument(doc(id, positions)); - } - // delete some documents - writer.commit(); - for (Integer id : ids) { - if (random.nextDouble() < 0.2) { - if (VERBOSE) { - System.out.println("delete doc_id " + id); - } - writer.deleteDocuments(new Term(ID_FIELD, id.toString())); - } - } - writer.close(); - } - - @BeforeClass - public static void beforeClassSorterTestBase() throws Exception { - dir = newDirectory(); - int numDocs = atLeast(20); - createIndex(dir, numDocs, random()); - - unsortedReader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir)); - } - - @AfterClass - public static void afterClassSorterTestBase() throws Exception { - unsortedReader.close(); - sortedReader.close(); - dir.close(); - unsortedReader = sortedReader = null; - dir = null; - } - - public void testBinaryDocValuesField() throws Exception { - BinaryDocValues dv = sortedReader.getBinaryDocValues(BINARY_DV_FIELD); - for (int i = 0; i < sortedReader.maxDoc(); i++) { - final BytesRef bytes = dv.get(i); - assertEquals("incorrect binary DocValues for doc " + i, sortedValues[i].toString(), bytes.utf8ToString()); - } - } - - public void testDocsAndPositionsEnum() throws Exception { - TermsEnum termsEnum = sortedReader.terms(DOC_POSITIONS_FIELD).iterator(); - assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOC_POSITIONS_TERM))); - PostingsEnum sortedPositions = termsEnum.postings(null, PostingsEnum.ALL); - int doc; - - // test nextDoc() - while ((doc = sortedPositions.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - int freq = sortedPositions.freq(); - assertEquals("incorrect freq for doc=" + doc, sortedValues[doc].intValue() / 10 + 1, freq); - for (int i = 0; i < freq; i++) { - assertEquals("incorrect position for doc=" + doc, i, sortedPositions.nextPosition()); - assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.startOffset()); - assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.endOffset()); - assertEquals("incorrect payload for doc=" + doc, freq - i, Integer.parseInt(sortedPositions.getPayload().utf8ToString())); - } - } - - // test advance() - final PostingsEnum reuse = sortedPositions; - sortedPositions = termsEnum.postings(reuse, PostingsEnum.ALL); - if (sortedPositions instanceof SortingDocsEnum) { - assertTrue(((SortingDocsEnum) sortedPositions).reused(reuse)); // make sure reuse worked - } - doc = 0; - while ((doc = sortedPositions.advance(doc + TestUtil.nextInt(random(), 1, 5))) != DocIdSetIterator.NO_MORE_DOCS) { - int freq = sortedPositions.freq(); - assertEquals("incorrect freq for doc=" + doc, sortedValues[doc].intValue() / 10 + 1, freq); - for (int i = 0; i < freq; i++) { - assertEquals("incorrect position for doc=" + doc, i, sortedPositions.nextPosition()); - assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.startOffset()); - assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.endOffset()); - assertEquals("incorrect payload for doc=" + doc, freq - i, Integer.parseInt(sortedPositions.getPayload().utf8ToString())); - } - } - } - - Bits randomLiveDocs(int maxDoc) { - if (rarely()) { - if (random().nextBoolean()) { - return null; - } else { - return new Bits.MatchNoBits(maxDoc); - } - } - final FixedBitSet bits = new FixedBitSet(maxDoc); - final int bitsSet = TestUtil.nextInt(random(), 1, maxDoc - 1); - for (int i = 0; i < bitsSet; ++i) { - while (true) { - final int index = random().nextInt(maxDoc); - if (!bits.get(index)) { - bits.set(index); - break; - } - } - } - return bits; - } - - public void testDocsEnum() throws Exception { - TermsEnum termsEnum = sortedReader.terms(DOCS_ENUM_FIELD).iterator(); - assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOCS_ENUM_TERM))); - PostingsEnum docs = termsEnum.postings(null); - - int doc; - while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - assertEquals("incorrect value; doc " + doc, sortedValues[doc].intValue(), Integer.parseInt(sortedReader.document(doc).get(ID_FIELD))); - } - - PostingsEnum reuse = docs; - docs = termsEnum.postings(reuse); - if (docs instanceof SortingDocsEnum) { - assertTrue(((SortingDocsEnum) docs).reused(reuse)); // make sure reuse worked - } - doc = -1; - while ((doc = docs.advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS) { - assertEquals("incorrect value; doc " + doc, sortedValues[doc].intValue(), Integer.parseInt(sortedReader.document(doc).get(ID_FIELD))); - } - } - - public void testNormValues() throws Exception { - NumericDocValues dv = sortedReader.getNormValues(NORMS_FIELD); - int maxDoc = sortedReader.maxDoc(); - for (int i = 0; i < maxDoc; i++) { - assertEquals("incorrect norm value for doc " + i, sortedValues[i].intValue(), dv.get(i)); - } - } - - public void testNumericDocValuesField() throws Exception { - NumericDocValues dv = sortedReader.getNumericDocValues(NUMERIC_DV_FIELD); - int maxDoc = sortedReader.maxDoc(); - for (int i = 0; i < maxDoc; i++) { - assertEquals("incorrect numeric DocValues for doc " + i, sortedValues[i].intValue(), dv.get(i)); - } - } - - public void testSortedDocValuesField() throws Exception { - SortedDocValues dv = sortedReader.getSortedDocValues(SORTED_DV_FIELD); - int maxDoc = sortedReader.maxDoc(); - for (int i = 0; i < maxDoc; i++) { - final BytesRef bytes = dv.get(i); - assertEquals("incorrect sorted DocValues for doc " + i, sortedValues[i].toString(), bytes.utf8ToString()); - } - } - - public void testSortedSetDocValuesField() throws Exception { - SortedSetDocValues dv = sortedReader.getSortedSetDocValues(SORTED_SET_DV_FIELD); - int maxDoc = sortedReader.maxDoc(); - for (int i = 0; i < maxDoc; i++) { - dv.setDocument(i); - BytesRef bytes = dv.lookupOrd(dv.nextOrd()); - int value = sortedValues[i].intValue(); - assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value).toString(), bytes.utf8ToString()); - bytes = dv.lookupOrd(dv.nextOrd()); - assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value + 1).toString(), bytes.utf8ToString()); - assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd()); - } - } - - public void testSortedNumericDocValuesField() throws Exception { - SortedNumericDocValues dv = sortedReader.getSortedNumericDocValues(SORTED_NUMERIC_DV_FIELD); - int maxDoc = sortedReader.maxDoc(); - for (int i = 0; i < maxDoc; i++) { - dv.setDocument(i); - assertEquals(2, dv.count()); - int value = sortedValues[i].intValue(); - assertEquals("incorrect sorted-numeric DocValues for doc " + i, value, dv.valueAt(0)); - assertEquals("incorrect sorted-numeric DocValues for doc " + i, value + 1, dv.valueAt(1)); - } - } - - public void testTermVectors() throws Exception { - int maxDoc = sortedReader.maxDoc(); - for (int i = 0; i < maxDoc; i++) { - Terms terms = sortedReader.getTermVector(i, TERM_VECTORS_FIELD); - assertNotNull("term vectors not found for doc " + i + " field [" + TERM_VECTORS_FIELD + "]", terms); - assertEquals("incorrect term vector for doc " + i, sortedValues[i].toString(), terms.iterator().next().utf8ToString()); - } - } - - // TODO: index sorting doesn't yet support points - /* - public void testPoints() throws Exception { - PointValues values = sortedReader.getPointValues(); - values.intersect(DIMENSIONAL_FIELD, - new IntersectVisitor() { - @Override - public void visit(int docID) { - throw new IllegalStateException(); - } - - @Override - public void visit(int docID, byte[] packedValues) { - assertEquals(sortedValues[docID].intValue(), NumericUtils.bytesToInt(packedValues, 0)); - } - - @Override - public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - return Relation.CELL_CROSSES_QUERY; - } - }); - } - */ -} diff --git a/lucene/misc/src/test/org/apache/lucene/index/SortingLeafReaderTest.java b/lucene/misc/src/test/org/apache/lucene/index/SortingLeafReaderTest.java deleted file mode 100644 index 3e8cb99ae07..00000000000 --- a/lucene/misc/src/test/org/apache/lucene/index/SortingLeafReaderTest.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.index; - -import java.util.Arrays; - -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.TestUtil; -import org.junit.BeforeClass; - -public class SortingLeafReaderTest extends SorterTestBase { - - @BeforeClass - public static void beforeClassSortingLeafReaderTest() throws Exception { - // NOTE: index was created by by super's @BeforeClass - - // sort the index by id (as integer, in NUMERIC_DV_FIELD) - Sort sort = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.INT)); - final Sorter.DocMap docMap = new Sorter(sort).sort(unsortedReader); - - // Sorter.compute also sorts the values - NumericDocValues dv = unsortedReader.getNumericDocValues(NUMERIC_DV_FIELD); - sortedValues = new Integer[unsortedReader.maxDoc()]; - for (int i = 0; i < unsortedReader.maxDoc(); ++i) { - sortedValues[docMap.oldToNew(i)] = (int)dv.get(i); - } - if (VERBOSE) { - System.out.println("docMap: " + docMap); - System.out.println("sortedValues: " + Arrays.toString(sortedValues)); - } - - // sort the index by id (as integer, in NUMERIC_DV_FIELD) - sortedReader = SortingLeafReader.wrap(unsortedReader, sort); - - if (VERBOSE) { - System.out.print("mapped-deleted-docs: "); - Bits mappedLiveDocs = sortedReader.getLiveDocs(); - for (int i = 0; i < mappedLiveDocs.length(); i++) { - if (!mappedLiveDocs.get(i)) { - System.out.print(i + " "); - } - } - System.out.println(); - } - - TestUtil.checkReader(sortedReader); - } - - public void testBadSort() throws Exception { - IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { - SortingLeafReader.wrap(sortedReader, Sort.RELEVANCE); - }); - assertEquals("Cannot sort an index with a Sort that refers to the relevance score", expected.getMessage()); - } - -} diff --git a/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java b/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java deleted file mode 100644 index 4a0d2b5a594..00000000000 --- a/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.index; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.document.StringField; -import org.apache.lucene.search.BlockJoinComparatorSource; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.Weight; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BitSet; -import org.apache.lucene.util.LuceneTestCase; - -public class TestBlockJoinSorter extends LuceneTestCase { - - public void test() throws IOException { - final int numParents = atLeast(200); - IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random())); - cfg.setMergePolicy(newLogMergePolicy()); - final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory(), cfg); - final Document parentDoc = new Document(); - final NumericDocValuesField parentVal = new NumericDocValuesField("parent_val", 0L); - parentDoc.add(parentVal); - final StringField parent = new StringField("parent", "true", Store.YES); - parentDoc.add(parent); - for (int i = 0; i < numParents; ++i) { - List documents = new ArrayList<>(); - final int numChildren = random().nextInt(10); - for (int j = 0; j < numChildren; ++j) { - final Document childDoc = new Document(); - childDoc.add(new NumericDocValuesField("child_val", random().nextInt(5))); - documents.add(childDoc); - } - parentVal.setLongValue(random().nextInt(50)); - documents.add(parentDoc); - writer.addDocuments(documents); - } - writer.forceMerge(1); - IndexReader indexReader = writer.getReader(); - writer.close(); - - IndexSearcher searcher = newSearcher(indexReader); - indexReader = searcher.getIndexReader(); // newSearcher may have wrapped it - assertEquals(1, indexReader.leaves().size()); - final LeafReader reader = indexReader.leaves().get(0).reader(); - final Query parentsFilter = new TermQuery(new Term("parent", "true")); - - final Weight weight = searcher.createNormalizedWeight(parentsFilter, false); - final Scorer parents = weight.scorer(indexReader.leaves().get(0)); - final BitSet parentBits = BitSet.of(parents.iterator(), reader.maxDoc()); - final NumericDocValues parentValues = reader.getNumericDocValues("parent_val"); - final NumericDocValues childValues = reader.getNumericDocValues("child_val"); - - final Sort parentSort = new Sort(new SortField("parent_val", SortField.Type.LONG)); - final Sort childSort = new Sort(new SortField("child_val", SortField.Type.LONG)); - - final Sort sort = new Sort(new SortField("custom", new BlockJoinComparatorSource(parentsFilter, parentSort, childSort))); - final Sorter sorter = new Sorter(sort); - final Sorter.DocMap docMap = sorter.sort(reader); - assertEquals(reader.maxDoc(), docMap.size()); - - int[] children = new int[1]; - int numChildren = 0; - int previousParent = -1; - for (int i = 0; i < docMap.size(); ++i) { - final int oldID = docMap.newToOld(i); - if (parentBits.get(oldID)) { - // check that we have the right children - for (int j = 0; j < numChildren; ++j) { - assertEquals(oldID, parentBits.nextSetBit(children[j])); - } - // check that children are sorted - for (int j = 1; j < numChildren; ++j) { - final int doc1 = children[j-1]; - final int doc2 = children[j]; - if (childValues.get(doc1) == childValues.get(doc2)) { - assertTrue(doc1 < doc2); // sort is stable - } else { - assertTrue(childValues.get(doc1) < childValues.get(doc2)); - } - } - // check that parents are sorted - if (previousParent != -1) { - if (parentValues.get(previousParent) == parentValues.get(oldID)) { - assertTrue(previousParent < oldID); - } else { - assertTrue(parentValues.get(previousParent) < parentValues.get(oldID)); - } - } - // reset - previousParent = oldID; - numChildren = 0; - } else { - children = ArrayUtil.grow(children, numChildren+1); - children[numChildren++] = oldID; - } - } - indexReader.close(); - writer.w.getDirectory().close(); - } - -} diff --git a/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java b/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java deleted file mode 100644 index a5486f4ce7e..00000000000 --- a/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.index; - -import java.io.IOException; -import java.lang.reflect.Method; -import java.lang.reflect.Modifier; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Random; -import java.util.Set; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.document.StringField; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LogMergePolicy; -import org.apache.lucene.index.MergePolicy; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.SlowCompositeReaderWrapper; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TieredMergePolicy; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.TestUtil; - -import com.carrotsearch.randomizedtesting.generators.RandomPicks; - -public class TestSortingMergePolicy extends BaseMergePolicyTestCase { - - private List terms; - private Directory dir1, dir2; - private Sort sort; - private boolean reversedSort; - private IndexReader reader; - private IndexReader sortedReader; - - @Override - public void setUp() throws Exception { - super.setUp(); - final Boolean reverse = (random().nextBoolean() ? null : new Boolean(random().nextBoolean())); - final SortField sort_field = (reverse == null - ? new SortField("ndv", SortField.Type.LONG) - : new SortField("ndv", SortField.Type.LONG, reverse.booleanValue())); - sort = new Sort(sort_field); - reversedSort = (null != reverse && reverse.booleanValue()); - createRandomIndexes(); - } - - private Document randomDocument() { - final Document doc = new Document(); - doc.add(new NumericDocValuesField("ndv", random().nextLong())); - doc.add(new StringField("s", RandomPicks.randomFrom(random(), terms), Store.YES)); - return doc; - } - - public MergePolicy mergePolicy() { - return newSortingMergePolicy(sort); - } - - public static SortingMergePolicy newSortingMergePolicy(Sort sort) { - // usually create a MP with a low merge factor so that many merges happen - MergePolicy mp; - int thingToDo = random().nextInt(3); - if (thingToDo == 0) { - TieredMergePolicy tmp = newTieredMergePolicy(random()); - final int numSegs = TestUtil.nextInt(random(), 3, 5); - tmp.setSegmentsPerTier(numSegs); - tmp.setMaxMergeAtOnce(TestUtil.nextInt(random(), 2, numSegs)); - mp = tmp; - } else if (thingToDo == 1) { - LogMergePolicy lmp = newLogMergePolicy(random()); - lmp.setMergeFactor(TestUtil.nextInt(random(), 3, 5)); - mp = lmp; - } else { - // just a regular random one from LTC (could be alcoholic etc) - mp = newMergePolicy(); - } - // wrap it with a sorting mp - if (VERBOSE) { - System.out.println("TEST: return SortingMergePolicy(mp=" + mp + " sort=" + sort + ")"); - } - return new SortingMergePolicy(mp, sort); - } - - private void createRandomIndexes() throws IOException { - dir1 = newDirectory(); - dir2 = newDirectory(); - final int numDocs = atLeast(150); - final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5); - Set randomTerms = new HashSet<>(); - while (randomTerms.size() < numTerms) { - randomTerms.add(TestUtil.randomSimpleString(random())); - } - terms = new ArrayList<>(randomTerms); - final long seed = random().nextLong(); - final IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(new Random(seed))); - final IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(new Random(seed))); - iwc2.setMergePolicy(mergePolicy()); - final RandomIndexWriter iw1 = new RandomIndexWriter(new Random(seed), dir1, iwc1); - final RandomIndexWriter iw2 = new RandomIndexWriter(new Random(seed), dir2, iwc2); - for (int i = 0; i < numDocs; ++i) { - if (random().nextInt(5) == 0 && i != numDocs - 1) { - final String term = RandomPicks.randomFrom(random(), terms); - iw1.deleteDocuments(new Term("s", term)); - iw2.deleteDocuments(new Term("s", term)); - } - final Document doc = randomDocument(); - iw1.addDocument(doc); - iw2.addDocument(doc); - if (random().nextInt(8) == 0) { - iw1.commit(); - iw2.commit(); - } - } - // Make sure we have something to merge - iw1.commit(); - iw2.commit(); - final Document doc = randomDocument(); - // NOTE: don't use RIW.addDocument directly, since it sometimes commits - // which may trigger a merge, at which case forceMerge may not do anything. - // With field updates this is a problem, since the updates can go into the - // single segment in the index, and threefore the index won't be sorted. - // This hurts the assumption of the test later on, that the index is sorted - // by SortingMP. - iw1.w.addDocument(doc); - iw2.w.addDocument(doc); - - // update NDV of docs belonging to one term (covers many documents) - final long value = random().nextLong(); - final String term = RandomPicks.randomFrom(random(), terms); - iw1.w.updateNumericDocValue(new Term("s", term), "ndv", value); - iw2.w.updateNumericDocValue(new Term("s", term), "ndv", value); - - iw1.forceMerge(1); - iw2.forceMerge(1); - iw1.close(); - iw2.close(); - reader = DirectoryReader.open(dir1); - sortedReader = DirectoryReader.open(dir2); - } - - @Override - public void tearDown() throws Exception { - reader.close(); - sortedReader.close(); - dir1.close(); - dir2.close(); - super.tearDown(); - } - - private static void assertSorted(LeafReader reader, boolean reverse) throws IOException { - final NumericDocValues ndv = reader.getNumericDocValues("ndv"); - for (int i = 1; i < reader.maxDoc(); ++i) { - final int lhs = (!reverse ? i-1 : i); - final int rhs = (!reverse ? i : i-1); - assertTrue("ndv(" + (i-1) + ")=" + ndv.get(i-1) + ",ndv(" + i + ")=" + ndv.get(i)+",reverse="+reverse, ndv.get(lhs) <= ndv.get(rhs)); - } - } - - public void testSortingMP() throws IOException { - final LeafReader sortedReader1 = SortingLeafReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort); - final LeafReader sortedReader2 = SlowCompositeReaderWrapper.wrap(sortedReader); - - assertSorted(sortedReader1, reversedSort); - assertSorted(sortedReader2, reversedSort); - - assertReaderEquals("", sortedReader1, sortedReader2); - } - - public void testBadSort() throws Exception { - IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { - new SortingMergePolicy(newMergePolicy(), Sort.RELEVANCE); - }); - assertEquals("Cannot sort an index with a Sort that refers to the relevance score", expected.getMessage()); - } - -} diff --git a/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java b/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java index 3fbe14a4217..54ad7445c79 100644 --- a/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java +++ b/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java @@ -32,9 +32,9 @@ import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; @@ -367,8 +367,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase { reader = writer.getReader(); writer.close(); searcher = newSearcher(reader); - LeafReader ar = SlowCompositeReaderWrapper.wrap(reader); - artistDocValues = ar.getSortedDocValues("artist"); + artistDocValues = MultiDocValues.getSortedValues(reader, "artist"); // All searches sort by song popularity final Similarity base = searcher.getSimilarity(true); diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java index 66630df2bca..0b19254d985 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java +++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java @@ -247,7 +247,7 @@ public class TestNearest extends LuceneTestCase { private IndexWriterConfig getIndexWriterConfig() { IndexWriterConfig iwc = newIndexWriterConfig(); - iwc.setCodec(Codec.forName("Lucene60")); + iwc.setCodec(Codec.forName("Lucene62")); return iwc; } } diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java index afdde71c0a7..4d3ef3b3c1d 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java @@ -17,7 +17,7 @@ package org.apache.lucene.spatial.prefix.tree; import java.text.ParseException; -import java.text.SimpleDateFormat; +import java.time.ZonedDateTime; import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; @@ -58,60 +58,97 @@ public class DateRangePrefixTree extends NumberRangePrefixTree { */ private static final TimeZone UTC = TimeZone.getTimeZone("UTC"); - private static Calendar CAL_TMP;//template + + /** + * The Java platform default {@link Calendar} with UTC & ROOT Locale. Generally a {@link GregorianCalendar}. + * Do not modify this! + */ + public static final Calendar DEFAULT_CAL;//template static { - CAL_TMP = Calendar.getInstance(UTC, Locale.ROOT); - CAL_TMP.clear(); + DEFAULT_CAL = Calendar.getInstance(UTC, Locale.ROOT); + DEFAULT_CAL.clear(); } - private static final Calendar MINCAL = (Calendar) CAL_TMP.clone(); - private static final Calendar MAXCAL = (Calendar) CAL_TMP.clone(); + /** + * A Calendar instance compatible with {@link java.time.ZonedDateTime} as seen from + * {@link GregorianCalendar#from(ZonedDateTime)}. + * Do not modify this! + */ + public static final Calendar JAVA_UTIL_TIME_COMPAT_CAL; static { - MINCAL.setTimeInMillis(Long.MIN_VALUE); - MAXCAL.setTimeInMillis(Long.MAX_VALUE); - } - //BC years are decreasing, remember. Yet ActualMaximum is the numerically high value, ActualMinimum is 1. - private static final int BC_FIRSTYEAR = MINCAL.getActualMaximum(Calendar.YEAR); - private static final int BC_LASTYEAR = MINCAL.getActualMinimum(Calendar.YEAR);//1 - private static final int BC_YEARS = BC_FIRSTYEAR - BC_LASTYEAR + 1; - private static final int AD_FIRSTYEAR = MAXCAL.getActualMinimum(Calendar.YEAR);//1 - private static final int AD_LASTYEAR = MAXCAL.getActualMaximum(Calendar.YEAR); - private static final int AD_YEAR_BASE = (((BC_YEARS-1) / 1000_000)+1) * 1000_000; - static { assert BC_LASTYEAR == 1 && AD_FIRSTYEAR == 1; } - - //how many million years are there? - private static final int NUM_MYEARS = (AD_YEAR_BASE + AD_LASTYEAR) / 1000_000; - - private static int calFieldLen(int field) { - return CAL_TMP.getMaximum(field) - CAL_TMP.getMinimum(field) + 1; + // see source of GregorianCalendar.from(ZonedDateTime) + GregorianCalendar cal = new GregorianCalendar(UTC, Locale.ROOT); + cal.setGregorianChange(new Date(Long.MIN_VALUE)); + cal.setFirstDayOfWeek(Calendar.MONDAY);// might not matter? + cal.setMinimalDaysInFirstWeek(4);// might not matter + cal.clear(); + JAVA_UTIL_TIME_COMPAT_CAL = cal; } private static final int[] FIELD_BY_LEVEL = { -1/*unused*/, -1, -1, Calendar.YEAR, Calendar.MONTH, Calendar.DAY_OF_MONTH, Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND}; - private static final int yearLevel = 3; - public static final DateRangePrefixTree INSTANCE = new DateRangePrefixTree(); + private static final int YEAR_LEVEL = 3; + + //how many million years are there? + private static final int NUM_MYEARS = 585;// we assert how this was computed in the constructor + + /** An instanced based on {@link Calendar#getInstance(TimeZone, Locale)} with UTC and Locale.Root. This + * will (always?) be a {@link GregorianCalendar} with a so-called "Gregorian Change Date" of 1582. + */ + @Deprecated + public static final DateRangePrefixTree INSTANCE = new DateRangePrefixTree(DEFAULT_CAL); + + // Instance fields: (all are final) + + private final Calendar CAL_TMP;//template + + private final Calendar MINCAL; + private final Calendar MAXCAL; + + private final int BC_FIRSTYEAR; + private final int BC_LASTYEAR; + private final int BC_YEARS; + private final int AD_FIRSTYEAR; + private final int AD_LASTYEAR; + private final int AD_YEAR_BASE; private final UnitNRShape minLV, maxLV; private final UnitNRShape gregorianChangeDateLV; - protected DateRangePrefixTree() { + /** Constructs with the specified calendar used as a template to be cloned whenever a new + * Calendar needs to be created. See {@link #DEFAULT_CAL} and {@link #JAVA_UTIL_TIME_COMPAT_CAL}. */ + public DateRangePrefixTree(Calendar templateCal) { super(new int[]{//sublevels by level NUM_MYEARS, 1000,//1 thousand thousand-years in a million years 1000,//1 thousand years in a thousand-year - calFieldLen(Calendar.MONTH), - calFieldLen(Calendar.DAY_OF_MONTH), - calFieldLen(Calendar.HOUR_OF_DAY), - calFieldLen(Calendar.MINUTE), - calFieldLen(Calendar.SECOND), - calFieldLen(Calendar.MILLISECOND), + calFieldLen(templateCal, Calendar.MONTH), + calFieldLen(templateCal, Calendar.DAY_OF_MONTH), + calFieldLen(templateCal, Calendar.HOUR_OF_DAY), + calFieldLen(templateCal, Calendar.MINUTE), + calFieldLen(templateCal, Calendar.SECOND), + calFieldLen(templateCal, Calendar.MILLISECOND), }); + CAL_TMP = (Calendar) templateCal.clone();// defensive copy + MINCAL = (Calendar) CAL_TMP.clone(); + MINCAL.setTimeInMillis(Long.MIN_VALUE); + MAXCAL = (Calendar) CAL_TMP.clone(); + MAXCAL.setTimeInMillis(Long.MAX_VALUE); + //BC years are decreasing, remember. Yet ActualMaximum is the numerically high value, ActualMinimum is 1. + BC_FIRSTYEAR = MINCAL.getActualMaximum(Calendar.YEAR); + BC_LASTYEAR = MINCAL.getActualMinimum(Calendar.YEAR); // 1 + BC_YEARS = BC_FIRSTYEAR - BC_LASTYEAR + 1; + AD_FIRSTYEAR = MAXCAL.getActualMinimum(Calendar.YEAR); // 1 + AD_LASTYEAR = MAXCAL.getActualMaximum(Calendar.YEAR); + AD_YEAR_BASE = (((BC_YEARS-1) / 1000_000)+1) * 1000_000; + assert BC_LASTYEAR == 1 && AD_FIRSTYEAR == 1; + assert NUM_MYEARS == (AD_YEAR_BASE + AD_LASTYEAR) / 1000_000; + maxLV = toShape((Calendar)MAXCAL.clone()); minLV = toShape((Calendar)MINCAL.clone()); if (MAXCAL instanceof GregorianCalendar) { - //TODO this should be a configurable param by passing a Calendar serving as a template. GregorianCalendar gCal = (GregorianCalendar)MAXCAL; gregorianChangeDateLV = toUnitShape(gCal.getGregorianChange()); } else { @@ -119,6 +156,10 @@ public class DateRangePrefixTree extends NumberRangePrefixTree { } } + private static int calFieldLen(Calendar cal, int field) { + return cal.getMaximum(field) - cal.getMinimum(field) + 1; + } + @Override public int getNumSubCells(UnitNRShape lv) { int cmp = comparePrefix(lv, maxLV); @@ -140,7 +181,7 @@ public class DateRangePrefixTree extends NumberRangePrefixTree { } private int fastSubCells(UnitNRShape lv) { - if (lv.getLevel() == yearLevel+1) {//month + if (lv.getLevel() == YEAR_LEVEL + 1) {//month switch (lv.getValAtLevel(lv.getLevel())) { case Calendar.SEPTEMBER: case Calendar.APRIL: @@ -175,7 +216,7 @@ public class DateRangePrefixTree extends NumberRangePrefixTree { } /** Calendar utility method: - * Returns a new {@link Calendar} in UTC TimeZone, ROOT Locale, with all fields cleared. */ + * Returns a clone of the {@link Calendar} passed to the constructor with all fields cleared. */ public Calendar newCal() { return (Calendar) CAL_TMP.clone(); } @@ -185,7 +226,7 @@ public class DateRangePrefixTree extends NumberRangePrefixTree { * {@link java.util.Calendar#YEAR}. If there's no match, the next greatest level is returned as a negative value. */ public int getTreeLevelForCalendarField(int calField) { - for (int i = yearLevel; i < FIELD_BY_LEVEL.length; i++) { + for (int i = YEAR_LEVEL; i < FIELD_BY_LEVEL.length; i++) { if (FIELD_BY_LEVEL[i] == calField) { return i; } else if (FIELD_BY_LEVEL[i] > calField) { @@ -200,7 +241,7 @@ public class DateRangePrefixTree extends NumberRangePrefixTree { * examines fields relevant to the prefix tree. If no fields are set, it returns -1. */ public int getCalPrecisionField(Calendar cal) { int lastField = -1; - for (int level = yearLevel; level < FIELD_BY_LEVEL.length; level++) { + for (int level = YEAR_LEVEL; level < FIELD_BY_LEVEL.length; level++) { int field = FIELD_BY_LEVEL[level]; if (!cal.isSet(field)) break; @@ -212,20 +253,18 @@ public class DateRangePrefixTree extends NumberRangePrefixTree { /** Calendar utility method: * Calls {@link Calendar#clear(int)} for every field after {@code field}. Beware of Calendar underflow. */ public void clearFieldsAfter(Calendar cal, int field) { - if (field == -1) { - cal.clear(); - return; - } int assertEra = -1; assert (assertEra = (((Calendar)cal.clone()).get(Calendar.ERA))) >= 0;//a trick to only get this if assert enabled - for (int f = field+1; f < Calendar.FIELD_COUNT; f++) { + //note: Calendar.ERA == 0; + for (int f = field + 1; f <= Calendar.MILLISECOND; f++) { cal.clear(f); } - assert ((Calendar)cal.clone()).get(Calendar.ERA) == assertEra : "Calendar underflow"; + assert field + 1 == Calendar.ERA || ((Calendar)cal.clone()).get(Calendar.ERA) == assertEra : "Calendar underflow"; } /** Converts {@code value} from a {@link Calendar} or {@link Date} to a {@link Shape}. Other arguments * result in a {@link java.lang.IllegalArgumentException}. + * If a Calendar is passed in, there might be problems if it is not created via {@link #newCal()}. */ @Override public UnitNRShape toUnitShape(Object value) { @@ -240,7 +279,9 @@ public class DateRangePrefixTree extends NumberRangePrefixTree { } /** Converts the Calendar into a Shape. - * The isSet() state of the Calendar is re-instated when done. */ + * The isSet() state of the Calendar is re-instated when done. + * If a Calendar is passed in, there might be problems if it is not created via {@link #newCal()}. + */ public UnitNRShape toShape(Calendar cal) { // Convert a Calendar into a stack of cell numbers final int calPrecField = getCalPrecisionField(cal);//must call first; getters set all fields @@ -256,7 +297,7 @@ public class DateRangePrefixTree extends NumberRangePrefixTree { valStack[len++] = yearAdj / 1000; yearAdj -= valStack[len-1] * 1000; valStack[len++] = yearAdj; - for (int level = yearLevel+1; level < FIELD_BY_LEVEL.length; level++) { + for (int level = YEAR_LEVEL +1; level < FIELD_BY_LEVEL.length; level++) { int field = FIELD_BY_LEVEL[level]; if (field > calPrecField) break; @@ -301,7 +342,7 @@ public class DateRangePrefixTree extends NumberRangePrefixTree { cal.set(Calendar.ERA, 0);//we assert this "sticks" at the end cal.set(Calendar.YEAR, (AD_YEAR_BASE - yearAdj) + 1); } - for (int level = yearLevel+1; level <= lv.getLevel(); level++) { + for (int level = YEAR_LEVEL + 1; level <= lv.getLevel(); level++) { int field = FIELD_BY_LEVEL[level]; cal.set(field, lv.getValAtLevel(level) + cal.getActualMinimum(field)); } @@ -314,59 +355,77 @@ public class DateRangePrefixTree extends NumberRangePrefixTree { return toString(toCalendar(lv)); } - /** Calendar utility method: - * Formats the calendar to ISO-8601 format, to include proper BC handling (1BC is "0000", 2BC is "-0001", etc.); - * and WITHOUT a trailing 'Z'. + /** Calendar utility method consistent with {@link java.time.format.DateTimeFormatter#ISO_INSTANT} except + * has no trailing 'Z', and will be truncated to the units given according to + * {@link Calendar#isSet(int)}. * A fully cleared calendar will yield the string "*". * The isSet() state of the Calendar is re-instated when done. */ - @SuppressWarnings("fallthrough") public String toString(Calendar cal) { final int calPrecField = getCalPrecisionField(cal);//must call first; getters set all fields if (calPrecField == -1) return "*"; try { - //TODO not fully optimized; but it's at least not used in 'search'. - //TODO maybe borrow code from Solr DateUtil (put in Lucene util somewhere), and have it reference this back? - String pattern = "yyyy-MM-dd'T'HH:mm:ss.SSS"; - int ptnLen = 0; - switch (calPrecField) {//switch fall-through is deliberate - case Calendar.MILLISECOND: ptnLen += 4; - case Calendar.SECOND: ptnLen += 3; - case Calendar.MINUTE: ptnLen += 3; - case Calendar.HOUR_OF_DAY: ptnLen += 5; - case Calendar.DAY_OF_MONTH: ptnLen += 3; - case Calendar.MONTH: ptnLen += 3; - case Calendar.YEAR: ptnLen += 4; - break; - default: throw new IllegalStateException(""+calPrecField); - } - pattern = pattern.substring(0, ptnLen); - SimpleDateFormat format = new SimpleDateFormat(pattern, Locale.ROOT); - format.setTimeZone(cal.getTimeZone()); - if (cal.get(Calendar.ERA) == 0) {//BC - //SDF doesn't do this properly according to ISO-8601 - // Example: 1BC == "0000" (actually 0 AD), 2BC == "-0001", 3BC == "-0002", ... - final int yearOrig = cal.get(Calendar.YEAR); - cal.set(Calendar.YEAR, yearOrig-1); - String str; - try { - str = format.format(cal.getTime()); - } finally { - //reset to what it was - cal.set(Calendar.ERA, 0);//necessary! - cal.set(Calendar.YEAR, yearOrig); + StringBuilder builder = new StringBuilder("yyyy-MM-dd'T'HH:mm:ss.SSS".length());//typical + int year = cal.get(Calendar.YEAR); // within the era (thus always positve). >= 1. + if (cal.get(Calendar.ERA) == 0) { // BC + year -= 1; // 1BC should be "0000", so shift by one + if (year > 0) { + builder.append('-'); } - if (yearOrig > 1) - return "-" + str; - else - return "0000" + str.substring(4); + } else if (year > 9999) { + builder.append('+'); } - return format.format(cal.getTime()); + appendPadded(builder, year, (short) 4); + if (calPrecField >= Calendar.MONTH) { + builder.append('-'); + appendPadded(builder, cal.get(Calendar.MONTH) + 1, (short) 2); // +1 since first is 0 + } + if (calPrecField >= Calendar.DAY_OF_MONTH) { + builder.append('-'); + appendPadded(builder, cal.get(Calendar.DAY_OF_MONTH), (short) 2); + } + if (calPrecField >= Calendar.HOUR_OF_DAY) { + builder.append('T'); + appendPadded(builder, cal.get(Calendar.HOUR_OF_DAY), (short) 2); + } + if (calPrecField >= Calendar.MINUTE) { + builder.append(':'); + appendPadded(builder, cal.get(Calendar.MINUTE), (short) 2); + } + if (calPrecField >= Calendar.SECOND) { + builder.append(':'); + appendPadded(builder, cal.get(Calendar.SECOND), (short) 2); + } + if (calPrecField >= Calendar.MILLISECOND && cal.get(Calendar.MILLISECOND) > 0) { // only if non-zero + builder.append('.'); + appendPadded(builder, cal.get(Calendar.MILLISECOND), (short) 3); + } + + return builder.toString(); } finally { clearFieldsAfter(cal, calPrecField);//restore precision state modified by get() } } + private void appendPadded(StringBuilder builder, int integer, short positions) { + assert integer >= 0 && positions >= 1 && positions <= 4; + int preBuilderLen = builder.length(); + int intStrLen; + if (integer > 999) { + intStrLen = 4; + } else if (integer > 99) { + intStrLen = 3; + } else if (integer > 9) { + intStrLen = 2; + } else { + intStrLen = 1; + } + for (int i = 0; i < positions - intStrLen; i++) { + builder.append('0'); + } + builder.append(integer); + } + @Override protected UnitNRShape parseUnitShape(String str) throws ParseException { return toShape(parseCalendar(str)); diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java index 33c8a330af9..9b93aac04e0 100644 --- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java +++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java @@ -20,12 +20,12 @@ import java.io.IOException; import java.util.Calendar; import com.carrotsearch.randomizedtesting.annotations.Repeat; -import org.locationtech.spatial4j.shape.Shape; import org.apache.lucene.spatial.prefix.tree.DateRangePrefixTree; import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape; import org.apache.lucene.spatial.query.SpatialOperation; import org.junit.Before; import org.junit.Test; +import org.locationtech.spatial4j.shape.Shape; import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean; import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween; @@ -54,7 +54,7 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase { }; } Calendar tmpCal = tree.newCal(); - int randomCalWindowField = randomIntBetween(1, Calendar.ZONE_OFFSET - 1);//we're not allowed to add zone offset + int randomCalWindowField = randomIntBetween(Calendar.YEAR, Calendar.MILLISECOND); tmpCal.add(randomCalWindowField, 2_000); randomCalWindowMs = Math.max(2000L, tmpCal.getTimeInMillis()); } diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java index 12e9744064b..e8c63518ca3 100644 --- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java +++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java @@ -17,19 +17,32 @@ package org.apache.lucene.spatial.prefix.tree; import java.text.ParseException; +import java.time.Instant; import java.util.Arrays; import java.util.Calendar; import java.util.GregorianCalendar; -import org.locationtech.spatial4j.shape.Shape; -import org.locationtech.spatial4j.shape.SpatialRelation; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; +import org.locationtech.spatial4j.shape.Shape; +import org.locationtech.spatial4j.shape.SpatialRelation; public class DateRangePrefixTreeTest extends LuceneTestCase { - private DateRangePrefixTree tree = DateRangePrefixTree.INSTANCE; + @ParametersFactory + public static Iterable parameters() { + return Arrays.asList(new Object[][]{ + {DateRangePrefixTree.DEFAULT_CAL}, {DateRangePrefixTree.JAVA_UTIL_TIME_COMPAT_CAL} + }); + } + + private final DateRangePrefixTree tree; + + public DateRangePrefixTreeTest(Calendar templateCal) { + tree = new DateRangePrefixTree(templateCal); + } public void testRoundTrip() throws Exception { Calendar cal = tree.newCal(); @@ -77,6 +90,10 @@ public class DateRangePrefixTreeTest extends LuceneTestCase { //test random cal.setTimeInMillis(random().nextLong()); roundTrip(cal); + //assert same toString as java.time, provided it's after the GCD + if (cal.getTimeInMillis() > ((GregorianCalendar)tree.newCal()).getGregorianChange().getTime()) { + assertEquals(Instant.ofEpochMilli(cal.getTimeInMillis()).toString(), tree.toString(cal) + 'Z'); + } } //copies from DateRangePrefixTree @@ -88,8 +105,14 @@ public class DateRangePrefixTreeTest extends LuceneTestCase { Calendar cal = (Calendar) calOrig.clone(); String lastString = null; while (true) { - String calString = tree.toString(cal); - assert lastString == null || calString.length() < lastString.length(); + String calString; + { + Calendar preToStringCalClone = (Calendar) cal.clone(); + calString = tree.toString(cal); + assert lastString == null || calString.length() < lastString.length(); + assertEquals(preToStringCalClone, cal);//ensure toString doesn't modify cal state + } + //test parseCalendar assertEquals(cal, tree.parseCalendar(calString)); diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java index 0f8f2026fe4..c2cb93b13f0 100644 --- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java +++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java @@ -85,14 +85,14 @@ import com.carrotsearch.randomizedtesting.generators.RandomInts; public class TestGeo3DPoint extends LuceneTestCase { private static Codec getCodec() { - if (Codec.getDefault().getName().equals("Lucene60")) { + if (Codec.getDefault().getName().equals("Lucene62")) { int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048); double maxMBSortInHeap = 3.0 + (3*random().nextDouble()); if (VERBOSE) { System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap); } - return new FilterCodec("Lucene60", Codec.getDefault()) { + return new FilterCodec("Lucene62", Codec.getDefault()) { @Override public PointsFormat pointsFormat() { return new PointsFormat() { diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java index 2b14d6e8016..16e9406310f 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java @@ -56,7 +56,6 @@ import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.index.SortingMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause; @@ -232,7 +231,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { // This way all merged segments will be sorted at // merge time, allow for per-segment early termination // when those segments are searched: - iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), SORT)); + iwc.setIndexSort(SORT); return iwc; } @@ -586,10 +585,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { // We sorted postings by weight during indexing, so we // only retrieve the first num hits now: - final SortingMergePolicy sortingMergePolicy = (SortingMergePolicy) writer.getConfig().getMergePolicy(); - Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num, sortingMergePolicy.getSort()); - IndexSearcher searcher = searcherMgr.acquire(); + Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num); List results = null; + IndexSearcher searcher = searcherMgr.acquire(); try { //System.out.println("got searcher=" + searcher); searcher.search(finalQuery, c2); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java index 97e0ef1c917..63454635a2a 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java @@ -66,7 +66,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { assertEquals("a penny saved is a penny earned", results.get(0).key); assertEquals("a penny saved is a penny earned", results.get(0).highlightKey); assertEquals(10, results.get(0).value); - assertEquals(new BytesRef("foobaz"), results.get(0).payload); + assertEquals("foobaz", results.get(0).payload.utf8ToString()); assertEquals("lend me your ear", results.get(1).key); assertEquals("lend me your ear", results.get(1).highlightKey); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java index 62ed08b8d0e..6b1c2d1b21d 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java @@ -32,7 +32,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene60.Lucene60Codec; +import org.apache.lucene.codecs.lucene62.Lucene62Codec; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -646,7 +646,7 @@ public class TestSuggestField extends LuceneTestCase { static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set suggestFields) { IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer); iwc.setMergePolicy(newLogMergePolicy()); - Codec filterCodec = new Lucene60Codec() { + Codec filterCodec = new Lucene62Codec() { PostingsFormat postingsFormat = new Completion50PostingsFormat(); @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java index bda4cdebaad..275c1864857 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java @@ -1242,7 +1242,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase { // Else seeds may not reproduce: iwc.setMergeScheduler(new SerialMergeScheduler()); int pointsInLeaf = 2 + random().nextInt(4); - iwc.setCodec(new FilterCodec("Lucene60", TestUtil.getDefaultCodec()) { + iwc.setCodec(new FilterCodec("Lucene62", TestUtil.getDefaultCodec()) { @Override public PointsFormat pointsFormat() { return new PointsFormat() { diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java index f09be9d05a6..7c19596aa81 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java @@ -627,7 +627,7 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest /** Returns a new fake segment */ protected static SegmentInfo newSegmentInfo(Directory dir, String name) { - return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); + return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); } /** Creates a file of the specified size with random data. */ diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java index d8e2296e336..528e92afc17 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java @@ -347,7 +347,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes /** Returns a new fake segment */ protected static SegmentInfo newSegmentInfo(Directory dir, String name) { - return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); + return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java index 2c6f379f4b6..d7dc44bbeed 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java @@ -303,7 +303,7 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase { Directory dir = newFSDirectory(createTempDir("justSoYouGetSomeChannelErrors")); Codec codec = getCodec(); - SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", 1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); + SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", 1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); FieldInfo proto = oneDocReader.getFieldInfos().fieldInfo("field"); FieldInfo field = new FieldInfo(proto.name, proto.number, proto.hasVectors(), proto.omitsNorms(), proto.hasPayloads(), proto.getIndexOptions(), proto.getDocValuesType(), proto.getDocValuesGen(), new HashMap<>(), diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java index 1136afa64fb..49d19ae4322 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java @@ -26,7 +26,8 @@ import java.util.Set; import org.apache.lucene.codecs.Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.StoredField; -import org.apache.lucene.document.TextField; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.MockDirectoryWrapper; @@ -52,7 +53,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT Codec codec = getCodec(); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, - Collections.emptyMap(), id, new HashMap<>()); + Collections.emptyMap(), id, new HashMap<>(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); @@ -66,7 +67,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT Codec codec = getCodec(); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, - Collections.emptyMap(), id, new HashMap<>()); + Collections.emptyMap(), id, new HashMap<>(), null); Set originalFiles = Collections.singleton("_123.a"); info.setFiles(originalFiles); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); @@ -95,7 +96,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT diagnostics.put("key1", "value1"); diagnostics.put("key2", "value2"); SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, - diagnostics, id, new HashMap<>()); + diagnostics, id, new HashMap<>(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); @@ -118,7 +119,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT attributes.put("key1", "value1"); attributes.put("key2", "value2"); SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, - Collections.emptyMap(), id, attributes); + Collections.emptyMap(), id, attributes, null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); @@ -138,7 +139,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT Directory dir = newDirectory(); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, - Collections.emptyMap(), id, new HashMap<>()); + Collections.emptyMap(), id, new HashMap<>(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); @@ -153,7 +154,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT Directory dir = newDirectory(); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo(dir, v, "_123", 1, false, codec, - Collections.emptyMap(), id, new HashMap<>()); + Collections.emptyMap(), id, new HashMap<>(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); @@ -161,7 +162,57 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT dir.close(); } } - + + protected boolean supportsIndexSort() { + return true; + } + + /** Test sort */ + public void testSort() throws IOException { + assumeTrue("test requires a codec that can read/write index sort", supportsIndexSort()); + + final int iters = atLeast(5); + for (int i = 0; i < iters; ++i) { + Sort sort; + if (i == 0) { + sort = null; + } else { + final int numSortFields = TestUtil.nextInt(random(), 1, 3); + SortField[] sortFields = new SortField[numSortFields]; + for (int j = 0; j < numSortFields; ++j) { + sortFields[j] = new SortField( + TestUtil.randomSimpleString(random()), + random().nextBoolean() ? SortField.Type.LONG : SortField.Type.STRING, + random().nextBoolean()); + if (random().nextBoolean()) { + switch (sortFields[j].getType()) { + case LONG: + sortFields[j].setMissingValue(random().nextLong()); + break; + case STRING: + sortFields[j].setMissingValue(random().nextBoolean() ? SortField.STRING_FIRST : SortField.STRING_LAST); + break; + default: + fail(); + } + } + } + sort = new Sort(sortFields); + } + + Directory dir = newDirectory(); + Codec codec = getCodec(); + byte id[] = StringHelper.randomId(); + SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, + Collections.emptyMap(), id, new HashMap<>(), sort); + info.setFiles(Collections.emptySet()); + codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); + SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); + assertEquals(sort, info2.getIndexSort()); + dir.close(); + } + } + /** * Test segment infos write that hits exception immediately on open. * make sure we get our exception back, no file handle leaks, etc. @@ -183,7 +234,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT Codec codec = getCodec(); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, - Collections.emptyMap(), id, new HashMap<>()); + Collections.emptyMap(), id, new HashMap<>(), null); info.setFiles(Collections.emptySet()); fail.setDoFail(); @@ -216,7 +267,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT Codec codec = getCodec(); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, - Collections.emptyMap(), id, new HashMap<>()); + Collections.emptyMap(), id, new HashMap<>(), null); info.setFiles(Collections.emptySet()); fail.setDoFail(); @@ -249,7 +300,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT Codec codec = getCodec(); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, - Collections.emptyMap(), id, new HashMap<>()); + Collections.emptyMap(), id, new HashMap<>(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); @@ -283,7 +334,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT Codec codec = getCodec(); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec, - Collections.emptyMap(), id, new HashMap<>()); + Collections.emptyMap(), id, new HashMap<>(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); @@ -332,7 +383,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT TestUtil.randomUnicodeString(random())); } - SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, codec, diagnostics, id, attributes); + SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, codec, diagnostics, id, attributes, null); info.setFiles(files); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, name, id, IOContext.DEFAULT); diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java b/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java index b40ac2685d5..f32e4d3c118 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java @@ -138,7 +138,6 @@ public class MockRandomMergePolicy extends MergePolicy { static class MockRandomOneMerge extends OneMerge { final Random r; - ArrayList readers; MockRandomOneMerge(List segments, long seed) { super(segments); @@ -146,34 +145,31 @@ public class MockRandomMergePolicy extends MergePolicy { } @Override - public List getMergeReaders() throws IOException { - if (readers == null) { - readers = new ArrayList(super.getMergeReaders()); - for (int i = 0; i < readers.size(); i++) { - // wrap it (e.g. prevent bulk merge etc) - // TODO: cut this over to FilterCodecReader api, we can explicitly - // enable/disable bulk merge for portions of the index we want. - int thingToDo = r.nextInt(7); - if (thingToDo == 0) { - // simple no-op FilterReader - if (LuceneTestCase.VERBOSE) { - System.out.println("NOTE: MockRandomMergePolicy now swaps in a SlowCodecReaderWrapper for merging reader=" + readers.get(i)); - } - readers.set(i, SlowCodecReaderWrapper.wrap(new FilterLeafReader(readers.get(i)) {})); - } else if (thingToDo == 1) { - // renumber fields - // NOTE: currently this only "blocks" bulk merges just by - // being a FilterReader. But it might find bugs elsewhere, - // and maybe the situation can be improved in the future. - if (LuceneTestCase.VERBOSE) { - System.out.println("NOTE: MockRandomMergePolicy now swaps in a MismatchedLeafReader for merging reader=" + readers.get(i)); - } - readers.set(i, SlowCodecReaderWrapper.wrap(new MismatchedLeafReader(readers.get(i), r))); - } - // otherwise, reader is unchanged + public CodecReader wrapForMerge(CodecReader reader) throws IOException { + + // wrap it (e.g. prevent bulk merge etc) + // TODO: cut this over to FilterCodecReader api, we can explicitly + // enable/disable bulk merge for portions of the index we want. + int thingToDo = r.nextInt(7); + if (thingToDo == 0) { + // simple no-op FilterReader + if (LuceneTestCase.VERBOSE) { + System.out.println("NOTE: MockRandomMergePolicy now swaps in a SlowCodecReaderWrapper for merging reader=" + reader); } + return SlowCodecReaderWrapper.wrap(new FilterLeafReader(reader) {}); + } else if (thingToDo == 1) { + // renumber fields + // NOTE: currently this only "blocks" bulk merges just by + // being a FilterReader. But it might find bugs elsewhere, + // and maybe the situation can be improved in the future. + if (LuceneTestCase.VERBOSE) { + System.out.println("NOTE: MockRandomMergePolicy now swaps in a MismatchedLeafReader for merging reader=" + reader); + } + return SlowCodecReaderWrapper.wrap(new MismatchedLeafReader(reader, r)); + } else { + // otherwise, reader is unchanged + return reader; } - return readers; } } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java index 90064c4d7ff..d4159279311 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java @@ -611,7 +611,7 @@ public class RandomPostingsTester { // maxAllowed = the "highest" we can index, but we will still // randomly index at lower IndexOption public FieldsProducer buildIndex(Codec codec, Directory dir, IndexOptions maxAllowed, boolean allowPayloads, boolean alwaysTestMax) throws IOException { - SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", maxDoc, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>()); + SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", maxDoc, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); int maxIndexOption = Arrays.asList(IndexOptions.values()).indexOf(maxAllowed); if (LuceneTestCase.VERBOSE) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java index b517af00cf1..74a46d4f5e7 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java @@ -283,6 +283,11 @@ public class QueryUtils { @Override protected void doClose() throws IOException {} + + @Override + public Sort getIndexSort() { + return null; + } }; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java index 52aca7e8f52..98cd2a790ea 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java @@ -2008,9 +2008,9 @@ public abstract class LuceneTestCase extends Assert { return; } assertTermsStatisticsEquals(info, leftTerms, rightTerms); - assertEquals(leftTerms.hasOffsets(), rightTerms.hasOffsets()); - assertEquals(leftTerms.hasPositions(), rightTerms.hasPositions()); - assertEquals(leftTerms.hasPayloads(), rightTerms.hasPayloads()); + assertEquals("hasOffsets", leftTerms.hasOffsets(), rightTerms.hasOffsets()); + assertEquals("hasPositions", leftTerms.hasPositions(), rightTerms.hasPositions()); + assertEquals("hasPayloads", leftTerms.hasPayloads(), rightTerms.hasPayloads()); TermsEnum leftTermsEnum = leftTerms.iterator(); TermsEnum rightTermsEnum = rightTerms.iterator(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java index 7ac40375f15..5c88dc7ec92 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java @@ -32,7 +32,7 @@ import org.apache.lucene.codecs.asserting.AssertingPostingsFormat; import org.apache.lucene.codecs.cheapbastard.CheapBastardCodec; import org.apache.lucene.codecs.compressing.CompressingCodec; import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; -import org.apache.lucene.codecs.lucene60.Lucene60Codec; +import org.apache.lucene.codecs.lucene62.Lucene62Codec; import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; import org.apache.lucene.codecs.simpletext.SimpleTextCodec; import org.apache.lucene.index.RandomCodec; @@ -181,8 +181,8 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule { codec = new AssertingCodec(); } else if ("Compressing".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Compressing"))) { codec = CompressingCodec.randomInstance(random); - } else if ("Lucene60".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene60"))) { - codec = new Lucene60Codec(RandomPicks.randomFrom(random, Lucene50StoredFieldsFormat.Mode.values())); + } else if ("Lucene62".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene62"))) { + codec = new Lucene62Codec(RandomPicks.randomFrom(random, Lucene50StoredFieldsFormat.Mode.values())); } else if (!"random".equals(TEST_CODEC)) { codec = Codec.forName(TEST_CODEC); } else if ("random".equals(TEST_POSTINGSFORMAT)) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java index d772ae321d3..b63216085b3 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java @@ -54,7 +54,7 @@ import org.apache.lucene.codecs.blockterms.LuceneFixedGap; import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat; import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat; import org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat; -import org.apache.lucene.codecs.lucene60.Lucene60Codec; +import org.apache.lucene.codecs.lucene62.Lucene62Codec; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; import org.apache.lucene.document.BinaryDocValuesField; @@ -911,7 +911,7 @@ public final class TestUtil { * This may be different than {@link Codec#getDefault()} because that is randomized. */ public static Codec getDefaultCodec() { - return new Lucene60Codec(); + return new Lucene62Codec(); } /** diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 5f52cf89b29..22a8211fe4e 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -47,6 +47,11 @@ Optimizations ================== 6.1.0 ================== +Upgrading from Solr any prior release +---------------------- + +* If you use historical dates, specifically on or before the year 1582, you should re-index. + Detailed Change List ---------------------- @@ -135,6 +140,8 @@ New Features * SOLR-8208: [subquery] document transformer executes separate requests per result document. (Cao Manh Dat via Mikhail Khludnev) +* SOLR-8323: All CollectionStateWatcher API (Alan Woodward, Scott Blum) + Bug Fixes ---------------------- @@ -206,6 +213,10 @@ Bug Fixes * SOLR-8970: Change SSLTestConfig to use a keystore file that is included as a resource in the test-framework jar so users subclassing SolrTestCaseJ4 don't need to preserve magic paths (hossman) +* SOLR-9080, SOLR-9085: (6.0 bug) For years <= 1582, date math (round,add,sub) introduced error. Range faceting + on such dates was also affected. With this fixed, this is the first release range faceting works on BC years. + (David Smiley) + Optimizations ---------------------- * SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation. @@ -228,6 +239,8 @@ Optimizations * SOLR-9014: Deprecate and reduce usage of ClusterState methods which may make calls to ZK via the lazy collection reference. (Scott Blum, shalin) +* SOLR-9106: Cluster properties are now cached on ZkStateReader. (Alan Woodward) + Other Changes ---------------------- * SOLR-7516: Improve javadocs for JavaBinCodec, ObjectResolver and enforce the single-usage policy. @@ -286,6 +299,11 @@ Other Changes * SOLR-9105: Fix a bunch of typos across 103 files (Bartosz Krasiński via janhoy) +* SOLR-9072: Migrate morphline-core tests to SolrCloudTestCase. (Alan Woodward) + +================== 6.0.1 ================== +(No Changes) + ================== 6.0.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java index 7892c057292..e69b3fd9063 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java @@ -54,7 +54,7 @@ public class DataImportHandlerException extends RuntimeException { return errCode; } - public static void wrapAndThrow(int err, Exception e) { + public static DataImportHandlerException wrapAndThrow(int err, Exception e) { if (e instanceof DataImportHandlerException) { throw (DataImportHandlerException) e; } else { @@ -62,7 +62,7 @@ public class DataImportHandlerException extends RuntimeException { } } - public static void wrapAndThrow(int err, Exception e, String msg) { + public static DataImportHandlerException wrapAndThrow(int err, Exception e, String msg) { if (e instanceof DataImportHandlerException) { throw (DataImportHandlerException) e; } else { diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java index 30e16cef323..f4df82080aa 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java @@ -16,9 +16,6 @@ */ package org.apache.solr.handler.dataimport; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; -import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; - import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; @@ -35,6 +32,9 @@ import org.apache.solr.common.util.SuppressForbidden; import org.apache.solr.handler.dataimport.config.EntityField; import org.apache.solr.util.DateMathParser; +import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; +import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; + /** *

        Formats values using a given date format.

        *

        Pass three parameters: @@ -99,7 +99,7 @@ public class DateFormatEvaluator extends Evaluator { throw new DataImportHandlerException(SEVERE, "Malformed / non-existent locale: " + localeStr, ex); } } - TimeZone tz = TimeZone.getDefault(); + TimeZone tz = TimeZone.getDefault(); // DWS TODO: is this the right default for us? Deserves explanation if so. if(l.size()==4) { Object tzObj = l.get(3); String tzStr = null; @@ -153,24 +153,19 @@ public class DateFormatEvaluator extends Evaluator { * @return the result of evaluating a string */ protected Date evaluateString(String datemathfmt, Locale locale, TimeZone tz) { - Date date = null; - datemathfmt = datemathfmt.replaceAll("NOW", ""); - try { - DateMathParser parser = getDateMathParser(locale, tz); - date = parseMathString(parser,datemathfmt); - } catch (ParseException e) { - wrapAndThrow(SEVERE, e, "Invalid expression for date"); + // note: DMP does not use the locale but perhaps a subclass might use it, for e.g. parsing a date in a custom + // string that doesn't necessarily have date math? + //TODO refactor DateMathParser.parseMath a bit to have a static method for this logic. + if (datemathfmt.startsWith("NOW")) { + datemathfmt = datemathfmt.substring("NOW".length()); + } + try { + DateMathParser parser = new DateMathParser(tz); + parser.setNow(new Date());// thus do *not* use SolrRequestInfo + return parser.parseMath(datemathfmt); + } catch (ParseException e) { + throw wrapAndThrow(SEVERE, e, "Invalid expression for date"); } - return date; - } - - /** - * NOTE: declared as a method to allow for extensibility - * @lucene.experimental - * @return the result of resolving the variable wrapper - */ - protected Date parseMathString(DateMathParser parser, String datemathfmt) throws ParseException { - return parser.parseMath(datemathfmt); } /** @@ -182,16 +177,4 @@ public class DateFormatEvaluator extends Evaluator { return variableWrapper.resolve(); } - /** - * @lucene.experimental - * @return a DateMathParser - */ - protected DateMathParser getDateMathParser(Locale l, TimeZone tz) { - return new DateMathParser(tz, l) { - @Override - public Date getNow() { - return new Date(); - } - }; - } } diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java index e86d09df5e1..a03354f2d2f 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java @@ -20,7 +20,13 @@ import java.io.File; import java.io.FilenameFilter; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.*; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.TimeZone; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -153,10 +159,14 @@ public class FileListEntityProcessor extends EntityProcessorBase { } m = Evaluator.IN_SINGLE_QUOTES.matcher(dateStr); if (m.find()) { - String expr = null; - expr = m.group(1).replaceAll("NOW", ""); + String expr = m.group(1); + //TODO refactor DateMathParser.parseMath a bit to have a static method for this logic. + if (expr.startsWith("NOW")) { + expr = expr.substring("NOW".length()); + } try { - return new DateMathParser(TimeZone.getDefault(), Locale.ROOT).parseMath(expr); + // DWS TODO: is this TimeZone the right default for us? Deserves explanation if so. + return new DateMathParser(TimeZone.getDefault()).parseMath(expr); } catch (ParseException exp) { throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Invalid expression for date", exp); diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestVariableResolver.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestVariableResolver.java index e7ff2e698c0..00285649fe6 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestVariableResolver.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestVariableResolver.java @@ -16,12 +16,19 @@ */ package org.apache.solr.handler.dataimport; -import org.junit.Ignore; -import org.junit.Test; -import org.apache.solr.util.DateMathParser; - import java.text.SimpleDateFormat; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Properties; +import java.util.TimeZone; + +import org.apache.solr.util.DateMathParser; +import org.junit.Test; /** *

        @@ -103,7 +110,7 @@ public class TestVariableResolver extends AbstractDataImportHandlerTestCase { .> emptyList())); SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT); format.setTimeZone(TimeZone.getTimeZone("UTC")); - DateMathParser dmp = new DateMathParser(TimeZone.getDefault(), Locale.ROOT); + DateMathParser dmp = new DateMathParser(TimeZone.getDefault()); String s = vri .replaceTokens("${dataimporter.functions.formatDate('NOW/DAY','yyyy-MM-dd HH:mm')}"); @@ -144,7 +151,7 @@ public class TestVariableResolver extends AbstractDataImportHandlerTestCase { SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT); format.setTimeZone(TimeZone.getTimeZone("UTC")); - DateMathParser dmp = new DateMathParser(TimeZone.getDefault(), Locale.ROOT); + DateMathParser dmp = new DateMathParser(TimeZone.getDefault()); String s = resolver .replaceTokens("${dataimporter.functions.formatDate('NOW/DAY','yyyy-MM-dd HH:mm')}"); diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java index 4d95a4f9198..535fe9db3af 100644 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java @@ -24,14 +24,13 @@ import java.util.Locale; import com.codahale.metrics.MetricRegistry; import com.google.common.collect.ListMultimap; import com.typesafe.config.Config; -import org.apache.commons.io.FileUtils; import org.apache.lucene.util.Constants; import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.cloud.AbstractFullDistribZkTestBase; -import org.apache.solr.cloud.AbstractZkTestCase; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.cloud.AbstractDistribZkTestBase; +import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.common.SolrDocument; -import org.apache.solr.common.cloud.SolrZkClient; -import org.junit.AfterClass; +import org.junit.Before; import org.junit.BeforeClass; import org.kitesdk.morphline.api.Collector; import org.kitesdk.morphline.api.Command; @@ -42,72 +41,58 @@ import org.kitesdk.morphline.base.FaultTolerance; import org.kitesdk.morphline.base.Notifications; import org.kitesdk.morphline.stdlib.PipeBuilder; -public abstract class AbstractSolrMorphlineZkTestBase extends AbstractFullDistribZkTestBase { - private static File solrHomeDirectory; - - protected static final String RESOURCES_DIR = getFile("morphlines-core.marker").getParent(); - private static final File SOLR_INSTANCE_DIR = new File(RESOURCES_DIR + "/solr"); - private static final File SOLR_CONF_DIR = new File(RESOURCES_DIR + "/solr/collection1"); +public abstract class AbstractSolrMorphlineZkTestBase extends SolrCloudTestCase { + + protected static final String COLLECTION = "collection1"; + + protected static final int TIMEOUT = 30; + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(2) + .addConfig("conf", SOLR_CONF_DIR.toPath()) + .configure(); + + CollectionAdminRequest.createCollection(COLLECTION, "conf", 2, 1) + .processAndWait(cluster.getSolrClient(), TIMEOUT); + AbstractDistribZkTestBase.waitForRecoveriesToFinish(COLLECTION, cluster.getSolrClient().getZkStateReader(), + false, true, TIMEOUT); + } + + protected static final String RESOURCES_DIR = getFile("morphlines-core.marker").getParent(); + private static final File SOLR_CONF_DIR = new File(RESOURCES_DIR + "/solr/collection1/conf"); protected Collector collector; protected Command morphline; - - @Override - public String getSolrHome() { - return solrHomeDirectory.getPath(); - } - - public AbstractSolrMorphlineZkTestBase() { - sliceCount = 3; - fixShardCount(3); - } @BeforeClass public static void setupClass() throws Exception { - assumeFalse("This test fails on Java 9 (https://issues.apache.org/jira/browse/SOLR-8876)", Constants.JRE_IS_MINIMUM_JAVA9); + + assumeFalse("This test fails on Java 9 (https://issues.apache.org/jira/browse/SOLR-8876)", + Constants.JRE_IS_MINIMUM_JAVA9); assumeFalse("This test fails on UNIX with Turkish default locale (https://issues.apache.org/jira/browse/SOLR-6387)", new Locale("tr").getLanguage().equals(Locale.getDefault().getLanguage())); - solrHomeDirectory = createTempDir().toFile(); - AbstractZkTestCase.SOLRHOME = solrHomeDirectory; - FileUtils.copyDirectory(SOLR_INSTANCE_DIR, solrHomeDirectory); + } - - @AfterClass - public static void tearDownClass() throws Exception { - solrHomeDirectory = null; - } - - @Override - public void distribSetUp() throws Exception { - super.distribSetUp(); - System.setProperty("host", "127.0.0.1"); - System.setProperty("numShards", Integer.toString(sliceCount)); - uploadConfFiles(); + + @Before + public void setup() throws Exception { collector = new Collector(); } - - @Override - public void distribTearDown() throws Exception { - super.distribTearDown(); - System.clearProperty("host"); - System.clearProperty("numShards"); - } - - @Override + protected void commit() throws Exception { - Notifications.notifyCommitTransaction(morphline); - super.commit(); + Notifications.notifyCommitTransaction(morphline); } protected Command parse(String file) throws IOException { - return parse(file, "collection1"); + return parse(file, COLLECTION); } protected Command parse(String file, String collection) throws IOException { SolrLocator locator = new SolrLocator(createMorphlineContext()); locator.setCollectionName(collection); - locator.setZkHost(zkServer.getZkAddress()); + locator.setZkHost(cluster.getZkServer().getZkAddress()); //locator.setServerUrl(cloudJettys.get(0).url); // TODO: download IndexSchema from solrUrl not yet implemented //locator.setSolrHomeDir(SOLR_HOME_DIR.getPath()); Config config = new Compiler().parse(new File(RESOURCES_DIR + "/" + file + ".conf"), locator.toConfig("SOLR_LOCATOR")); @@ -145,33 +130,4 @@ public abstract class AbstractSolrMorphlineZkTestBase extends AbstractFullDistri return record; } - private void putConfig(SolrZkClient zkClient, String name) throws Exception { - File file = new File(new File(SOLR_CONF_DIR, "conf"), name); - String destPath = "/configs/conf1/" + name; - System.out.println("put " + file.getAbsolutePath() + " to " + destPath); - zkClient.makePath(destPath, file, false, true); - } - - private void uploadConfFiles(SolrZkClient zkClient, File dir, String prefix) throws Exception { - boolean found = false; - for (File f : dir.listFiles()) { - String name = f.getName(); - if (name.startsWith(".")) continue; - if (f.isFile()) { - putConfig(zkClient, prefix + name); - found = true; - } else if (f.isDirectory()) { - uploadConfFiles(zkClient, new File(dir, name), prefix + name + "/"); - } - } - assertTrue("Config folder '" + dir + "' with files to upload to zookeeper was empty.", found); - } - - private void uploadConfFiles() throws Exception { - // upload our own config files - SolrZkClient zkClient = new SolrZkClient(zkServer.getZkAddress(), 10000); - uploadConfFiles(zkClient, new File(SOLR_CONF_DIR, "conf"), ""); - zkClient.close(); - } - } diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java index a654e94de27..ddaf2f69e6e 100644 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java @@ -17,39 +17,32 @@ package org.apache.solr.morphlines.solr; import java.io.File; -import java.io.IOException; import java.util.Iterator; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; -import org.apache.solr.common.params.CollectionParams.CollectionAction; -import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.util.NamedList; import org.apache.solr.util.BadHdfsThreadsFilter; import org.junit.Test; import org.kitesdk.morphline.api.Record; import org.kitesdk.morphline.base.Fields; import org.kitesdk.morphline.base.Notifications; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; - @ThreadLeakFilters(defaultFilters = true, filters = { BadHdfsThreadsFilter.class // hdfs currently leaks thread(s) }) @Slow public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase { - @Test public void test() throws Exception { - - waitForRecoveriesToFinish(false); - - createAlias("aliascollection", "collection1"); + + CollectionAdminRequest.createAlias("aliascollection", "collection1") + .process(cluster.getSolrClient()); morphline = parse("test-morphlines" + File.separator + "loadSolrBasic", "aliascollection"); Record record = new Record(); @@ -84,9 +77,11 @@ public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase { assertFalse(citer.hasNext()); - commit(); + Notifications.notifyCommitTransaction(morphline); + new UpdateRequest().commit(cluster.getSolrClient(), COLLECTION); - QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc)); + QueryResponse rsp = cluster.getSolrClient() + .query(COLLECTION, new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc)); //System.out.println(rsp); Iterator iter = rsp.getResults().iterator(); assertEquals(expected.getFields(), next(iter)); @@ -95,26 +90,14 @@ public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase { Notifications.notifyRollbackTransaction(morphline); Notifications.notifyShutdown(morphline); - - - createAlias("aliascollection", "collection1,collection2"); - - try { + + CollectionAdminRequest.createAlias("aliascollection", "collection1,collection2") + .processAndWait(cluster.getSolrClient(), TIMEOUT); + + expectThrows(IllegalArgumentException.class, () -> { parse("test-morphlines" + File.separator + "loadSolrBasic", "aliascollection"); - fail("Expected IAE because update alias maps to multiple collections"); - } catch (IllegalArgumentException e) { - - } - } - - private NamedList createAlias(String alias, String collections) throws SolrServerException, IOException { - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("collections", collections); - params.set("name", alias); - params.set("action", CollectionAction.CREATEALIAS.toString()); - QueryRequest request = new QueryRequest(params); - request.setPath("/admin/collections"); - return cloudClient.request(request); + }); + } } diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java index 1c30a84e107..4f3b27f0063 100644 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java @@ -17,19 +17,23 @@ package org.apache.solr.morphlines.solr; import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; -import java.util.Comparator; import java.util.Iterator; import java.util.List; -import org.apache.avro.Schema.Field; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import com.google.common.base.Preconditions; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.FileReader; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.util.BadHdfsThreadsFilter; @@ -38,16 +42,6 @@ import org.kitesdk.morphline.api.Record; import org.kitesdk.morphline.base.Fields; import org.kitesdk.morphline.base.Notifications; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.io.Files; - @ThreadLeakFilters(defaultFilters = true, filters = { BadHdfsThreadsFilter.class // hdfs currently leaks thread(s) }) @@ -57,33 +51,33 @@ public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase { @Test public void test() throws Exception { - Joiner joiner = Joiner.on(File.separator); - File file = new File(joiner.join(RESOURCES_DIR, "test-documents", "sample-statuses-20120906-141433-medium.avro")); - - waitForRecoveriesToFinish(false); - + + Path avro = Paths.get(RESOURCES_DIR).resolve("test-documents").resolve("sample-statuses-20120906-141433-medium.avro"); + // load avro records via morphline and zk into solr morphline = parse("test-morphlines" + File.separator + "tutorialReadAvroContainer"); Record record = new Record(); - byte[] body = Files.toByteArray(file); + byte[] body = Files.readAllBytes(avro); record.put(Fields.ATTACHMENT_BODY, body); startSession(); Notifications.notifyBeginTransaction(morphline); assertTrue(morphline.process(record)); assertEquals(1, collector.getNumStartEvents()); - commit(); + Notifications.notifyCommitTransaction(morphline); + new UpdateRequest().commit(cluster.getSolrClient(), COLLECTION); // fetch sorted result set from solr - QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort("id", SolrQuery.ORDER.asc)); + QueryResponse rsp = cluster.getSolrClient() + .query(COLLECTION, new SolrQuery("*:*").setRows(100000).addSort("id", SolrQuery.ORDER.asc)); assertEquals(2104, collector.getRecords().size()); assertEquals(collector.getRecords().size(), rsp.getResults().size()); Collections.sort(collector.getRecords(), (r1, r2) -> r1.get("id").toString().compareTo(r2.get("id").toString())); // fetch test input data and sort like solr result set - List records = new ArrayList(); - FileReader reader = new DataFileReader(file, new GenericDatumReader()); + List records = new ArrayList<>(); + FileReader reader = new DataFileReader(avro.toFile(), new GenericDatumReader()); while (reader.hasNext()) { GenericData.Record expected = reader.next(); records.add(expected); @@ -121,7 +115,7 @@ public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase { Notifications.notifyRollbackTransaction(morphline); Notifications.notifyShutdown(morphline); - cloudClient.close(); + } private void assertTweetEquals(GenericData.Record expected, Record actual, int i) { @@ -144,12 +138,4 @@ public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase { } } - private String toString(GenericData.Record avroRecord) { - Record record = new Record(); - for (Field field : avroRecord.getSchema().getFields()) { - record.put(field.name(), avroRecord.get(field.pos())); - } - return record.toString(); // prints sorted by key for human readability - } - } diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java index 97c6dfb77d4..24d8682003b 100644 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java +++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java @@ -19,8 +19,10 @@ package org.apache.solr.morphlines.solr; import java.io.File; import java.util.Iterator; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.util.BadHdfsThreadsFilter; @@ -29,21 +31,16 @@ import org.kitesdk.morphline.api.Record; import org.kitesdk.morphline.base.Fields; import org.kitesdk.morphline.base.Notifications; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; - @ThreadLeakFilters(defaultFilters = true, filters = { BadHdfsThreadsFilter.class // hdfs currently leaks thread(s) }) @Slow public class SolrMorphlineZkTest extends AbstractSolrMorphlineZkTestBase { - @Test public void test() throws Exception { - waitForRecoveriesToFinish(false); - - morphline = parse("test-morphlines" + File.separator + "loadSolrBasic"); + morphline = parse("test-morphlines" + File.separator + "loadSolrBasic"); Record record = new Record(); record.put(Fields.ID, "id0-innsbruck"); record.put("text", "mytext"); @@ -76,9 +73,11 @@ public class SolrMorphlineZkTest extends AbstractSolrMorphlineZkTestBase { assertFalse(citer.hasNext()); - commit(); + Notifications.notifyCommitTransaction(morphline); + new UpdateRequest().commit(cluster.getSolrClient(), COLLECTION); - QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc)); + QueryResponse rsp = cluster.getSolrClient() + .query(COLLECTION, new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc)); //System.out.println(rsp); Iterator iter = rsp.getResults().iterator(); assertEquals(expected.getFields(), next(iter)); @@ -87,7 +86,7 @@ public class SolrMorphlineZkTest extends AbstractSolrMorphlineZkTestBase { Notifications.notifyRollbackTransaction(morphline); Notifications.notifyShutdown(morphline); - cloudClient.close(); + } } diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java index c5e51f918da..cf73b62cb08 100644 --- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java +++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java @@ -1062,8 +1062,9 @@ public class Overseer implements Closeable { throw new RuntimeException(e); } } - public static boolean isLegacy(Map clusterProps) { - return !"false".equals(clusterProps.get(ZkStateReader.LEGACY_CLOUD)); + public static boolean isLegacy(ZkStateReader stateReader) { + String legacyProperty = stateReader.getClusterProperty(ZkStateReader.LEGACY_CLOUD, "true"); + return !"false".equals(legacyProperty); } public ZkStateReader getZkStateReader() { diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java index c3571e366ed..93f88cdddb0 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java @@ -24,7 +24,6 @@ import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; -import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; @@ -148,7 +147,7 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable { // TODO: extract to configurable strategy class ?? ClusterState clusterState = zkStateReader.getClusterState(); //check if we have disabled autoAddReplicas cluster wide - String autoAddReplicas = (String) zkStateReader.getClusterProps().get(ZkStateReader.AUTO_ADD_REPLICAS); + String autoAddReplicas = zkStateReader.getClusterProperty(ZkStateReader.AUTO_ADD_REPLICAS, (String) null); if (autoAddReplicas != null && autoAddReplicas.equals("false")) { return; } @@ -229,7 +228,7 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable { private boolean addReplica(final String collection, DownReplica badReplica) { // first find best home - first strategy, sort by number of cores // hosted where maxCoresPerNode is not violated - final Integer maxCoreCount = (Integer) zkStateReader.getClusterProps().get(ZkStateReader.MAX_CORES_PER_NODE); + final Integer maxCoreCount = zkStateReader.getClusterProperty(ZkStateReader.MAX_CORES_PER_NODE, (Integer) null); final String createUrl = getBestCreateUrl(zkStateReader, badReplica, maxCoreCount); if (createUrl == null) { log.warn("Could not find a node to create new replica on."); diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java index b4dc93511e7..ed23e7754e1 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java @@ -1894,7 +1894,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler positionVsNodes = identifyNodes(clusterState, nodeList, message, shardNames, repFactor); } - boolean isLegacyCloud = Overseer.isLegacy(zkStateReader.getClusterProps()); + boolean isLegacyCloud = Overseer.isLegacy(zkStateReader); createConfNode(configName, collectionName, isLegacyCloud); @@ -2126,7 +2126,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler } ModifiableSolrParams params = new ModifiableSolrParams(); - if (!Overseer.isLegacy(zkStateReader.getClusterProps())) { + if (!Overseer.isLegacy(zkStateReader)) { if (!skipCreateReplicaInClusterState) { ZkNodeProps props = new ZkNodeProps(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower(), ZkStateReader.COLLECTION_PROP, collection, ZkStateReader.SHARD_ID_PROP, shard, ZkStateReader.CORE_NAME_PROP, coreName, diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java b/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java index 14f6c7185ad..0e2f1892191 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java @@ -16,7 +16,16 @@ */ package org.apache.solr.cloud; -import static org.apache.solr.common.params.CommonParams.*; +import javax.xml.parsers.ParserConfigurationException; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.util.List; +import java.util.concurrent.TimeoutException; +import java.util.regex.Pattern; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -28,26 +37,16 @@ import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; -import org.apache.solr.common.SolrException; +import org.apache.solr.common.cloud.ClusterProperties; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.ZkConfigManager; -import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.core.CoreContainer; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.xml.sax.SAXException; -import javax.xml.parsers.ParserConfigurationException; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.nio.file.Paths; -import java.util.List; -import java.util.concurrent.TimeoutException; -import java.util.regex.Pattern; +import static org.apache.solr.common.params.CommonParams.NAME; +import static org.apache.solr.common.params.CommonParams.VALUE_LONG; public class ZkCLI { @@ -324,28 +323,12 @@ public class ZkCLI { //If -val option is missing, we will use the null value. This is required to maintain //compatibility with Collections API. String propertyValue = line.getOptionValue(VALUE_LONG); - ZkStateReader reader = new ZkStateReader(zkClient); + ClusterProperties props = new ClusterProperties(zkClient); try { - reader.setClusterProperty(propertyName, propertyValue); - } catch (SolrException ex) { - //This can happen if two concurrent invocations of this command collide - //with each other. Here we are just adding a defensive check to see if - //the value is already set to expected value. If yes, then we don't - //fail the command. - Throwable cause = ex.getCause(); - if(cause instanceof KeeperException.NodeExistsException - || cause instanceof KeeperException.BadVersionException) { - String currentValue = (String)reader.getClusterProps().get(propertyName); - if((currentValue == propertyValue) || (currentValue != null && currentValue.equals(propertyValue))) { - return; - } - } - System.out.println("Unable to set the cluster property due to following error : " + - ex.getLocalizedMessage() + - ((cause instanceof KeeperException.BadVersionException)?". Try again":"")); + props.setClusterProperty(propertyName, propertyValue); + } catch (IOException ex) { + System.out.println("Unable to set the cluster property due to following error : " + ex.getLocalizedMessage()); System.exit(1); - } finally { - reader.close(); } } else { // If not cmd matches diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index ae73633cf33..444887b08a9 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -151,7 +151,7 @@ public final class ZkController { private final int localHostPort; // example: 54065 private final String hostName; // example: 127.0.0.1 private final String nodeName; // example: 127.0.0.1:54065_solr - private final String baseURL; // example: http://127.0.0.1:54065/solr + private String baseURL; // example: http://127.0.0.1:54065/solr private final CloudConfig cloudConfig; @@ -386,8 +386,6 @@ public final class ZkController { if (cc != null) cc.securityNodeChanged(); }); - this.baseURL = zkStateReader.getBaseUrlForNodeName(this.nodeName); - init(registerOnReconnect); } @@ -642,6 +640,7 @@ public final class ZkController { try { createClusterZkNodes(zkClient); zkStateReader.createClusterStateWatchersAndUpdate(); + this.baseURL = zkStateReader.getBaseUrlForNodeName(this.nodeName); // start the overseer first as following code may need it's processing if (!zkRunOnly) { @@ -1215,23 +1214,10 @@ public final class ZkController { if (context != null) { context.cancelElection(); } - - final Collection cores = cc.getCores(); - - // if there is no SolrCore which is a member of this collection, remove the watch + CloudDescriptor cloudDescriptor = cd.getCloudDescriptor(); - boolean removeWatch = true; - for (SolrCore solrCore : cores) { - final CloudDescriptor cloudDesc = solrCore.getCoreDescriptor().getCloudDescriptor(); - if (cloudDesc != null && cloudDescriptor.getCollectionName().equals(cloudDesc.getCollectionName())) { - removeWatch = false; - break; - } - } - - if (removeWatch) { - zkStateReader.removeZKWatch(collection); - } + zkStateReader.unregisterCore(cloudDescriptor.getCollectionName()); + ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.DELETECORE.toLower(), ZkStateReader.CORE_NAME_PROP, coreName, ZkStateReader.NODE_NAME_PROP, getNodeName(), @@ -1481,7 +1467,7 @@ public final class ZkController { "Collection {} not visible yet, but flagging it so a watch is registered when it becomes visible" : "Registering watch for collection {}", collectionName); - zkStateReader.addCollectionWatch(collectionName); + zkStateReader.registerCore(collectionName); } catch (KeeperException e) { log.error("", e); throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e); @@ -1501,7 +1487,7 @@ public final class ZkController { } private void checkStateInZk(CoreDescriptor cd) throws InterruptedException { - if (!Overseer.isLegacy(zkStateReader.getClusterProps())) { + if (!Overseer.isLegacy(zkStateReader)) { CloudDescriptor cloudDesc = cd.getCloudDescriptor(); String coreNodeName = cloudDesc.getCoreNodeName(); assert coreNodeName != null : "SolrCore: " + cd.getName() + " has no coreNodeName"; diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java index 5147f43797b..495d1d3b932 100644 --- a/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java +++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java @@ -197,7 +197,7 @@ public class ReplicaMutator { } public ZkWriteCommand setState(ClusterState clusterState, ZkNodeProps message) { - if (Overseer.isLegacy(zkStateReader.getClusterProps())) { + if (Overseer.isLegacy(zkStateReader)) { return updateState(clusterState, message); } else { return updateStateNew(clusterState, message); diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java index f933faba822..db9887cf1f1 100644 --- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java +++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java @@ -761,7 +761,7 @@ public class CoreContainer { boolean preExisitingZkEntry = false; try { if (getZkController() != null) { - if (!Overseer.isLegacy(getZkController().getZkStateReader().getClusterProps())) { + if (!Overseer.isLegacy(getZkController().getZkStateReader())) { if (cd.getCloudDescriptor().getCoreNodeName() == null) { throw new SolrException(ErrorCode.SERVER_ERROR, "non legacy mode coreNodeName missing " + parameters.toString()); diff --git a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java index dc423d99212..c575ecbca10 100644 --- a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java +++ b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java @@ -24,9 +24,9 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; -import org.apache.lucene.codecs.lucene60.Lucene60Codec; -import org.apache.solr.common.SolrException; +import org.apache.lucene.codecs.lucene62.Lucene62Codec; import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.SolrException; import org.apache.solr.common.util.NamedList; import org.apache.solr.schema.SchemaField; import org.apache.solr.util.plugin.SolrCoreAware; @@ -91,7 +91,7 @@ public class SchemaCodecFactory extends CodecFactory implements SolrCoreAware { compressionMode = SOLR_DEFAULT_COMPRESSION_MODE; log.info("Using default compressionMode: " + compressionMode); } - codec = new Lucene60Codec(compressionMode) { + codec = new Lucene62Codec(compressionMode) { @Override public PostingsFormat getPostingsFormatForField(String field) { final SchemaField schemaField = core.getLatestSchema().getFieldOrNull(field); diff --git a/solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java b/solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java index c7a668908ca..f9da1cfa7c1 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java +++ b/solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java @@ -20,7 +20,6 @@ import java.io.File; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.List; -import java.util.Locale; import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexDeletionPolicy; @@ -174,7 +173,7 @@ public class SolrDeletionPolicy extends IndexDeletionPolicy implements NamedList try { if (maxCommitAge != null) { if (maxCommitAgeTimeStamp==-1) { - DateMathParser dmp = new DateMathParser(DateMathParser.UTC, Locale.ROOT); + DateMathParser dmp = new DateMathParser(DateMathParser.UTC); maxCommitAgeTimeStamp = dmp.parseMath(maxCommitAge).getTime(); } if (IndexDeletionPolicyWrapper.getCommitTimestamp(commit) < maxCommitAgeTimeStamp) { diff --git a/solr/core/src/java/org/apache/solr/handler/admin/ClusterStatus.java b/solr/core/src/java/org/apache/solr/handler/admin/ClusterStatus.java index 48302cd818d..dbe2e790f9d 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/ClusterStatus.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/ClusterStatus.java @@ -152,7 +152,7 @@ public class ClusterStatus { clusterStatus.add("collections", collectionProps); // read cluster properties - Map clusterProps = zkStateReader.getClusterProps(); + Map clusterProps = zkStateReader.getClusterProperties(); if (clusterProps != null && !clusterProps.isEmpty()) { clusterStatus.add("properties", clusterProps); } diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java index 4deddedbf5d..6d501a1cb5a 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java @@ -51,17 +51,8 @@ import org.apache.solr.cloud.rule.ReplicaAssigner; import org.apache.solr.cloud.rule.Rule; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; -import org.apache.solr.common.cloud.ClusterState; -import org.apache.solr.common.cloud.DocCollection; -import org.apache.solr.common.cloud.ImplicitDocRouter; -import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.*; import org.apache.solr.common.cloud.Replica.State; -import org.apache.solr.common.cloud.Slice; -import org.apache.solr.common.cloud.SolrZkClient; -import org.apache.solr.common.cloud.ZkCmdExecutor; -import org.apache.solr.common.cloud.ZkCoreNodeProps; -import org.apache.solr.common.cloud.ZkNodeProps; -import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.CollectionAdminParams; import org.apache.solr.common.params.CollectionParams; import org.apache.solr.common.params.CollectionParams.CollectionAction; @@ -570,7 +561,8 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission Map call(SolrQueryRequest req, SolrQueryResponse rsp, CollectionsHandler h) throws Exception { String name = req.getParams().required().get(NAME); String val = req.getParams().get(VALUE_LONG); - h.coreContainer.getZkController().getZkStateReader().setClusterProperty(name, val); + ClusterProperties cp = new ClusterProperties(h.coreContainer.getZkController().getZkClient()); + cp.setClusterProperty(name, val); return null; } }, @@ -808,7 +800,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission String location = req.getParams().get("location"); if (location == null) { - location = (String) h.coreContainer.getZkController().getZkStateReader().getClusterProps().get("location"); + location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty("location", (String) null); } if (location == null) { throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query parameter or set as a cluster property"); @@ -832,7 +824,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission String location = req.getParams().get("location"); if (location == null) { - location = (String) h.coreContainer.getZkController().getZkStateReader().getClusterProps().get("location"); + location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty("location", (String) null); } if (location == null) { throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query parameter or set as a cluster property"); diff --git a/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java b/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java new file mode 100644 index 00000000000..b58d0a4ba16 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.index; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.MergePolicyWrapper; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.MergeTrigger; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.SegmentCommitInfo; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.search.Sort; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.packed.PackedInts; +import org.apache.lucene.util.packed.PackedLongValues; + +// TODO: remove this and add indexSort specification directly to solrconfig.xml? But for BWC, also accept SortingMergePolicy specifiction? + +public final class SortingMergePolicy extends MergePolicyWrapper { + + private final Sort sort; + + /** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */ + public SortingMergePolicy(MergePolicy in, Sort sort) { + super(in); + this.sort = sort; + } + + /** Return the {@link Sort} order that is used to sort segments when merging. */ + public Sort getSort() { + return sort; + } + + @Override + public String toString() { + return "SortingMergePolicy(" + in + ", sort=" + sort + ")"; + } +} diff --git a/solr/core/src/java/org/apache/solr/index/SortingMergePolicyFactory.java b/solr/core/src/java/org/apache/solr/index/SortingMergePolicyFactory.java index 53190b5f4f9..b22df3b3f97 100644 --- a/solr/core/src/java/org/apache/solr/index/SortingMergePolicyFactory.java +++ b/solr/core/src/java/org/apache/solr/index/SortingMergePolicyFactory.java @@ -17,7 +17,6 @@ package org.apache.solr.index; import org.apache.lucene.index.MergePolicy; -import org.apache.lucene.index.SortingMergePolicy; import org.apache.lucene.search.Sort; import org.apache.solr.core.SolrResourceLoader; diff --git a/solr/core/src/java/org/apache/solr/schema/DateRangeField.java b/solr/core/src/java/org/apache/solr/schema/DateRangeField.java index f4070d38d30..d51c1f17022 100644 --- a/solr/core/src/java/org/apache/solr/schema/DateRangeField.java +++ b/solr/core/src/java/org/apache/solr/schema/DateRangeField.java @@ -39,16 +39,18 @@ import org.apache.solr.util.DateMathParser; import org.locationtech.spatial4j.shape.Shape; /** - * A field for indexed dates and date ranges. It's mostly compatible with TrieDateField. + * A field for indexed dates and date ranges. It's mostly compatible with TrieDateField. It has the potential to allow + * efficient faceting, similar to facet.enum. * * @see NumberRangePrefixTreeStrategy * @see DateRangePrefixTree */ -public class DateRangeField extends AbstractSpatialPrefixTreeFieldType { +public class DateRangeField extends AbstractSpatialPrefixTreeFieldType + implements DateValueFieldType { // used by ParseDateFieldUpdateProcessorFactory private static final String OP_PARAM = "op";//local-param to resolve SpatialOperation - private static final DateRangePrefixTree tree = DateRangePrefixTree.INSTANCE; + private static final DateRangePrefixTree tree = new DateRangePrefixTree(DateRangePrefixTree.JAVA_UTIL_TIME_COMPAT_CAL); @Override protected void init(IndexSchema schema, Map args) { @@ -69,17 +71,24 @@ public class DateRangeField extends AbstractSpatialPrefixTreeFieldType= 0) { - //use Solr standard date format parsing rules. - //TODO parse a Calendar instead of a Date, rounded according to DateMath syntax. + if (str.startsWith("NOW") || str.lastIndexOf('Z') >= 0) { // ? but not if Z is last char ? Ehh, whatever. + //use Solr standard date format parsing rules: + //TODO add DMP utility to return ZonedDateTime alternative, then set cal fields manually, which is faster? Date date = DateMathParser.parseMath(null, str); Calendar cal = tree.newCal(); cal.setTime(date); @@ -119,19 +128,6 @@ public class DateRangeField extends AbstractSpatialPrefixTreeFieldType * + *

        + * Historical dates: The calendar computation is completely done with the + * Gregorian system/algorithm. It does not switch to Julian or + * anything else, unlike the default {@link java.util.GregorianCalendar}. + *

        * @see SolrRequestInfo#getClientTimeZone * @see SolrRequestInfo#getNOW */ @@ -103,9 +114,6 @@ public class DateMathParser { /** Default TimeZone for DateMath rounding (UTC) */ public static final TimeZone DEFAULT_MATH_TZ = UTC; - /** Default Locale for DateMath rounding (Locale.ROOT) */ - public static final Locale DEFAULT_MATH_LOCALE = Locale.ROOT; - /** * Differs by {@link DateTimeFormatter#ISO_INSTANT} in that it's lenient. * @see #parseNoMath(String) @@ -115,22 +123,22 @@ public class DateMathParser { /** * A mapping from (uppercased) String labels identifying time units, - * to the corresponding Calendar constant used to set/add/roll that unit - * of measurement. + * to the corresponding {@link ChronoUnit} enum (e.g. "YEARS") used to + * set/add/roll that unit of measurement. * *

        * A single logical unit of time might be represented by multiple labels - * for convenience (ie: DATE==DAY, - * MILLI==MILLISECOND) + * for convenience (ie: DATE==DAYS, + * MILLI==MILLIS) *

        * * @see Calendar */ - public static final Map CALENDAR_UNITS = makeUnitsMap(); + public static final Map CALENDAR_UNITS = makeUnitsMap(); /** @see #CALENDAR_UNITS */ - private static Map makeUnitsMap() { + private static Map makeUnitsMap() { // NOTE: consciously choosing not to support WEEK at this time, // because of complexity in rounding down to the nearest week @@ -141,90 +149,69 @@ public class DateMathParser { // we probably need to change "Locale loc" to default to something // from a param via SolrRequestInfo as well. - Map units = new HashMap<>(13); - units.put("YEAR", Calendar.YEAR); - units.put("YEARS", Calendar.YEAR); - units.put("MONTH", Calendar.MONTH); - units.put("MONTHS", Calendar.MONTH); - units.put("DAY", Calendar.DATE); - units.put("DAYS", Calendar.DATE); - units.put("DATE", Calendar.DATE); - units.put("HOUR", Calendar.HOUR_OF_DAY); - units.put("HOURS", Calendar.HOUR_OF_DAY); - units.put("MINUTE", Calendar.MINUTE); - units.put("MINUTES", Calendar.MINUTE); - units.put("SECOND", Calendar.SECOND); - units.put("SECONDS", Calendar.SECOND); - units.put("MILLI", Calendar.MILLISECOND); - units.put("MILLIS", Calendar.MILLISECOND); - units.put("MILLISECOND", Calendar.MILLISECOND); - units.put("MILLISECONDS",Calendar.MILLISECOND); + Map units = new HashMap<>(13); + units.put("YEAR", ChronoUnit.YEARS); + units.put("YEARS", ChronoUnit.YEARS); + units.put("MONTH", ChronoUnit.MONTHS); + units.put("MONTHS", ChronoUnit.MONTHS); + units.put("DAY", ChronoUnit.DAYS); + units.put("DAYS", ChronoUnit.DAYS); + units.put("DATE", ChronoUnit.DAYS); + units.put("HOUR", ChronoUnit.HOURS); + units.put("HOURS", ChronoUnit.HOURS); + units.put("MINUTE", ChronoUnit.MINUTES); + units.put("MINUTES", ChronoUnit.MINUTES); + units.put("SECOND", ChronoUnit.SECONDS); + units.put("SECONDS", ChronoUnit.SECONDS); + units.put("MILLI", ChronoUnit.MILLIS); + units.put("MILLIS", ChronoUnit.MILLIS); + units.put("MILLISECOND", ChronoUnit.MILLIS); + units.put("MILLISECONDS",ChronoUnit.MILLIS); + + // NOTE: Maybe eventually support NANOS return units; } /** - * Modifies the specified Calendar by "adding" the specified value of units + * Returns a modified time by "adding" the specified value of units * * @exception IllegalArgumentException if unit isn't recognized. * @see #CALENDAR_UNITS */ - public static void add(Calendar c, int val, String unit) { - Integer uu = CALENDAR_UNITS.get(unit); + private static LocalDateTime add(LocalDateTime t, int val, String unit) { + ChronoUnit uu = CALENDAR_UNITS.get(unit); if (null == uu) { throw new IllegalArgumentException("Adding Unit not recognized: " + unit); } - c.add(uu.intValue(), val); + return t.plus(val, uu); } /** - * Modifies the specified Calendar by "rounding" down to the specified unit + * Returns a modified time by "rounding" down to the specified unit * * @exception IllegalArgumentException if unit isn't recognized. * @see #CALENDAR_UNITS */ - public static void round(Calendar c, String unit) { - Integer uu = CALENDAR_UNITS.get(unit); + private static LocalDateTime round(LocalDateTime t, String unit) { + ChronoUnit uu = CALENDAR_UNITS.get(unit); if (null == uu) { throw new IllegalArgumentException("Rounding Unit not recognized: " + unit); } - int u = uu.intValue(); - - switch (u) { - - case Calendar.YEAR: - c.clear(Calendar.MONTH); - /* fall through */ - case Calendar.MONTH: - c.clear(Calendar.DAY_OF_MONTH); - c.clear(Calendar.DAY_OF_WEEK); - c.clear(Calendar.DAY_OF_WEEK_IN_MONTH); - c.clear(Calendar.DAY_OF_YEAR); - c.clear(Calendar.WEEK_OF_MONTH); - c.clear(Calendar.WEEK_OF_YEAR); - /* fall through */ - case Calendar.DATE: - c.clear(Calendar.HOUR_OF_DAY); - c.clear(Calendar.HOUR); - c.clear(Calendar.AM_PM); - /* fall through */ - case Calendar.HOUR_OF_DAY: - c.clear(Calendar.MINUTE); - /* fall through */ - case Calendar.MINUTE: - c.clear(Calendar.SECOND); - /* fall through */ - case Calendar.SECOND: - c.clear(Calendar.MILLISECOND); - break; - default: - throw new IllegalStateException( - "No logic for rounding value ("+u+") " + unit - ); + // note: OffsetDateTime.truncatedTo does not support >= DAYS units so we handle those + switch (uu) { + case YEARS: + return LocalDateTime.of(LocalDate.of(t.getYear(), 1, 1), LocalTime.MIDNIGHT); // midnight is 00:00:00 + case MONTHS: + return LocalDateTime.of(LocalDate.of(t.getYear(), t.getMonth(), 1), LocalTime.MIDNIGHT); + case DAYS: + return LocalDateTime.of(t.toLocalDate(), LocalTime.MIDNIGHT); + default: + assert !uu.isDateBased();// >= DAY + return t.truncatedTo(uu); } - } /** @@ -290,23 +277,19 @@ public class DateMathParser { * otherwise specified in the SolrRequestInfo * * @see SolrRequestInfo#getClientTimeZone - * @see #DEFAULT_MATH_LOCALE */ public DateMathParser() { - this(null, DEFAULT_MATH_LOCALE); + this(null); } /** - * @param tz The TimeZone used for rounding (to determine when hours/days begin). If null, then this method defaults to the value dicated by the SolrRequestInfo if it - * exists -- otherwise it uses UTC. - * @param l The Locale used for rounding (to determine when weeks begin). If null, then this method defaults to en_US. + * @param tz The TimeZone used for rounding (to determine when hours/days begin). If null, then this method defaults + * to the value dictated by the SolrRequestInfo if it exists -- otherwise it uses UTC. * @see #DEFAULT_MATH_TZ - * @see #DEFAULT_MATH_LOCALE * @see Calendar#getInstance(TimeZone,Locale) * @see SolrRequestInfo#getClientTimeZone */ - public DateMathParser(TimeZone tz, Locale l) { - loc = (null != l) ? l : DEFAULT_MATH_LOCALE; + public DateMathParser(TimeZone tz) { if (null == tz) { SolrRequestInfo reqInfo = SolrRequestInfo.getRequestInfo(); tz = (null != reqInfo) ? reqInfo.getClientTimeZone() : DEFAULT_MATH_TZ; @@ -321,13 +304,6 @@ public class DateMathParser { return this.zone; } - /** - * @return the locale - */ - public Locale getLocale() { - return this.loc; - } - /** * Defines this instance's concept of "now". * @see #getNow @@ -337,7 +313,7 @@ public class DateMathParser { } /** - * Returns a cloned of this instance's concept of "now". + * Returns a clone of this instance's concept of "now" (never null). * * If setNow was never called (or if null was specified) then this method * first defines 'now' as the value dictated by the SolrRequestInfo if it @@ -353,7 +329,7 @@ public class DateMathParser { // fall back to current time if no request info set now = new Date(); } else { - now = reqInfo.getNOW(); + now = reqInfo.getNOW(); // never null } } return (Date) now.clone(); @@ -365,15 +341,15 @@ public class DateMathParser { * @exception ParseException positions in ParseExceptions are token positions, not character positions. */ public Date parseMath(String math) throws ParseException { - - Calendar cal = Calendar.getInstance(zone, loc); - cal.setTime(getNow()); - /* check for No-Op */ if (0==math.length()) { - return cal.getTime(); + return getNow(); } - + + ZoneId zoneId = zone.toZoneId(); + // localDateTime is a date and time local to the timezone specified + LocalDateTime localDateTime = ZonedDateTime.ofInstant(getNow().toInstant(), zoneId).toLocalDateTime(); + String[] ops = splitter.split(math); int pos = 0; while ( pos < ops.length ) { @@ -391,7 +367,7 @@ public class DateMathParser { ("Need a unit after command: \"" + command + "\"", pos); } try { - round(cal, ops[pos++]); + localDateTime = round(localDateTime, ops[pos++]); } catch (IllegalArgumentException e) { throw new ParseException ("Unit not recognized: \"" + ops[pos-1] + "\"", pos-1); @@ -415,7 +391,7 @@ public class DateMathParser { } try { String unit = ops[pos++]; - add(cal, val, unit); + localDateTime = add(localDateTime, val, unit); } catch (IllegalArgumentException e) { throw new ParseException ("Unit not recognized: \"" + ops[pos-1] + "\"", pos-1); @@ -427,7 +403,7 @@ public class DateMathParser { } } - return cal.getTime(); + return Date.from(ZonedDateTime.of(localDateTime, zoneId).toInstant()); } private static Pattern splitter = Pattern.compile("\\b|(?<=\\d)(?=\\D)"); diff --git a/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java b/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java index 9d45a0dabc9..579ccf082f4 100644 --- a/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java +++ b/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java @@ -867,7 +867,7 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 { assertQ("check counts using fixed NOW and TZ rounding", req("q", "bday:[NOW/DAY TO NOW/DAY+1DAY]", - "TZ", "GMT-23", + "TZ", "GMT+01", "NOW", "205369736000" // 1976-07-04T23:08:56.235Z ), "*[count(//doc)=0]"); diff --git a/solr/core/src/test/org/apache/solr/TestTrie.java b/solr/core/src/test/org/apache/solr/TestTrie.java index 07935b209bf..47c50b2e099 100644 --- a/solr/core/src/test/org/apache/solr/TestTrie.java +++ b/solr/core/src/test/org/apache/solr/TestTrie.java @@ -172,7 +172,7 @@ public class TestTrie extends SolrTestCaseJ4 { format.setTimeZone(TimeZone.getTimeZone("UTC")); assertU(delQ("*:*")); - DateMathParser dmp = new DateMathParser(DateMathParser.UTC, Locale.ROOT); + DateMathParser dmp = new DateMathParser(DateMathParser.UTC); String largestDate = ""; for (int i = 0; i < 10; i++) { // index 10 days starting with today @@ -221,7 +221,7 @@ public class TestTrie extends SolrTestCaseJ4 { // For tdate tests SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT); format.setTimeZone(TimeZone.getTimeZone("UTC")); - DateMathParser dmp = new DateMathParser(DateMathParser.UTC, Locale.ROOT); + DateMathParser dmp = new DateMathParser(DateMathParser.UTC); for (int i = 0; i < 10; i++) { long l = Integer.MAX_VALUE + i*1L; diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java index 6ada6bb5d98..f77829d135f 100644 --- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java @@ -1322,7 +1322,7 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa boolean changed = false; while(! timeout.hasTimedOut()){ Thread.sleep(10); - changed = Objects.equals(val,client.getZkStateReader().getClusterProps().get(name)); + changed = Objects.equals(val,client.getZkStateReader().getClusterProperty(name, (String) null)); if(changed) break; } return changed; diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java index 0975b9aae2e..b04bfbc3ffa 100644 --- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java @@ -45,7 +45,6 @@ import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.util.TimeOut; import org.apache.zookeeper.KeeperException; -import org.junit.Ignore; import org.junit.Test; import static org.apache.solr.cloud.ReplicaPropertiesBase.verifyUniqueAcrossCollection; @@ -336,7 +335,7 @@ public class CollectionsAPISolrJTest extends AbstractFullDistribZkTestBase { while(! timeout.hasTimedOut()){ Thread.sleep(10); changed = Objects.equals("false", - cloudClient.getZkStateReader().getClusterProps().get(ZkStateReader.LEGACY_CLOUD)); + cloudClient.getZkStateReader().getClusterProperty(ZkStateReader.LEGACY_CLOUD, "none")); if(changed) break; } assertTrue("The Cluster property wasn't set", changed); @@ -351,7 +350,7 @@ public class CollectionsAPISolrJTest extends AbstractFullDistribZkTestBase { changed = false; while(! timeout.hasTimedOut()) { Thread.sleep(10); - changed = (cloudClient.getZkStateReader().getClusterProps().get(ZkStateReader.LEGACY_CLOUD) == null); + changed = (cloudClient.getZkStateReader().getClusterProperty(ZkStateReader.LEGACY_CLOUD, (String) null) == null); if(changed) break; } diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java index f5a09b0da13..8efb60525a0 100644 --- a/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java @@ -16,11 +16,17 @@ */ package org.apache.solr.cloud; +import java.lang.invoke.MethodHandles; +import java.util.*; +import java.util.Map.Entry; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.TimeUnit; + import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.cloud.OverseerTaskQueue.QueueEvent; import org.apache.solr.cloud.Overseer.LeaderStatus; +import org.apache.solr.cloud.OverseerTaskQueue.QueueEvent; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.ZkNodeProps; @@ -48,32 +54,7 @@ import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.lang.invoke.MethodHandles; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Queue; -import java.util.Set; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.TimeUnit; - -import static org.easymock.EasyMock.anyBoolean; -import static org.easymock.EasyMock.anyObject; -import static org.easymock.EasyMock.capture; -import static org.easymock.EasyMock.createMock; -import static org.easymock.EasyMock.expect; -import static org.easymock.EasyMock.expectLastCall; -import static org.easymock.EasyMock.getCurrentArguments; -import static org.easymock.EasyMock.replay; -import static org.easymock.EasyMock.reset; -import static org.easymock.EasyMock.verify; +import static org.easymock.EasyMock.*; public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 { @@ -284,11 +265,12 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 { }).anyTimes(); } - zkStateReaderMock.getClusterProps(); - expectLastCall().andAnswer(new IAnswer() { + + zkStateReaderMock.getClusterProperty("legacyCloud", "true"); + expectLastCall().andAnswer(new IAnswer() { @Override - public Map answer() throws Throwable { - return new HashMap(); + public String answer() throws Throwable { + return "true"; } }); diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java b/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java index 1109f9ed0a8..d1521a0d399 100644 --- a/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java +++ b/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java @@ -16,6 +16,15 @@ */ package org.apache.solr.cloud; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.lang.invoke.MethodHandles; +import java.nio.charset.StandardCharsets; +import java.util.Collection; +import java.util.List; + import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.io.filefilter.RegexFileFilter; @@ -23,6 +32,7 @@ import org.apache.commons.io.filefilter.TrueFileFilter; import org.apache.solr.SolrJettyTestBase; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; +import org.apache.solr.common.cloud.ClusterProperties; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.VMParamsAllAndReadonlyDigestZkACLProvider; import org.apache.solr.common.cloud.ZkConfigManager; @@ -37,15 +47,6 @@ import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.InputStream; -import java.lang.invoke.MethodHandles; -import java.nio.charset.StandardCharsets; -import java.util.Collection; -import java.util.List; - // TODO: This test would be a lot faster if it used a solrhome with fewer config // files - there are a lot of them to upload public class ZkCLITest extends SolrTestCaseJ4 { @@ -321,22 +322,19 @@ public class ZkCLITest extends SolrTestCaseJ4 { @Test public void testSetClusterProperty() throws Exception { - ZkStateReader reader = new ZkStateReader(zkClient); - try { - // add property urlScheme=http - String[] args = new String[] {"-zkhost", zkServer.getZkAddress(), - "-cmd", "CLUSTERPROP", "-name", "urlScheme", "-val", "http"}; - ZkCLI.main(args); - assertEquals("http", reader.getClusterProps().get("urlScheme")); - - // remove it again - args = new String[] {"-zkhost", zkServer.getZkAddress(), - "-cmd", "CLUSTERPROP", "-name", "urlScheme"}; - ZkCLI.main(args); - assertNull(reader.getClusterProps().get("urlScheme")); - } finally { - reader.close(); - } + ClusterProperties properties = new ClusterProperties(zkClient); + // add property urlScheme=http + String[] args = new String[] {"-zkhost", zkServer.getZkAddress(), + "-cmd", "CLUSTERPROP", "-name", "urlScheme", "-val", "http"}; + ZkCLI.main(args); + assertEquals("http", properties.getClusterProperty("urlScheme", "none")); + + // remove it again + args = new String[] {"-zkhost", zkServer.getZkAddress(), + "-cmd", "CLUSTERPROP", "-name", "urlScheme"}; + ZkCLI.main(args); + assertNull(properties.getClusterProperty("urlScheme", (String) null)); + } @Test diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java index 912369787a0..b8654391977 100644 --- a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java @@ -16,17 +16,15 @@ */ package org.apache.solr.cloud; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.common.cloud.ClusterState; -import org.apache.solr.common.cloud.DocCollection; -import org.apache.solr.common.cloud.DocRouter; -import org.apache.solr.common.cloud.Replica; -import org.apache.solr.common.cloud.Slice; -import org.apache.solr.common.cloud.SolrZkClient; -import org.apache.solr.common.cloud.ZkConfigManager; -import org.apache.solr.common.cloud.ZkNodeProps; -import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.cloud.*; import org.apache.solr.common.util.Utils; import org.apache.solr.core.CloudConfig; import org.apache.solr.core.CoreContainer; @@ -40,12 +38,6 @@ import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.TimeUnit; - @Slow @SolrTestCaseJ4.SuppressSSL public class ZkControllerTest extends SolrTestCaseJ4 { @@ -98,59 +90,71 @@ public class ZkControllerTest extends SolrTestCaseJ4 { AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost()); AbstractZkTestCase.makeSolrZkNode(server.getZkHost()); - ZkStateReader zkStateReader = new ZkStateReader(server.getZkAddress(), TIMEOUT, TIMEOUT); - try { - // getBaseUrlForNodeName - assertEquals("http://zzz.xxx:1234/solr", - zkStateReader.getBaseUrlForNodeName("zzz.xxx:1234_solr")); - assertEquals("http://xxx:99", - zkStateReader.getBaseUrlForNodeName("xxx:99_")); - assertEquals("http://foo-bar.baz.org:9999/some_dir", - zkStateReader.getBaseUrlForNodeName("foo-bar.baz.org:9999_some_dir")); - assertEquals("http://foo-bar.baz.org:9999/solr/sub_dir", - zkStateReader.getBaseUrlForNodeName("foo-bar.baz.org:9999_solr%2Fsub_dir")); - - // generateNodeName + getBaseUrlForNodeName - assertEquals("http://foo:9876/solr", - zkStateReader.getBaseUrlForNodeName - (ZkController.generateNodeName("foo","9876","solr"))); - assertEquals("http://foo:9876/solr", - zkStateReader.getBaseUrlForNodeName - (ZkController.generateNodeName("foo","9876","/solr"))); - assertEquals("http://foo:9876/solr", - zkStateReader.getBaseUrlForNodeName - (ZkController.generateNodeName("foo","9876","/solr/"))); - assertEquals("http://foo.bar.com:9876/solr/sub_dir", - zkStateReader.getBaseUrlForNodeName - (ZkController.generateNodeName("foo.bar.com","9876","solr/sub_dir"))); - assertEquals("http://foo.bar.com:9876/solr/sub_dir", - zkStateReader.getBaseUrlForNodeName - (ZkController.generateNodeName("foo.bar.com","9876","/solr/sub_dir/"))); - assertEquals("http://foo-bar:9876", - zkStateReader.getBaseUrlForNodeName - (ZkController.generateNodeName("foo-bar","9876",""))); - assertEquals("http://foo-bar:9876", - zkStateReader.getBaseUrlForNodeName - (ZkController.generateNodeName("foo-bar","9876","/"))); - assertEquals("http://foo-bar.com:80/some_dir", - zkStateReader.getBaseUrlForNodeName - (ZkController.generateNodeName("foo-bar.com","80","some_dir"))); - assertEquals("http://foo-bar.com:80/some_dir", - zkStateReader.getBaseUrlForNodeName - (ZkController.generateNodeName("foo-bar.com","80","/some_dir"))); + try (SolrZkClient client = new SolrZkClient(server.getZkAddress(), TIMEOUT)) { + + ZkController.createClusterZkNodes(client); + + try (ZkStateReader zkStateReader = new ZkStateReader(client)) { + zkStateReader.createClusterStateWatchersAndUpdate(); + + // getBaseUrlForNodeName + assertEquals("http://zzz.xxx:1234/solr", + zkStateReader.getBaseUrlForNodeName("zzz.xxx:1234_solr")); + assertEquals("http://xxx:99", + zkStateReader.getBaseUrlForNodeName("xxx:99_")); + assertEquals("http://foo-bar.baz.org:9999/some_dir", + zkStateReader.getBaseUrlForNodeName("foo-bar.baz.org:9999_some_dir")); + assertEquals("http://foo-bar.baz.org:9999/solr/sub_dir", + zkStateReader.getBaseUrlForNodeName("foo-bar.baz.org:9999_solr%2Fsub_dir")); + + // generateNodeName + getBaseUrlForNodeName + assertEquals("http://foo:9876/solr", + zkStateReader.getBaseUrlForNodeName + (ZkController.generateNodeName("foo", "9876", "solr"))); + assertEquals("http://foo:9876/solr", + zkStateReader.getBaseUrlForNodeName + (ZkController.generateNodeName("foo", "9876", "/solr"))); + assertEquals("http://foo:9876/solr", + zkStateReader.getBaseUrlForNodeName + (ZkController.generateNodeName("foo", "9876", "/solr/"))); + assertEquals("http://foo.bar.com:9876/solr/sub_dir", + zkStateReader.getBaseUrlForNodeName + (ZkController.generateNodeName("foo.bar.com", "9876", "solr/sub_dir"))); + assertEquals("http://foo.bar.com:9876/solr/sub_dir", + zkStateReader.getBaseUrlForNodeName + (ZkController.generateNodeName("foo.bar.com", "9876", "/solr/sub_dir/"))); + assertEquals("http://foo-bar:9876", + zkStateReader.getBaseUrlForNodeName + (ZkController.generateNodeName("foo-bar", "9876", ""))); + assertEquals("http://foo-bar:9876", + zkStateReader.getBaseUrlForNodeName + (ZkController.generateNodeName("foo-bar", "9876", "/"))); + assertEquals("http://foo-bar.com:80/some_dir", + zkStateReader.getBaseUrlForNodeName + (ZkController.generateNodeName("foo-bar.com", "80", "some_dir"))); + assertEquals("http://foo-bar.com:80/some_dir", + zkStateReader.getBaseUrlForNodeName + (ZkController.generateNodeName("foo-bar.com", "80", "/some_dir"))); + + } + + ClusterProperties cp = new ClusterProperties(client); + cp.setClusterProperty("urlScheme", "https"); //Verify the URL Scheme is taken into account - zkStateReader.getZkClient().create(ZkStateReader.CLUSTER_PROPS, - Utils.toJSON(Collections.singletonMap("urlScheme", "https")), CreateMode.PERSISTENT, true); - - assertEquals("https://zzz.xxx:1234/solr", - zkStateReader.getBaseUrlForNodeName("zzz.xxx:1234_solr")); - - assertEquals("https://foo-bar.com:80/some_dir", - zkStateReader.getBaseUrlForNodeName - (ZkController.generateNodeName("foo-bar.com","80","/some_dir"))); - } finally { - zkStateReader.close(); + + try (ZkStateReader zkStateReader = new ZkStateReader(client)) { + + zkStateReader.createClusterStateWatchersAndUpdate(); + + assertEquals("https://zzz.xxx:1234/solr", + zkStateReader.getBaseUrlForNodeName("zzz.xxx:1234_solr")); + + assertEquals("https://foo-bar.com:80/some_dir", + zkStateReader.getBaseUrlForNodeName + (ZkController.generateNodeName("foo-bar.com", "80", "/some_dir"))); + + } } } finally { server.shutdown(); diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index fd8d4939176..ad51614b19c 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -62,6 +62,7 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 { ZkTestServer server = new ZkTestServer(zkDir); SolrZkClient zkClient = null; + ZkStateReader reader = null; try { server.run(); @@ -71,10 +72,10 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 { zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); ZkController.createClusterZkNodes(zkClient); - ZkStateReader reader = new ZkStateReader(zkClient); + reader = new ZkStateReader(zkClient); reader.createClusterStateWatchersAndUpdate(); if (isInteresting) { - reader.addCollectionWatch("c1"); + reader.registerCore("c1"); } ZkStateWriter writer = new ZkStateWriter(reader, new Overseer.Stats()); @@ -136,7 +137,7 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 { assertEquals(2, collection.getStateFormat()); } } finally { - IOUtils.close(zkClient); + IOUtils.close(reader, zkClient); server.shutdown(); } @@ -146,6 +147,7 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 { String zkDir = createTempDir("testExternalCollectionWatchedNotWatched").toFile().getAbsolutePath(); ZkTestServer server = new ZkTestServer(zkDir); SolrZkClient zkClient = null; + ZkStateReader reader = null; try { server.run(); @@ -155,7 +157,7 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 { zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); ZkController.createClusterZkNodes(zkClient); - ZkStateReader reader = new ZkStateReader(zkClient); + reader = new ZkStateReader(zkClient); reader.createClusterStateWatchersAndUpdate(); ZkStateWriter writer = new ZkStateWriter(reader, new Overseer.Stats()); @@ -170,13 +172,13 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 { reader.forceUpdateCollection("c1"); assertTrue(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); - reader.addCollectionWatch("c1"); + reader.registerCore("c1"); assertFalse(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); - reader.removeZKWatch("c1"); + reader.unregisterCore("c1"); assertTrue(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); } finally { - IOUtils.close(zkClient); + IOUtils.close(reader, zkClient); server.shutdown(); } } @@ -187,6 +189,7 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 { ZkTestServer server = new ZkTestServer(zkDir); SolrZkClient zkClient = null; + ZkStateReader reader = null; try { server.run(); @@ -196,9 +199,9 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 { zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); ZkController.createClusterZkNodes(zkClient); - ZkStateReader reader = new ZkStateReader(zkClient); + reader = new ZkStateReader(zkClient); reader.createClusterStateWatchersAndUpdate(); - reader.addCollectionWatch("c1"); + reader.registerCore("c1"); // Initially there should be no c1 collection. assertNull(reader.getClusterState().getCollectionRef("c1")); @@ -234,7 +237,7 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 { assertFalse(ref.isLazilyLoaded()); assertEquals(2, ref.get().getStateFormat()); } finally { - IOUtils.close(zkClient); + IOUtils.close(reader, zkClient); server.shutdown(); } diff --git a/solr/core/src/test/org/apache/solr/handler/component/SearchHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/component/SearchHandlerTest.java index 7e8ab470350..70744e24af0 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/SearchHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/SearchHandlerTest.java @@ -21,21 +21,16 @@ import java.util.ArrayList; import java.util.List; import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.client.solrj.embedded.JettySolrRunner; import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.request.QueryRequest; -import org.apache.solr.client.solrj.request.SolrPing; import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.client.solrj.response.SolrPingResponse; import org.apache.solr.cloud.MiniSolrCloudCluster; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; -import org.apache.solr.response.SolrQueryResponse; import org.junit.BeforeClass; import org.junit.Test; - public class SearchHandlerTest extends SolrTestCaseJ4 { @BeforeClass diff --git a/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java b/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java index 1bb860e20a0..05149185668 100644 --- a/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java +++ b/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java @@ -939,7 +939,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,"*[count("+pre+"/int)=2]" ,pre+"/int[@name='1976-07-05T00:00:00Z'][.='2' ]" ,pre+"/int[@name='1976-07-06T00:00:00Z'][.='0']" - + ,meta+"/int[@name='before' ][.='5']" ); assertQ("check after is not inclusive of lower bound by default (for dates)", @@ -955,10 +955,10 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,"*[count("+pre+"/int)=2]" ,pre+"/int[@name='1976-07-03T00:00:00Z'][.='2' ]" ,pre+"/int[@name='1976-07-04T00:00:00Z']" + jul4 - + ,meta+"/int[@name='after' ][.='9']" ); - + assertQ("check hardend=false", req( "q", "*:*" @@ -975,7 +975,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,pre+"/int[@name='1976-07-01T00:00:00Z'][.='5' ]" ,pre+"/int[@name='1976-07-06T00:00:00Z'][.='0' ]" ,pre+"/int[@name='1976-07-11T00:00:00Z'][.='4' ]" - + ,meta+"/int[@name='before' ][.='2']" ,meta+"/int[@name='after' ][.='3']" ,meta+"/int[@name='between'][.='9']" @@ -996,12 +996,33 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,pre+"/int[@name='1976-07-01T00:00:00Z'][.='5' ]" ,pre+"/int[@name='1976-07-06T00:00:00Z'][.='0' ]" ,pre+"/int[@name='1976-07-11T00:00:00Z'][.='1' ]" - + ,meta+"/int[@name='before' ][.='2']" ,meta+"/int[@name='after' ][.='6']" ,meta+"/int[@name='between'][.='6']" ); - + + //Fixed by SOLR-9080 related to the Gregorian Change Date + assertQ("check BC era", + req( "q", "*:*" + ,"rows", "0" + ,"facet", "true" + ,p, f + ,p+".start", "-0200-01-01T00:00:00Z" // BC + ,p+".end", "+0200-01-01T00:00:00Z" // AD + ,p+".gap", "+100YEARS" + ,p+".other", "all" + ) + ,pre+"/int[@name='-0200-01-01T00:00:00Z'][.='0']" + ,pre+"/int[@name='-0100-01-01T00:00:00Z'][.='0']" + ,pre+"/int[@name='0000-01-01T00:00:00Z'][.='0']" + ,pre+"/int[@name='0100-01-01T00:00:00Z'][.='0']" + ,meta+"/int[@name='before' ][.='0']" + ,meta+"/int[@name='after' ][.='14']" + ,meta+"/int[@name='between'][.='0']" + + ); + } @Test diff --git a/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java b/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java index c0882d3499a..87cfeb36b3b 100644 --- a/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java +++ b/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java @@ -54,14 +54,12 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas int peopleMultiplier = atLeast(1); int deptMultiplier = atLeast(1); - String people = "people"; - int numPeopleShards; - createCollection(people, atLeast(1), numPeopleShards = atLeast(2), numPeopleShards); + final String people = "people"; + createCollection(people, 2, 1, 10); - String depts = "departments"; - int numDeptsShards; - createCollection(depts, atLeast(1), numDeptsShards = atLeast(2), numDeptsShards); + final String depts = "departments"; + createCollection(depts, 2, 1, 10); createIndex(people, peopleMultiplier, depts, deptMultiplier); diff --git a/solr/core/src/test/org/apache/solr/schema/DateRangeFieldTest.java b/solr/core/src/test/org/apache/solr/schema/DateRangeFieldTest.java index 372c9e97eec..e76f8217cfd 100644 --- a/solr/core/src/test/org/apache/solr/schema/DateRangeFieldTest.java +++ b/solr/core/src/test/org/apache/solr/schema/DateRangeFieldTest.java @@ -55,6 +55,35 @@ public class DateRangeFieldTest extends SolrTestCaseJ4 { assertQ(req("q", "dateRange:[1999 TO 2001]"), xpathMatches(0, 2)); } + public void testBeforeGregorianChangeDate() { // GCD is the year 1582 + assertU(delQ("*:*")); + assertU(adoc("id", "0", "dateRange", "1500-01-01T00:00:00Z")); + assertU(adoc("id", "1", "dateRange", "-1500-01-01T00:00:00Z")); // BC + assertU(adoc("id", "2", "dateRange", "1400-01-01T00:00:00Z/YEAR")); // date math of month or year can cause issues + assertU(adoc("id", "3", "dateRange", "1300")); // the whole year of 1300 + assertU(commit()); + + //ensure round-trip toString + assertQ(req("q", "id:0", "fl", "dateRange"), "//result/doc/arr[@name='dateRange']/str[.='1500-01-01T00:00:00Z']"); + assertQ(req("q", "id:1", "fl", "dateRange"), "//result/doc/arr[@name='dateRange']/str[.='-1500-01-01T00:00:00Z']"); + // note: fixed by SOLR-9080, would instead find "1399-01-09T00:00:00Z" + assertQ(req("q", "id:2", "fl", "dateRange"), "//result/doc/arr[@name='dateRange']/str[.='1400-01-01T00:00:00Z']"); + assertQ(req("q", "id:3", "fl", "dateRange"), "//result/doc/arr[@name='dateRange']/str[.='1300']"); + + //ensure range syntax works + assertQ(req("q", "dateRange:[1450-01-01T00:00:00Z TO 1499-12-31T23:59:59Z]"), xpathMatches());// before + assertQ(req("q", "dateRange:[1500-01-01T00:00:00Z TO 1500-01-01T00:00:00Z]"), xpathMatches(0));// spot on + assertQ(req("q", "dateRange:[1500-01-01T00:00:01Z TO 1550-01-01T00:00:00Z]"), xpathMatches());// after + + assertQ(req("q", "dateRange:[-1500-01-01T00:00:00Z TO -1500-01-01T00:00:00Z]"), xpathMatches(1)); + + // do range queries in the vicinity of docId=3 val:"1300" + assertQ(req("q", "dateRange:[1299 TO 1299-12-31T23:59:59Z]"), xpathMatches());//adjacent + assertQ(req("q", "dateRange:[1299 TO 1300-01-01T00:00:00Z]"), xpathMatches(3));// expand + 1 sec + assertQ(req("q", "dateRange:1301"), xpathMatches()); // adjacent + assertQ(req("q", "dateRange:[1300-12-31T23:59:59Z TO 1301]"), xpathMatches(3)); // expand + 1 sec + } + @Test public void testMultiValuedDateRanges() { assertU(delQ("*:*")); diff --git a/solr/core/src/test/org/apache/solr/search/TestDocSet.java b/solr/core/src/test/org/apache/solr/search/TestDocSet.java index 9c46d5baa57..cdddd86e7ec 100644 --- a/solr/core/src/test/org/apache/solr/search/TestDocSet.java +++ b/solr/core/src/test/org/apache/solr/search/TestDocSet.java @@ -22,7 +22,6 @@ import java.util.List; import java.util.Random; import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.PointValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.Fields; @@ -32,12 +31,14 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PointValues; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Sort; import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; @@ -467,6 +468,11 @@ public class TestDocSet extends LuceneTestCase { @Override public void checkIntegrity() throws IOException { } + + @Override + public Sort getIndexSort() { + return null; + } }; } diff --git a/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java b/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java index ffb495e9f4b..08a9037608a 100644 --- a/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java +++ b/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java @@ -23,7 +23,6 @@ import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.SimpleMergedSegmentWarmer; -import org.apache.lucene.index.SortingMergePolicy; import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; @@ -31,6 +30,7 @@ import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.core.DirectoryFactory; import org.apache.solr.core.SolrConfig; import org.apache.solr.core.TestMergePolicyConfig; +import org.apache.solr.index.SortingMergePolicy; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchemaFactory; import org.junit.BeforeClass; diff --git a/solr/core/src/test/org/apache/solr/util/DateMathParserTest.java b/solr/core/src/test/org/apache/solr/util/DateMathParserTest.java index 8840d34b354..8cc417b33f6 100644 --- a/solr/core/src/test/org/apache/solr/util/DateMathParserTest.java +++ b/solr/core/src/test/org/apache/solr/util/DateMathParserTest.java @@ -16,11 +16,10 @@ */ package org.apache.solr.util; -import java.text.DateFormat; import java.text.ParseException; -import java.text.SimpleDateFormat; import java.time.Instant; -import java.util.Calendar; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; import java.util.Date; import java.util.HashMap; import java.util.Locale; @@ -40,44 +39,37 @@ public class DateMathParserTest extends LuceneTestCase { * A formatter for specifying every last nuance of a Date for easy * reference in assertion statements */ - private DateFormat fmt; + private DateTimeFormatter fmt; + /** * A parser for reading in explicit dates that are convenient to type * in a test */ - private DateFormat parser; + private DateTimeFormatter parser; public DateMathParserTest() { - super(); - fmt = new SimpleDateFormat - ("G yyyyy MM ww WW DD dd F E aa HH hh mm ss SSS z Z",Locale.ROOT); - fmt.setTimeZone(UTC); + fmt = DateTimeFormatter.ofPattern("G yyyyy MM ww W D dd F E a HH hh mm ss SSS z Z", Locale.ROOT) + .withZone(ZoneOffset.UTC); - parser = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS",Locale.ROOT); - parser.setTimeZone(UTC); + parser = DateTimeFormatter.ISO_LOCAL_DATE_TIME.withZone(ZoneOffset.UTC); // basically without the 'Z' } /** MACRO: Round: parses s, rounds with u, fmts */ protected String r(String s, String u) throws Exception { - Date d = parser.parse(s); - Calendar c = Calendar.getInstance(UTC, Locale.ROOT); - c.setTime(d); - DateMathParser.round(c, u); - return fmt.format(c.getTime()); + Date dt = DateMathParser.parseMath(null, s + "Z/" + u); + return fmt.format(dt.toInstant()); } /** MACRO: Add: parses s, adds v u, fmts */ protected String a(String s, int v, String u) throws Exception { - Date d = parser.parse(s); - Calendar c = Calendar.getInstance(UTC, Locale.ROOT); - c.setTime(d); - DateMathParser.add(c, v, u); - return fmt.format(c.getTime()); + char sign = v >= 0 ? '+' : '-'; + Date dt = DateMathParser.parseMath(null, s + 'Z' + sign + Math.abs(v) + u); + return fmt.format(dt.toInstant()); } /** MACRO: Expected: parses s, fmts */ protected String e(String s) throws Exception { - return fmt.format(parser.parse(s)); + return fmt.format(parser.parse(s, Instant::from)); } protected void assertRound(String e, String i, String u) throws Exception { @@ -85,6 +77,7 @@ public class DateMathParserTest extends LuceneTestCase { String rr = r(i,u); assertEquals(ee + " != " + rr + " round:" + i + ":" + u, ee, rr); } + protected void assertAdd(String e, String i, int v, String u) throws Exception { @@ -97,13 +90,17 @@ public class DateMathParserTest extends LuceneTestCase { throws Exception { String ee = e(e); - String aa = fmt.format(p.parseMath(i)); + String aa = fmt.format(p.parseMath(i).toInstant()); assertEquals(ee + " != " + aa + " math:" + - parser.format(p.getNow()) + ":" + i, ee, aa); + parser.format(p.getNow().toInstant()) + ":" + i, ee, aa); + } + + private void setNow(DateMathParser p, String text) { + p.setNow(Date.from(parser.parse(text, Instant::from))); } public void testCalendarUnitsConsistency() throws Exception { - String input = "2001-07-04T12:08:56.235"; + String input = "1234-07-04T12:08:56.235"; for (String u : DateMathParser.CALENDAR_UNITS.keySet()) { try { r(input, u); @@ -120,20 +117,20 @@ public class DateMathParserTest extends LuceneTestCase { public void testRound() throws Exception { - String input = "2001-07-04T12:08:56.235"; + String input = "1234-07-04T12:08:56.235"; - assertRound("2001-07-04T12:08:56.000", input, "SECOND"); - assertRound("2001-07-04T12:08:00.000", input, "MINUTE"); - assertRound("2001-07-04T12:00:00.000", input, "HOUR"); - assertRound("2001-07-04T00:00:00.000", input, "DAY"); - assertRound("2001-07-01T00:00:00.000", input, "MONTH"); - assertRound("2001-01-01T00:00:00.000", input, "YEAR"); + assertRound("1234-07-04T12:08:56.000", input, "SECOND"); + assertRound("1234-07-04T12:08:00.000", input, "MINUTE"); + assertRound("1234-07-04T12:00:00.000", input, "HOUR"); + assertRound("1234-07-04T00:00:00.000", input, "DAY"); + assertRound("1234-07-01T00:00:00.000", input, "MONTH"); + assertRound("1234-01-01T00:00:00.000", input, "YEAR"); } public void testAddZero() throws Exception { - String input = "2001-07-04T12:08:56.235"; + String input = "1234-07-04T12:08:56.235"; for (String u : DateMathParser.CALENDAR_UNITS.keySet()) { assertAdd(input, input, 0, u); @@ -143,24 +140,24 @@ public class DateMathParserTest extends LuceneTestCase { public void testAdd() throws Exception { - String input = "2001-07-04T12:08:56.235"; + String input = "1234-07-04T12:08:56.235"; - assertAdd("2001-07-04T12:08:56.236", input, 1, "MILLISECOND"); - assertAdd("2001-07-04T12:08:57.235", input, 1, "SECOND"); - assertAdd("2001-07-04T12:09:56.235", input, 1, "MINUTE"); - assertAdd("2001-07-04T13:08:56.235", input, 1, "HOUR"); - assertAdd("2001-07-05T12:08:56.235", input, 1, "DAY"); - assertAdd("2001-08-04T12:08:56.235", input, 1, "MONTH"); - assertAdd("2002-07-04T12:08:56.235", input, 1, "YEAR"); + assertAdd("1234-07-04T12:08:56.236", input, 1, "MILLISECOND"); + assertAdd("1234-07-04T12:08:57.235", input, 1, "SECOND"); + assertAdd("1234-07-04T12:09:56.235", input, 1, "MINUTE"); + assertAdd("1234-07-04T13:08:56.235", input, 1, "HOUR"); + assertAdd("1234-07-05T12:08:56.235", input, 1, "DAY"); + assertAdd("1234-08-04T12:08:56.235", input, 1, "MONTH"); + assertAdd("1235-07-04T12:08:56.235", input, 1, "YEAR"); } public void testParseStatelessness() throws Exception { - DateMathParser p = new DateMathParser(UTC, Locale.ROOT); - p.setNow(parser.parse("2001-07-04T12:08:56.235")); + DateMathParser p = new DateMathParser(UTC); + setNow(p, "1234-07-04T12:08:56.235"); - String e = fmt.format(p.parseMath("")); + String e = fmt.format(p.parseMath("").toInstant()); Date trash = p.parseMath("+7YEARS"); trash = p.parseMath("/MONTH"); @@ -168,90 +165,89 @@ public class DateMathParserTest extends LuceneTestCase { Thread.currentThread(); Thread.sleep(5); - String a = fmt.format(p.parseMath("")); + String a =fmt.format(p.parseMath("").toInstant()); assertEquals("State of DateMathParser changed", e, a); } - + public void testParseMath() throws Exception { - DateMathParser p = new DateMathParser(UTC, Locale.ROOT); - p.setNow(parser.parse("2001-07-04T12:08:56.235")); + DateMathParser p = new DateMathParser(UTC); + setNow(p, "1234-07-04T12:08:56.235"); // No-Op - assertMath("2001-07-04T12:08:56.235", p, ""); + assertMath("1234-07-04T12:08:56.235", p, ""); // simple round - assertMath("2001-07-04T12:08:56.000", p, "/SECOND"); - assertMath("2001-07-04T12:08:00.000", p, "/MINUTE"); - assertMath("2001-07-04T12:00:00.000", p, "/HOUR"); - assertMath("2001-07-04T00:00:00.000", p, "/DAY"); - assertMath("2001-07-01T00:00:00.000", p, "/MONTH"); - assertMath("2001-01-01T00:00:00.000", p, "/YEAR"); + assertMath("1234-07-04T12:08:56.235", p, "/MILLIS"); // no change + assertMath("1234-07-04T12:08:56.000", p, "/SECOND"); + assertMath("1234-07-04T12:08:00.000", p, "/MINUTE"); + assertMath("1234-07-04T12:00:00.000", p, "/HOUR"); + assertMath("1234-07-04T00:00:00.000", p, "/DAY"); + assertMath("1234-07-01T00:00:00.000", p, "/MONTH"); + assertMath("1234-01-01T00:00:00.000", p, "/YEAR"); // simple addition - assertMath("2001-07-04T12:08:56.236", p, "+1MILLISECOND"); - assertMath("2001-07-04T12:08:57.235", p, "+1SECOND"); - assertMath("2001-07-04T12:09:56.235", p, "+1MINUTE"); - assertMath("2001-07-04T13:08:56.235", p, "+1HOUR"); - assertMath("2001-07-05T12:08:56.235", p, "+1DAY"); - assertMath("2001-08-04T12:08:56.235", p, "+1MONTH"); - assertMath("2002-07-04T12:08:56.235", p, "+1YEAR"); + assertMath("1234-07-04T12:08:56.236", p, "+1MILLISECOND"); + assertMath("1234-07-04T12:08:57.235", p, "+1SECOND"); + assertMath("1234-07-04T12:09:56.235", p, "+1MINUTE"); + assertMath("1234-07-04T13:08:56.235", p, "+1HOUR"); + assertMath("1234-07-05T12:08:56.235", p, "+1DAY"); + assertMath("1234-08-04T12:08:56.235", p, "+1MONTH"); + assertMath("1235-07-04T12:08:56.235", p, "+1YEAR"); // simple subtraction - assertMath("2001-07-04T12:08:56.234", p, "-1MILLISECOND"); - assertMath("2001-07-04T12:08:55.235", p, "-1SECOND"); - assertMath("2001-07-04T12:07:56.235", p, "-1MINUTE"); - assertMath("2001-07-04T11:08:56.235", p, "-1HOUR"); - assertMath("2001-07-03T12:08:56.235", p, "-1DAY"); - assertMath("2001-06-04T12:08:56.235", p, "-1MONTH"); - assertMath("2000-07-04T12:08:56.235", p, "-1YEAR"); + assertMath("1234-07-04T12:08:56.234", p, "-1MILLISECOND"); + assertMath("1234-07-04T12:08:55.235", p, "-1SECOND"); + assertMath("1234-07-04T12:07:56.235", p, "-1MINUTE"); + assertMath("1234-07-04T11:08:56.235", p, "-1HOUR"); + assertMath("1234-07-03T12:08:56.235", p, "-1DAY"); + assertMath("1234-06-04T12:08:56.235", p, "-1MONTH"); + assertMath("1233-07-04T12:08:56.235", p, "-1YEAR"); // simple '+/-' - assertMath("2001-07-04T12:08:56.235", p, "+1MILLISECOND-1MILLISECOND"); - assertMath("2001-07-04T12:08:56.235", p, "+1SECOND-1SECOND"); - assertMath("2001-07-04T12:08:56.235", p, "+1MINUTE-1MINUTE"); - assertMath("2001-07-04T12:08:56.235", p, "+1HOUR-1HOUR"); - assertMath("2001-07-04T12:08:56.235", p, "+1DAY-1DAY"); - assertMath("2001-07-04T12:08:56.235", p, "+1MONTH-1MONTH"); - assertMath("2001-07-04T12:08:56.235", p, "+1YEAR-1YEAR"); + assertMath("1234-07-04T12:08:56.235", p, "+1MILLISECOND-1MILLISECOND"); + assertMath("1234-07-04T12:08:56.235", p, "+1SECOND-1SECOND"); + assertMath("1234-07-04T12:08:56.235", p, "+1MINUTE-1MINUTE"); + assertMath("1234-07-04T12:08:56.235", p, "+1HOUR-1HOUR"); + assertMath("1234-07-04T12:08:56.235", p, "+1DAY-1DAY"); + assertMath("1234-07-04T12:08:56.235", p, "+1MONTH-1MONTH"); + assertMath("1234-07-04T12:08:56.235", p, "+1YEAR-1YEAR"); // simple '-/+' - assertMath("2001-07-04T12:08:56.235", p, "-1MILLISECOND+1MILLISECOND"); - assertMath("2001-07-04T12:08:56.235", p, "-1SECOND+1SECOND"); - assertMath("2001-07-04T12:08:56.235", p, "-1MINUTE+1MINUTE"); - assertMath("2001-07-04T12:08:56.235", p, "-1HOUR+1HOUR"); - assertMath("2001-07-04T12:08:56.235", p, "-1DAY+1DAY"); - assertMath("2001-07-04T12:08:56.235", p, "-1MONTH+1MONTH"); - assertMath("2001-07-04T12:08:56.235", p, "-1YEAR+1YEAR"); + assertMath("1234-07-04T12:08:56.235", p, "-1MILLISECOND+1MILLISECOND"); + assertMath("1234-07-04T12:08:56.235", p, "-1SECOND+1SECOND"); + assertMath("1234-07-04T12:08:56.235", p, "-1MINUTE+1MINUTE"); + assertMath("1234-07-04T12:08:56.235", p, "-1HOUR+1HOUR"); + assertMath("1234-07-04T12:08:56.235", p, "-1DAY+1DAY"); + assertMath("1234-07-04T12:08:56.235", p, "-1MONTH+1MONTH"); + assertMath("1234-07-04T12:08:56.235", p, "-1YEAR+1YEAR"); // more complex stuff - assertMath("2000-07-04T12:08:56.236", p, "+1MILLISECOND-1YEAR"); - assertMath("2000-07-04T12:08:57.235", p, "+1SECOND-1YEAR"); - assertMath("2000-07-04T12:09:56.235", p, "+1MINUTE-1YEAR"); - assertMath("2000-07-04T13:08:56.235", p, "+1HOUR-1YEAR"); - assertMath("2000-07-05T12:08:56.235", p, "+1DAY-1YEAR"); - assertMath("2000-08-04T12:08:56.235", p, "+1MONTH-1YEAR"); - assertMath("2000-07-04T12:08:56.236", p, "-1YEAR+1MILLISECOND"); - assertMath("2000-07-04T12:08:57.235", p, "-1YEAR+1SECOND"); - assertMath("2000-07-04T12:09:56.235", p, "-1YEAR+1MINUTE"); - assertMath("2000-07-04T13:08:56.235", p, "-1YEAR+1HOUR"); - assertMath("2000-07-05T12:08:56.235", p, "-1YEAR+1DAY"); - assertMath("2000-08-04T12:08:56.235", p, "-1YEAR+1MONTH"); - assertMath("2000-07-01T00:00:00.000", p, "-1YEAR+1MILLISECOND/MONTH"); - assertMath("2000-07-04T00:00:00.000", p, "-1YEAR+1SECOND/DAY"); - assertMath("2000-07-04T00:00:00.000", p, "-1YEAR+1MINUTE/DAY"); - assertMath("2000-07-04T13:00:00.000", p, "-1YEAR+1HOUR/HOUR"); - assertMath("2000-07-05T12:08:56.000", p, "-1YEAR+1DAY/SECOND"); - assertMath("2000-08-04T12:08:56.000", p, "-1YEAR+1MONTH/SECOND"); + assertMath("1233-07-04T12:08:56.236", p, "+1MILLISECOND-1YEAR"); + assertMath("1233-07-04T12:08:57.235", p, "+1SECOND-1YEAR"); + assertMath("1233-07-04T12:09:56.235", p, "+1MINUTE-1YEAR"); + assertMath("1233-07-04T13:08:56.235", p, "+1HOUR-1YEAR"); + assertMath("1233-07-05T12:08:56.235", p, "+1DAY-1YEAR"); + assertMath("1233-08-04T12:08:56.235", p, "+1MONTH-1YEAR"); + assertMath("1233-07-04T12:08:56.236", p, "-1YEAR+1MILLISECOND"); + assertMath("1233-07-04T12:08:57.235", p, "-1YEAR+1SECOND"); + assertMath("1233-07-04T12:09:56.235", p, "-1YEAR+1MINUTE"); + assertMath("1233-07-04T13:08:56.235", p, "-1YEAR+1HOUR"); + assertMath("1233-07-05T12:08:56.235", p, "-1YEAR+1DAY"); + assertMath("1233-08-04T12:08:56.235", p, "-1YEAR+1MONTH"); + assertMath("1233-07-01T00:00:00.000", p, "-1YEAR+1MILLISECOND/MONTH"); + assertMath("1233-07-04T00:00:00.000", p, "-1YEAR+1SECOND/DAY"); + assertMath("1233-07-04T00:00:00.000", p, "-1YEAR+1MINUTE/DAY"); + assertMath("1233-07-04T13:00:00.000", p, "-1YEAR+1HOUR/HOUR"); + assertMath("1233-07-05T12:08:56.000", p, "-1YEAR+1DAY/SECOND"); + assertMath("1233-08-04T12:08:56.000", p, "-1YEAR+1MONTH/SECOND"); // "tricky" cases - p.setNow(parser.parse("2006-01-31T17:09:59.999")); + setNow(p, "2006-01-31T17:09:59.999"); assertMath("2006-02-28T17:09:59.999", p, "+1MONTH"); assertMath("2008-02-29T17:09:59.999", p, "+25MONTH"); assertMath("2006-02-01T00:00:00.000", p, "/MONTH+35DAYS/MONTH"); assertMath("2006-01-31T17:10:00.000", p, "+3MILLIS/MINUTE"); - - } public void testParseMathTz() throws Exception { @@ -267,13 +263,14 @@ public class DateMathParserTest extends LuceneTestCase { // US, Positive Offset with DST TimeZone tz = TimeZone.getTimeZone(PLUS_TZS); - DateMathParser p = new DateMathParser(tz, Locale.ROOT); + DateMathParser p = new DateMathParser(tz); - p.setNow(parser.parse("2001-07-04T12:08:56.235")); + setNow(p, "2001-07-04T12:08:56.235"); // No-Op assertMath("2001-07-04T12:08:56.235", p, ""); - + assertMath("2001-07-04T12:08:56.235", p, "/MILLIS"); + assertMath("2001-07-04T12:08:56.000", p, "/SECOND"); assertMath("2001-07-04T12:08:00.000", p, "/MINUTE"); assertMath("2001-07-04T12:00:00.000", p, "/HOUR"); @@ -289,8 +286,8 @@ public class DateMathParserTest extends LuceneTestCase { // France, Negative Offset with DST tz = TimeZone.getTimeZone(NEG_TZS); - p = new DateMathParser(tz, Locale.ROOT); - p.setNow(parser.parse("2001-07-04T12:08:56.235")); + p = new DateMathParser(tz); + setNow(p, "2001-07-04T12:08:56.235"); assertMath("2001-07-04T12:08:56.000", p, "/SECOND"); assertMath("2001-07-04T12:08:00.000", p, "/MINUTE"); @@ -306,8 +303,8 @@ public class DateMathParserTest extends LuceneTestCase { public void testParseMathExceptions() throws Exception { - DateMathParser p = new DateMathParser(UTC, Locale.ROOT); - p.setNow(parser.parse("2001-07-04T12:08:56.235")); + DateMathParser p = new DateMathParser(UTC); + setNow(p, "1234-07-04T12:08:56.235"); Map badCommands = new HashMap<>(); badCommands.put("/", 1); @@ -373,7 +370,8 @@ public class DateMathParserTest extends LuceneTestCase { } private void assertFormat(final String expected, final long millis) { - assertEquals(expected, Instant.ofEpochMilli(millis).toString()); + assertEquals(expected, Instant.ofEpochMilli(millis).toString()); // assert same as ISO_INSTANT + assertEquals(millis, DateMathParser.parseMath(null, expected).getTime()); // assert DMP has same result } /** diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java index 4fed84e00f7..7b59d29d77c 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java @@ -56,6 +56,8 @@ import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.ToleratedUpdateError; import org.apache.solr.common.cloud.Aliases; import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.CollectionStatePredicate; +import org.apache.solr.common.cloud.CollectionStateWatcher; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.DocRouter; import org.apache.solr.common.cloud.ImplicitDocRouter; @@ -572,6 +574,40 @@ public class CloudSolrClient extends SolrClient { zkStateReader.getConfigManager().downloadConfigDir(configName, downloadPath); } + /** + * Block until a collection state matches a predicate, or a timeout + * + * Note that the predicate may be called again even after it has returned true, so + * implementors should avoid changing state within the predicate call itself. + * + * @param collection the collection to watch + * @param wait how long to wait + * @param unit the units of the wait parameter + * @param predicate a {@link CollectionStatePredicate} to check the collection state + * @throws InterruptedException on interrupt + * @throws TimeoutException on timeout + */ + public void waitForState(String collection, long wait, TimeUnit unit, CollectionStatePredicate predicate) + throws InterruptedException, TimeoutException { + connect(); + zkStateReader.waitForState(collection, wait, unit, predicate); + } + + /** + * Register a CollectionStateWatcher to be called when the cluster state for a collection changes + * + * Note that the watcher is unregistered after it has been called once. To make a watcher persistent, + * it should re-register itself in its {@link CollectionStateWatcher#onStateChanged(Set, DocCollection)} + * call + * + * @param collection the collection to watch + * @param watcher a watcher that will be called when the state changes + */ + public void registerCollectionStateWatcher(String collection, CollectionStateWatcher watcher) { + connect(); + zkStateReader.registerCollectionStateWatcher(collection, watcher); + } + private NamedList directUpdate(AbstractUpdateRequest request, String collection, ClusterState clusterState) throws SolrServerException { UpdateRequest updateRequest = (UpdateRequest) request; ModifiableSolrParams params = (ModifiableSolrParams) request.getParams(); diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterProperties.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterProperties.java new file mode 100644 index 00000000000..6645336f13b --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterProperties.java @@ -0,0 +1,126 @@ +package org.apache.solr.common.cloud; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.Utils; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.data.Stat; + +/** + * Interact with solr cluster properties + * + * Note that all methods on this class make calls to ZK on every invocation. For + * read-only eventually-consistent uses, clients should instead call + * {@link ZkStateReader#getClusterProperty(String, Object)} + */ +public class ClusterProperties { + + private final SolrZkClient client; + + /** + * Creates a ClusterProperties object using a provided SolrZkClient + */ + public ClusterProperties(SolrZkClient client) { + this.client = client; + } + + /** + * Read the value of a cluster property, returning a default if it is not set + * @param key the property name + * @param defaultValue the default value + * @param the type of the property + * @return the property value + * @throws IOException if there is an error reading the value from the cluster + */ + @SuppressWarnings("unchecked") + public T getClusterProperty(String key, T defaultValue) throws IOException { + T value = (T) getClusterProperties().get(key); + if (value == null) + return defaultValue; + return value; + } + + /** + * Return the cluster properties + * @throws IOException if there is an error reading properties from the cluster + */ + @SuppressWarnings("unchecked") + public Map getClusterProperties() throws IOException { + try { + return (Map) Utils.fromJSON(client.getData(ZkStateReader.CLUSTER_PROPS, null, new Stat(), true)); + } catch (KeeperException.NoNodeException e) { + return Collections.emptyMap(); + } catch (KeeperException | InterruptedException e) { + throw new IOException("Error reading cluster property", SolrZkClient.checkInterrupted(e)); + } + } + + /** + * This method sets a cluster property. + * + * @param propertyName The property name to be set. + * @param propertyValue The value of the property. + * @throws IOException if there is an error writing data to the cluster + */ + @SuppressWarnings("unchecked") + public void setClusterProperty(String propertyName, String propertyValue) throws IOException { + + if (!ZkStateReader.KNOWN_CLUSTER_PROPS.contains(propertyName)) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Not a known cluster property " + propertyName); + } + + for (; ; ) { + Stat s = new Stat(); + try { + if (client.exists(ZkStateReader.CLUSTER_PROPS, true)) { + Map properties = (Map) Utils.fromJSON(client.getData(ZkStateReader.CLUSTER_PROPS, null, s, true)); + if (propertyValue == null) { + //Don't update ZK unless absolutely necessary. + if (properties.get(propertyName) != null) { + properties.remove(propertyName); + client.setData(ZkStateReader.CLUSTER_PROPS, Utils.toJSON(properties), s.getVersion(), true); + } + } else { + //Don't update ZK unless absolutely necessary. + if (!propertyValue.equals(properties.get(propertyName))) { + properties.put(propertyName, propertyValue); + client.setData(ZkStateReader.CLUSTER_PROPS, Utils.toJSON(properties), s.getVersion(), true); + } + } + } else { + Map properties = new LinkedHashMap(); + properties.put(propertyName, propertyValue); + client.create(ZkStateReader.CLUSTER_PROPS, Utils.toJSON(properties), CreateMode.PERSISTENT, true); + } + } catch (KeeperException.BadVersionException | KeeperException.NodeExistsException e) { + //race condition + continue; + } catch (InterruptedException | KeeperException e) { + throw new IOException("Error setting cluster property", SolrZkClient.checkInterrupted(e)); + } + break; + } + } +} diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/CollectionStatePredicate.java b/solr/solrj/src/java/org/apache/solr/common/cloud/CollectionStatePredicate.java new file mode 100644 index 00000000000..0b0a28eeed0 --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/CollectionStatePredicate.java @@ -0,0 +1,42 @@ +package org.apache.solr.common.cloud; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Set; +import java.util.concurrent.TimeUnit; + +/** + * Interface to determine if a collection state matches a required state + * + * @see ZkStateReader#waitForState(String, long, TimeUnit, CollectionStatePredicate) + */ +public interface CollectionStatePredicate { + + /** + * Check the collection state matches a required state + * + * Note that both liveNodes and collectionState should be consulted to determine + * the overall state. + * + * @param liveNodes the current set of live nodes + * @param collectionState the latest collection state, or null if the collection + * does not exist + */ + boolean matches(Set liveNodes, DocCollection collectionState); + +} diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/CollectionStateWatcher.java b/solr/solrj/src/java/org/apache/solr/common/cloud/CollectionStateWatcher.java new file mode 100644 index 00000000000..0bf66b012e8 --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/CollectionStateWatcher.java @@ -0,0 +1,42 @@ +package org.apache.solr.common.cloud; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +import java.util.Set; + +/** + * Callback registered with {@link ZkStateReader#registerCollectionStateWatcher(String, CollectionStateWatcher)} + * and called whenever the collection state changes. + */ +public interface CollectionStateWatcher { + + /** + * Called when the collection we are registered against has a change of state + * + * Note that, due to the way Zookeeper watchers are implemented, a single call may be + * the result of several state changes + * + * A watcher is unregistered after it has been called once. To make a watcher persistent, + * implementors should re-register during this call. + * + * @param liveNodes the set of live nodes + * @param collectionState the new collection state + */ + void onStateChanged(Set liveNodes, DocCollection collectionState); + +} diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java b/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java index d30a73fa145..b5c65a6d847 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java @@ -22,6 +22,8 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.Set; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; @@ -35,7 +37,8 @@ import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR; /** * Models a Collection in zookeeper (but that Java name is obviously taken, hence "DocCollection") */ -public class DocCollection extends ZkNodeProps { +public class DocCollection extends ZkNodeProps implements Iterable { + public static final String DOC_ROUTER = "router"; public static final String SHARDS = "shards"; public static final String STATE_FORMAT = "stateFormat"; @@ -217,4 +220,34 @@ public class DocCollection extends ZkNodeProps { if (slice == null) return null; return slice.getLeader(); } + + /** + * Check that all replicas in a collection are live + * + * @see CollectionStatePredicate + */ + public static boolean isFullyActive(Set liveNodes, DocCollection collectionState, + int expectedShards, int expectedReplicas) { + Objects.requireNonNull(liveNodes); + if (collectionState == null) + return false; + int activeShards = 0; + for (Slice slice : collectionState) { + int activeReplicas = 0; + for (Replica replica : slice) { + if (replica.isActive(liveNodes) == false) + return false; + activeReplicas++; + } + if (activeReplicas != expectedReplicas) + return false; + activeShards++; + } + return activeShards == expectedShards; + } + + @Override + public Iterator iterator() { + return slices.values().iterator(); + } } diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java b/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java index 3a31d195658..7015dfbfdd5 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java @@ -16,13 +16,15 @@ */ package org.apache.solr.common.cloud; -import static org.apache.solr.common.cloud.ZkStateReader.*; - import java.util.Locale; import java.util.Map; +import java.util.Set; import org.noggit.JSONUtil; +import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP; +import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP; + public class Replica extends ZkNodeProps { /** @@ -116,6 +118,10 @@ public class Replica extends ZkNodeProps { return state; } + public boolean isActive(Set liveNodes) { + return liveNodes.contains(this.nodeName) && this.state == State.ACTIVE; + } + @Override public String toString() { return name + ':' + JSONUtil.toJSON(propMap, -1); // small enough, keep it on one line (i.e. no indent) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/Slice.java b/solr/solrj/src/java/org/apache/solr/common/cloud/Slice.java index 163561a996a..3ace17a6cd3 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/Slice.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/Slice.java @@ -19,6 +19,7 @@ package org.apache.solr.common.cloud; import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Locale; import java.util.Map; @@ -29,24 +30,29 @@ import org.noggit.JSONWriter; /** * A Slice contains immutable information about a logical shard (all replicas that share the same shard id). */ -public class Slice extends ZkNodeProps { +public class Slice extends ZkNodeProps implements Iterable { /** Loads multiple slices into a Map from a generic Map that probably came from deserialized JSON. */ public static Map loadAllFromMap(Map genericSlices) { if (genericSlices == null) return Collections.emptyMap(); - Map result = new LinkedHashMap<>(genericSlices.size()); - for (Map.Entry entry : genericSlices.entrySet()) { + Map result = new LinkedHashMap<>(genericSlices.size()); + for (Map.Entry entry : genericSlices.entrySet()) { String name = entry.getKey(); Object val = entry.getValue(); if (val instanceof Slice) { - result.put(name, (Slice)val); + result.put(name, (Slice) val); } else if (val instanceof Map) { - result.put(name, new Slice(name, null, (Map)val)); + result.put(name, new Slice(name, null, (Map) val)); } } return result; } + @Override + public Iterator iterator() { + return replicas.values().iterator(); + } + /** The slice's state. */ public enum State { diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index ab031b0ab21..fe04d35e148 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -28,18 +28,24 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Objects; import java.util.Set; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import org.apache.solr.common.Callable; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.Pair; import org.apache.solr.common.util.Utils; -import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; @@ -111,12 +117,10 @@ public class ZkStateReader implements Closeable { public static final String SHARD_LEADERS_ZKNODE = "leaders"; public static final String ELECTION_NODE = "election"; - - /** Collections we actively care about, and will try to keep watch on. */ - private final Set interestingCollections = Collections.newSetFromMap(new ConcurrentHashMap<>()); - + /** Collections tracked in the legacy (shared) state format, reflects the contents of clusterstate.json. */ private Map legacyCollectionStates = emptyMap(); + /** Last seen ZK version of clusterstate.json. */ private int legacyClusterStateVersion = 0; @@ -128,12 +132,29 @@ public class ZkStateReader implements Closeable { private volatile Set liveNodes = emptySet(); + private volatile Map clusterProperties = Collections.emptyMap(); + private final ZkConfigManager configManager; private ConfigData securityData; private final Runnable securityNodeListener; + private ConcurrentHashMap collectionWatches = new ConcurrentHashMap<>(); + + private final ExecutorService notifications = ExecutorUtil.newMDCAwareCachedThreadPool("watches"); + + private class CollectionWatch { + + int coreRefCount = 0; + Set stateWatchers = new HashSet<>(); + + public boolean canBeRemoved() { + return coreRefCount + stateWatchers.size() == 0; + } + + } + public static final Set KNOWN_CLUSTER_PROPS = unmodifiableSet(new HashSet<>(asList( LEGACY_CLOUD, URL_SCHEME, @@ -262,6 +283,7 @@ public class ZkStateReader implements Closeable { * a better design is possible. */ public void forceUpdateCollection(String collection) throws KeeperException, InterruptedException { + synchronized (getUpdateLock()) { if (clusterState == null) { return; @@ -295,6 +317,7 @@ public class ZkStateReader implements Closeable { } constructState(); } + } /** Refresh the set of live nodes. */ @@ -341,10 +364,11 @@ public class ZkStateReader implements Closeable { } // on reconnect of SolrZkClient force refresh and re-add watches. + loadClusterProperties(); + refreshLiveNodes(new LiveNodeWatcher()); refreshLegacyClusterState(new LegacyClusterStateWatcher()); refreshStateFormat2Collections(); refreshCollectionList(new CollectionsChildWatcher()); - refreshLiveNodes(new LiveNodeWatcher()); synchronized (ZkStateReader.this.getUpdateLock()) { constructState(); @@ -458,7 +482,7 @@ public class ZkStateReader implements Closeable { this.clusterState = new ClusterState(liveNodes, result, legacyClusterStateVersion); LOG.debug("clusterStateSet: legacy [{}] interesting [{}] watched [{}] lazy [{}] total [{}]", legacyCollectionStates.keySet().size(), - interestingCollections.size(), + collectionWatches.keySet().size(), watchedCollectionStates.keySet().size(), lazyCollectionStates.keySet().size(), clusterState.getCollectionStates().size()); @@ -466,7 +490,7 @@ public class ZkStateReader implements Closeable { if (LOG.isTraceEnabled()) { LOG.trace("clusterStateSet: legacy [{}] interesting [{}] watched [{}] lazy [{}] total [{}]", legacyCollectionStates.keySet(), - interestingCollections, + collectionWatches.keySet(), watchedCollectionStates.keySet(), lazyCollectionStates.keySet(), clusterState.getCollectionStates()); @@ -476,8 +500,7 @@ public class ZkStateReader implements Closeable { /** * Refresh legacy (shared) clusterstate.json */ - private void refreshLegacyClusterState(Watcher watcher) - throws KeeperException, InterruptedException { + private void refreshLegacyClusterState(Watcher watcher) throws KeeperException, InterruptedException { try { final Stat stat = new Stat(); final byte[] data = zkClient.getData(CLUSTER_STATE, watcher, stat, true); @@ -487,6 +510,22 @@ public class ZkStateReader implements Closeable { // Nothing to do, someone else updated same or newer. return; } + Set liveNodes = this.liveNodes; // volatile read + for (Map.Entry watchEntry : this.collectionWatches.entrySet()) { + String coll = watchEntry.getKey(); + CollectionWatch collWatch = watchEntry.getValue(); + ClusterState.CollectionRef ref = this.legacyCollectionStates.get(coll); + if (ref == null) + continue; + // legacy collections are always in-memory + DocCollection oldState = ref.get(); + ClusterState.CollectionRef newRef = loadedData.getCollectionStates().get(coll); + DocCollection newState = newRef == null ? null : newRef.get(); + if (!collWatch.stateWatchers.isEmpty() + && !Objects.equals(oldState, newState)) { + notifyStateWatchers(liveNodes, coll, newState); + } + } this.legacyCollectionStates = loadedData.getCollectionStates(); this.legacyClusterStateVersion = stat.getVersion(); } @@ -503,9 +542,8 @@ public class ZkStateReader implements Closeable { * Refresh state format2 collections. */ private void refreshStateFormat2Collections() { - // It's okay if no format2 state.json exists, if one did not previous exist. - for (String coll : interestingCollections) { - new StateWatcher(coll).refreshAndWatch(watchedCollectionStates.containsKey(coll)); + for (String coll : collectionWatches.keySet()) { + new StateWatcher(coll).refreshAndWatch(); } } @@ -546,7 +584,7 @@ public class ZkStateReader implements Closeable { this.lazyCollectionStates.keySet().retainAll(children); for (String coll : children) { // We will create an eager collection for any interesting collections, so don't add to lazy. - if (!interestingCollections.contains(coll)) { + if (!collectionWatches.containsKey(coll)) { // Double check contains just to avoid allocating an object. LazyCollectionRef existing = lazyCollectionStates.get(coll); if (existing == null) { @@ -637,6 +675,7 @@ public class ZkStateReader implements Closeable { public void close() { this.closed = true; + notifications.shutdown(); if (closeClient) { zkClient.close(); } @@ -757,69 +796,47 @@ public class ZkStateReader implements Closeable { final byte[] data = zkClient.getData(ALIASES, null, null, true); this.aliases = ClusterState.load(data); } - - public Map getClusterProps() { - try { - if (getZkClient().exists(ZkStateReader.CLUSTER_PROPS, true)) { - return (Map) Utils.fromJSON(getZkClient().getData(ZkStateReader.CLUSTER_PROPS, null, new Stat(), true)) ; - } else { - return new LinkedHashMap(); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new SolrException(ErrorCode.SERVER_ERROR, "Thread interrupted. Error reading cluster properties", e); - } catch (KeeperException e) { - throw new SolrException(ErrorCode.SERVER_ERROR, "Error reading cluster properties", e); - } + + @SuppressWarnings("unchecked") + public T getClusterProperty(String key, T defaultValue) { + T value = (T) clusterProperties.get(key); + if (value == null) + return defaultValue; + return value; } - /** - * This method sets a cluster property. - * - * @param propertyName The property name to be set. - * @param propertyValue The value of the property. - */ - public void setClusterProperty(String propertyName, String propertyValue) { - if (!KNOWN_CLUSTER_PROPS.contains(propertyName)) { - throw new SolrException(ErrorCode.BAD_REQUEST, "Not a known cluster property " + propertyName); - } + public Map getClusterProperties() { + return Collections.unmodifiableMap(clusterProperties); + } - for (; ; ) { - Stat s = new Stat(); - try { - if (getZkClient().exists(CLUSTER_PROPS, true)) { - Map properties = (Map) Utils.fromJSON(getZkClient().getData(CLUSTER_PROPS, null, s, true)); - if (propertyValue == null) { - //Don't update ZK unless absolutely necessary. - if (properties.get(propertyName) != null) { - properties.remove(propertyName); - getZkClient().setData(CLUSTER_PROPS, Utils.toJSON(properties), s.getVersion(), true); - } - } else { - //Don't update ZK unless absolutely necessary. - if (!propertyValue.equals(properties.get(propertyName))) { - properties.put(propertyName, propertyValue); - getZkClient().setData(CLUSTER_PROPS, Utils.toJSON(properties), s.getVersion(), true); - } - } - } else { - Map properties = new LinkedHashMap(); - properties.put(propertyName, propertyValue); - getZkClient().create(CLUSTER_PROPS, Utils.toJSON(properties), CreateMode.PERSISTENT, true); + private final Watcher clusterPropertiesWatcher = event -> { + // session events are not change events, and do not remove the watcher + if (Watcher.Event.EventType.None.equals(event.getType())) { + return; + } + loadClusterProperties(); + }; + + @SuppressWarnings("unchecked") + private void loadClusterProperties() { + try { + while (true) { + try { + byte[] data = zkClient.getData(ZkStateReader.CLUSTER_PROPS, clusterPropertiesWatcher, new Stat(), true); + this.clusterProperties = (Map) Utils.fromJSON(data); + LOG.info("Loaded cluster properties: {}", this.clusterProperties); + return; + } catch (KeeperException.NoNodeException e) { + this.clusterProperties = Collections.emptyMap(); + LOG.info("Loaded empty cluster properties"); + // set an exists watch, and if the node has been created since the last call, + // read the data again + if (zkClient.exists(ZkStateReader.CLUSTER_PROPS, clusterPropertiesWatcher, true) == null) + return; } - } catch (KeeperException.BadVersionException | KeeperException.NodeExistsException e) { - LOG.warn("Race condition while trying to set a new cluster prop on current version [{}]", s.getVersion()); - //race condition - continue; - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - LOG.error("Thread Interrupted. Error updating path [{}]", CLUSTER_PROPS, e); - throw new SolrException(ErrorCode.SERVER_ERROR, "Thread Interrupted. Error updating cluster property " + propertyName, e); - } catch (KeeperException e) { - LOG.error("Error updating path [{}]", CLUSTER_PROPS, e); - throw new SolrException(ErrorCode.SERVER_ERROR, "Error updating cluster property " + propertyName, e); } - break; + } catch (KeeperException | InterruptedException e) { + LOG.error("Error reading cluster properties from zookeeper", SolrZkClient.checkInterrupted(e)); } } @@ -863,10 +880,7 @@ public class ZkStateReader implements Closeable { final String hostAndPort = nodeName.substring(0,_offset); try { final String path = URLDecoder.decode(nodeName.substring(1+_offset), "UTF-8"); - String urlScheme = (String) getClusterProps().get(URL_SCHEME); - if(urlScheme == null) { - urlScheme = "http"; - } + String urlScheme = getClusterProperty(URL_SCHEME, "http"); return urlScheme + "://" + hostAndPort + (path.isEmpty() ? "" : ("/" + path)); } catch (UnsupportedEncodingException e) { throw new IllegalStateException("JVM Does not seem to support UTF-8", e); @@ -888,7 +902,7 @@ public class ZkStateReader implements Closeable { return; } - if (!interestingCollections.contains(coll)) { + if (!collectionWatches.containsKey(coll)) { // This collection is no longer interesting, stop watching. LOG.info("Uninteresting collection {}", coll); return; @@ -899,27 +913,22 @@ public class ZkStateReader implements Closeable { LOG.info("A cluster state change: [{}] for collection [{}] has occurred - updating... (live nodes size: [{}])", event, coll, liveNodesSize); - refreshAndWatch(true); + refreshAndWatch(); synchronized (getUpdateLock()) { constructState(); } + } /** * Refresh collection state from ZK and leave a watch for future changes. * As a side effect, updates {@link #clusterState} and {@link #watchedCollectionStates} * with the results of the refresh. - * - * @param expectExists if true, error if no state node exists */ - public void refreshAndWatch(boolean expectExists) { + public void refreshAndWatch() { try { DocCollection newState = fetchCollectionState(coll, this); updateWatchedCollection(coll, newState); - } catch (KeeperException.NoNodeException e) { - if (expectExists) { - LOG.warn("State node vanished for collection: [{}]", coll, e); - } } catch (KeeperException.SessionExpiredException | KeeperException.ConnectionLossException e) { LOG.warn("ZooKeeper watch triggered, but Solr cannot talk to ZK: [{}]", e.getMessage()); } catch (KeeperException e) { @@ -1071,32 +1080,190 @@ public class ZkStateReader implements Closeable { return COLLECTIONS_ZKNODE+"/"+coll + "/state.json"; } - public void addCollectionWatch(String coll) { - if (interestingCollections.add(coll)) { - LOG.info("addZkWatch [{}]", coll); - new StateWatcher(coll).refreshAndWatch(false); + /** + * Notify this reader that a local Core is a member of a collection, and so that collection + * state should be watched. + * + * Not a public API. This method should only be called from ZkController. + * + * The number of cores per-collection is tracked, and adding multiple cores from the same + * collection does not increase the number of watches. + * + * @param collection the collection that the core is a member of + * + * @see ZkStateReader#unregisterCore(String) + */ + public void registerCore(String collection) { + AtomicBoolean reconstructState = new AtomicBoolean(false); + collectionWatches.compute(collection, (k, v) -> { + if (v == null) { + reconstructState.set(true); + v = new CollectionWatch(); + } + v.coreRefCount++; + return v; + }); + if (reconstructState.get()) { + new StateWatcher(collection).refreshAndWatch(); synchronized (getUpdateLock()) { constructState(); } } } + /** + * Notify this reader that a local core that is a member of a collection has been closed. + * + * Not a public API. This method should only be called from ZkController. + * + * If no cores are registered for a collection, and there are no {@link CollectionStateWatcher}s + * for that collection either, the collection watch will be removed. + * + * @param collection the collection that the core belongs to + */ + public void unregisterCore(String collection) { + AtomicBoolean reconstructState = new AtomicBoolean(false); + collectionWatches.compute(collection, (k, v) -> { + if (v == null) + return null; + if (v.coreRefCount > 0) + v.coreRefCount--; + if (v.canBeRemoved()) { + watchedCollectionStates.remove(collection); + lazyCollectionStates.put(collection, new LazyCollectionRef(collection)); + reconstructState.set(true); + return null; + } + return v; + }); + if (reconstructState.get()) { + synchronized (getUpdateLock()) { + constructState(); + } + } + } + + /** + * Register a CollectionStateWatcher to be called when the state of a collection changes + * + * A given CollectionStateWatcher will be only called once. If you want to have a persistent watcher, + * it should register itself again in its {@link CollectionStateWatcher#onStateChanged(Set, DocCollection)} + * method. + */ + public void registerCollectionStateWatcher(String collection, CollectionStateWatcher stateWatcher) { + AtomicBoolean watchSet = new AtomicBoolean(false); + collectionWatches.compute(collection, (k, v) -> { + if (v == null) { + v = new CollectionWatch(); + watchSet.set(true); + } + v.stateWatchers.add(stateWatcher); + return v; + }); + if (watchSet.get()) { + new StateWatcher(collection).refreshAndWatch(); + synchronized (getUpdateLock()) { + constructState(); + } + } + } + + /** + * Block until a CollectionStatePredicate returns true, or the wait times out + * + * Note that the predicate may be called again even after it has returned true, so + * implementors should avoid changing state within the predicate call itself. + * + * @param collection the collection to watch + * @param wait how long to wait + * @param unit the units of the wait parameter + * @param predicate the predicate to call on state changes + * @throws InterruptedException on interrupt + * @throws TimeoutException on timeout + */ + public void waitForState(final String collection, long wait, TimeUnit unit, CollectionStatePredicate predicate) + throws InterruptedException, TimeoutException { + + final CountDownLatch latch = new CountDownLatch(1); + + CollectionStateWatcher watcher = new CollectionStateWatcher() { + @Override + public void onStateChanged(Set liveNodes, DocCollection collectionState) { + if (predicate.matches(liveNodes, collectionState)) { + latch.countDown(); + } else { + registerCollectionStateWatcher(collection, this); + } + } + }; + registerCollectionStateWatcher(collection, watcher); + + try { + // check the current state + DocCollection dc = clusterState.getCollectionOrNull(collection); + if (predicate.matches(liveNodes, dc)) + return; + + // wait for the watcher predicate to return true, or time out + if (!latch.await(wait, unit)) + throw new TimeoutException(); + + } + finally { + removeCollectionStateWatcher(collection, watcher); + } + } + + /** + * Remove a watcher from a collection's watch list. + * + * This allows Zookeeper watches to be removed if there is no interest in the + * collection. + * + * @param collection the collection + * @param watcher the watcher + */ + public void removeCollectionStateWatcher(String collection, CollectionStateWatcher watcher) { + collectionWatches.compute(collection, (k, v) -> { + if (v == null) + return null; + v.stateWatchers.remove(watcher); + if (v.canBeRemoved()) + return null; + return v; + }); + } + + /* package-private for testing */ + Set getStateWatchers(String collection) { + CollectionWatch watch = collectionWatches.get(collection); + if (watch == null) + return null; + return new HashSet<>(watch.stateWatchers); + } + + // returns true if the state has changed private void updateWatchedCollection(String coll, DocCollection newState) { + + Set liveNodes = this.liveNodes; // volatile read + if (newState == null) { LOG.info("Deleting data for [{}]", coll); watchedCollectionStates.remove(coll); + notifyStateWatchers(liveNodes, coll, null); return; } // CAS update loop while (true) { - if (!interestingCollections.contains(coll)) { + if (!collectionWatches.containsKey(coll)) { break; } DocCollection oldState = watchedCollectionStates.get(coll); if (oldState == null) { if (watchedCollectionStates.putIfAbsent(coll, newState) == null) { LOG.info("Add data for [{}] ver [{}]", coll, newState.getZNodeVersion()); + notifyStateWatchers(liveNodes, coll, newState); break; } } else { @@ -1106,27 +1273,18 @@ public class ZkStateReader implements Closeable { } if (watchedCollectionStates.replace(coll, oldState, newState)) { LOG.info("Updating data for [{}] from [{}] to [{}]", coll, oldState.getZNodeVersion(), newState.getZNodeVersion()); + notifyStateWatchers(liveNodes, coll, newState); break; } } } - // Resolve race with removeZKWatch. - if (!interestingCollections.contains(coll)) { + // Resolve race with unregisterCore. + if (!collectionWatches.containsKey(coll)) { watchedCollectionStates.remove(coll); LOG.info("Removing uninteresting collection [{}]", coll); } - } - - /** This is not a public API. Only used by ZkController */ - public void removeZKWatch(String coll) { - LOG.info("Removing watch for uninteresting collection [{}]", coll); - interestingCollections.remove(coll); - watchedCollectionStates.remove(coll); - lazyCollectionStates.put(coll, new LazyCollectionRef(coll)); - synchronized (getUpdateLock()) { - constructState(); - } + } public static class ConfigData { @@ -1142,4 +1300,45 @@ public class ZkStateReader implements Closeable { } } + + private void notifyStateWatchers(Set liveNodes, String collection, DocCollection collectionState) { + try { + notifications.submit(new Notification(liveNodes, collection, collectionState)); + } + catch (RejectedExecutionException e) { + if (closed == false) { + LOG.error("Couldn't run collection notifications for {}", collection, e); + } + } + } + + private class Notification implements Runnable { + + final Set liveNodes; + final String collection; + final DocCollection collectionState; + + private Notification(Set liveNodes, String collection, DocCollection collectionState) { + this.liveNodes = liveNodes; + this.collection = collection; + this.collectionState = collectionState; + } + + @Override + public void run() { + List watchers = new ArrayList<>(); + collectionWatches.compute(collection, (k, v) -> { + if (v == null) + return null; + watchers.addAll(v.stateWatchers); + v.stateWatchers.clear(); + return v; + }); + for (CollectionStateWatcher watcher : watchers) { + watcher.onStateChanged(liveNodes, collectionState); + } + } + + } + } diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java index b8d9ac41b0a..5f307a8bcc0 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java @@ -19,16 +19,9 @@ package org.apache.solr.common.util; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Collection; - -import java.util.Enumeration; -import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.BlockingQueue; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RejectedExecutionHandler; @@ -153,6 +146,13 @@ public class ExecutorUtil { threadFactory); } + /** + * Create a cached thread pool using a named thread factory + */ + public static ExecutorService newMDCAwareCachedThreadPool(String name) { + return newMDCAwareCachedThreadPool(new SolrjNamedThreadFactory(name)); + } + /** * See {@link java.util.concurrent.Executors#newCachedThreadPool(ThreadFactory)} */ diff --git a/solr/solrj/src/test/org/apache/solr/common/cloud/TestCollectionStateWatchers.java b/solr/solrj/src/test/org/apache/solr/common/cloud/TestCollectionStateWatchers.java new file mode 100644 index 00000000000..057cf5f4df2 --- /dev/null +++ b/solr/solrj/src/test/org/apache/solr/common/cloud/TestCollectionStateWatchers.java @@ -0,0 +1,249 @@ +package org.apache.solr.common.cloud; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.invoke.MethodHandles; +import java.util.HashMap; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.util.ExecutorUtil; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.hamcrest.core.Is.is; + +public class TestCollectionStateWatchers extends SolrCloudTestCase { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private static final int CLUSTER_SIZE = 4; + + private static final ExecutorService executor = ExecutorUtil.newMDCAwareCachedThreadPool("backgroundWatchers"); + + private static final int MAX_WAIT_TIMEOUT = 30; + + @BeforeClass + public static void startCluster() throws Exception { + configureCluster(CLUSTER_SIZE) + .addConfig("config", getFile("solrj/solr/collection1/conf").toPath()) + .configure(); + } + + @AfterClass + public static void shutdownBackgroundExecutors() { + executor.shutdown(); + } + + @Before + public void prepareCluster() throws Exception { + int missingServers = CLUSTER_SIZE - cluster.getJettySolrRunners().size(); + for (int i = 0; i < missingServers; i++) { + cluster.startJettySolrRunner(); + } + cluster.waitForAllNodes(30); + } + + private static Future waitInBackground(String collection, long timeout, TimeUnit unit, + CollectionStatePredicate predicate) { + return executor.submit(() -> { + try { + cluster.getSolrClient().waitForState(collection, timeout, unit, predicate); + } catch (InterruptedException | TimeoutException e) { + return Boolean.FALSE; + } + return Boolean.TRUE; + }); + } + + + @Test + public void testSimpleCollectionWatch() throws Exception { + + CloudSolrClient client = cluster.getSolrClient(); + CollectionAdminRequest.createCollection("testcollection", "config", 4, 1) + .processAndWait(client, MAX_WAIT_TIMEOUT); + + client.waitForState("testcollection", MAX_WAIT_TIMEOUT, TimeUnit.SECONDS, + (n, c) -> DocCollection.isFullyActive(n, c, 4, 1)); + + // shutdown a node and check that we get notified about the change + final AtomicInteger nodeCount = new AtomicInteger(0); + final CountDownLatch latch = new CountDownLatch(1); + client.registerCollectionStateWatcher("testcollection", (liveNodes, collectionState) -> { + // we can't just count liveNodes here, because that's updated by a separate watcher, + // and it may be the case that we're triggered by a node setting itself to DOWN before + // the liveNodes watcher is called + log.info("State changed: {}", collectionState); + for (Slice slice : collectionState) { + for (Replica replica : slice) { + if (replica.isActive(liveNodes)) + nodeCount.incrementAndGet(); + } + } + latch.countDown(); + }); + + cluster.stopJettySolrRunner(random().nextInt(cluster.getJettySolrRunners().size())); + assertTrue("CollectionStateWatcher was never notified of cluster change", latch.await(MAX_WAIT_TIMEOUT, TimeUnit.SECONDS)); + + assertThat(nodeCount.intValue(), is(3)); + + } + + @Test + public void testWaitForStateChecksCurrentState() throws Exception { + + CloudSolrClient client = cluster.getSolrClient(); + CollectionAdminRequest.createCollection("waitforstate", "config", 1, 1) + .processAndWait(client, MAX_WAIT_TIMEOUT); + + client.waitForState("waitforstate", MAX_WAIT_TIMEOUT, TimeUnit.SECONDS, + (n, c) -> DocCollection.isFullyActive(n, c, 1, 1)); + + // several goes, to check that we're not getting delayed state changes + for (int i = 0; i < 10; i++) { + try { + client.waitForState("waitforstate", 1, TimeUnit.SECONDS, (n, c) -> DocCollection.isFullyActive(n, c, 1, 1)); + } + catch (TimeoutException e) { + fail("waitForState should return immediately if the predicate is already satisfied"); + } + } + + } + + @Test + public void testCanWaitForNonexistantCollection() throws Exception { + + Future future = waitInBackground("delayed", MAX_WAIT_TIMEOUT, TimeUnit.SECONDS, + (n, c) -> DocCollection.isFullyActive(n, c, 1, 1)); + + CollectionAdminRequest.createCollection("delayed", "config", 1, 1) + .processAndWait(cluster.getSolrClient(), MAX_WAIT_TIMEOUT); + + assertTrue("waitForState was not triggered by collection creation", future.get()); + + } + + @Test + public void testPredicateFailureTimesOut() throws Exception { + CloudSolrClient client = cluster.getSolrClient(); + expectThrows(TimeoutException.class, () -> { + client.waitForState("nosuchcollection", 1, TimeUnit.SECONDS, ((liveNodes, collectionState) -> false)); + }); + Set watchers = client.getZkStateReader().getStateWatchers("nosuchcollection"); + assertTrue("Watchers for collection should be removed after timeout", + watchers == null || watchers.size() == 0); + + } + + @Test + public void testWaitForStateWatcherIsRetainedOnPredicateFailure() throws Exception { + + CloudSolrClient client = cluster.getSolrClient(); + CollectionAdminRequest.createCollection("falsepredicate", "config", 4, 1) + .processAndWait(client, MAX_WAIT_TIMEOUT); + + client.waitForState("falsepredicate", MAX_WAIT_TIMEOUT, TimeUnit.SECONDS, + (n, c) -> DocCollection.isFullyActive(n, c, 4, 1)); + + final CountDownLatch firstCall = new CountDownLatch(1); + + // stop a node, then add a watch waiting for all nodes to be back up + JettySolrRunner node1 = cluster.stopJettySolrRunner(random().nextInt(cluster.getJettySolrRunners().size())); + + Future future = waitInBackground("falsepredicate", MAX_WAIT_TIMEOUT, TimeUnit.SECONDS, (liveNodes, collectionState) -> { + firstCall.countDown(); + return DocCollection.isFullyActive(liveNodes, collectionState, 4, 1); + }); + + // first, stop another node; the watch should not be fired after this! + JettySolrRunner node2 = cluster.stopJettySolrRunner(random().nextInt(cluster.getJettySolrRunners().size())); + + // now start them both back up + cluster.startJettySolrRunner(node1); + assertTrue("CollectionStateWatcher not called after 30 seconds", firstCall.await(MAX_WAIT_TIMEOUT, TimeUnit.SECONDS)); + cluster.startJettySolrRunner(node2); + + Boolean result = future.get(); + assertTrue("Did not see a fully active cluster after 30 seconds", result); + + } + + @Test + public void testWatcherIsRemovedAfterTimeout() { + CloudSolrClient client = cluster.getSolrClient(); + assertTrue("There should be no watchers for a non-existent collection!", + client.getZkStateReader().getStateWatchers("no-such-collection") == null); + + expectThrows(TimeoutException.class, () -> { + client.waitForState("no-such-collection", 10, TimeUnit.MILLISECONDS, (n, c) -> DocCollection.isFullyActive(n, c, 1, 1)); + }); + + Set watchers = client.getZkStateReader().getStateWatchers("no-such-collection"); + assertTrue("Watchers for collection should be removed after timeout", + watchers == null || watchers.size() == 0); + + } + + @Test + public void testDeletionsTriggerWatches() throws Exception { + cluster.createCollection("tobedeleted", 1, 1, "config", new HashMap<>()); + Future future = waitInBackground("tobedeleted", MAX_WAIT_TIMEOUT, TimeUnit.SECONDS, (l, c) -> c == null); + + CollectionAdminRequest.deleteCollection("tobedeleted").process(cluster.getSolrClient()); + + assertTrue("CollectionStateWatcher not notified of delete call after 30 seconds", future.get()); + } + + @Test + public void testWatchesWorkForStateFormat1() throws Exception { + + final CloudSolrClient client = cluster.getSolrClient(); + + Future future = waitInBackground("stateformat1", 10, TimeUnit.SECONDS, + (n, c) -> DocCollection.isFullyActive(n, c, 1, 1)); + + CollectionAdminRequest.createCollection("stateformat1", "config", 1, 1).setStateFormat(1) + .processAndWait(client, MAX_WAIT_TIMEOUT); + assertTrue("CollectionStateWatcher not notified of stateformat=1 collection creation", future.get()); + + Future migrated + = waitInBackground("stateformat1", MAX_WAIT_TIMEOUT, TimeUnit.SECONDS, + (n, c) -> c != null && c.getStateFormat() == 2); + + CollectionAdminRequest.migrateCollectionFormat("stateformat1").processAndWait(client, MAX_WAIT_TIMEOUT); + assertTrue("CollectionStateWatcher did not persist over state format migration", migrated.get()); + + } + +} diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java index d23b37cd4b8..f51116246eb 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java @@ -27,6 +27,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.SortedMap; import java.util.concurrent.Callable; import java.util.concurrent.CopyOnWriteArrayList; @@ -181,8 +182,8 @@ public class MiniSolrCloudCluster { */ public MiniSolrCloudCluster(int numServers, Path baseDir, String solrXml, JettyConfig jettyConfig, ZkTestServer zkTestServer) throws Exception { - this.baseDir = baseDir; - this.jettyConfig = jettyConfig; + this.baseDir = Objects.requireNonNull(baseDir); + this.jettyConfig = Objects.requireNonNull(jettyConfig); Files.createDirectories(baseDir); @@ -194,8 +195,7 @@ public class MiniSolrCloudCluster { } this.zkServer = zkTestServer; - try(SolrZkClient zkClient = new SolrZkClient(zkServer.getZkHost(), - AbstractZkTestCase.TIMEOUT, AbstractZkTestCase.TIMEOUT, null)) { + try (SolrZkClient zkClient = new SolrZkClient(zkServer.getZkHost(), AbstractZkTestCase.TIMEOUT)) { zkClient.makePath("/solr/solr.xml", solrXml.getBytes(Charset.defaultCharset()), true); if (jettyConfig.sslConfig != null && jettyConfig.sslConfig.isSSLMode()) { zkClient.makePath("/solr" + ZkStateReader.CLUSTER_PROPS, "{'urlScheme':'https'}".getBytes(Charsets.UTF_8), true); @@ -222,12 +222,17 @@ public class MiniSolrCloudCluster { throw startupError; } - try (SolrZkClient zkClient = new SolrZkClient(zkServer.getZkHost(), - AbstractZkTestCase.TIMEOUT, 45000, null)) { + waitForAllNodes(numServers, 60); + + solrClient = buildSolrClient(); + } + + private void waitForAllNodes(int numServers, int timeout) throws IOException, InterruptedException { + try (SolrZkClient zkClient = new SolrZkClient(zkServer.getZkHost(), AbstractZkTestCase.TIMEOUT)) { int numliveNodes = 0; - int retries = 60; + int retries = timeout; String liveNodesPath = "/solr/live_nodes"; - // Wait up to 60 seconds for number of live_nodes to match up number of servers + // Wait up to {timeout} seconds for number of live_nodes to match up number of servers do { if (zkClient.exists(liveNodesPath, true)) { numliveNodes = zkClient.getChildren(liveNodesPath, null, true).size(); @@ -244,8 +249,13 @@ public class MiniSolrCloudCluster { Thread.sleep(1000); } while (numliveNodes != numServers); } + catch (KeeperException e) { + throw new IOException("Error communicating with zookeeper", e); + } + } - solrClient = buildSolrClient(); + public void waitForAllNodes(int timeout) throws IOException, InterruptedException { + waitForAllNodes(jettys.size(), timeout); } private String newNodeName() { @@ -348,7 +358,13 @@ public class MiniSolrCloudCluster { return jetty; } - protected JettySolrRunner startJettySolrRunner(JettySolrRunner jetty) throws Exception { + /** + * Add a previously stopped node back to the cluster + * @param jetty a {@link JettySolrRunner} previously returned by {@link #stopJettySolrRunner(int)} + * @return the started node + * @throws Exception on error + */ + public JettySolrRunner startJettySolrRunner(JettySolrRunner jetty) throws Exception { jetty.start(); jettys.add(jetty); return jetty;