diff --git a/dev-tools/scripts/addVersion.py b/dev-tools/scripts/addVersion.py
index 112da45adb9..630e86f6cb3 100644
--- a/dev-tools/scripts/addVersion.py
+++ b/dev-tools/scripts/addVersion.py
@@ -48,28 +48,36 @@ def add_constant(new_version, deprecate):
last = buffer[-1]
if last.strip() != '@Deprecated':
spaces = ' ' * (len(last) - len(last.lstrip()) - 1)
- buffer[-1] = spaces + (' * @deprecated (%s) Use latest\n' % new_version)
- buffer.append(spaces + ' */\n')
- buffer.append(spaces + '@Deprecated\n')
+ del buffer[-1] # Remove comment closer line
+ if (len(buffer) >= 4 and re.search('for Lucene.\s*$', buffer[-1]) != None):
+ del buffer[-3:] # drop the trailing lines '
/ Use this to get the latest ... / ... for Lucene.'
+ buffer.append(( '{0} * @deprecated ({1}) Use latest\n'
+ + '{0} */\n'
+ + '{0}@Deprecated\n').format(spaces, new_version))
def buffer_constant(buffer, line):
spaces = ' ' * (len(line) - len(line.lstrip()))
- buffer.append('\n' + spaces + '/**\n')
- buffer.append(spaces + ' * Match settings and bugs in Lucene\'s %s release.\n' % new_version)
+ buffer.append(( '\n{0}/**\n'
+ + '{0} * Match settings and bugs in Lucene\'s {1} release.\n')
+ .format(spaces, new_version))
if deprecate:
- buffer.append(spaces + ' * @deprecated Use latest\n')
- buffer.append(spaces + ' */\n')
+ buffer.append('%s * @deprecated Use latest\n' % spaces)
+ else:
+ buffer.append(( '{0} *
\n'
+ + '{0} * Use this to get the latest & greatest settings, bug\n'
+ + '{0} * fixes, etc, for Lucene.\n').format(spaces))
+ buffer.append('%s */\n' % spaces)
if deprecate:
- buffer.append(spaces + '@Deprecated\n')
- buffer.append(spaces + 'public static final Version %s = new Version(%d, %d, %d);\n' %
- (new_version.constant, new_version.major, new_version.minor, new_version.bugfix))
+ buffer.append('%s@Deprecated\n' % spaces)
+ buffer.append('{0}public static final Version {1} = new Version({2}, {3}, {4});\n'.format
+ (spaces, new_version.constant, new_version.major, new_version.minor, new_version.bugfix))
class Edit(object):
found = -1
def __call__(self, buffer, match, line):
if new_version.constant in line:
return None # constant already exists
- # outter match is just to find lines declaring version constants
+ # outer match is just to find lines declaring version constants
match = prev_matcher.search(line)
if match is not None:
ensure_deprecated(buffer) # old version should be deprecated
@@ -166,38 +174,26 @@ def check_solr_version_tests():
def read_config():
parser = argparse.ArgumentParser(description='Add a new version')
parser.add_argument('version', type=Version.parse)
- parser.add_argument('-c', '--changeid', type=str, help='Git ChangeId (commit hash) for downstream version change to merge')
c = parser.parse_args()
c.branch_type = find_branch_type()
c.matching_branch = c.version.is_bugfix_release() and c.branch_type == BranchType.release or \
c.version.is_minor_release() and c.branch_type == BranchType.stable or \
- c.version.is_major_release() and c.branch_type == BranchType.major
+ c.version.is_major_release() and c.branch_type == BranchType.unstable
print ("branch_type is %s " % c.branch_type)
- if c.changeid and c.version.is_major_release():
- parser.error('Cannot use --changeid for major release')
- if c.changeid and c.matching_branch:
- parser.error('Cannot use --changeid on branch that new version will originate on')
- if c.version.is_bugfix_release() and c.branch_type in [BranchType.major, BranchType.stable] and not c.changeid:
- parser.error('Adding bugfix release on master or stable branch requires --changeid')
- if c.version.is_minor_release() and c.branch_type in [BranchType.major] and not c.changeid:
- parser.error('Adding minor release on master branch requires --changeid')
return c
def main():
- c = read_config()
-
- if c.changeid:
- cherry_pick_change(c.changeid)
+ c = read_config()
print('\nAdding new version %s' % c.version)
update_changes('lucene/CHANGES.txt', c.version)
update_changes('solr/CHANGES.txt', c.version)
add_constant(c.version, not c.matching_branch)
- if not c.changeid:
+ if c.matching_branch:
print('\nUpdating latest version')
update_build_version(c.version)
update_latest_constant(c.version)
diff --git a/dev-tools/scripts/scriptutil.py b/dev-tools/scripts/scriptutil.py
index f10bf971b6f..84c39cd9516 100644
--- a/dev-tools/scripts/scriptutil.py
+++ b/dev-tools/scripts/scriptutil.py
@@ -94,11 +94,11 @@ def update_file(filename, line_re, edit):
f.write(''.join(buffer))
return True
-# branch types are "release", "stable" and "major"
+# branch types are "release", "stable" and "unstable"
class BranchType(Enum):
- major = 1
- stable = 2
- release = 3
+ unstable = 1
+ stable = 2
+ release = 3
def find_branch_type():
output = subprocess.check_output('git status', shell=True)
@@ -110,22 +110,17 @@ def find_branch_type():
raise Exception('git status missing branch name')
if branchName == b'master':
- return BranchType.major
+ return BranchType.unstable
if re.match(r'branch_(\d+)x', branchName.decode('UTF-8')):
return BranchType.stable
if re.match(r'branch_(\d+)_(\d+)', branchName.decode('UTF-8')):
return BranchType.release
- raise Exception('Cannot run bumpVersion.py on feature branch')
+ raise Exception('Cannot run %s on feature branch' % sys.argv[0].rsplit('/', 1)[-1])
version_prop_re = re.compile('version\.base=(.*)')
def find_current_version():
return version_prop_re.search(open('lucene/version.properties').read()).group(1)
-def cherry_pick_change(changeid):
- print('\nCherry-picking downstream change %s...' % changeid, end='')
- run('git cherry-pick %s' % changeid)
- print('done')
-
if __name__ == '__main__':
print('This is only a support module, it cannot be run')
sys.exit(1)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 791adadbf68..97d5b966731 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -22,6 +22,10 @@ New Features
* LUCENE-7234: Added InetAddressPoint.nextDown/nextUp to easily generate range
queries with excluded bounds. (Adrien Grand)
+* LUCENE-7278: Spatial-extras DateRangePrefixTree's Calendar is now configurable, to
+ e.g. clear the Gregorian Change Date. Also, toString(cal) is now identical to
+ DateTimeFormatter.ISO_INSTANT. (David Smiley)
+
API Changes
* LUCENE-7184: Refactor LatLonPoint encoding methods to new GeoEncodingUtils
@@ -133,6 +137,9 @@ Other
* LUCENE-7263: Make queryparser/xml/CoreParser's SpanQueryBuilderFactory
accessible to deriving classes. (Daniel Collins via Christine Poerschke)
+======================= Lucene 6.0.1 =======================
+(No Changes)
+
======================= Lucene 6.0.0 =======================
System Requirements
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/Placeholder.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/Placeholder.java
index b0c292b0348..f359369a0c9 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/Placeholder.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/Placeholder.java
@@ -16,7 +16,6 @@
*/
package org.apache.lucene.codecs;
-
/** Remove this file when adding back compat codecs */
public class Placeholder {
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java
index 001439ce14d..19d6e3bbe90 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java
@@ -108,7 +108,7 @@ public class Lucene50Codec extends Codec {
}
@Override
- public final SegmentInfoFormat segmentInfoFormat() {
+ public SegmentInfoFormat segmentInfoFormat() {
return segmentInfosFormat;
}
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java
new file mode 100644
index 00000000000..9c5453f65b5
--- /dev/null
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene50;
+
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexWriter; // javadocs
+import org.apache.lucene.index.SegmentInfo; // javadocs
+import org.apache.lucene.index.SegmentInfos; // javadocs
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.DataOutput; // javadocs
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.Version;
+
+/**
+ * Lucene 5.0 Segment info format.
+ * @deprecated Only for reading old 5.0-6.0 segments
+ */
+@Deprecated
+public class Lucene50SegmentInfoFormat extends SegmentInfoFormat {
+
+ /** Sole constructor. */
+ public Lucene50SegmentInfoFormat() {
+ }
+
+ @Override
+ public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
+ try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
+ Throwable priorE = null;
+ SegmentInfo si = null;
+ try {
+ int format = CodecUtil.checkIndexHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME,
+ Lucene50SegmentInfoFormat.VERSION_START,
+ Lucene50SegmentInfoFormat.VERSION_CURRENT,
+ segmentID, "");
+ final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
+
+ final int docCount = input.readInt();
+ if (docCount < 0) {
+ throw new CorruptIndexException("invalid docCount: " + docCount, input);
+ }
+ final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
+
+ final Map diagnostics;
+ final Set files;
+ final Map attributes;
+
+ if (format >= VERSION_SAFE_MAPS) {
+ diagnostics = input.readMapOfStrings();
+ files = input.readSetOfStrings();
+ attributes = input.readMapOfStrings();
+ } else {
+ diagnostics = Collections.unmodifiableMap(input.readStringStringMap());
+ files = Collections.unmodifiableSet(input.readStringSet());
+ attributes = Collections.unmodifiableMap(input.readStringStringMap());
+ }
+
+ si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, null);
+ si.setFiles(files);
+ } catch (Throwable exception) {
+ priorE = exception;
+ } finally {
+ CodecUtil.checkFooter(input, priorE);
+ }
+ return si;
+ }
+ }
+
+ @Override
+ public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
+ throw new UnsupportedOperationException("this codec can only be used for reading");
+ }
+
+ /** File extension used to store {@link SegmentInfo}. */
+ public final static String SI_EXTENSION = "si";
+ static final String CODEC_NAME = "Lucene50SegmentInfo";
+ static final int VERSION_START = 0;
+ static final int VERSION_SAFE_MAPS = 1;
+ static final int VERSION_CURRENT = VERSION_SAFE_MAPS;
+}
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54Codec.java
index 2dde0cf6d90..d982d3b9ce0 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54Codec.java
@@ -51,7 +51,9 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
*
* @see org.apache.lucene.codecs.lucene54 package documentation for file format details.
* @lucene.experimental
+ * @deprecated Only for 5.x back compat
*/
+@Deprecated
public class Lucene54Codec extends Codec {
private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat();
private final FieldInfosFormat fieldInfosFormat = new Lucene50FieldInfosFormat();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java
similarity index 98%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java
rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java
index 9f0d546740d..32c17527deb 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60Codec.java
@@ -16,7 +16,6 @@
*/
package org.apache.lucene.codecs.lucene60;
-
import java.util.Objects;
import org.apache.lucene.codecs.Codec;
@@ -51,7 +50,9 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
* @see org.apache.lucene.codecs.lucene60 package documentation for file format details.
*
* @lucene.experimental
+ * @deprecated Only for 6.0 back compat
*/
+@Deprecated
public class Lucene60Codec extends Codec {
private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat();
private final FieldInfosFormat fieldInfosFormat = new Lucene60FieldInfosFormat();
@@ -149,7 +150,7 @@ public class Lucene60Codec extends Codec {
/** Returns the docvalues format that should be used for writing
* new segments of field
.
*
- * The default implementation always returns "Lucene50".
+ * The default implementation always returns "Lucene54".
*
* WARNING: if you subclass, you are responsible for index
* backwards compatibility: future version of Lucene are only
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/package.html
new file mode 100644
index 00000000000..6b4e234826d
--- /dev/null
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/package.html
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+Lucene 6.0 file format.
+
+
diff --git a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
index 7f66de899e7..71aa938e21e 100644
--- a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@@ -16,3 +16,4 @@
org.apache.lucene.codecs.lucene50.Lucene50Codec
org.apache.lucene.codecs.lucene53.Lucene53Codec
org.apache.lucene.codecs.lucene54.Lucene54Codec
+org.apache.lucene.codecs.lucene60.Lucene60Codec
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java
index 359e2ec3d22..8fdeb2041d2 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWCodec.java
@@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene50;
import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.SegmentInfoFormat;
/**
* Codec for testing 5.0 index format
@@ -26,9 +27,15 @@ import org.apache.lucene.codecs.NormsFormat;
@Deprecated
final class Lucene50RWCodec extends Lucene50Codec {
private final NormsFormat normsFormat = new Lucene50RWNormsFormat();
+ private final SegmentInfoFormat segmentInfoFormat = new Lucene50RWSegmentInfoFormat();
@Override
public NormsFormat normsFormat() {
return normsFormat;
}
+
+ @Override
+ public SegmentInfoFormat segmentInfoFormat() {
+ return segmentInfoFormat;
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWSegmentInfoFormat.java
similarity index 76%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java
rename to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWSegmentInfoFormat.java
index 68aacc62db8..0a373b1dc76 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWSegmentInfoFormat.java
@@ -37,43 +37,14 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Version;
/**
- * Lucene 5.0 Segment info format.
- *
- * Files:
- *
- * .si : Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Attributes, Footer
- *
- * Data types:
- *
- * Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
- * SegSize --> {@link DataOutput#writeInt Int32}
- * SegVersion --> {@link DataOutput#writeString String}
- * Files --> {@link DataOutput#writeSetOfStrings Set<String>}
- * Diagnostics,Attributes --> {@link DataOutput#writeMapOfStrings Map<String,String>}
- * IsCompoundFile --> {@link DataOutput#writeByte Int8}
- * Footer --> {@link CodecUtil#writeFooter CodecFooter}
- *
- * Field Descriptions:
- *
- * SegVersion is the code version that created the segment.
- * SegSize is the number of documents contained in the segment index.
- * IsCompoundFile records whether the segment is written as a compound file or
- * not. If this is -1, the segment is not a compound file. If it is 1, the segment
- * is a compound file.
- * The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid,
- * for each segment it creates. It includes metadata like the current Lucene
- * version, OS, Java version, why the segment was created (merge, flush,
- * addIndexes), etc.
- * Files is a list of files referred to by this segment.
- *
- *
- * @see SegmentInfos
- * @lucene.experimental
+ * Read-write version of 5.0 SegmentInfoFormat for testing
+ * @deprecated for test purposes only
*/
-public class Lucene50SegmentInfoFormat extends SegmentInfoFormat {
+@Deprecated
+public class Lucene50RWSegmentInfoFormat extends Lucene50SegmentInfoFormat {
/** Sole constructor. */
- public Lucene50SegmentInfoFormat() {
+ public Lucene50RWSegmentInfoFormat() {
}
@Override
@@ -109,7 +80,7 @@ public class Lucene50SegmentInfoFormat extends SegmentInfoFormat {
attributes = Collections.unmodifiableMap(input.readStringStringMap());
}
- si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes);
+ si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, null);
si.setFiles(files);
} catch (Throwable exception) {
priorE = exception;
@@ -124,6 +95,8 @@ public class Lucene50SegmentInfoFormat extends SegmentInfoFormat {
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
+ assert si.getIndexSort() == null;
+
try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
// Only add the file once we've successfully created it, else IFD assert can trip:
si.addFile(fileName);
@@ -153,6 +126,7 @@ public class Lucene50SegmentInfoFormat extends SegmentInfoFormat {
}
output.writeSetOfStrings(files);
output.writeMapOfStrings(si.getAttributes());
+
CodecUtil.writeFooter(output);
}
}
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
index 74486d6c38c..df8a1b49404 100644
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
@@ -29,7 +29,7 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene60.Lucene60Codec;
+import org.apache.lucene.codecs.lucene62.Lucene62Codec;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
@@ -139,7 +139,7 @@ public class CreateIndexTask extends PerfTask {
if (defaultCodec == null && postingsFormat != null) {
try {
final PostingsFormat postingsFormatChosen = PostingsFormat.forName(postingsFormat);
- iwConf.setCodec(new Lucene60Codec() {
+ iwConf.setCodec(new Lucene62Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return postingsFormatChosen;
diff --git a/lucene/classification/build.xml b/lucene/classification/build.xml
index 3ddb9bd26e6..704cae8973d 100644
--- a/lucene/classification/build.xml
+++ b/lucene/classification/build.xml
@@ -28,7 +28,6 @@
-
@@ -37,17 +36,16 @@
-
+
-
-
diff --git a/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java b/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
index fce786bf1e9..c1c8ad19ee6 100644
--- a/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
@@ -29,6 +29,7 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
@@ -38,7 +39,6 @@ import org.apache.lucene.search.grouping.GroupDocs;
import org.apache.lucene.search.grouping.GroupingSearch;
import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.uninverting.UninvertingReader;
/**
* Utility class for creating training / test / cross validation indexes from the original index.
@@ -68,7 +68,7 @@ public class DatasetSplitter {
* @param crossValidationIndex a {@link Directory} used to write the cross validation index
* @param analyzer {@link Analyzer} used to create the new docs
* @param termVectors {@code true} if term vectors should be kept
- * @param classFieldName names of the field used as the label for classification
+ * @param classFieldName name of the field used as the label for classification; this must be indexed with sorted doc values
* @param fieldNames names of fields that need to be put in the new indexes or null
if all should be used
* @throws IOException if any writing operation fails on any of the indexes
*/
@@ -80,30 +80,23 @@ public class DatasetSplitter {
IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(analyzer));
IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(analyzer));
- // try to get the exact no. of existing classes
- Terms terms = originalIndex.terms(classFieldName);
- long noOfClasses = -1;
- if (terms != null) {
- noOfClasses = terms.size();
-
- }
- if (noOfClasses == -1) {
- noOfClasses = 10000; // fallback
+ // get the exact no. of existing classes
+ SortedDocValues classValues = originalIndex.getSortedDocValues(classFieldName);
+ if (classValues == null) {
+ throw new IllegalStateException("the classFieldName \"" + classFieldName + "\" must index sorted doc values");
}
- HashMap mapping = new HashMap<>();
- mapping.put(classFieldName, UninvertingReader.Type.SORTED);
- UninvertingReader uninvertingReader = new UninvertingReader(originalIndex, mapping);
+ int noOfClasses = classValues.getValueCount();
try {
- IndexSearcher indexSearcher = new IndexSearcher(uninvertingReader);
+ IndexSearcher indexSearcher = new IndexSearcher(originalIndex);
GroupingSearch gs = new GroupingSearch(classFieldName);
gs.setGroupSort(Sort.INDEXORDER);
gs.setSortWithinGroup(Sort.INDEXORDER);
gs.setAllGroups(true);
gs.setGroupDocsLimit(originalIndex.maxDoc());
- TopGroups topGroups = gs.search(indexSearcher, new MatchAllDocsQuery(), 0, (int) noOfClasses);
+ TopGroups topGroups = gs.search(indexSearcher, new MatchAllDocsQuery(), 0, noOfClasses);
// set the type to be indexed, stored, with term vectors
FieldType ft = new FieldType(TextField.TYPE_STORED);
@@ -156,7 +149,7 @@ public class DatasetSplitter {
testWriter.close();
cvWriter.close();
trainingWriter.close();
- uninvertingReader.close();
+ originalIndex.close();
}
}
diff --git a/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java b/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java
index 0b6f077cdfd..fdd4b0bb4a8 100644
--- a/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java
+++ b/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java
@@ -70,7 +70,9 @@ public class DataSplitterTest extends LuceneTestCase {
doc = new Document();
doc.add(new Field(idFieldName, "id" + Integer.toString(i), ft));
doc.add(new Field(textFieldName, TestUtil.randomUnicodeString(rnd, 1024), ft));
- doc.add(new Field(classFieldName, Integer.toString(rnd.nextInt(10)), ft));
+ String className = Integer.toString(rnd.nextInt(10));
+ doc.add(new Field(classFieldName, className, ft));
+ doc.add(new SortedDocValuesField(classFieldName, new BytesRef(className)));
indexWriter.addDocument(doc);
}
@@ -89,13 +91,11 @@ public class DataSplitterTest extends LuceneTestCase {
super.tearDown();
}
-
@Test
public void testSplitOnAllFields() throws Exception {
assertSplit(originalIndex, 0.1, 0.1);
}
-
@Test
public void testSplitOnSomeFields() throws Exception {
assertSplit(originalIndex, 0.2, 0.35, idFieldName, textFieldName);
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
index 3b026bedacd..20235528dca 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
@@ -36,6 +36,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
private IndexOutput out;
private final BytesRefBuilder scratch = new BytesRefBuilder();
private final SegmentWriteState writeState;
+ final String segment;
final static BytesRef END = new BytesRef("END");
final static BytesRef FIELD = new BytesRef("field ");
@@ -49,6 +50,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
public SimpleTextFieldsWriter(SegmentWriteState writeState) throws IOException {
final String fileName = SimpleTextPostingsFormat.getPostingsFileName(writeState.segmentInfo.name, writeState.segmentSuffix);
+ segment = writeState.segmentInfo.name;
out = writeState.directory.createOutput(fileName, writeState.context);
this.writeState = writeState;
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
index 0823a888040..146e92a6a29 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
@@ -31,6 +31,8 @@ import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@@ -59,6 +61,11 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
final static BytesRef SI_FILE = new BytesRef(" file ");
final static BytesRef SI_ID = new BytesRef(" id ");
+ final static BytesRef SI_SORT = new BytesRef(" sort ");
+ final static BytesRef SI_SORT_FIELD = new BytesRef(" field ");
+ final static BytesRef SI_SORT_TYPE = new BytesRef(" type ");
+ final static BytesRef SI_SORT_REVERSE = new BytesRef(" reverse ");
+ final static BytesRef SI_SORT_MISSING = new BytesRef(" missing ");
public static final String SI_EXTENSION = "si";
@@ -137,10 +144,119 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
+ ", got: " + StringHelper.idToString(id), input);
}
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_SORT);
+ final int numSortFields = Integer.parseInt(readString(SI_SORT.length, scratch));
+ SortField[] sortField = new SortField[numSortFields];
+ for (int i = 0; i < numSortFields; ++i) {
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_SORT_FIELD);
+ final String field = readString(SI_SORT_FIELD.length, scratch);
+
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_SORT_TYPE);
+ final String typeAsString = readString(SI_SORT_TYPE.length, scratch);
+
+ final SortField.Type type;
+ switch (typeAsString) {
+ case "string":
+ type = SortField.Type.STRING;
+ break;
+ case "long":
+ type = SortField.Type.LONG;
+ break;
+ case "int":
+ type = SortField.Type.INT;
+ break;
+ case "double":
+ type = SortField.Type.DOUBLE;
+ break;
+ case "float":
+ type = SortField.Type.FLOAT;
+ break;
+ default:
+ throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input);
+ }
+
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_SORT_REVERSE);
+ final boolean reverse = Boolean.parseBoolean(readString(SI_SORT_REVERSE.length, scratch));
+
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_SORT_MISSING);
+ final String missingLastAsString = readString(SI_SORT_MISSING.length, scratch);
+ final Object missingValue;
+ switch (type) {
+ case STRING:
+ switch (missingLastAsString) {
+ case "null":
+ missingValue = null;
+ break;
+ case "first":
+ missingValue = SortField.STRING_FIRST;
+ break;
+ case "last":
+ missingValue = SortField.STRING_LAST;
+ break;
+ default:
+ throw new CorruptIndexException("unable to parse missing string: " + typeAsString, input);
+ }
+ break;
+ case LONG:
+ switch (missingLastAsString) {
+ case "null":
+ missingValue = null;
+ break;
+ default:
+ missingValue = Long.parseLong(missingLastAsString);
+ break;
+ }
+ break;
+ case INT:
+ switch (missingLastAsString) {
+ case "null":
+ missingValue = null;
+ break;
+ default:
+ missingValue = Integer.parseInt(missingLastAsString);
+ break;
+ }
+ break;
+ case DOUBLE:
+ switch (missingLastAsString) {
+ case "null":
+ missingValue = null;
+ break;
+ default:
+ missingValue = Double.parseDouble(missingLastAsString);
+ break;
+ }
+ break;
+ case FLOAT:
+ switch (missingLastAsString) {
+ case "null":
+ missingValue = null;
+ break;
+ default:
+ missingValue = Float.parseFloat(missingLastAsString);
+ break;
+ }
+ break;
+ default:
+ throw new AssertionError();
+ }
+ sortField[i] = new SortField(field, type, reverse);
+ if (missingValue != null) {
+ sortField[i].setMissingValue(missingValue);
+ }
+ }
+ Sort indexSort = sortField.length == 0 ? null : new Sort(sortField);
+
SimpleTextUtil.checkFooter(input);
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
- isCompoundFile, null, Collections.unmodifiableMap(diagnostics), id, Collections.unmodifiableMap(attributes));
+ isCompoundFile, null, Collections.unmodifiableMap(diagnostics),
+ id, Collections.unmodifiableMap(attributes), indexSort);
info.setFiles(files);
return info;
}
@@ -223,6 +339,62 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
SimpleTextUtil.write(output, new BytesRef(si.getId()));
SimpleTextUtil.writeNewline(output);
+ Sort indexSort = si.getIndexSort();
+ SimpleTextUtil.write(output, SI_SORT);
+ final int numSortFields = indexSort == null ? 0 : indexSort.getSort().length;
+ SimpleTextUtil.write(output, Integer.toString(numSortFields), scratch);
+ SimpleTextUtil.writeNewline(output);
+ for (int i = 0; i < numSortFields; ++i) {
+ final SortField sortField = indexSort.getSort()[i];
+
+ SimpleTextUtil.write(output, SI_SORT_FIELD);
+ SimpleTextUtil.write(output, sortField.getField(), scratch);
+ SimpleTextUtil.writeNewline(output);
+
+ SimpleTextUtil.write(output, SI_SORT_TYPE);
+ final String sortType;
+ switch (sortField.getType()) {
+ case STRING:
+ sortType = "string";
+ break;
+ case LONG:
+ sortType = "long";
+ break;
+ case INT:
+ sortType = "int";
+ break;
+ case DOUBLE:
+ sortType = "double";
+ break;
+ case FLOAT:
+ sortType = "float";
+ break;
+ default:
+ throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
+ }
+ SimpleTextUtil.write(output, sortType, scratch);
+ SimpleTextUtil.writeNewline(output);
+
+ SimpleTextUtil.write(output, SI_SORT_REVERSE);
+ SimpleTextUtil.write(output, Boolean.toString(sortField.getReverse()), scratch);
+ SimpleTextUtil.writeNewline(output);
+
+ SimpleTextUtil.write(output, SI_SORT_MISSING);
+ final Object missingValue = sortField.getMissingValue();
+ final String missing;
+ if (missingValue == null) {
+ missing = "null";
+ } else if (missingValue == SortField.STRING_FIRST) {
+ missing = "first";
+ } else if (missingValue == SortField.STRING_LAST) {
+ missing = "last";
+ } else {
+ missing = missingValue.toString();
+ }
+ SimpleTextUtil.write(output, missing, scratch);
+ SimpleTextUtil.writeNewline(output);
+ }
+
SimpleTextUtil.writeChecksum(output, scratch);
}
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java
index 00259b89711..b59114a65e2 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java
@@ -143,7 +143,6 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter {
} else {
write(TYPE_STRING);
newLine();
-
write(VALUE);
write(field.stringValue());
newLine();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
index 5d704ca017d..442445c2237 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
@@ -57,7 +57,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
}
// TODO: should we use this, or maybe a system property is better?
- static Codec defaultCodec = LOADER.lookup("Lucene60");
+ static Codec defaultCodec = LOADER.lookup("Lucene62");
}
private final String name;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
index 90abf2ad44b..427b520aa4a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
@@ -16,7 +16,6 @@
*/
package org.apache.lucene.codecs;
-
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
@@ -25,12 +24,13 @@ import java.util.List;
import java.util.NoSuchElementException;
import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DocIDMerger;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.MergeState;
-import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
-import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentWriteState; // javadocs
import org.apache.lucene.index.SortedDocValues;
@@ -44,6 +44,8 @@ import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.packed.PackedInts;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
/**
* Abstract API that consumes numeric, binary and
* sorted docvalues. Concrete implementations of this
@@ -240,6 +242,32 @@ public abstract class DocValuesConsumer implements Closeable {
}
}
}
+
+ /** Tracks state of one numeric sub-reader that we are merging */
+ private static class NumericDocValuesSub extends DocIDMerger.Sub {
+
+ private final NumericDocValues values;
+ private final Bits docsWithField;
+ private int docID = -1;
+ private final int maxDoc;
+
+ public NumericDocValuesSub(MergeState.DocMap docMap, NumericDocValues values, Bits docsWithField, int maxDoc) {
+ super(docMap);
+ this.values = values;
+ this.docsWithField = docsWithField;
+ this.maxDoc = maxDoc;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+ }
/**
* Merges the numeric docvalues from toMerge
.
@@ -248,20 +276,23 @@ public abstract class DocValuesConsumer implements Closeable {
* an Iterable that merges and filters deleted documents on the fly.
*/
public void mergeNumericField(final FieldInfo fieldInfo, final MergeState mergeState, final List toMerge, final List docsWithField) throws IOException {
-
addNumericField(fieldInfo,
new Iterable() {
@Override
public Iterator iterator() {
+
+ // We must make a new DocIDMerger for each iterator:
+ List subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator() {
- int readerUpto = -1;
- int docIDUpto;
long nextValue;
boolean nextHasValue;
- int currentMaxDoc;
- NumericDocValues currentValues;
- Bits currentLiveDocs;
- Bits currentDocsWithField;
boolean nextIsSet;
@Override
@@ -276,7 +307,7 @@ public abstract class DocValuesConsumer implements Closeable {
@Override
public Number next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
assert nextIsSet;
@@ -285,43 +316,46 @@ public abstract class DocValuesConsumer implements Closeable {
}
private boolean setNext() {
- while (true) {
- if (readerUpto == toMerge.size()) {
- return false;
- }
-
- if (docIDUpto == currentMaxDoc) {
- readerUpto++;
- if (readerUpto < toMerge.size()) {
- currentValues = toMerge.get(readerUpto);
- currentDocsWithField = docsWithField.get(readerUpto);
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- currentMaxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
- }
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- nextIsSet = true;
- nextValue = currentValues.get(docIDUpto);
- if (nextValue == 0 && currentDocsWithField.get(docIDUpto) == false) {
- nextHasValue = false;
- } else {
- nextHasValue = true;
- }
- docIDUpto++;
- return true;
- }
-
- docIDUpto++;
+ NumericDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
+ return false;
}
+ nextIsSet = true;
+ nextValue = sub.values.get(sub.docID);
+ nextHasValue = nextValue != 0 || sub.docsWithField.get(sub.docID);
+ return true;
}
};
}
});
}
+ /** Tracks state of one binary sub-reader that we are merging */
+ private static class BinaryDocValuesSub extends DocIDMerger.Sub {
+
+ private final BinaryDocValues values;
+ private final Bits docsWithField;
+ private int docID = -1;
+ private final int maxDoc;
+
+ public BinaryDocValuesSub(MergeState.DocMap docMap, BinaryDocValues values, Bits docsWithField, int maxDoc) {
+ super(docMap);
+ this.values = values;
+ this.docsWithField = docsWithField;
+ this.maxDoc = maxDoc;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+ }
+
/**
* Merges the binary docvalues from toMerge
.
*
@@ -329,20 +363,23 @@ public abstract class DocValuesConsumer implements Closeable {
* an Iterable that merges and filters deleted documents on the fly.
*/
public void mergeBinaryField(FieldInfo fieldInfo, final MergeState mergeState, final List toMerge, final List docsWithField) throws IOException {
-
addBinaryField(fieldInfo,
new Iterable() {
@Override
public Iterator iterator() {
+
+ // We must make a new DocIDMerger for each iterator:
+ List subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator() {
- int readerUpto = -1;
- int docIDUpto;
BytesRef nextValue;
BytesRef nextPointer; // points to null if missing, or nextValue
- int currentMaxDoc;
- BinaryDocValues currentValues;
- Bits currentLiveDocs;
- Bits currentDocsWithField;
boolean nextIsSet;
@Override
@@ -357,7 +394,7 @@ public abstract class DocValuesConsumer implements Closeable {
@Override
public BytesRef next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
assert nextIsSet;
@@ -367,42 +404,49 @@ public abstract class DocValuesConsumer implements Closeable {
private boolean setNext() {
while (true) {
- if (readerUpto == toMerge.size()) {
- return false;
+ BinaryDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
+ return false;
+ }
+ nextIsSet = true;
+ if (sub.docsWithField.get(sub.docID)) {
+ nextPointer = nextValue = sub.values.get(sub.docID);
+ } else {
+ nextPointer = null;
+ }
+ return true;
}
-
- if (docIDUpto == currentMaxDoc) {
- readerUpto++;
- if (readerUpto < toMerge.size()) {
- currentValues = toMerge.get(readerUpto);
- currentDocsWithField = docsWithField.get(readerUpto);
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- currentMaxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
- }
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- nextIsSet = true;
- if (currentDocsWithField.get(docIDUpto)) {
- nextValue = currentValues.get(docIDUpto);
- nextPointer = nextValue;
- } else {
- nextPointer = null;
- }
- docIDUpto++;
- return true;
- }
-
- docIDUpto++;
}
- }
};
}
});
}
+ /** Tracks state of one sorted numeric sub-reader that we are merging */
+ private static class SortedNumericDocValuesSub extends DocIDMerger.Sub {
+
+ private final SortedNumericDocValues values;
+ private int docID = -1;
+ private final int maxDoc;
+
+ public SortedNumericDocValuesSub(MergeState.DocMap docMap, SortedNumericDocValues values, int maxDoc) {
+ super(docMap);
+ this.values = values;
+ this.maxDoc = maxDoc;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ values.setDocument(docID);
+ return docID;
+ }
+ }
+ }
+
/**
* Merges the sorted docvalues from toMerge
.
*
@@ -410,21 +454,24 @@ public abstract class DocValuesConsumer implements Closeable {
* iterables that filter deleted documents.
*/
public void mergeSortedNumericField(FieldInfo fieldInfo, final MergeState mergeState, List toMerge) throws IOException {
- final int numReaders = toMerge.size();
- final SortedNumericDocValues dvs[] = toMerge.toArray(new SortedNumericDocValues[numReaders]);
- // step 3: add field
addSortedNumericField(fieldInfo,
// doc -> value count
new Iterable() {
@Override
public Iterator iterator() {
+
+ // We must make a new DocIDMerger for each iterator:
+ List subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator() {
- int readerUpto = -1;
- int docIDUpto;
int nextValue;
- int currentMaxDoc;
- Bits currentLiveDocs;
boolean nextIsSet;
@Override
@@ -439,7 +486,7 @@ public abstract class DocValuesConsumer implements Closeable {
@Override
public Number next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
assert nextIsSet;
@@ -449,30 +496,13 @@ public abstract class DocValuesConsumer implements Closeable {
private boolean setNext() {
while (true) {
- if (readerUpto == numReaders) {
+ SortedNumericDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
return false;
}
-
- if (docIDUpto == currentMaxDoc) {
- readerUpto++;
- if (readerUpto < numReaders) {
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- currentMaxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
- }
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- nextIsSet = true;
- SortedNumericDocValues dv = dvs[readerUpto];
- dv.setDocument(docIDUpto);
- nextValue = dv.count();
- docIDUpto++;
- return true;
- }
-
- docIDUpto++;
+ nextIsSet = true;
+ nextValue = sub.values.count();
+ return true;
}
}
};
@@ -482,15 +512,21 @@ public abstract class DocValuesConsumer implements Closeable {
new Iterable() {
@Override
public Iterator iterator() {
+ // We must make a new DocIDMerger for each iterator:
+ List subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator() {
- int readerUpto = -1;
- int docIDUpto;
long nextValue;
- int currentMaxDoc;
- Bits currentLiveDocs;
boolean nextIsSet;
int valueUpto;
int valueLength;
+ SortedNumericDocValuesSub current;
@Override
public boolean hasNext() {
@@ -504,7 +540,7 @@ public abstract class DocValuesConsumer implements Closeable {
@Override
public Number next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
assert nextIsSet;
@@ -514,38 +550,21 @@ public abstract class DocValuesConsumer implements Closeable {
private boolean setNext() {
while (true) {
- if (readerUpto == numReaders) {
- return false;
- }
if (valueUpto < valueLength) {
- nextValue = dvs[readerUpto].valueAt(valueUpto);
+ nextValue = current.values.valueAt(valueUpto);
valueUpto++;
nextIsSet = true;
return true;
}
- if (docIDUpto == currentMaxDoc) {
- readerUpto++;
- if (readerUpto < numReaders) {
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- currentMaxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
+ current = docIDMerger.next();
+ if (current == null) {
+ return false;
}
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- assert docIDUpto < currentMaxDoc;
- SortedNumericDocValues dv = dvs[readerUpto];
- dv.setDocument(docIDUpto);
- valueUpto = 0;
- valueLength = dv.count();
- docIDUpto++;
- continue;
- }
-
- docIDUpto++;
+ valueUpto = 0;
+ valueLength = current.values.count();
+ continue;
}
}
};
@@ -554,6 +573,32 @@ public abstract class DocValuesConsumer implements Closeable {
);
}
+ /** Tracks state of one sorted sub-reader that we are merging */
+ private static class SortedDocValuesSub extends DocIDMerger.Sub {
+
+ private final SortedDocValues values;
+ private int docID = -1;
+ private final int maxDoc;
+ private final LongValues map;
+
+ public SortedDocValuesSub(MergeState.DocMap docMap, SortedDocValues values, int maxDoc, LongValues map) {
+ super(docMap);
+ this.values = values;
+ this.maxDoc = maxDoc;
+ this.map = map;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+ }
+
/**
* Merges the sorted docvalues from toMerge
.
*
@@ -608,7 +653,7 @@ public abstract class DocValuesConsumer implements Closeable {
@Override
public BytesRef next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
int segmentNumber = map.getFirstSegmentNumber(currentOrd);
@@ -629,13 +674,17 @@ public abstract class DocValuesConsumer implements Closeable {
new Iterable() {
@Override
public Iterator iterator() {
+ // We must make a new DocIDMerger for each iterator:
+ List subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator() {
- int readerUpto = -1;
- int docIDUpto;
int nextValue;
- int currentMaxDoc;
- Bits currentLiveDocs;
- LongValues currentMap;
boolean nextIsSet;
@Override
@@ -650,7 +699,7 @@ public abstract class DocValuesConsumer implements Closeable {
@Override
public Number next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
assert nextIsSet;
@@ -661,30 +710,15 @@ public abstract class DocValuesConsumer implements Closeable {
private boolean setNext() {
while (true) {
- if (readerUpto == numReaders) {
+ SortedDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
return false;
}
- if (docIDUpto == currentMaxDoc) {
- readerUpto++;
- if (readerUpto < numReaders) {
- currentMap = map.getGlobalOrds(readerUpto);
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- currentMaxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
- }
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- nextIsSet = true;
- int segOrd = dvs[readerUpto].getOrd(docIDUpto);
- nextValue = segOrd == -1 ? -1 : (int) currentMap.get(segOrd);
- docIDUpto++;
- return true;
- }
-
- docIDUpto++;
+ nextIsSet = true;
+ int segOrd = sub.values.getOrd(sub.docID);
+ nextValue = segOrd == -1 ? -1 : (int) sub.map.get(segOrd);
+ return true;
}
}
};
@@ -693,6 +727,37 @@ public abstract class DocValuesConsumer implements Closeable {
);
}
+ /** Tracks state of one sorted set sub-reader that we are merging */
+ private static class SortedSetDocValuesSub extends DocIDMerger.Sub {
+
+ private final SortedSetDocValues values;
+ int docID = -1;
+ private final int maxDoc;
+ private final LongValues map;
+
+ public SortedSetDocValuesSub(MergeState.DocMap docMap, SortedSetDocValues values, int maxDoc, LongValues map) {
+ super(docMap);
+ this.values = values;
+ this.maxDoc = maxDoc;
+ this.map = map;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "SortedSetDocValuesSub(docID=" + docID + " mappedDocID=" + mappedDocID + " values=" + values + ")";
+ }
+ }
+
/**
* Merges the sortedset docvalues from toMerge
.
*
@@ -700,14 +765,12 @@ public abstract class DocValuesConsumer implements Closeable {
* an Iterable that merges ordinals and values and filters deleted documents .
*/
public void mergeSortedSetField(FieldInfo fieldInfo, final MergeState mergeState, List toMerge) throws IOException {
- final SortedSetDocValues dvs[] = toMerge.toArray(new SortedSetDocValues[toMerge.size()]);
- final int numReaders = mergeState.maxDocs.length;
// step 1: iterate thru each sub and mark terms still in use
- TermsEnum liveTerms[] = new TermsEnum[dvs.length];
+ TermsEnum liveTerms[] = new TermsEnum[toMerge.size()];
long[] weights = new long[liveTerms.length];
for (int sub = 0; sub < liveTerms.length; sub++) {
- SortedSetDocValues dv = dvs[sub];
+ SortedSetDocValues dv = toMerge.get(sub);
Bits liveDocs = mergeState.liveDocs[sub];
int maxDoc = mergeState.maxDocs[sub];
if (liveDocs == null) {
@@ -748,12 +811,12 @@ public abstract class DocValuesConsumer implements Closeable {
@Override
public BytesRef next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
int segmentNumber = map.getFirstSegmentNumber(currentOrd);
long segmentOrd = map.getFirstSegmentOrd(currentOrd);
- final BytesRef term = dvs[segmentNumber].lookupOrd(segmentOrd);
+ final BytesRef term = toMerge.get(segmentNumber).lookupOrd(segmentOrd);
currentOrd++;
return term;
}
@@ -769,12 +832,18 @@ public abstract class DocValuesConsumer implements Closeable {
new Iterable() {
@Override
public Iterator iterator() {
+
+ // We must make a new DocIDMerger for each iterator:
+ List subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator() {
- int readerUpto = -1;
- int docIDUpto;
int nextValue;
- int currentMaxDoc;
- Bits currentLiveDocs;
boolean nextIsSet;
@Override
@@ -789,7 +858,7 @@ public abstract class DocValuesConsumer implements Closeable {
@Override
public Number next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
assert nextIsSet;
@@ -800,33 +869,18 @@ public abstract class DocValuesConsumer implements Closeable {
private boolean setNext() {
while (true) {
- if (readerUpto == numReaders) {
+ SortedSetDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
return false;
}
-
- if (docIDUpto == currentMaxDoc) {
- readerUpto++;
- if (readerUpto < numReaders) {
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- currentMaxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
+ sub.values.setDocument(sub.docID);
+ nextValue = 0;
+ while (sub.values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
+ nextValue++;
}
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- nextIsSet = true;
- SortedSetDocValues dv = dvs[readerUpto];
- dv.setDocument(docIDUpto);
- nextValue = 0;
- while (dv.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
- nextValue++;
- }
- docIDUpto++;
- return true;
- }
-
- docIDUpto++;
+ //System.out.println(" doc " + sub + " -> ord count = " + nextValue);
+ nextIsSet = true;
+ return true;
}
}
};
@@ -836,13 +890,18 @@ public abstract class DocValuesConsumer implements Closeable {
new Iterable() {
@Override
public Iterator iterator() {
+
+ // We must make a new DocIDMerger for each iterator:
+ List subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator() {
- int readerUpto = -1;
- int docIDUpto;
long nextValue;
- int currentMaxDoc;
- Bits currentLiveDocs;
- LongValues currentMap;
boolean nextIsSet;
long ords[] = new long[8];
int ordUpto;
@@ -860,7 +919,7 @@ public abstract class DocValuesConsumer implements Closeable {
@Override
public Number next() {
- if (!hasNext()) {
+ if (hasNext() == false) {
throw new NoSuchElementException();
}
assert nextIsSet;
@@ -871,10 +930,6 @@ public abstract class DocValuesConsumer implements Closeable {
private boolean setNext() {
while (true) {
- if (readerUpto == numReaders) {
- return false;
- }
-
if (ordUpto < ordLength) {
nextValue = ords[ordUpto];
ordUpto++;
@@ -882,35 +937,22 @@ public abstract class DocValuesConsumer implements Closeable {
return true;
}
- if (docIDUpto == currentMaxDoc) {
- readerUpto++;
- if (readerUpto < numReaders) {
- currentMap = map.getGlobalOrds(readerUpto);
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- currentMaxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
- }
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- assert docIDUpto < currentMaxDoc;
- SortedSetDocValues dv = dvs[readerUpto];
- dv.setDocument(docIDUpto);
- ordUpto = ordLength = 0;
- long ord;
- while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
- if (ordLength == ords.length) {
- ords = ArrayUtil.grow(ords, ordLength+1);
- }
- ords[ordLength] = currentMap.get(ord);
- ordLength++;
- }
- docIDUpto++;
- continue;
+ SortedSetDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
+ return false;
}
+ sub.values.setDocument(sub.docID);
- docIDUpto++;
+ ordUpto = ordLength = 0;
+ long ord;
+ while ((ord = sub.values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ if (ordLength == ords.length) {
+ ords = ArrayUtil.grow(ords, ordLength+1);
+ }
+ ords[ordLength] = sub.map.get(ord);
+ ordLength++;
+ }
+ continue;
}
}
};
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
index b771aabf43a..39d39022a75 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
@@ -16,7 +16,6 @@
*/
package org.apache.lucene.codecs;
-
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
@@ -24,6 +23,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
+import org.apache.lucene.index.DocIDMerger;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
@@ -31,6 +31,8 @@ import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.Bits;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
/**
* Abstract API that consumes normalization values.
* Concrete implementations of this
@@ -98,6 +100,30 @@ public abstract class NormsConsumer implements Closeable {
}
}
+ /** Tracks state of one numeric sub-reader that we are merging */
+ private static class NumericDocValuesSub extends DocIDMerger.Sub {
+
+ private final NumericDocValues values;
+ private int docID = -1;
+ private final int maxDoc;
+
+ public NumericDocValuesSub(MergeState.DocMap docMap, NumericDocValues values, int maxDoc) {
+ super(docMap);
+ this.values = values;
+ this.maxDoc = maxDoc;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+ }
+
/**
* Merges the norms from toMerge
.
*
@@ -111,13 +137,18 @@ public abstract class NormsConsumer implements Closeable {
new Iterable() {
@Override
public Iterator iterator() {
+
+ // We must make a new DocIDMerger for each iterator:
+ List subs = new ArrayList<>();
+ assert mergeState.docMaps.length == toMerge.size();
+ for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
return new Iterator() {
- int readerUpto = -1;
- int docIDUpto;
long nextValue;
- int maxDoc;
- NumericDocValues currentValues;
- Bits currentLiveDocs;
boolean nextIsSet;
@Override
@@ -141,31 +172,13 @@ public abstract class NormsConsumer implements Closeable {
}
private boolean setNext() {
- while (true) {
- if (readerUpto == toMerge.size()) {
- return false;
- }
-
- if (currentValues == null || docIDUpto == maxDoc) {
- readerUpto++;
- if (readerUpto < toMerge.size()) {
- currentValues = toMerge.get(readerUpto);
- currentLiveDocs = mergeState.liveDocs[readerUpto];
- maxDoc = mergeState.maxDocs[readerUpto];
- }
- docIDUpto = 0;
- continue;
- }
-
- if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
- nextIsSet = true;
- nextValue = currentValues.get(docIDUpto);
- docIDUpto++;
- return true;
- }
-
- docIDUpto++;
+ NumericDocValuesSub sub = docIDMerger.next();
+ if (sub == null) {
+ return false;
}
+ nextIsSet = true;
+ nextValue = sub.values.get(sub.docID);
+ return true;
}
};
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
index 43b4416fedb..05084db6ca1 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
@@ -76,7 +76,6 @@ public abstract class PointsWriter implements Closeable {
}
MergeState.DocMap docMap = mergeState.docMaps[i];
- int docBase = mergeState.docBase[i];
pointsReader.intersect(fieldInfo.name,
new IntersectVisitor() {
@Override
@@ -90,7 +89,7 @@ public abstract class PointsWriter implements Closeable {
int newDocID = docMap.get(docID);
if (newDocID != -1) {
// Not deleted:
- mergedVisitor.visit(docBase + newDocID, packedValue);
+ mergedVisitor.visit(newDocID, packedValue);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
index b8cff117e5f..26652aa8231 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
@@ -20,10 +20,13 @@ import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.DocIDMerger;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexableField;
@@ -33,6 +36,8 @@ import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
/**
* Codec API for writing stored fields:
*
@@ -73,6 +78,30 @@ public abstract class StoredFieldsWriter implements Closeable {
* check that this is the case to detect the JRE bug described
* in LUCENE-1282. */
public abstract void finish(FieldInfos fis, int numDocs) throws IOException;
+
+ private static class StoredFieldsMergeSub extends DocIDMerger.Sub {
+ private final StoredFieldsReader reader;
+ private final int maxDoc;
+ private final MergeVisitor visitor;
+ int docID = -1;
+
+ public StoredFieldsMergeSub(MergeVisitor visitor, MergeState.DocMap docMap, StoredFieldsReader reader, int maxDoc) {
+ super(docMap);
+ this.maxDoc = maxDoc;
+ this.reader = reader;
+ this.visitor = visitor;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+ }
/** Merges in the stored fields from the readers in
* mergeState
. The default implementation skips
@@ -82,23 +111,26 @@ public abstract class StoredFieldsWriter implements Closeable {
* Implementations can override this method for more sophisticated
* merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException {
- int docCount = 0;
- for (int i=0;i subs = new ArrayList<>();
+ for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
+ int docCount = 0;
+ while (true) {
+ StoredFieldsMergeSub sub = docIDMerger.next();
+ if (sub == null) {
+ break;
}
+ assert sub.mappedDocID == docCount;
+ startDocument();
+ sub.reader.visitDocument(sub.docID, sub.visitor);
+ finishDocument();
+ docCount++;
}
finish(mergeState.mergeFieldInfos, docCount);
return docCount;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
index 1aff7379d37..5756d5beb87 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
@@ -16,16 +16,18 @@
*/
package org.apache.lucene.codecs;
-
import java.io.Closeable;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Iterator;
+import java.util.List;
-import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.DocIDMerger;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
@@ -34,6 +36,8 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
/**
* Codec API for writing term vectors:
*
@@ -160,6 +164,28 @@ public abstract class TermVectorsWriter implements Closeable {
}
}
+ private static class TermVectorsMergeSub extends DocIDMerger.Sub {
+ private final TermVectorsReader reader;
+ private final int maxDoc;
+ int docID = -1;
+
+ public TermVectorsMergeSub(MergeState.DocMap docMap, TermVectorsReader reader, int maxDoc) {
+ super(docMap);
+ this.maxDoc = maxDoc;
+ this.reader = reader;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+ }
+
/** Merges in the term vectors from the readers in
* mergeState
. The default implementation skips
* over deleted documents, and uses {@link #startDocument(int)},
@@ -170,32 +196,35 @@ public abstract class TermVectorsWriter implements Closeable {
* Implementations can override this method for more sophisticated
* merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException {
+
+ List subs = new ArrayList<>();
+ for(int i=0;i docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+
int docCount = 0;
- int numReaders = mergeState.maxDocs.length;
- for (int i = 0; i < numReaders; i++) {
- int maxDoc = mergeState.maxDocs[i];
- Bits liveDocs = mergeState.liveDocs[i];
- TermVectorsReader termVectorsReader = mergeState.termVectorsReaders[i];
- if (termVectorsReader != null) {
- termVectorsReader.checkIntegrity();
+ while (true) {
+ TermVectorsMergeSub sub = docIDMerger.next();
+ if (sub == null) {
+ break;
}
- for (int docID=0;docIDEach segment index maintains the following:
*
*
- * {@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment info}.
+ * {@link org.apache.lucene.codecs.lucene62.Lucene62SegmentInfoFormat Segment info}.
* This contains metadata about a segment, such as the number of documents,
* what files it uses,
*
@@ -235,7 +235,7 @@
* file.
*
*
- * {@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment Info}
+ * {@link org.apache.lucene.codecs.lucene62.Lucene62SegmentInfoFormat Segment Info}
* .si
* Stores metadata about a segment
*
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
index 2b1e13dfedf..63308c422b3 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
@@ -123,6 +123,13 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
@Override
public void merge(MergeState mergeState) throws IOException {
+ if (mergeState.segmentInfo.getIndexSort() != null) {
+ // TODO: can we gain back some optos even if index is sorted? E.g. if sort results in large chunks of contiguous docs from one sub
+ // being copied over...?
+ super.merge(mergeState);
+ return;
+ }
+
for(PointsReader reader : mergeState.pointsReaders) {
if (reader instanceof Lucene60PointsReader == false) {
// We can only bulk merge when all to-be-merged segments use our format:
@@ -171,7 +178,6 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
singleValuePerDoc)) {
List bkdReaders = new ArrayList<>();
List docMaps = new ArrayList<>();
- List docIDBases = new ArrayList<>();
for(int i=0;iApache Lucene - Index File Formats
- *
- *
- * Introduction
- *
- *
This document defines the index file formats used in this version of Lucene.
- * If you are using a different version of Lucene, please consult the copy of
- * docs/
that was distributed with
- * the version you are using.
- *
Apache Lucene is written in Java, but several efforts are underway to write
- * versions of
- * Lucene in other programming languages . If these versions are to remain
- * compatible with Apache Lucene, then a language-independent definition of the
- * Lucene index format is required. This document thus attempts to provide a
- * complete and independent definition of the Apache Lucene file formats.
- *
As Lucene evolves, this document should evolve. Versions of Lucene in
- * different programming languages should endeavor to agree on file formats, and
- * generate new versions of this document.
- *
- *
- * Definitions
- *
- *
The fundamental concepts in Lucene are index, document, field and term.
- *
An index contains a sequence of documents.
- *
- * A document is a sequence of fields.
- * A field is a named sequence of terms.
- * A term is a sequence of bytes.
- *
- *
The same sequence of bytes in two different fields is considered a different
- * term. Thus terms are represented as a pair: the string naming the field, and the
- * bytes within the field.
- *
- *
Inverted Indexing
- *
The index stores statistics about terms in order to make term-based search
- * more efficient. Lucene's index falls into the family of indexes known as an
- * inverted index. This is because it can list, for a term, the documents
- * that contain it. This is the inverse of the natural relationship, in which
- * documents list terms.
- *
- *
Types of Fields
- *
In Lucene, fields may be stored , in which case their text is stored
- * in the index literally, in a non-inverted manner. Fields that are inverted are
- * called indexed . A field may be both stored and indexed.
- *
The text of a field may be tokenized into terms to be indexed, or the
- * text of a field may be used literally as a term to be indexed. Most fields are
- * tokenized, but sometimes it is useful for certain identifier fields to be
- * indexed literally.
- *
See the {@link org.apache.lucene.document.Field Field}
- * java docs for more information on Fields.
- *
- *
Segments
- *
Lucene indexes may be composed of multiple sub-indexes, or segments .
- * Each segment is a fully independent index, which could be searched separately.
- * Indexes evolve by:
- *
- * Creating new segments for newly added documents.
- * Merging existing segments.
- *
- *
Searches may involve multiple segments and/or multiple indexes, each index
- * potentially composed of a set of segments.
- *
- *
Document Numbers
- *
Internally, Lucene refers to documents by an integer document number .
- * The first document added to an index is numbered zero, and each subsequent
- * document added gets a number one greater than the previous.
- *
Note that a document's number may change, so caution should be taken when
- * storing these numbers outside of Lucene. In particular, numbers may change in
- * the following situations:
- *
- *
- * The numbers stored in each segment are unique only within the segment, and
- * must be converted before they can be used in a larger context. The standard
- * technique is to allocate each segment a range of values, based on the range of
- * numbers used in that segment. To convert a document number from a segment to an
- * external value, the segment's base document number is added. To convert
- * an external value back to a segment-specific value, the segment is identified
- * by the range that the external value is in, and the segment's base value is
- * subtracted. For example two five document segments might be combined, so that
- * the first segment has a base value of zero, and the second of five. Document
- * three from the second segment would have an external value of eight.
- *
- *
- * When documents are deleted, gaps are created in the numbering. These are
- * eventually removed as the index evolves through merging. Deleted documents are
- * dropped when segments are merged. A freshly-merged segment thus has no gaps in
- * its numbering.
- *
- *
- *
- *
- * Index Structure Overview
- *
- *
Each segment index maintains the following:
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment info}.
- * This contains metadata about a segment, such as the number of documents,
- * what files it uses,
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50FieldInfosFormat Field names}.
- * This contains the set of field names used in the index.
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Stored Field values}.
- * This contains, for each document, a list of attribute-value pairs, where the attributes
- * are field names. These are used to store auxiliary information about the document, such as
- * its title, url, or an identifier to access a database. The set of stored fields are what is
- * returned for each hit when searching. This is keyed by document number.
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term dictionary}.
- * A dictionary containing all of the terms used in all of the
- * indexed fields of all of the documents. The dictionary also contains the number
- * of documents which contain the term, and pointers to the term's frequency and
- * proximity data.
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Frequency data}.
- * For each term in the dictionary, the numbers of all the
- * documents that contain that term, and the frequency of the term in that
- * document, unless frequencies are omitted (IndexOptions.DOCS_ONLY)
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Proximity data}.
- * For each term in the dictionary, the positions that the
- * term occurs in each document. Note that this will not exist if all fields in
- * all documents omit position data.
- *
- *
- * {@link org.apache.lucene.codecs.lucene53.Lucene53NormsFormat Normalization factors}.
- * For each field in each document, a value is stored
- * that is multiplied into the score for hits on that field.
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vectors}.
- * For each field in each document, the term vector (sometimes
- * called document vector) may be stored. A term vector consists of term text and
- * term frequency. To add Term Vectors to your index see the
- * {@link org.apache.lucene.document.Field Field} constructors
- *
- *
- * {@link org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat Per-document values}.
- * Like stored values, these are also keyed by document
- * number, but are generally intended to be loaded into main memory for fast
- * access. Whereas stored values are generally intended for summary results from
- * searches, per-document values are useful for things like scoring factors.
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live documents}.
- * An optional file indicating which documents are live.
- *
- *
- * {@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}.
- * Optional pair of files, recording dimensionally indexed fields, to enable fast
- * numeric range filtering and large numeric values like BigInteger and BigDecimal (1D)
- * and geographic shape intersection (2D, 3D).
- *
- *
- *
Details on each of these are provided in their linked pages.
- *
- *
- * File Naming
- *
- *
All files belonging to a segment have the same name with varying extensions.
- * The extensions correspond to the different file formats described below. When
- * using the Compound File format (default in 1.4 and greater) these files (except
- * for the Segment info file, the Lock file, and Deleted documents file) are collapsed
- * into a single .cfs file (see below for details)
- *
Typically, all segments in an index are stored in a single directory,
- * although this is not required.
- *
As of version 2.1 (lock-less commits), file names are never re-used.
- * That is, when any file is saved
- * to the Directory it is given a never before used filename. This is achieved
- * using a simple generations approach. For example, the first segments file is
- * segments_1, then segments_2, etc. The generation is a sequential long integer
- * represented in alpha-numeric (base 36) form.
- *
- *
- * Summary of File Extensions
- *
- *
The following table summarizes the names and extensions of the files in
- * Lucene:
- *
- *
- * Name
- * Extension
- * Brief Description
- *
- *
- * {@link org.apache.lucene.index.SegmentInfos Segments File}
- * segments_N
- * Stores information about a commit point
- *
- *
- * Lock File
- * write.lock
- * The Write lock prevents multiple IndexWriters from writing to the same
- * file.
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat Segment Info}
- * .si
- * Stores metadata about a segment
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat Compound File}
- * .cfs, .cfe
- * An optional "virtual" file consisting of all the other index files for
- * systems that frequently run out of file handles.
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50FieldInfosFormat Fields}
- * .fnm
- * Stores information about the fields
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Field Index}
- * .fdx
- * Contains pointers to field data
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Field Data}
- * .fdt
- * The stored fields for documents
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Dictionary}
- * .tim
- * The term dictionary, stores term info
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Term Index}
- * .tip
- * The index into the Term Dictionary
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Frequencies}
- * .doc
- * Contains the list of docs which contain each term along with frequency
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Positions}
- * .pos
- * Stores position information about where a term occurs in the index
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat Payloads}
- * .pay
- * Stores additional per-position metadata information such as character offsets and user payloads
- *
- *
- * {@link org.apache.lucene.codecs.lucene53.Lucene53NormsFormat Norms}
- * .nvd, .nvm
- * Encodes length and boost factors for docs and fields
- *
- *
- * {@link org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat Per-Document Values}
- * .dvd, .dvm
- * Encodes additional scoring factors or other per-document information.
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vector Index}
- * .tvx
- * Stores offset into the document data file
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vector Documents}
- * .tvd
- * Contains information about each document that has term vectors
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat Term Vector Fields}
- * .tvf
- * The field level info about term vectors
- *
- *
- * {@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live Documents}
- * .liv
- * Info about what files are live
- *
- *
- * {@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}
- * .dii, .dim
- * Holds indexed points, if any
- *
- *
- *
- *
- * Lock File
- * The write lock, which is stored in the index directory by default, is named
- * "write.lock". If the lock directory is different from the index directory then
- * the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix
- * derived from the full path to the index directory. When this file is present, a
- * writer is currently modifying the index (adding or removing documents). This
- * lock file ensures that only one writer is modifying the index at a time.
- *
- * History
- * Compatibility notes are provided in this document, describing how file
- * formats have changed from prior versions:
- *
- * In version 2.1, the file format was changed to allow lock-less commits (ie,
- * no more commit lock). The change is fully backwards compatible: you can open a
- * pre-2.1 index for searching or adding/deleting of docs. When the new segments
- * file is saved (committed), it will be written in the new file format (meaning
- * no specific "upgrade" process is needed). But note that once a commit has
- * occurred, pre-2.1 Lucene will not be able to read the index.
- * In version 2.3, the file format was changed to allow segments to share a
- * single set of doc store (vectors & stored fields) files. This allows for
- * faster indexing in certain cases. The change is fully backwards compatible (in
- * the same way as the lock-less commits change in 2.1).
- * In version 2.4, Strings are now written as true UTF-8 byte sequence, not
- * Java's modified UTF-8. See
- * LUCENE-510 for details.
- * In version 2.9, an optional opaque Map<String,String> CommitUserData
- * may be passed to IndexWriter's commit methods (and later retrieved), which is
- * recorded in the segments_N file. See
- * LUCENE-1382 for details. Also,
- * diagnostics were added to each segment written recording details about why it
- * was written (due to flush, merge; which OS/JRE was used; etc.). See issue
- * LUCENE-1654 for details.
- * In version 3.0, compressed fields are no longer written to the index (they
- * can still be read, but on merge the new segment will write them, uncompressed).
- * See issue LUCENE-1960
- * for details.
- * In version 3.1, segments records the code version that created them. See
- * LUCENE-2720 for details.
- * Additionally segments track explicitly whether or not they have term vectors.
- * See LUCENE-2811
- * for details.
- * In version 3.2, numeric fields are written as natively to stored fields
- * file, previously they were stored in text format only.
- * In version 3.4, fields can omit position data while still indexing term
- * frequencies.
- * In version 4.0, the format of the inverted index became extensible via
- * the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage
- * ({@code DocValues}) was introduced. Normalization factors need no longer be a
- * single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}.
- * Terms need not be unicode strings, they can be any byte sequence. Term offsets
- * can optionally be indexed into the postings lists. Payloads can be stored in the
- * term vectors.
- * In version 4.1, the format of the postings list changed to use either
- * of FOR compression or variable-byte encoding, depending upon the frequency
- * of the term. Terms appearing only once were changed to inline directly into
- * the term dictionary. Stored fields are compressed by default.
- * In version 4.2, term vectors are compressed by default. DocValues has
- * a new multi-valued type (SortedSet), that can be used for faceting/grouping/joining
- * on multi-valued fields.
- * In version 4.5, DocValues were extended to explicitly represent missing values.
- * In version 4.6, FieldInfos were extended to support per-field DocValues generation, to
- * allow updating NumericDocValues fields.
- * In version 4.8, checksum footers were added to the end of each index file
- * for improved data integrity. Specifically, the last 8 bytes of every index file
- * contain the zlib-crc32 checksum of the file.
- * In version 4.9, DocValues has a new multi-valued numeric type (SortedNumeric)
- * that is suitable for faceting/sorting/analytics.
- * In version 5.4, DocValues have been improved to store more information on disk:
- * addresses for binary fields and ord indexes for multi-valued fields.
- * In version 6.0, Points were added, for multi-dimensional range/distance search.
- *
- *
- *
- * Limitations
- *
- *
Lucene uses a Java int
to refer to
- * document numbers, and the index file format uses an Int32
- * on-disk to store document numbers. This is a limitation
- * of both the index file format and the current implementation. Eventually these
- * should be replaced with either UInt64
values, or
- * better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.
- *
+ * Components from the Lucene 6.0 index format. See {@link org.apache.lucene.codecs.lucene62}
+ * for an overview of the index format.
*/
package org.apache.lucene.codecs.lucene60;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java
new file mode 100644
index 00000000000..50710752694
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62Codec.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene62;
+
+import java.util.Objects;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.CompoundFormat;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.FieldInfosFormat;
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.LiveDocsFormat;
+import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.PointsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
+import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
+import org.apache.lucene.codecs.lucene53.Lucene53NormsFormat;
+import org.apache.lucene.codecs.lucene60.Lucene60FieldInfosFormat;
+import org.apache.lucene.codecs.lucene60.Lucene60PointsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
+import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+
+/**
+ * Implements the Lucene 6.2 index format, with configurable per-field postings
+ * and docvalues formats.
+ *
+ * If you want to reuse functionality of this codec in another codec, extend
+ * {@link FilterCodec}.
+ *
+ * @see org.apache.lucene.codecs.lucene60 package documentation for file format details.
+ *
+ * @lucene.experimental
+ */
+public class Lucene62Codec extends Codec {
+ private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat();
+ private final FieldInfosFormat fieldInfosFormat = new Lucene60FieldInfosFormat();
+ private final SegmentInfoFormat segmentInfosFormat = new Lucene62SegmentInfoFormat();
+ private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
+ private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
+
+ private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
+ @Override
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return Lucene62Codec.this.getPostingsFormatForField(field);
+ }
+ };
+
+ private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
+ @Override
+ public DocValuesFormat getDocValuesFormatForField(String field) {
+ return Lucene62Codec.this.getDocValuesFormatForField(field);
+ }
+ };
+
+ private final StoredFieldsFormat storedFieldsFormat;
+
+ /**
+ * Instantiates a new codec.
+ */
+ public Lucene62Codec() {
+ this(Mode.BEST_SPEED);
+ }
+
+ /**
+ * Instantiates a new codec, specifying the stored fields compression
+ * mode to use.
+ * @param mode stored fields compression mode to use for newly
+ * flushed/merged segments.
+ */
+ public Lucene62Codec(Mode mode) {
+ super("Lucene62");
+ this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Objects.requireNonNull(mode));
+ }
+
+ @Override
+ public final StoredFieldsFormat storedFieldsFormat() {
+ return storedFieldsFormat;
+ }
+
+ @Override
+ public final TermVectorsFormat termVectorsFormat() {
+ return vectorsFormat;
+ }
+
+ @Override
+ public final PostingsFormat postingsFormat() {
+ return postingsFormat;
+ }
+
+ @Override
+ public final FieldInfosFormat fieldInfosFormat() {
+ return fieldInfosFormat;
+ }
+
+ @Override
+ public final SegmentInfoFormat segmentInfoFormat() {
+ return segmentInfosFormat;
+ }
+
+ @Override
+ public final LiveDocsFormat liveDocsFormat() {
+ return liveDocsFormat;
+ }
+
+ @Override
+ public final CompoundFormat compoundFormat() {
+ return compoundFormat;
+ }
+
+ @Override
+ public final PointsFormat pointsFormat() {
+ return new Lucene60PointsFormat();
+ }
+
+ /** Returns the postings format that should be used for writing
+ * new segments of field
.
+ *
+ * The default implementation always returns "Lucene50".
+ *
+ * WARNING: if you subclass, you are responsible for index
+ * backwards compatibility: future version of Lucene are only
+ * guaranteed to be able to read the default implementation.
+ */
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return defaultFormat;
+ }
+
+ /** Returns the docvalues format that should be used for writing
+ * new segments of field
.
+ *
+ * The default implementation always returns "Lucene54".
+ *
+ * WARNING: if you subclass, you are responsible for index
+ * backwards compatibility: future version of Lucene are only
+ * guaranteed to be able to read the default implementation.
+ */
+ public DocValuesFormat getDocValuesFormatForField(String field) {
+ return defaultDVFormat;
+ }
+
+ @Override
+ public final DocValuesFormat docValuesFormat() {
+ return docValuesFormat;
+ }
+
+ private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene50");
+ private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene54");
+
+ private final NormsFormat normsFormat = new Lucene53NormsFormat();
+
+ @Override
+ public final NormsFormat normsFormat() {
+ return normsFormat;
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
new file mode 100644
index 00000000000..fe78572680c
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene62;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexWriter; // javadocs
+import org.apache.lucene.index.SegmentInfo; // javadocs
+import org.apache.lucene.index.SegmentInfos; // javadocs
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.DataOutput; // javadocs
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.Version;
+
+/**
+ * Lucene 6.2 Segment info format.
+ *
+ * Files:
+ *
+ * .si : Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Attributes, IndexSort, Footer
+ *
+ * Data types:
+ *
+ * Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
+ * SegSize --> {@link DataOutput#writeInt Int32}
+ * SegVersion --> {@link DataOutput#writeString String}
+ * Files --> {@link DataOutput#writeSetOfStrings Set<String>}
+ * Diagnostics,Attributes --> {@link DataOutput#writeMapOfStrings Map<String,String>}
+ * IsCompoundFile --> {@link DataOutput#writeByte Int8}
+ * IndexSort --> {@link DataOutput#writeVInt Int32} count, followed by {@code count} SortField
+ * SortField --> {@link DataOutput#writeString String} field name, followed by {@link DataOutput#writeVInt Int32} sort type ID,
+ * followed by {@link DataOutput#writeByte Int8} indicatating reversed sort, followed by a type-specific encoding of the optional missing value
+ * Footer --> {@link CodecUtil#writeFooter CodecFooter}
+ *
+ * Field Descriptions:
+ *
+ * SegVersion is the code version that created the segment.
+ * SegSize is the number of documents contained in the segment index.
+ * IsCompoundFile records whether the segment is written as a compound file or
+ * not. If this is -1, the segment is not a compound file. If it is 1, the segment
+ * is a compound file.
+ * The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid,
+ * for each segment it creates. It includes metadata like the current Lucene
+ * version, OS, Java version, why the segment was created (merge, flush,
+ * addIndexes), etc.
+ * Files is a list of files referred to by this segment.
+ *
+ *
+ * @see SegmentInfos
+ * @lucene.experimental
+ */
+public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
+
+ /** Sole constructor. */
+ public Lucene62SegmentInfoFormat() {
+ }
+
+ @Override
+ public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene62SegmentInfoFormat.SI_EXTENSION);
+ try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
+ Throwable priorE = null;
+ SegmentInfo si = null;
+ try {
+ int format = CodecUtil.checkIndexHeader(input, Lucene62SegmentInfoFormat.CODEC_NAME,
+ Lucene62SegmentInfoFormat.VERSION_START,
+ Lucene62SegmentInfoFormat.VERSION_CURRENT,
+ segmentID, "");
+ final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
+
+ final int docCount = input.readInt();
+ if (docCount < 0) {
+ throw new CorruptIndexException("invalid docCount: " + docCount, input);
+ }
+ final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
+
+ final Map diagnostics = input.readMapOfStrings();
+ final Set files = input.readSetOfStrings();
+ final Map attributes = input.readMapOfStrings();
+
+ int numSortFields = input.readVInt();
+ Sort indexSort;
+ if (numSortFields > 0) {
+ SortField[] sortFields = new SortField[numSortFields];
+ for(int i=0;i= 5 but got: " + version.major + " segment=" + si);
+ }
+ // Write the Lucene version that created this segment, since 3.1
+ output.writeInt(version.major);
+ output.writeInt(version.minor);
+ output.writeInt(version.bugfix);
+ assert version.prerelease == 0;
+ output.writeInt(si.maxDoc());
+
+ output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
+ output.writeMapOfStrings(si.getDiagnostics());
+ Set files = si.files();
+ for (String file : files) {
+ if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
+ throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files);
+ }
+ }
+ output.writeSetOfStrings(files);
+ output.writeMapOfStrings(si.getAttributes());
+
+ Sort indexSort = si.getIndexSort();
+ int numSortFields = indexSort == null ? 0 : indexSort.getSort().length;
+ output.writeVInt(numSortFields);
+ for (int i = 0; i < numSortFields; ++i) {
+ SortField sortField = indexSort.getSort()[i];
+ output.writeString(sortField.getField());
+ int sortTypeID;
+ switch (sortField.getType()) {
+ case STRING:
+ sortTypeID = 0;
+ break;
+ case LONG:
+ sortTypeID = 1;
+ break;
+ case INT:
+ sortTypeID = 2;
+ break;
+ case DOUBLE:
+ sortTypeID = 3;
+ break;
+ case FLOAT:
+ sortTypeID = 4;
+ break;
+ default:
+ throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
+ }
+ output.writeVInt(sortTypeID);
+ output.writeByte((byte) (sortField.getReverse() ? 0 : 1));
+
+ // write missing value
+ Object missingValue = sortField.getMissingValue();
+ if (missingValue == null) {
+ output.writeByte((byte) 0);
+ } else {
+ switch(sortField.getType()) {
+ case STRING:
+ if (missingValue == SortField.STRING_LAST) {
+ output.writeByte((byte) 1);
+ } else if (missingValue == SortField.STRING_FIRST) {
+ output.writeByte((byte) 2);
+ } else {
+ throw new AssertionError("unrecognized missing value for STRING field \"" + sortField.getField() + "\": " + missingValue);
+ }
+ break;
+ case LONG:
+ output.writeByte((byte) 1);
+ output.writeLong(((Long) missingValue).longValue());
+ break;
+ case INT:
+ output.writeByte((byte) 1);
+ output.writeInt(((Integer) missingValue).intValue());
+ break;
+ case DOUBLE:
+ output.writeByte((byte) 1);
+ output.writeLong(Double.doubleToLongBits(((Double) missingValue).doubleValue()));
+ break;
+ case FLOAT:
+ output.writeByte((byte) 1);
+ output.writeInt(Float.floatToIntBits(((Float) missingValue).floatValue()));
+ break;
+ default:
+ throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
+ }
+ }
+ }
+
+ CodecUtil.writeFooter(output);
+ }
+ }
+
+ /** File extension used to store {@link SegmentInfo}. */
+ public final static String SI_EXTENSION = "si";
+ static final String CODEC_NAME = "Lucene62SegmentInfo";
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java
new file mode 100644
index 00000000000..2fe2dc74b4a
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Components from the Lucene 6.2 index format
+ * See {@link org.apache.lucene.codecs.lucene62} for an overview
+ * of the index format.
+ */
+
+package org.apache.lucene.codecs.lucene62;
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 89b36efa2d3..9dee2d14e1c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -43,6 +43,9 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.LeafFieldComparator;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -217,6 +220,9 @@ public final class CheckIndex implements Closeable {
/** Status for testing of PointValues (null if PointValues could not be tested). */
public PointsStatus pointsStatus;
+
+ /** Status of index sort */
+ public IndexSortStatus indexSortStatus;
}
/**
@@ -374,6 +380,18 @@ public final class CheckIndex implements Closeable {
/** Exception thrown during doc values test (null on success) */
public Throwable error = null;
}
+
+ /**
+ * Status from testing index sort
+ */
+ public static final class IndexSortStatus {
+ IndexSortStatus() {
+ }
+
+ /** Exception thrown during term index test (null on success) */
+ public Throwable error = null;
+ }
+
}
/** Create a new CheckIndex on the directory. */
@@ -632,6 +650,7 @@ public final class CheckIndex implements Closeable {
int toLoseDocCount = info.info.maxDoc();
SegmentReader reader = null;
+ Sort previousIndexSort = null;
try {
msg(infoStream, " version=" + (version == null ? "3.0" : version));
@@ -642,6 +661,17 @@ public final class CheckIndex implements Closeable {
msg(infoStream, " compound=" + info.info.getUseCompoundFile());
segInfoStat.compound = info.info.getUseCompoundFile();
msg(infoStream, " numFiles=" + info.files().size());
+ Sort indexSort = info.info.getIndexSort();
+ if (indexSort != null) {
+ msg(infoStream, " sort=" + indexSort);
+ if (previousIndexSort != null) {
+ if (previousIndexSort.equals(indexSort) == false) {
+ throw new RuntimeException("index sort changed from " + previousIndexSort + " to " + indexSort);
+ }
+ } else {
+ previousIndexSort = indexSort;
+ }
+ }
segInfoStat.numFiles = info.files().size();
segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.);
msg(infoStream, " size (MB)=" + nf.format(segInfoStat.sizeMB));
@@ -722,6 +752,9 @@ public final class CheckIndex implements Closeable {
// Test PointValues
segInfoStat.pointsStatus = testPoints(reader, infoStream, failFast);
+ // Test index sort
+ segInfoStat.indexSortStatus = testSort(reader, indexSort, infoStream, failFast);
+
// Rethrow the first exception we encountered
// This will cause stats for failed segments to be incremented properly
if (segInfoStat.liveDocStatus.error != null) {
@@ -790,6 +823,72 @@ public final class CheckIndex implements Closeable {
return result;
}
+
+ /**
+ * Tests index sort order.
+ * @lucene.experimental
+ */
+ public static Status.IndexSortStatus testSort(CodecReader reader, Sort sort, PrintStream infoStream, boolean failFast) throws IOException {
+ // This segment claims its documents are sorted according to the incoming sort ... let's make sure:
+
+ long startNS = System.nanoTime();
+
+ Status.IndexSortStatus status = new Status.IndexSortStatus();
+
+ if (sort != null) {
+ if (infoStream != null) {
+ infoStream.print(" test: index sort..........");
+ }
+
+ SortField fields[] = sort.getSort();
+ final int reverseMul[] = new int[fields.length];
+ final LeafFieldComparator comparators[] = new LeafFieldComparator[fields.length];
+
+ LeafReaderContext readerContext = new LeafReaderContext(reader);
+
+ for (int i = 0; i < fields.length; i++) {
+ reverseMul[i] = fields[i].getReverse() ? -1 : 1;
+ comparators[i] = fields[i].getComparator(1, i).getLeafComparator(readerContext);
+ }
+
+ int maxDoc = reader.maxDoc();
+
+ try {
+
+ for(int docID=1;docID < maxDoc;docID++) {
+
+ int cmp = 0;
+
+ for (int i = 0; i < comparators.length; i++) {
+ // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
+ // the segments are always the same here...
+ comparators[i].copy(0, docID-1);
+ comparators[i].setBottom(0);
+ cmp = reverseMul[i] * comparators[i].compareBottom(docID);
+ if (cmp != 0) {
+ break;
+ }
+ }
+
+ if (cmp > 0) {
+ throw new RuntimeException("segment has indexSort=" + sort + " but docID=" + (docID-1) + " sorts after docID=" + docID);
+ }
+ }
+ msg(infoStream, String.format(Locale.ROOT, "OK [took %.3f sec]", nsToSec(System.nanoTime()-startNS)));
+ } catch (Throwable e) {
+ if (failFast) {
+ IOUtils.reThrow(e);
+ }
+ msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
+ status.error = e;
+ if (infoStream != null) {
+ e.printStackTrace(infoStream);
+ }
+ }
+ }
+
+ return status;
+ }
/**
* Test live docs.
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
new file mode 100644
index 00000000000..07c9e725270
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.search.DocIdSetIterator; // javadocs
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.PriorityQueue;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+/** Utility class to help merging documents from sub-readers according to either simple
+ * concatenated (unsorted) order, or by a specified index-time sort, skipping
+ * deleted documents and remapping non-deleted documents. */
+
+public class DocIDMerger {
+
+ private final List subs;
+
+ // Used when indexSort != null:
+ private final PriorityQueue queue;
+ private boolean first;
+
+ // Used when indexIsSorted
+ private T current;
+ private int nextIndex;
+
+ /** Represents one sub-reader being merged */
+ public static abstract class Sub {
+ /** Mapped doc ID */
+ public int mappedDocID;
+
+ final MergeState.DocMap docMap;
+
+ /** Sole constructor */
+ public Sub(MergeState.DocMap docMap) {
+ this.docMap = docMap;
+ }
+
+ /** Returns the next document ID from this sub reader, and {@link DocIdSetIterator#NO_MORE_DOCS} when done */
+ public abstract int nextDoc();
+ }
+
+ /** Construct this from the provided subs, specifying the maximum sub count */
+ public DocIDMerger(List subs, int maxCount, boolean indexIsSorted) {
+ this.subs = subs;
+
+ if (indexIsSorted && maxCount > 1) {
+ queue = new PriorityQueue(maxCount) {
+ @Override
+ protected boolean lessThan(Sub a, Sub b) {
+ assert a.mappedDocID != b.mappedDocID;
+ return a.mappedDocID < b.mappedDocID;
+ }
+ };
+ } else {
+ // We simply concatentate
+ queue = null;
+ }
+
+ reset();
+ }
+
+ /** Construct this from the provided subs */
+ public DocIDMerger(List subs, boolean indexIsSorted) {
+ this(subs, subs.size(), indexIsSorted);
+ }
+
+ /** Reuse API, currently only used by postings during merge */
+ public void reset() {
+ if (queue != null) {
+ // caller may not have fully consumed the queue:
+ queue.clear();
+ for(T sub : subs) {
+ while (true) {
+ int docID = sub.nextDoc();
+ if (docID == NO_MORE_DOCS) {
+ // all docs in this sub were deleted; do not add it to the queue!
+ break;
+ }
+
+ int mappedDocID = sub.docMap.get(docID);
+ if (mappedDocID == -1) {
+ // doc was deleted
+ continue;
+ } else {
+ sub.mappedDocID = mappedDocID;
+ queue.add(sub);
+ break;
+ }
+ }
+ }
+ first = true;
+ } else if (subs.size() > 0) {
+ current = subs.get(0);
+ nextIndex = 1;
+ } else {
+ current = null;
+ nextIndex = 0;
+ }
+ }
+
+ /** Returns null when done */
+ public T next() {
+ // Loop until we find a non-deleted document
+ if (queue != null) {
+ T top = queue.top();
+ if (top == null) {
+ // NOTE: it's annoying that caller is allowed to call us again even after we returned null before
+ return null;
+ }
+
+ if (first == false) {
+ while (true) {
+ int docID = top.nextDoc();
+ if (docID == NO_MORE_DOCS) {
+ queue.pop();
+ top = queue.top();
+ break;
+ }
+ int mappedDocID = top.docMap.get(docID);
+ if (mappedDocID == -1) {
+ // doc was deleted
+ continue;
+ } else {
+ top.mappedDocID = mappedDocID;
+ top = queue.updateTop();
+ break;
+ }
+ }
+ }
+
+ first = false;
+
+ return top;
+
+ } else {
+ while (true) {
+ if (current == null) {
+ // NOTE: it's annoying that caller is allowed to call us again even after we returned null before
+ return null;
+ }
+ int docID = current.nextDoc();
+ if (docID == NO_MORE_DOCS) {
+ if (nextIndex == subs.size()) {
+ current = null;
+ return null;
+ }
+ current = subs.get(nextIndex);
+ nextIndex++;
+ continue;
+ }
+ int mappedDocID = current.docMap.get(docID);
+ if (mappedDocID == -1) {
+ // doc is deleted
+ continue;
+ }
+
+ current.mappedDocID = mappedDocID;
+ return current;
+ }
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValues.java b/lucene/core/src/java/org/apache/lucene/index/DocValues.java
index feceb3bd3ff..4de42387042 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocValues.java
@@ -210,7 +210,7 @@ public final class DocValues {
(expected.length == 1
? "(expected=" + expected[0]
: "(expected one of " + Arrays.toString(expected)) + "). " +
- "Use UninvertingReader or index with docvalues.");
+ "Re-index with correct docvalues type.");
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
index 65d6a144e74..3e8a2270297 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
@@ -178,7 +178,7 @@ class DocumentsWriterPerThread {
pendingUpdates.clear();
deleteSlice = deleteQueue.newSlice();
- segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
assert numDocsInRAM == 0;
if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentName + " delQueue=" + deleteQueue);
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java
index c35dc6719c9..13b6e8d6d4d 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java
@@ -25,6 +25,7 @@ import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
/**
@@ -101,6 +102,11 @@ public abstract class FilterCodecReader extends CodecReader {
return in.maxDoc();
}
+ @Override
+ public Sort getIndexSort() {
+ return in.getIndexSort();
+ }
+
@Override
public void addCoreClosedListener(CoreClosedListener listener) {
in.addCoreClosedListener(listener);
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
index f273dba6fc9..886c12a6836 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
@@ -22,6 +22,7 @@ import java.util.Iterator;
import java.util.Objects;
import org.apache.lucene.search.QueryCache;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -471,6 +472,12 @@ public abstract class FilterLeafReader extends LeafReader {
return in.getDocsWithField(field);
}
+ @Override
+ public Sort getIndexSort() {
+ ensureOpen();
+ return in.getIndexSort();
+ }
+
@Override
public void checkIntegrity() throws IOException {
ensureOpen();
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 2b45b6b38ab..159f5917c42 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -16,7 +16,6 @@
*/
package org.apache.lucene.index;
-
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
@@ -32,8 +31,8 @@ import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
-import java.util.Map;
import java.util.Map.Entry;
+import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
@@ -49,6 +48,7 @@ import org.apache.lucene.index.FieldInfos.FieldNumbers;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -937,6 +937,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
// NOTE: this is correct even for an NRT reader because we'll pull FieldInfos even for the un-committed segments:
globalFieldNumberMap = getFieldNumberMap();
+ validateIndexSort();
+
config.getFlushPolicy().init(config);
docWriter = new DocumentsWriter(this, config, directoryOrig, directory);
eventQueue = docWriter.eventQueue();
@@ -1000,6 +1002,20 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
}
}
+ /** Confirms that the incoming index sort (if any) matches the existing index sort (if any). This is unfortunately just best effort,
+ * because it could be the old index only has flushed segments. */
+ private void validateIndexSort() {
+ Sort indexSort = config.getIndexSort();
+ if (indexSort != null) {
+ for(SegmentCommitInfo info : segmentInfos) {
+ Sort segmentIndexSort = info.info.getIndexSort();
+ if (segmentIndexSort != null && indexSort.equals(segmentIndexSort) == false) {
+ throw new IllegalArgumentException("cannot change previous indexSort=" + segmentIndexSort + " (from segment=" + info + ") to new indexSort=" + indexSort);
+ }
+ }
+ }
+ }
+
// reads latest field infos for the commit
// this is used on IW init and addIndexes(Dir) to create/update the global field map.
// TODO: fix tests abusing this method!
@@ -2472,7 +2488,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
* @throws IllegalArgumentException if addIndexes would cause
- * the index to exceed {@link #MAX_DOCS}
+ * the index to exceed {@link #MAX_DOCS}, or if the indoming
+ * index sort does not match this index's index sort
*/
public void addIndexes(Directory... dirs) throws IOException {
ensureOpen();
@@ -2481,6 +2498,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
List locks = acquireWriteLocks(dirs);
+ Sort indexSort = config.getIndexSort();
+
boolean successTop = false;
try {
@@ -2513,6 +2532,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
for (SegmentCommitInfo info : sis) {
assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name;
+ Sort segmentIndexSort = info.info.getIndexSort();
+
+ if (indexSort != null && segmentIndexSort != null && indexSort.equals(segmentIndexSort) == false) {
+ // TODO: we could make this smarter, e.g. if the incoming indexSort is congruent with our sort ("starts with") then it's OK
+ throw new IllegalArgumentException("cannot change index sort from " + segmentIndexSort + " to " + indexSort);
+ }
+
String newSegName = newSegmentName();
if (infoStream.isEnabled("IW")) {
@@ -2609,6 +2635,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
// long so we can detect int overflow:
long numDocs = 0;
+ Sort indexSort = config.getIndexSort();
+
try {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "flush at addIndexes(CodecReader...)");
@@ -2618,6 +2646,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
String mergedName = newSegmentName();
for (CodecReader leaf : readers) {
numDocs += leaf.numDocs();
+ Sort leafIndexSort = leaf.getIndexSort();
+ if (indexSort != null && leafIndexSort != null && indexSort.equals(leafIndexSort) == false) {
+ throw new IllegalArgumentException("cannot change index sort from " + leafIndexSort + " to " + indexSort);
+ }
}
// Best-effort up front check:
@@ -2630,7 +2662,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
SegmentInfo info = new SegmentInfo(directoryOrig, Version.LATEST, mergedName, -1,
- false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), config.getIndexSort());
SegmentMerger merger = new SegmentMerger(Arrays.asList(readers), info, infoStream, trackingDir,
globalFieldNumberMap,
@@ -2715,7 +2747,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
// Same SI as before but we change directory and name
SegmentInfo newInfo = new SegmentInfo(directoryOrig, info.info.getVersion(), segName, info.info.maxDoc(),
info.info.getUseCompoundFile(), info.info.getCodec(),
- info.info.getDiagnostics(), info.info.getId(), info.info.getAttributes());
+ info.info.getDiagnostics(), info.info.getId(), info.info.getAttributes(), info.info.getIndexSort());
SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo, info.getDelCount(), info.getDelGen(),
info.getFieldInfosGen(), info.getDocValuesGen());
@@ -3243,16 +3275,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
private static class MergedDeletesAndUpdates {
ReadersAndUpdates mergedDeletesAndUpdates = null;
- MergePolicy.DocMap docMap = null;
boolean initializedWritableLiveDocs = false;
MergedDeletesAndUpdates() {}
- final void init(ReaderPool readerPool, MergePolicy.OneMerge merge, MergeState mergeState, boolean initWritableLiveDocs) throws IOException {
+ final void init(ReaderPool readerPool, MergePolicy.OneMerge merge, boolean initWritableLiveDocs) throws IOException {
if (mergedDeletesAndUpdates == null) {
mergedDeletesAndUpdates = readerPool.get(merge.info, true);
- docMap = merge.getDocMap(mergeState);
- assert docMap.isConsistent(merge.info.info.maxDoc());
}
if (initWritableLiveDocs && !initializedWritableLiveDocs) {
mergedDeletesAndUpdates.initWritableLiveDocs();
@@ -3262,18 +3291,18 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
}
- private void maybeApplyMergedDVUpdates(MergePolicy.OneMerge merge, MergeState mergeState, int docUpto,
+ private void maybeApplyMergedDVUpdates(MergePolicy.OneMerge merge, MergeState mergeState,
MergedDeletesAndUpdates holder, String[] mergingFields, DocValuesFieldUpdates[] dvFieldUpdates,
- DocValuesFieldUpdates.Iterator[] updatesIters, int curDoc) throws IOException {
+ DocValuesFieldUpdates.Iterator[] updatesIters, int segment, int curDoc) throws IOException {
int newDoc = -1;
for (int idx = 0; idx < mergingFields.length; idx++) {
DocValuesFieldUpdates.Iterator updatesIter = updatesIters[idx];
if (updatesIter.doc() == curDoc) { // document has an update
if (holder.mergedDeletesAndUpdates == null) {
- holder.init(readerPool, merge, mergeState, false);
+ holder.init(readerPool, merge, false);
}
if (newDoc == -1) { // map once per all field updates, but only if there are any updates
- newDoc = holder.docMap.map(docUpto);
+ newDoc = mergeState.docMaps[segment].get(curDoc);
}
DocValuesFieldUpdates dvUpdates = dvFieldUpdates[idx];
dvUpdates.add(newDoc, updatesIter.value());
@@ -3306,13 +3335,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
// Carefully merge deletes that occurred after we
// started merging:
- int docUpto = 0;
long minGen = Long.MAX_VALUE;
// Lazy init (only when we find a delete to carry over):
final MergedDeletesAndUpdates holder = new MergedDeletesAndUpdates();
final DocValuesFieldUpdates.Container mergedDVUpdates = new DocValuesFieldUpdates.Container();
-
+
+ assert sourceSegments.size() == mergeState.docMaps.length;
for (int i = 0; i < sourceSegments.size(); i++) {
SegmentCommitInfo info = sourceSegments.get(i);
minGen = Math.min(info.getBufferedDeletesGen(), minGen);
@@ -3375,21 +3404,20 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
// since we started the merge, so we
// must merge them:
for (int j = 0; j < maxDoc; j++) {
- if (!prevLiveDocs.get(j)) {
- assert !currentLiveDocs.get(j);
- } else {
- if (!currentLiveDocs.get(j)) {
- if (holder.mergedDeletesAndUpdates == null || !holder.initializedWritableLiveDocs) {
- holder.init(readerPool, merge, mergeState, true);
- }
- holder.mergedDeletesAndUpdates.delete(holder.docMap.map(docUpto));
- if (mergingFields != null) { // advance all iters beyond the deleted document
- skipDeletedDoc(updatesIters, j);
- }
- } else if (mergingFields != null) {
- maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
+ if (prevLiveDocs.get(j) == false) {
+ // if the document was deleted before, it better still be deleted!
+ assert currentLiveDocs.get(j) == false;
+ } else if (currentLiveDocs.get(j) == false) {
+ // the document was deleted while we were merging:
+ if (holder.mergedDeletesAndUpdates == null || holder.initializedWritableLiveDocs == false) {
+ holder.init(readerPool, merge, true);
}
- docUpto++;
+ holder.mergedDeletesAndUpdates.delete(mergeState.docMaps[i].get(mergeState.leafDocMaps[i].get(j)));
+ if (mergingFields != null) { // advance all iters beyond the deleted document
+ skipDeletedDoc(updatesIters, j);
+ }
+ } else if (mergingFields != null) {
+ maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j);
}
}
} else if (mergingFields != null) {
@@ -3397,50 +3425,38 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
for (int j = 0; j < maxDoc; j++) {
if (prevLiveDocs.get(j)) {
// document isn't deleted, check if any of the fields have an update to it
- maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
- // advance docUpto for every non-deleted document
- docUpto++;
+ maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j);
} else {
// advance all iters beyond the deleted document
skipDeletedDoc(updatesIters, j);
}
}
- } else {
- docUpto += info.info.maxDoc() - info.getDelCount() - rld.getPendingDeleteCount();
}
} else if (currentLiveDocs != null) {
assert currentLiveDocs.length() == maxDoc;
// This segment had no deletes before but now it
// does:
for (int j = 0; j < maxDoc; j++) {
- if (!currentLiveDocs.get(j)) {
+ if (currentLiveDocs.get(j) == false) {
if (holder.mergedDeletesAndUpdates == null || !holder.initializedWritableLiveDocs) {
- holder.init(readerPool, merge, mergeState, true);
+ holder.init(readerPool, merge, true);
}
- holder.mergedDeletesAndUpdates.delete(holder.docMap.map(docUpto));
+ holder.mergedDeletesAndUpdates.delete(mergeState.docMaps[i].get(mergeState.leafDocMaps[i].get(j)));
if (mergingFields != null) { // advance all iters beyond the deleted document
skipDeletedDoc(updatesIters, j);
}
} else if (mergingFields != null) {
- maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
+ maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j);
}
- docUpto++;
}
} else if (mergingFields != null) {
// no deletions before or after, but there were updates
for (int j = 0; j < maxDoc; j++) {
- maybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
- // advance docUpto for every non-deleted document
- docUpto++;
+ maybeApplyMergedDVUpdates(merge, mergeState, holder, mergingFields, dvFieldUpdates, updatesIters, i, j);
}
- } else {
- // No deletes or updates before or after
- docUpto += info.info.maxDoc();
}
}
- assert docUpto == merge.info.info.maxDoc();
-
if (mergedDVUpdates.any()) {
// System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: mergedDeletes.info=" + mergedDeletes.info + ", mergedFieldUpdates=" + mergedFieldUpdates);
boolean success = false;
@@ -3881,7 +3897,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
// ConcurrentMergePolicy we keep deterministic segment
// names.
final String mergeSegmentName = newSegmentName();
- SegmentInfo si = new SegmentInfo(directoryOrig, Version.LATEST, mergeSegmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ SegmentInfo si = new SegmentInfo(directoryOrig, Version.LATEST, mergeSegmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), config.getIndexSort());
Map details = new HashMap<>();
details.put("mergeMaxNumSegments", "" + merge.maxNumSegments);
details.put("mergeFactor", Integer.toString(merge.segments.size()));
@@ -4082,10 +4098,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
}
// System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders());
-
- // we pass merge.getMergeReaders() instead of merge.readers to allow the
- // OneMerge to return a view over the actual segments to merge
- final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(),
+
+ // Let the merge wrap readers
+ List mergeReaders = new ArrayList<>();
+ for (SegmentReader reader : merge.readers) {
+ mergeReaders.add(merge.wrapForMerge(reader));
+ }
+ final SegmentMerger merger = new SegmentMerger(mergeReaders,
merge.info.info, infoStream, dirWrapper,
globalFieldNumberMap,
context);
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
index a90d625e305..e2957d74316 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
@@ -18,16 +18,19 @@ package org.apache.lucene.index;
import java.io.PrintStream;
+import java.util.EnumSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.PrintStreamInfoStream;
-import org.apache.lucene.util.SetOnce;
import org.apache.lucene.util.SetOnce.AlreadySetException;
+import org.apache.lucene.util.SetOnce;
/**
* Holds all the configuration that is used to create an {@link IndexWriter}.
@@ -439,6 +442,26 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig {
return this;
}
+ /** We only allow sorting on these types */
+ private static final EnumSet ALLOWED_INDEX_SORT_TYPES = EnumSet.of(SortField.Type.STRING,
+ SortField.Type.LONG,
+ SortField.Type.INT,
+ SortField.Type.DOUBLE,
+ SortField.Type.FLOAT);
+
+ /**
+ * Set the {@link Sort} order to use when merging segments. Note that newly flushed segments will remain unsorted.
+ */
+ public IndexWriterConfig setIndexSort(Sort sort) {
+ for(SortField sortField : sort.getSort()) {
+ if (ALLOWED_INDEX_SORT_TYPES.contains(sortField.getType()) == false) {
+ throw new IllegalArgumentException("invalid SortField type: must be one of " + ALLOWED_INDEX_SORT_TYPES + " but got: " + sortField);
+ }
+ }
+ this.indexSort = sort;
+ return this;
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder(super.toString());
diff --git a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
index 9622d4e2f85..44e61e2787f 100644
--- a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.index.IndexReader.ReaderClosedListener;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
/** {@code LeafReader} is an abstract class, providing an interface for accessing an
@@ -312,4 +313,7 @@ public abstract class LeafReader extends IndexReader {
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
+
+ /** Returns null if this leaf is unsorted, or the {@link Sort} that it was sorted by */
+ public abstract Sort getIndexSort();
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
index 1a0002c73f7..cec70c099aa 100644
--- a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
@@ -23,6 +23,7 @@ import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.InfoStream;
@@ -94,6 +95,9 @@ public class LiveIndexWriterConfig {
/** True if calls to {@link IndexWriter#close()} should first do a commit. */
protected boolean commitOnClose = IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE;
+ /** The sort order to use to write merged segments. */
+ protected Sort indexSort = null;
+
// used by IndexWriterConfig
LiveIndexWriterConfig(Analyzer analyzer) {
this.analyzer = analyzer;
@@ -445,6 +449,14 @@ public class LiveIndexWriterConfig {
return commitOnClose;
}
+ /**
+ * Set the index-time {@link Sort} order. Merged segments will be written
+ * in this order.
+ */
+ public Sort getIndexSort() {
+ return indexSort;
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
@@ -467,6 +479,7 @@ public class LiveIndexWriterConfig {
sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n");
sb.append("useCompoundFile=").append(getUseCompoundFile()).append("\n");
sb.append("commitOnClose=").append(getCommitOnClose()).append("\n");
+ sb.append("indexSort=").append(getIndexSort()).append("\n");
return sb.toString();
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
index a06c34f3c80..166878d3f8f 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
@@ -18,8 +18,11 @@ package org.apache.lucene.index;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.lucene.index.MultiPostingsEnum.EnumWithSlice;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
/**
@@ -30,52 +33,66 @@ import org.apache.lucene.util.BytesRef;
*/
final class MappingMultiPostingsEnum extends PostingsEnum {
- private MultiPostingsEnum.EnumWithSlice[] subs;
- int numSubs;
- int upto;
- MergeState.DocMap currentMap;
- PostingsEnum current;
- int currentBase;
- int doc = -1;
- private MergeState mergeState;
MultiPostingsEnum multiDocsAndPositionsEnum;
final String field;
+ final DocIDMerger docIDMerger;
+ private MappingPostingsSub current;
+ private final MappingPostingsSub[] allSubs;
+ private final List subs = new ArrayList<>();
+
+ private static class MappingPostingsSub extends DocIDMerger.Sub {
+ public PostingsEnum postings;
+
+ public MappingPostingsSub(MergeState.DocMap docMap) {
+ super(docMap);
+ }
+
+ @Override
+ public int nextDoc() {
+ try {
+ return postings.nextDoc();
+ } catch (IOException ioe) {
+ throw new RuntimeException(ioe);
+ }
+ }
+ }
/** Sole constructor. */
- public MappingMultiPostingsEnum(String field, MergeState mergeState) {
+ public MappingMultiPostingsEnum(String field, MergeState mergeState) throws IOException {
this.field = field;
- this.mergeState = mergeState;
+ allSubs = new MappingPostingsSub[mergeState.fieldsProducers.length];
+ for(int i=0;i(subs, allSubs.length, mergeState.segmentInfo.getIndexSort() != null);
}
- MappingMultiPostingsEnum reset(MultiPostingsEnum postingsEnum) {
- this.numSubs = postingsEnum.getNumSubs();
- this.subs = postingsEnum.getSubs();
- upto = -1;
- doc = -1;
- current = null;
+ MappingMultiPostingsEnum reset(MultiPostingsEnum postingsEnum) throws IOException {
this.multiDocsAndPositionsEnum = postingsEnum;
+ MultiPostingsEnum.EnumWithSlice[] subsArray = postingsEnum.getSubs();
+ int count = postingsEnum.getNumSubs();
+ subs.clear();
+ for(int i=0;i IndexWriter.MAX_POSITION=" + IndexWriter.MAX_POSITION + "), field=\"" + field + "\" doc=" + doc,
- mergeState.fieldsProducers[upto].toString());
+ throw new CorruptIndexException("position=" + pos + " is too large (> IndexWriter.MAX_POSITION=" + IndexWriter.MAX_POSITION + "), field=\"" + field + "\" doc=" + current.mappedDocID,
+ current.postings.toString());
}
return pos;
}
@Override
public int startOffset() throws IOException {
- return current.startOffset();
+ return current.postings.startOffset();
}
@Override
public int endOffset() throws IOException {
- return current.endOffset();
+ return current.postings.endOffset();
}
@Override
public BytesRef getPayload() throws IOException {
- return current.getPayload();
+ return current.postings.getPayload();
}
@Override
public long cost() {
long cost = 0;
- for (EnumWithSlice enumWithSlice : subs) {
- cost += enumWithSlice.postingsEnum.cost();
+ for (MappingPostingsSub sub : subs) {
+ cost += sub.postings.cost();
}
return cost;
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
index 1d67c4a0abc..c42b052d288 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
@@ -58,31 +58,6 @@ import org.apache.lucene.util.FixedBitSet;
*/
public abstract class MergePolicy {
- /** A map of doc IDs. */
- public static abstract class DocMap {
- /** Sole constructor, typically invoked from sub-classes constructors. */
- protected DocMap() {}
-
- /** Return the new doc ID according to its old value. */
- public abstract int map(int old);
-
- /** Useful from an assert. */
- boolean isConsistent(int maxDoc) {
- final FixedBitSet targets = new FixedBitSet(maxDoc);
- for (int i = 0; i < maxDoc; ++i) {
- final int target = map(i);
- if (target < 0 || target >= maxDoc) {
- assert false : "out of range: " + target + " not in [0-" + maxDoc + "[";
- return false;
- } else if (targets.get(target)) {
- assert false : target + " is already taken (" + i + ")";
- return false;
- }
- }
- return true;
- }
- }
-
/** OneMerge provides the information necessary to perform
* an individual primitive merge operation, resulting in
* a single new segment. The merge spec includes the
@@ -140,25 +115,11 @@ public abstract class MergePolicy {
public void mergeFinished() throws IOException {
}
- /** Expert: Get the list of readers to merge. Note that this list does not
- * necessarily match the list of segments to merge and should only be used
- * to feed SegmentMerger to initialize a merge. When a {@link OneMerge}
- * reorders doc IDs, it must override {@link #getDocMap} too so that
- * deletes that happened during the merge can be applied to the newly
- * merged segment. */
- public List getMergeReaders() throws IOException {
- if (readers == null) {
- throw new IllegalStateException("IndexWriter has not initialized readers from the segment infos yet");
- }
- final List readers = new ArrayList<>(this.readers.size());
- for (SegmentReader reader : this.readers) {
- if (reader.numDocs() > 0) {
- readers.add(reader);
- }
- }
- return Collections.unmodifiableList(readers);
+ /** Wrap the reader in order to add/remove information to the merged segment. */
+ public CodecReader wrapForMerge(CodecReader reader) throws IOException {
+ return reader;
}
-
+
/**
* Expert: Sets the {@link SegmentCommitInfo} of the merged segment.
* Allows sub-classes to e.g. set diagnostics properties.
@@ -175,20 +136,6 @@ public abstract class MergePolicy {
return info;
}
- /** Expert: If {@link #getMergeReaders()} reorders document IDs, this method
- * must be overridden to return a mapping from the natural doc ID
- * (the doc ID that would result from a natural merge) to the actual doc
- * ID. This mapping is used to apply deletions that happened during the
- * merge to the new segment. */
- public DocMap getDocMap(MergeState mergeState) {
- return new DocMap() {
- @Override
- public int map(int docID) {
- return docID;
- }
- };
- }
-
/** Record that an exception occurred while executing
* this merge */
synchronized void setException(Throwable error) {
diff --git a/lucene/misc/src/java/org/apache/lucene/index/MergeReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java
similarity index 95%
rename from lucene/misc/src/java/org/apache/lucene/index/MergeReaderWrapper.java
rename to lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java
index dba5c913f00..2401d0fa3ee 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/MergeReaderWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java
@@ -1,3 +1,5 @@
+package org.apache.lucene.index;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -14,7 +16,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.index;
import java.io.IOException;
@@ -23,18 +24,19 @@ import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
-/** this is a hack to make SortingMP fast! */
+/** This is a hack to make index sorting fast, with a {@link LeafReader} that always returns merge instances when you ask for the codec readers. */
class MergeReaderWrapper extends LeafReader {
- final SegmentReader in;
+ final CodecReader in;
final FieldsProducer fields;
final NormsProducer norms;
final DocValuesProducer docValues;
final StoredFieldsReader store;
final TermVectorsReader vectors;
- MergeReaderWrapper(SegmentReader in) throws IOException {
+ MergeReaderWrapper(CodecReader in) throws IOException {
this.in = in;
FieldsProducer fields = in.getPostingsReader();
@@ -256,4 +258,9 @@ class MergeReaderWrapper extends LeafReader {
public String toString() {
return "MergeReaderWrapper(" + in + ")";
}
+
+ @Override
+ public Sort getIndexSort() {
+ return in.getIndexSort();
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/MergeState.java b/lucene/core/src/java/org/apache/lucene/index/MergeState.java
index 7242785e101..3723f19476f 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MergeState.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergeState.java
@@ -18,7 +18,10 @@ package org.apache.lucene.index;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
+import java.util.Locale;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
@@ -26,6 +29,7 @@ import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.packed.PackedInts;
@@ -36,6 +40,12 @@ import org.apache.lucene.util.packed.PackedLongValues;
* @lucene.experimental */
public class MergeState {
+ /** Maps document IDs from old segments to document IDs in the new segment */
+ public final DocMap[] docMaps;
+
+ // Only used by IW when it must remap deletes that arrived against the merging segmetns while a merge was running:
+ final DocMap[] leafDocMaps;
+
/** {@link SegmentInfo} of the newly merged segment. */
public final SegmentInfo segmentInfo;
@@ -60,18 +70,12 @@ public class MergeState {
/** Live docs for each reader */
public final Bits[] liveDocs;
- /** Maps docIDs around deletions. */
- public final DocMap[] docMaps;
-
/** Postings to merge */
public final FieldsProducer[] fieldsProducers;
/** Point readers to merge */
public final PointsReader[] pointsReaders;
- /** New docID base per reader. */
- public final int[] docBase;
-
/** Max docs per reader */
public final int[] maxDocs;
@@ -79,11 +83,15 @@ public class MergeState {
public final InfoStream infoStream;
/** Sole constructor. */
- MergeState(List readers, SegmentInfo segmentInfo, InfoStream infoStream) throws IOException {
+ MergeState(List originalReaders, SegmentInfo segmentInfo, InfoStream infoStream) throws IOException {
+
+ this.infoStream = infoStream;
+
+ final Sort indexSort = segmentInfo.getIndexSort();
+ int numReaders = originalReaders.size();
+ leafDocMaps = new DocMap[numReaders];
+ List readers = maybeSortReaders(originalReaders, segmentInfo);
- int numReaders = readers.size();
- docMaps = new DocMap[numReaders];
- docBase = new int[numReaders];
maxDocs = new int[numReaders];
fieldsProducers = new FieldsProducer[numReaders];
normsProducers = new NormsProducer[numReaders];
@@ -94,6 +102,7 @@ public class MergeState {
fieldInfos = new FieldInfos[numReaders];
liveDocs = new Bits[numReaders];
+ int numDocs = 0;
for(int i=0;i readers) throws IOException {
- final int numReaders = maxDocs.length;
+ private DocMap[] buildDocMaps(List readers, Sort indexSort) throws IOException {
- // Remap docIDs
- int docBase = 0;
- for(int i=0;i 0;
- }
-
- /** Creates a {@link DocMap} instance appropriate for
- * this reader. */
- public static DocMap build(CodecReader reader) {
- final int maxDoc = reader.maxDoc();
- if (!reader.hasDeletions()) {
- return new NoDelDocMap(maxDoc);
- }
- final Bits liveDocs = reader.getLiveDocs();
- return build(maxDoc, liveDocs);
- }
-
- static DocMap build(final int maxDoc, final Bits liveDocs) {
- assert liveDocs != null;
- final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
- int del = 0;
- for (int i = 0; i < maxDoc; ++i) {
- docMapBuilder.add(i - del);
- if (!liveDocs.get(i)) {
- ++del;
+ final PackedLongValues delDocMap;
+ if (liveDocs != null) {
+ delDocMap = removeDeletes(reader.maxDoc(), liveDocs);
+ } else {
+ delDocMap = null;
}
- }
- final PackedLongValues docMap = docMapBuilder.build();
- final int numDeletedDocs = del;
- assert docMap.size() == maxDoc;
- return new DocMap() {
- @Override
- public int get(int docID) {
- if (!liveDocs.get(docID)) {
- return -1;
+ final int docBase = totalDocs;
+ docMaps[i] = new DocMap() {
+ @Override
+ public int get(int docID) {
+ if (liveDocs == null) {
+ return docBase + docID;
+ } else if (liveDocs.get(docID)) {
+ return docBase + (int) delDocMap.get(docID);
+ } else {
+ return -1;
+ }
}
- return (int) docMap.get(docID);
- }
+ };
+ totalDocs += reader.numDocs();
+ }
- @Override
- public int maxDoc() {
- return maxDoc;
- }
+ return docMaps;
- @Override
- public int numDeletedDocs() {
- return numDeletedDocs;
- }
- };
+ } else {
+ // do a merge sort of the incoming leaves:
+ long t0 = System.nanoTime();
+ DocMap[] result = MultiSorter.sort(indexSort, readers);
+ long t1 = System.nanoTime();
+ if (infoStream.isEnabled("SM")) {
+ infoStream.message("SM", String.format(Locale.ROOT, "%.2f msec to build merge sorted DocMaps", (t1-t0)/1000000.0));
+ }
+ return result;
}
}
- private static final class NoDelDocMap extends DocMap {
+ private List maybeSortReaders(List originalReaders, SegmentInfo segmentInfo) throws IOException {
- private final int maxDoc;
-
- NoDelDocMap(int maxDoc) {
- this.maxDoc = maxDoc;
+ // Default to identity:
+ for(int i=0;i readers = new ArrayList<>(originalReaders.size());
+
+ for (CodecReader leaf : originalReaders) {
+ Sort segmentSort = leaf.getIndexSort();
+
+ if (segmentSort == null) {
+ // TODO: fix IW to also sort when flushing? It's somewhat tricky because of stored fields and term vectors, which write "live"
+ // to their index files on each indexed document:
+
+ // This segment was written by flush, so documents are not yet sorted, so we sort them now:
+ long t0 = System.nanoTime();
+ Sorter.DocMap sortDocMap = sorter.sort(leaf);
+ long t1 = System.nanoTime();
+ double msec = (t1-t0)/1000000.0;
+
+ if (sortDocMap != null) {
+ if (infoStream.isEnabled("SM")) {
+ infoStream.message("SM", String.format(Locale.ROOT, "segment %s is not sorted; wrapping for sort %s now (%.2f msec to sort)", leaf, indexSort, msec));
+ }
+ leaf = SlowCodecReaderWrapper.wrap(SortingLeafReader.wrap(new MergeReaderWrapper(leaf), sortDocMap));
+ leafDocMaps[readers.size()] = new DocMap() {
+ @Override
+ public int get(int docID) {
+ return sortDocMap.oldToNew(docID);
+ }
+ };
+ } else {
+ if (infoStream.isEnabled("SM")) {
+ infoStream.message("SM", String.format(Locale.ROOT, "segment %s is not sorted, but is already accidentally in sort %s order (%.2f msec to sort)", leaf, indexSort, msec));
+ }
+ }
+
+ } else {
+ if (segmentSort.equals(indexSort) == false) {
+ throw new IllegalArgumentException("index sort mismatch: merged segment has sort=" + indexSort + " but to-be-merged segment has sort=" + segmentSort);
+ }
+ if (infoStream.isEnabled("SM")) {
+ infoStream.message("SM", "segment " + leaf + " already sorted");
+ }
+ }
+
+ readers.add(leaf);
}
- @Override
- public int numDeletedDocs() {
- return 0;
+ return readers;
+ }
+
+ /** A map of doc IDs. */
+ public static abstract class DocMap {
+ /** Sole constructor */
+ public DocMap() {
}
+
+ /** Return the mapped docID or -1 if the given doc is not mapped. */
+ public abstract int get(int docID);
+ }
+
+ static PackedLongValues removeDeletes(final int maxDoc, final Bits liveDocs) {
+ final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
+ int del = 0;
+ for (int i = 0; i < maxDoc; ++i) {
+ docMapBuilder.add(i - del);
+ if (liveDocs.get(i) == false) {
+ ++del;
+ }
+ }
+ return docMapBuilder.build();
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java
index 54563254162..062fc303c09 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiPostingsEnum.java
@@ -57,7 +57,7 @@ public final class MultiPostingsEnum extends PostingsEnum {
return this.parent == parent;
}
- /** Rre-use and reset this instance on the provided slices. */
+ /** Re-use and reset this instance on the provided slices. */
public MultiPostingsEnum reset(final EnumWithSlice[] subs, final int numSubs) {
this.numSubs = numSubs;
for(int i=0;i readers) throws IOException {
+
+ // TODO: optimize if only 1 reader is incoming, though that's a rare case
+
+ SortField fields[] = sort.getSort();
+ final CrossReaderComparator[] comparators = new CrossReaderComparator[fields.length];
+ for(int i=0;i queue = new PriorityQueue(leafCount) {
+ @Override
+ public boolean lessThan(LeafAndDocID a, LeafAndDocID b) {
+ for(int i=0;i readers, SortField sortField) throws IOException {
+ switch(sortField.getType()) {
+
+ case STRING:
+ {
+ // this uses the efficient segment-local ordinal map:
+ MultiReader multiReader = new MultiReader(readers.toArray(new LeafReader[readers.size()]));
+ final SortedDocValues sorted = MultiDocValues.getSortedValues(multiReader, sortField.getField());
+ final int[] docStarts = new int[readers.size()];
+ List leaves = multiReader.leaves();
+ for(int i=0;i values = new ArrayList<>();
+ List docsWithFields = new ArrayList<>();
+ for(CodecReader reader : readers) {
+ values.add(DocValues.getNumeric(reader, sortField.getField()));
+ docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
+ }
+
+ final int reverseMul;
+ if (sortField.getReverse()) {
+ reverseMul = -1;
+ } else {
+ reverseMul = 1;
+ }
+
+ final long missingValue;
+
+ if (sortField.getMissingValue() != null) {
+ missingValue = (Long) sortField.getMissingValue();
+ } else {
+ missingValue = 0;
+ }
+
+ return new CrossReaderComparator() {
+ @Override
+ public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
+ long valueA;
+ if (docsWithFields.get(readerIndexA).get(docIDA)) {
+ valueA = values.get(readerIndexA).get(docIDA);
+ } else {
+ valueA = missingValue;
+ }
+
+ long valueB;
+ if (docsWithFields.get(readerIndexB).get(docIDB)) {
+ valueB = values.get(readerIndexB).get(docIDB);
+ } else {
+ valueB = missingValue;
+ }
+ return reverseMul * Long.compare(valueA, valueB);
+ }
+ };
+ }
+
+ case INT:
+ {
+ List values = new ArrayList<>();
+ List docsWithFields = new ArrayList<>();
+ for(CodecReader reader : readers) {
+ values.add(DocValues.getNumeric(reader, sortField.getField()));
+ docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
+ }
+
+ final int reverseMul;
+ if (sortField.getReverse()) {
+ reverseMul = -1;
+ } else {
+ reverseMul = 1;
+ }
+
+ final int missingValue;
+
+ if (sortField.getMissingValue() != null) {
+ missingValue = (Integer) sortField.getMissingValue();
+ } else {
+ missingValue = 0;
+ }
+
+ return new CrossReaderComparator() {
+ @Override
+ public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
+ int valueA;
+ if (docsWithFields.get(readerIndexA).get(docIDA)) {
+ valueA = (int) values.get(readerIndexA).get(docIDA);
+ } else {
+ valueA = missingValue;
+ }
+
+ int valueB;
+ if (docsWithFields.get(readerIndexB).get(docIDB)) {
+ valueB = (int) values.get(readerIndexB).get(docIDB);
+ } else {
+ valueB = missingValue;
+ }
+ return reverseMul * Integer.compare(valueA, valueB);
+ }
+ };
+ }
+
+ case DOUBLE:
+ {
+ List values = new ArrayList<>();
+ List docsWithFields = new ArrayList<>();
+ for(CodecReader reader : readers) {
+ values.add(DocValues.getNumeric(reader, sortField.getField()));
+ docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
+ }
+
+ final int reverseMul;
+ if (sortField.getReverse()) {
+ reverseMul = -1;
+ } else {
+ reverseMul = 1;
+ }
+
+ final double missingValue;
+
+ if (sortField.getMissingValue() != null) {
+ missingValue = (Double) sortField.getMissingValue();
+ } else {
+ missingValue = 0.0;
+ }
+
+ return new CrossReaderComparator() {
+ @Override
+ public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
+ double valueA;
+ if (docsWithFields.get(readerIndexA).get(docIDA)) {
+ valueA = Double.longBitsToDouble(values.get(readerIndexA).get(docIDA));
+ } else {
+ valueA = missingValue;
+ }
+
+ double valueB;
+ if (docsWithFields.get(readerIndexB).get(docIDB)) {
+ valueB = Double.longBitsToDouble(values.get(readerIndexB).get(docIDB));
+ } else {
+ valueB = missingValue;
+ }
+ return reverseMul * Double.compare(valueA, valueB);
+ }
+ };
+ }
+
+ case FLOAT:
+ {
+ List values = new ArrayList<>();
+ List docsWithFields = new ArrayList<>();
+ for(CodecReader reader : readers) {
+ values.add(DocValues.getNumeric(reader, sortField.getField()));
+ docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
+ }
+
+ final int reverseMul;
+ if (sortField.getReverse()) {
+ reverseMul = -1;
+ } else {
+ reverseMul = 1;
+ }
+
+ final float missingValue;
+
+ if (sortField.getMissingValue() != null) {
+ missingValue = (Float) sortField.getMissingValue();
+ } else {
+ missingValue = 0.0f;
+ }
+
+ return new CrossReaderComparator() {
+ @Override
+ public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
+ float valueA;
+ if (docsWithFields.get(readerIndexA).get(docIDA)) {
+ valueA = Float.intBitsToFloat((int) values.get(readerIndexA).get(docIDA));
+ } else {
+ valueA = missingValue;
+ }
+
+ float valueB;
+ if (docsWithFields.get(readerIndexB).get(docIDB)) {
+ valueB = Float.intBitsToFloat((int) values.get(readerIndexB).get(docIDB));
+ } else {
+ valueB = missingValue;
+ }
+ return reverseMul * Float.compare(valueA, valueB);
+ }
+ };
+ }
+
+ default:
+ throw new IllegalArgumentException("unhandled SortField.getType()=" + sortField.getType());
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java
index 532265f59c7..d85ff2d0fa2 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java
@@ -26,6 +26,7 @@ import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
/** An {@link LeafReader} which reads multiple, parallel indexes. Each index
@@ -55,6 +56,7 @@ public class ParallelLeafReader extends LeafReader {
private final boolean closeSubReaders;
private final int maxDoc, numDocs;
private final boolean hasDeletions;
+ private final Sort indexSort;
private final SortedMap fieldToReader = new TreeMap<>();
private final SortedMap tvFieldToReader = new TreeMap<>();
@@ -100,8 +102,18 @@ public class ParallelLeafReader extends LeafReader {
// TODO: make this read-only in a cleaner way?
FieldInfos.Builder builder = new FieldInfos.Builder();
+
+ Sort indexSort = null;
+
// build FieldInfos and fieldToReader map:
for (final LeafReader reader : this.parallelReaders) {
+ Sort leafIndexSort = reader.getIndexSort();
+ if (indexSort == null) {
+ indexSort = leafIndexSort;
+ } else if (leafIndexSort != null && indexSort.equals(leafIndexSort) == false) {
+ throw new IllegalArgumentException("cannot combine LeafReaders that have different index sorts: saw both sort=" + indexSort + " and " + leafIndexSort);
+ }
+
final FieldInfos readerFieldInfos = reader.getFieldInfos();
for (FieldInfo fieldInfo : readerFieldInfos) {
// NOTE: first reader having a given field "wins":
@@ -115,6 +127,7 @@ public class ParallelLeafReader extends LeafReader {
}
}
fieldInfos = builder.finish();
+ this.indexSort = indexSort;
// build Fields instance
for (final LeafReader reader : this.parallelReaders) {
@@ -423,4 +436,10 @@ public class ParallelLeafReader extends LeafReader {
ensureOpen();
return parallelReaders;
}
+
+ @Override
+ public Sort getIndexSort() {
+ return indexSort;
+ }
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
index bed84589576..ec12365e958 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
@@ -28,6 +28,7 @@ import java.util.Set;
import java.util.regex.Matcher;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.StringHelper;
@@ -69,6 +70,8 @@ public final class SegmentInfo {
private final Map attributes;
+ private final Sort indexSort;
+
// Tracks the Lucene version this segment was created with, since 3.1. Null
// indicates an older than 3.0 index, and it's used to detect a too old index.
// The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
@@ -93,7 +96,7 @@ public final class SegmentInfo {
*/
public SegmentInfo(Directory dir, Version version, String name, int maxDoc,
boolean isCompoundFile, Codec codec, Map diagnostics,
- byte[] id, Map attributes) {
+ byte[] id, Map attributes, Sort indexSort) {
assert !(dir instanceof TrackingDirectoryWrapper);
this.dir = Objects.requireNonNull(dir);
this.version = Objects.requireNonNull(version);
@@ -107,6 +110,7 @@ public final class SegmentInfo {
throw new IllegalArgumentException("invalid id: " + Arrays.toString(id));
}
this.attributes = Objects.requireNonNull(attributes);
+ this.indexSort = indexSort;
}
/**
@@ -194,13 +198,9 @@ public final class SegmentInfo {
s.append('/').append(delCount);
}
- final String sorter_key = "sorter"; // SortingMergePolicy.SORTER_ID_PROP; // TODO: use this once we can import SortingMergePolicy (currently located in 'misc' instead of 'core')
- final String sorter_val = diagnostics.get(sorter_key);
- if (sorter_val != null) {
- s.append(":[");
- s.append(sorter_key);
- s.append('=');
- s.append(sorter_val);
+ if (indexSort != null) {
+ s.append(":[indexSort=");
+ s.append(indexSort);
s.append(']');
}
@@ -311,5 +311,10 @@ public final class SegmentInfo {
public Map getAttributes() {
return attributes;
}
+
+ /** Return the sort order of this segment, or null if the index has no sort. */
+ public Sort getIndexSort() {
+ return indexSort;
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
index b0d9bcff50b..d23f01024d8 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -59,6 +59,11 @@ final class SegmentMerger {
this.codec = segmentInfo.getCodec();
this.context = context;
this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers);
+ if (mergeState.infoStream.isEnabled("SM")) {
+ if (segmentInfo.getIndexSort() != null) {
+ mergeState.infoStream.message("SM", "index sort during merge: " + segmentInfo.getIndexSort());
+ }
+ }
}
/** True if any merging should happen */
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
index 8ed93e376c3..e68f8186272 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
@@ -28,6 +28,7 @@ import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
@@ -303,4 +304,9 @@ public final class SegmentReader extends CodecReader {
ensureOpen();
core.removeCoreClosedListener(listener);
}
+
+ @Override
+ public Sort getIndexSort() {
+ return si.info.getIndexSort();
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java
index 3a7370138a6..2742247381a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java
@@ -26,6 +26,7 @@ import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
/**
@@ -125,6 +126,16 @@ public final class SlowCodecReaderWrapper {
public void removeCoreClosedListener(CoreClosedListener listener) {
reader.removeCoreClosedListener(listener);
}
+
+ @Override
+ public String toString() {
+ return "SlowCodecReaderWrapper(" + reader + ")";
+ }
+
+ @Override
+ public Sort getIndexSort() {
+ return reader.getIndexSort();
+ }
};
}
}
diff --git a/lucene/misc/src/java/org/apache/lucene/index/Sorter.java b/lucene/core/src/java/org/apache/lucene/index/Sorter.java
similarity index 99%
rename from lucene/misc/src/java/org/apache/lucene/index/Sorter.java
rename to lucene/core/src/java/org/apache/lucene/index/Sorter.java
index 7e4e475a248..cf75c18f6f0 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/Sorter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/Sorter.java
@@ -168,6 +168,7 @@ final class Sorter {
}
final PackedLongValues newToOld = newToOldBuilder.build();
+ // invert the docs mapping:
for (int i = 0; i < maxDoc; ++i) {
docs[(int) newToOld.get(i)] = i;
} // docs is now the oldToNew mapping
@@ -196,7 +197,7 @@ final class Sorter {
}
};
}
-
+
/**
* Returns a mapping from the old document ID to its new location in the
* sorted index. Implementations can use the auxiliary
diff --git a/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java
similarity index 96%
rename from lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java
rename to lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java
index 683c5c263bb..70d5d204439 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java
@@ -1,3 +1,5 @@
+package org.apache.lucene.index;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -14,7 +16,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.index;
import java.io.IOException;
import java.util.Arrays;
@@ -35,21 +36,13 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
/**
* An {@link org.apache.lucene.index.LeafReader} which supports sorting documents by a given
- * {@link Sort}. You can use this class to sort an index as follows:
- *
- *
- * IndexWriter writer; // writer to which the sorted index will be added
- * DirectoryReader reader; // reader on the input index
- * Sort sort; // determines how the documents are sorted
- * LeafReader sortingReader = SortingLeafReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
- * writer.addIndexes(reader);
- * writer.close();
- * reader.close();
- *
+ * {@link Sort}. This is package private and is only used by Lucene when it needs to merge
+ * a newly flushed (unsorted) segment.
*
* @lucene.experimental
*/
-public class SortingLeafReader extends FilterLeafReader {
+
+class SortingLeafReader extends FilterLeafReader {
private static class SortingFields extends FilterFields {
@@ -111,25 +104,6 @@ public class SortingLeafReader extends FilterLeafReader {
this.hasPositions = hasPositions;
}
- Bits newToOld(final Bits liveDocs) {
- if (liveDocs == null) {
- return null;
- }
- return new Bits() {
-
- @Override
- public boolean get(int index) {
- return liveDocs.get(docMap.oldToNew(index));
- }
-
- @Override
- public int length() {
- return liveDocs.length();
- }
-
- };
- }
-
@Override
public PostingsEnum postings( PostingsEnum reuse, final int flags) throws IOException {
@@ -368,6 +342,7 @@ public class SortingLeafReader extends FilterLeafReader {
@Override
public void setDocument(int docID) {
+ //System.out.println(" slr.sssdv.setDocument docID=" + docID + " this=" + this);
in.setDocument(docMap.newToOld(docID));
}
@@ -865,7 +840,6 @@ public class SortingLeafReader extends FilterLeafReader {
if (inPointValues == null) {
return null;
} else {
- // TODO: this is untested!
return new SortingPointValues(inPointValues, docMap);
}
}
diff --git a/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java b/lucene/core/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java
similarity index 77%
rename from lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java
rename to lucene/core/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java
index 5d82be41450..1af1b9f35e2 100644
--- a/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java
@@ -20,14 +20,14 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicBoolean;
-import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.SortingMergePolicy;
-import org.apache.lucene.search.LeafCollector;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.FilterLeafCollector;
import org.apache.lucene.search.FilterCollector;
+import org.apache.lucene.search.FilterLeafCollector;
+import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TotalHitCountCollector;
@@ -39,8 +39,7 @@ import org.apache.lucene.search.TotalHitCountCollector;
*
*
* NOTE: the {@code Collector} detects segments sorted according to a
- * {@link SortingMergePolicy}'s {@link Sort} and so it's best used in conjunction
- * with a {@link SortingMergePolicy}. Also,it collects up to a specified
+ * an {@link IndexWriterConfig#setIndexSort}. Also, it collects up to a specified
* {@code numDocsToCollect} from each segment, and therefore is mostly suitable
* for use in conjunction with collectors such as {@link TopDocsCollector}, and
* not e.g. {@link TotalHitCountCollector}.
@@ -48,24 +47,12 @@ import org.apache.lucene.search.TotalHitCountCollector;
* NOTE : If you wrap a {@code TopDocsCollector} that sorts in the same
* order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs}
* will be correct. However the total of {@link TopDocsCollector#getTotalHits()
- * hit count} will be underestimated since not all matching documents will have
+ * hit count} will be vastly underestimated since not all matching documents will have
* been collected.
- *
- * NOTE : This {@code Collector} uses {@link Sort#toString()} to detect
- * whether a segment was sorted with the same {@code Sort}. This has
- * two implications:
- *
- * if a custom comparator is not implemented correctly and returns
- * different identifiers for equivalent instances, this collector will not
- * detect sorted segments,
- * if you suddenly change the {@link IndexWriter}'s
- * {@code SortingMergePolicy} to sort according to another criterion and if both
- * the old and the new {@code Sort}s have the same identifier, this
- * {@code Collector} will incorrectly detect sorted segments.
- *
*
* @lucene.experimental
*/
+
public class EarlyTerminatingSortingCollector extends FilterCollector {
/** Returns whether collection can be early-terminated if it sorts with the
@@ -85,7 +72,6 @@ public class EarlyTerminatingSortingCollector extends FilterCollector {
protected final Sort sort;
/** Number of documents to collect in each segment */
protected final int numDocsToCollect;
- private final Sort mergePolicySort;
private final AtomicBoolean terminatedEarly = new AtomicBoolean(false);
/**
@@ -99,27 +85,26 @@ public class EarlyTerminatingSortingCollector extends FilterCollector {
* the number of documents to collect on each segment. When wrapping
* a {@link TopDocsCollector}, this number should be the number of
* hits.
- * @param mergePolicySort
- * the sort your {@link SortingMergePolicy} uses
* @throws IllegalArgumentException if the sort order doesn't allow for early
* termination with the given merge policy.
*/
- public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect, Sort mergePolicySort) {
+ public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) {
super(in);
if (numDocsToCollect <= 0) {
throw new IllegalArgumentException("numDocsToCollect must always be > 0, got " + numDocsToCollect);
}
- if (canEarlyTerminate(sort, mergePolicySort) == false) {
- throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + mergePolicySort);
- }
this.sort = sort;
this.numDocsToCollect = numDocsToCollect;
- this.mergePolicySort = mergePolicySort;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
- if (SortingMergePolicy.isSorted(context.reader(), mergePolicySort)) {
+ Sort segmentSort = context.reader().getIndexSort();
+ if (segmentSort != null && canEarlyTerminate(sort, segmentSort) == false) {
+ throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + segmentSort);
+ }
+
+ if (segmentSort != null) {
// segment is sorted, can early-terminate
return new FilterLeafCollector(super.getLeafCollector(context)) {
private int numCollected;
@@ -142,5 +127,4 @@ public class EarlyTerminatingSortingCollector extends FilterCollector {
public boolean terminatedEarly() {
return terminatedEarly.get();
}
-
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
index 3ac64c84c5d..b81b8079a1c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -819,6 +819,7 @@ public class IndexSearcher {
sumTotalTermFreq = terms.getSumTotalTermFreq();
sumDocFreq = terms.getSumDocFreq();
}
+
return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/Sort.java b/lucene/core/src/java/org/apache/lucene/search/Sort.java
index 7493e9b6a89..77585a2a674 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Sort.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Sort.java
@@ -147,6 +147,9 @@ public class Sort {
* etc. Finally, if there is still a tie after all SortFields
* are checked, the internal Lucene docid is used to break it. */
public void setSort(SortField... fields) {
+ if (fields.length == 0) {
+ throw new IllegalArgumentException("There must be at least 1 sort field");
+ }
this.fields = fields;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/SortField.java b/lucene/core/src/java/org/apache/lucene/search/SortField.java
index 880697bd605..412a50ab4b4 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SortField.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SortField.java
@@ -77,9 +77,6 @@ public class SortField {
* uses ordinals to do the sorting. */
STRING_VAL,
- /** Sort use byte[] index values. */
- BYTES,
-
/** Force rewriting of SortField using {@link SortField#rewrite(IndexSearcher)}
* before it can be used for sorting */
REWRITEABLE
diff --git a/lucene/core/src/java/org/apache/lucene/util/Version.java b/lucene/core/src/java/org/apache/lucene/util/Version.java
index d8873ccf65f..d5640d8a7a4 100644
--- a/lucene/core/src/java/org/apache/lucene/util/Version.java
+++ b/lucene/core/src/java/org/apache/lucene/util/Version.java
@@ -33,14 +33,18 @@ import java.util.Locale;
public final class Version {
/** Match settings and bugs in Lucene's 6.0 release.
- *
- * Use this to get the latest & greatest settings, bug
- * fixes, etc, for Lucene.
* @deprecated (7.0.0) Use latest
*/
@Deprecated
public static final Version LUCENE_6_0_0 = new Version(6, 0, 0);
+ /**
+ * Match settings and bugs in Lucene's 6.0.1 release.
+ * @deprecated Use latest
+ */
+ @Deprecated
+ public static final Version LUCENE_6_0_1 = new Version(6, 0, 1);
+
/**
* Match settings and bugs in Lucene's 6.1.0 release.
* @deprecated Use latest
@@ -50,6 +54,9 @@ public final class Version {
/**
* Match settings and bugs in Lucene's 7.0.0 release.
+ *
+ * Use this to get the latest & greatest settings, bug
+ * fixes, etc, for Lucene.
*/
public static final Version LUCENE_7_0_0 = new Version(7, 0, 0);
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index 288ece4c51d..09eef266b6d 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -299,9 +299,6 @@ public class BKDWriter implements Closeable {
final BKDReader.IntersectState state;
final MergeState.DocMap docMap;
- /** Base offset for all our docIDs */
- final int docIDBase;
-
/** Current doc ID */
public int docID;
@@ -314,7 +311,7 @@ public class BKDWriter implements Closeable {
/** Which leaf block we are up to */
private int blockID;
- public MergeReader(BKDReader bkd, MergeState.DocMap docMap, int docIDBase) throws IOException {
+ public MergeReader(BKDReader bkd, MergeState.DocMap docMap) throws IOException {
this.bkd = bkd;
state = new BKDReader.IntersectState(bkd.in.clone(),
bkd.numDims,
@@ -322,7 +319,6 @@ public class BKDWriter implements Closeable {
bkd.maxPointsInLeafNode,
null);
this.docMap = docMap;
- this.docIDBase = docIDBase;
long minFP = Long.MAX_VALUE;
//System.out.println("MR.init " + this + " bkdreader=" + bkd + " leafBlockFPs.length=" + bkd.leafBlockFPs.length);
for(long fp : bkd.leafBlockFPs) {
@@ -396,14 +392,14 @@ public class BKDWriter implements Closeable {
}
// Tie break by sorting smaller docIDs earlier:
- return a.docIDBase < b.docIDBase;
+ return a.docID < b.docID;
}
}
/** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already
* sorted values and currently only works when numDims==1. This returns -1 if all documents containing
* dimensional values were deleted. */
- public long merge(IndexOutput out, List docMaps, List readers, List docIDBases) throws IOException {
+ public long merge(IndexOutput out, List docMaps, List readers) throws IOException {
if (numDims != 1) {
throw new UnsupportedOperationException("numDims must be 1 but got " + numDims);
}
@@ -411,8 +407,6 @@ public class BKDWriter implements Closeable {
throw new IllegalStateException("cannot mix add and merge");
}
- //System.out.println("BKDW.merge segs=" + readers.size());
-
// Catch user silliness:
if (heapPointWriter == null && tempInput == null) {
throw new IllegalStateException("already finished");
@@ -433,7 +427,7 @@ public class BKDWriter implements Closeable {
} else {
docMap = docMaps.get(i);
}
- MergeReader reader = new MergeReader(bkd, docMap, docIDBases.get(i));
+ MergeReader reader = new MergeReader(bkd, docMap);
if (reader.next()) {
queue.add(reader);
}
@@ -468,7 +462,7 @@ public class BKDWriter implements Closeable {
// System.out.println("iter reader=" + reader);
// NOTE: doesn't work with subclasses (e.g. SimpleText!)
- int docID = reader.docIDBase + reader.docID;
+ int docID = reader.docID;
leafBlockDocIDs[leafCount] = docID;
System.arraycopy(reader.state.scratchPackedValue, 0, leafBlockPackedValues[leafCount], 0, packedBytesLength);
docsSeen.set(docID);
diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
index 875aba527e2..548f8d09244 100644
--- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-org.apache.lucene.codecs.lucene60.Lucene60Codec
+org.apache.lucene.codecs.lucene62.Lucene62Codec
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java
index 59e48144d44..f945c2d0dc0 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java
@@ -19,7 +19,7 @@ package org.apache.lucene.codecs.lucene50;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
-import org.apache.lucene.codecs.lucene60.Lucene60Codec;
+import org.apache.lucene.codecs.lucene62.Lucene62Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.BaseStoredFieldsFormatTestCase;
@@ -33,7 +33,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks;
public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFieldsFormatTestCase {
@Override
protected Codec getCodec() {
- return new Lucene60Codec(Mode.BEST_COMPRESSION);
+ return new Lucene62Codec(Mode.BEST_COMPRESSION);
}
/**
@@ -44,7 +44,7 @@ public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFie
Directory dir = newDirectory();
for (int i = 0; i < 10; i++) {
IndexWriterConfig iwc = newIndexWriterConfig();
- iwc.setCodec(new Lucene60Codec(RandomPicks.randomFrom(random(), Mode.values())));
+ iwc.setCodec(new Lucene62Codec(RandomPicks.randomFrom(random(), Mode.values())));
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig());
Document doc = new Document();
doc.add(new StoredField("field1", "value1"));
@@ -71,7 +71,7 @@ public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFie
public void testInvalidOptions() throws Exception {
expectThrows(NullPointerException.class, () -> {
- new Lucene60Codec(null);
+ new Lucene62Codec(null);
});
expectThrows(NullPointerException.class, () -> {
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java
index c915de0bfce..a0ad87fca87 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene53/TestLucene53NormsFormat.java
@@ -18,14 +18,14 @@ package org.apache.lucene.codecs.lucene53;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.lucene60.Lucene60Codec;
+import org.apache.lucene.codecs.lucene62.Lucene62Codec;
import org.apache.lucene.index.BaseNormsFormatTestCase;
/**
* Tests Lucene53NormsFormat
*/
public class TestLucene53NormsFormat extends BaseNormsFormatTestCase {
- private final Codec codec = new Lucene60Codec();
+ private final Codec codec = new Lucene62Codec();
@Override
protected Codec getCodec() {
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50SegmentInfoFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene62/TestLucene62SegmentInfoFormat.java
similarity index 89%
rename from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50SegmentInfoFormat.java
rename to lucene/core/src/test/org/apache/lucene/codecs/lucene62/TestLucene62SegmentInfoFormat.java
index 81143300ee5..8c758f29e5a 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50SegmentInfoFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene62/TestLucene62SegmentInfoFormat.java
@@ -1,3 +1,5 @@
+package org.apache.lucene.codecs.lucene62;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -14,8 +16,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene50;
-
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseSegmentInfoFormatTestCase;
@@ -23,9 +23,9 @@ import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version;
/**
- * Tests Lucene50SegmentInfoFormat
+ * Tests Lucene62SegmentInfoFormat
*/
-public class TestLucene50SegmentInfoFormat extends BaseSegmentInfoFormatTestCase {
+public class TestLucene62SegmentInfoFormat extends BaseSegmentInfoFormatTestCase {
@Override
protected Version[] getVersions() {
diff --git a/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java b/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
index 2f3a3a69890..da8dbac0f5e 100644
--- a/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
+++ b/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
@@ -24,8 +24,6 @@ import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
-import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
-import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.search.IndexSearcher;
@@ -143,6 +141,6 @@ public class Test2BPoints extends LuceneTestCase {
}
private static Codec getCodec() {
- return Codec.forName("Lucene60");
+ return Codec.forName("Lucene62");
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java b/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java
index 22b3605965b..22d12346d4e 100644
--- a/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java
+++ b/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java
@@ -53,7 +53,7 @@ import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
// disk (but, should run successfully). Best to run w/
// -Dtests.codec=, and w/ plenty of RAM, eg:
//
-// ant test -Dtests.monster=true -Dtests.heapsize=8g -Dtests.codec=Lucene60 -Dtestcase=Test2BTerms
+// ant test -Dtests.monster=true -Dtests.heapsize=8g -Dtests.codec=Lucene62 -Dtestcase=Test2BTerms
//
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
@Monster("very slow, use 5g minimum heap")
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
index d1148ef13a6..9d00c3f42d2 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
@@ -39,6 +39,8 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.Directory;
@@ -1281,4 +1283,53 @@ public class TestAddIndexes extends LuceneTestCase {
w2.close();
IOUtils.close(src, dest);
}
+
+ public void testIllegalIndexSortChange1() throws Exception {
+ Directory dir1 = newDirectory();
+ IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc1.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT)));
+ RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1, iwc1);
+ w1.addDocument(new Document());
+ w1.commit();
+ w1.addDocument(new Document());
+ w1.commit();
+ // so the index sort is in fact burned into the index:
+ w1.forceMerge(1);
+ w1.close();
+
+ Directory dir2 = newDirectory();
+ IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc2.setIndexSort(new Sort(new SortField("foo", SortField.Type.STRING)));
+ RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2, iwc2);
+ String message = expectThrows(IllegalArgumentException.class, () -> {
+ w2.addIndexes(dir1);
+ }).getMessage();
+ assertEquals("cannot change index sort from to ", message);
+ IOUtils.close(dir1, w2, dir2);
+ }
+
+ public void testIllegalIndexSortChange2() throws Exception {
+ Directory dir1 = newDirectory();
+ IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc1.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT)));
+ RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1, iwc1);
+ w1.addDocument(new Document());
+ w1.commit();
+ w1.addDocument(new Document());
+ w1.commit();
+ // so the index sort is in fact burned into the index:
+ w1.forceMerge(1);
+ w1.close();
+
+ Directory dir2 = newDirectory();
+ IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc2.setIndexSort(new Sort(new SortField("foo", SortField.Type.STRING)));
+ RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2, iwc2);
+ IndexReader r1 = DirectoryReader.open(dir1);
+ String message = expectThrows(IllegalArgumentException.class, () -> {
+ w2.addIndexes((SegmentReader) getOnlyLeafReader(r1));
+ }).getMessage();
+ assertEquals("cannot change index sort from to ", message);
+ IOUtils.close(r1, dir1, w2, dir2);
+ }
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
index 73b4622b82c..bd1e9b6e14f 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
@@ -222,7 +222,7 @@ public class TestCodecs extends LuceneTestCase {
final FieldInfos fieldInfos = builder.finish();
final Directory dir = newDirectory();
Codec codec = Codec.getDefault();
- final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
this.write(si, fieldInfos, dir, fields);
final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random())));
@@ -279,7 +279,7 @@ public class TestCodecs extends LuceneTestCase {
}
Codec codec = Codec.getDefault();
- final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
this.write(si, fieldInfos, dir, fields);
if (VERBOSE) {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java b/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java
index 9f3339c8c69..0dc654cb212 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java
@@ -503,7 +503,7 @@ public class TestDemoParallelLeafReader extends LuceneTestCase {
class ReindexingOneMerge extends OneMerge {
- List parallelReaders;
+ final List parallelReaders = new ArrayList<>();
final long schemaGen;
ReindexingOneMerge(List segments) {
@@ -519,33 +519,23 @@ public class TestDemoParallelLeafReader extends LuceneTestCase {
}
@Override
- public List getMergeReaders() throws IOException {
- if (parallelReaders == null) {
- parallelReaders = new ArrayList<>();
- for (CodecReader reader : super.getMergeReaders()) {
- parallelReaders.add(getCurrentReader((SegmentReader)reader, schemaGen));
- }
+ public CodecReader wrapForMerge(CodecReader reader) throws IOException {
+ LeafReader wrapped = getCurrentReader((SegmentReader)reader, schemaGen);
+ if (wrapped instanceof ParallelLeafReader) {
+ parallelReaders.add((ParallelLeafReader) wrapped);
}
-
- // TODO: fix ParallelLeafReader, if this is a good use case
- List mergeReaders = new ArrayList<>();
- for (LeafReader reader : parallelReaders) {
- mergeReaders.add(SlowCodecReaderWrapper.wrap(reader));
- }
- return mergeReaders;
+ return SlowCodecReaderWrapper.wrap(wrapped);
}
@Override
public void mergeFinished() throws IOException {
Throwable th = null;
- for(LeafReader r : parallelReaders) {
- if (r instanceof ParallelLeafReader) {
- try {
- r.decRef();
- } catch (Throwable t) {
- if (th == null) {
- th = t;
- }
+ for (ParallelLeafReader r : parallelReaders) {
+ try {
+ r.decRef();
+ } catch (Throwable t) {
+ if (th == null) {
+ th = t;
}
}
}
@@ -561,10 +551,6 @@ public class TestDemoParallelLeafReader extends LuceneTestCase {
super.setMergeInfo(info);
}
- @Override
- public MergePolicy.DocMap getDocMap(final MergeState mergeState) {
- return super.getDocMap(mergeState);
- }
}
class ReindexingMergeSpecification extends MergeSpecification {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
index 803b1d9bc65..8b24b4d7bc5 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
@@ -218,7 +218,7 @@ public class TestDoc extends LuceneTestCase {
final Codec codec = Codec.getDefault();
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
- final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
SegmentMerger merger = new SegmentMerger(Arrays.asList(r1, r2),
si, InfoStream.getDefault(), trackingDir,
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java b/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java
new file mode 100644
index 00000000000..003db9e4529
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+public class TestDocIDMerger extends LuceneTestCase {
+
+ private static class TestSubUnsorted extends DocIDMerger.Sub {
+ private int docID = -1;
+ final int valueStart;
+ final int maxDoc;
+
+ public TestSubUnsorted(MergeState.DocMap docMap, int maxDoc, int valueStart) {
+ super(docMap);
+ this.maxDoc = maxDoc;
+ this.valueStart = valueStart;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+
+ public int getValue() {
+ return valueStart + docID;
+ }
+ }
+
+ public void testNoSort() throws Exception {
+
+ int subCount = TestUtil.nextInt(random(), 1, 20);
+ List subs = new ArrayList<>();
+ int valueStart = 0;
+ for(int i=0;i merger = new DocIDMerger<>(subs, false);
+
+ int count = 0;
+ while (true) {
+ TestSubUnsorted sub = merger.next();
+ if (sub == null) {
+ break;
+ }
+ assertEquals(count, sub.mappedDocID);
+ assertEquals(count, sub.getValue());
+ count++;
+ }
+
+ assertEquals(valueStart, count);
+ }
+
+ private static class TestSubSorted extends DocIDMerger.Sub {
+ private int docID = -1;
+ final int maxDoc;
+ final int index;
+
+ public TestSubSorted(MergeState.DocMap docMap, int maxDoc, int index) {
+ super(docMap);
+ this.maxDoc = maxDoc;
+ this.index = index;
+ }
+
+ @Override
+ public int nextDoc() {
+ docID++;
+ if (docID == maxDoc) {
+ return NO_MORE_DOCS;
+ } else {
+ return docID;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "TestSubSorted(index=" + index + ", mappedDocID=" + mappedDocID+ ")";
+ }
+ }
+
+ public void testWithSort() throws Exception {
+
+ int subCount = TestUtil.nextInt(random(), 1, 20);
+ List oldToNew = new ArrayList<>();
+ // how many docs we've written to each sub:
+ List uptos = new ArrayList<>();
+ int totDocCount = 0;
+ for(int i=0;i completedSubs = new ArrayList<>();
+
+ // randomly distribute target docIDs into the segments:
+ for(int docID=0;docID subs = new ArrayList<>();
+ for(int i=0;i merger = new DocIDMerger<>(subs, true);
+
+ int count = 0;
+ while (true) {
+ TestSubSorted sub = merger.next();
+ if (sub == null) {
+ break;
+ }
+ if (liveDocs != null) {
+ count = liveDocs.nextSetBit(count);
+ }
+ assertEquals(count, sub.mappedDocID);
+ count++;
+ }
+
+ if (liveDocs != null) {
+ if (count < totDocCount) {
+ assertEquals(NO_MORE_DOCS, liveDocs.nextSetBit(count));
+ } else {
+ assertEquals(totDocCount, count);
+ }
+ } else {
+ assertEquals(totDocCount, count);
+ }
+ }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
new file mode 100644
index 00000000000..4e775f3e5da
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -0,0 +1,1377 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.document.BinaryDocValuesField;
+import org.apache.lucene.document.BinaryPoint;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoubleDocValuesField;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.FloatDocValuesField;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.PointValues.IntersectVisitor;
+import org.apache.lucene.index.PointValues.Relation;
+import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.EarlyTerminatingSortingCollector;
+import org.apache.lucene.search.FieldDoc;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TermStatistics;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopFieldCollector;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.TestUtil;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class TestIndexSorting extends LuceneTestCase {
+
+ public void testBasicString() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.STRING));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("zzz")));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("aaa")));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("mmm")));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ SortedDocValues values = leaf.getSortedDocValues("foo");
+ assertEquals("aaa", values.get(0).utf8ToString());
+ assertEquals("mmm", values.get(1).utf8ToString());
+ assertEquals("zzz", values.get(2).utf8ToString());
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingStringFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.STRING);
+ sortField.setMissingValue(SortField.STRING_FIRST);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("zzz")));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("mmm")));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ SortedDocValues values = leaf.getSortedDocValues("foo");
+ assertEquals(-1, values.getOrd(0));
+ assertEquals("mmm", values.get(1).utf8ToString());
+ assertEquals("zzz", values.get(2).utf8ToString());
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingStringLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.STRING);
+ sortField.setMissingValue(SortField.STRING_LAST);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("zzz")));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("mmm")));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ SortedDocValues values = leaf.getSortedDocValues("foo");
+ assertEquals("mmm", values.get(0).utf8ToString());
+ assertEquals("zzz", values.get(1).utf8ToString());
+ assertEquals(-1, values.getOrd(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicLong() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", -1));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(-1, values.get(0));
+ assertEquals(7, values.get(1));
+ assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingLongFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.LONG);
+ sortField.setMissingValue(Long.valueOf(Long.MIN_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(0, values.get(0));
+ assertFalse(docsWithField.get(0));
+ assertEquals(7, values.get(1));
+ assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingLongLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.LONG);
+ sortField.setMissingValue(Long.valueOf(Long.MAX_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(7, values.get(0));
+ assertEquals(18, values.get(1));
+ assertEquals(0, values.get(2));
+ assertFalse(docsWithField.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicInt() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", -1));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(-1, values.get(0));
+ assertEquals(7, values.get(1));
+ assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingIntFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.INT);
+ sortField.setMissingValue(Integer.valueOf(Integer.MIN_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(0, values.get(0));
+ assertFalse(docsWithField.get(0));
+ assertEquals(7, values.get(1));
+ assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingIntLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.INT);
+ sortField.setMissingValue(Integer.valueOf(Integer.MAX_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(7, values.get(0));
+ assertEquals(18, values.get(1));
+ assertEquals(0, values.get(2));
+ assertFalse(docsWithField.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicDouble() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.DOUBLE));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 18.0));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", -1.0));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 7.0));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(-1.0, Double.longBitsToDouble(values.get(0)), 0.0);
+ assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0);
+ assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0);
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingDoubleFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.DOUBLE);
+ sortField.setMissingValue(Double.NEGATIVE_INFINITY);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 18.0));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 7.0));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(0.0, Double.longBitsToDouble(values.get(0)), 0.0);
+ assertFalse(docsWithField.get(0));
+ assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0);
+ assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0);
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingDoubleLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.DOUBLE);
+ sortField.setMissingValue(Double.POSITIVE_INFINITY);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 18.0));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 7.0));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(7.0, Double.longBitsToDouble(values.get(0)), 0.0);
+ assertEquals(18.0, Double.longBitsToDouble(values.get(1)), 0.0);
+ assertEquals(0.0, Double.longBitsToDouble(values.get(2)), 0.0);
+ assertFalse(docsWithField.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicFloat() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.FLOAT));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 18.0f));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new FloatDocValuesField("foo", -1.0f));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 7.0f));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(-1.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
+ assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
+ assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingFloatFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.FLOAT);
+ sortField.setMissingValue(Float.NEGATIVE_INFINITY);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 18.0f));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 7.0f));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(0.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
+ assertFalse(docsWithField.get(0));
+ assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
+ assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingFloatLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.FLOAT);
+ sortField.setMissingValue(Float.POSITIVE_INFINITY);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 18.0f));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 7.0f));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(7.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
+ assertEquals(18.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
+ assertEquals(0.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
+ assertFalse(docsWithField.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testRandom1() throws IOException {
+ boolean withDeletes = random().nextBoolean();
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ final int numDocs = atLeast(1000);
+ final FixedBitSet deleted = new FixedBitSet(numDocs);
+ for (int i = 0; i < numDocs; ++i) {
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", random().nextInt(20)));
+ doc.add(new StringField("id", Integer.toString(i), Store.YES));
+ doc.add(new NumericDocValuesField("id", i));
+ w.addDocument(doc);
+ if (random().nextInt(5) == 0) {
+ w.getReader().close();
+ } else if (random().nextInt(30) == 0) {
+ w.forceMerge(2);
+ } else if (random().nextInt(4) == 0) {
+ final int id = TestUtil.nextInt(random(), 0, i);
+ deleted.set(id);
+ w.deleteDocuments(new Term("id", Integer.toString(id)));
+ }
+ }
+
+ // Check that segments are sorted
+ DirectoryReader reader = w.getReader();
+ for (LeafReaderContext ctx : reader.leaves()) {
+ final SegmentReader leaf = (SegmentReader) ctx.reader();
+ SegmentInfo info = leaf.getSegmentInfo().info;
+ switch (info.getDiagnostics().get(IndexWriter.SOURCE)) {
+ case IndexWriter.SOURCE_FLUSH:
+ assertNull(info.getIndexSort());
+ break;
+ case IndexWriter.SOURCE_MERGE:
+ assertEquals(indexSort, info.getIndexSort());
+ final NumericDocValues values = leaf.getNumericDocValues("foo");
+ long previous = Long.MIN_VALUE;
+ for (int i = 0; i < leaf.maxDoc(); ++i) {
+ final long value = values.get(i);
+ assertTrue(value >= previous);
+ previous = value;
+ }
+ break;
+ default:
+ fail();
+ }
+ }
+
+ // Now check that the index is consistent
+ IndexSearcher searcher = newSearcher(reader);
+ for (int i = 0; i < numDocs; ++i) {
+ TermQuery termQuery = new TermQuery(new Term("id", Integer.toString(i)));
+ final TopDocs topDocs = searcher.search(termQuery, 1);
+ if (deleted.get(i)) {
+ assertEquals(0, topDocs.totalHits);
+ } else {
+ assertEquals(1, topDocs.totalHits);
+ assertEquals(i, MultiDocValues.getNumericValues(reader, "id").get(topDocs.scoreDocs[0].doc));
+ Document document = reader.document(topDocs.scoreDocs[0].doc);
+ assertEquals(Integer.toString(i), document.get("id"));
+ }
+ }
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+ static class UpdateRunnable implements Runnable {
+
+ private final int numDocs;
+ private final Random random;
+ private final AtomicInteger updateCount;
+ private final IndexWriter w;
+ private final Map values;
+ private final CountDownLatch latch;
+
+ UpdateRunnable(int numDocs, Random random, CountDownLatch latch, AtomicInteger updateCount, IndexWriter w, Map values) {
+ this.numDocs = numDocs;
+ this.random = random;
+ this.latch = latch;
+ this.updateCount = updateCount;
+ this.w = w;
+ this.values = values;
+ }
+
+ @Override
+ public void run() {
+ try {
+ latch.await();
+ while (updateCount.decrementAndGet() >= 0) {
+ final int id = random.nextInt(numDocs);
+ final long value = random.nextInt(20);
+ Document doc = new Document();
+ doc.add(new StringField("id", Integer.toString(id), Store.NO));
+ doc.add(new NumericDocValuesField("foo", value));
+
+ synchronized (values) {
+ w.updateDocument(new Term("id", Integer.toString(id)), doc);
+ values.put(id, value);
+ }
+
+ switch (random.nextInt(10)) {
+ case 0:
+ case 1:
+ // reopen
+ DirectoryReader.open(w).close();
+ break;
+ case 2:
+ w.forceMerge(3);
+ break;
+ }
+ }
+ } catch (IOException | InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ }
+
+ // There is tricky logic to resolve deletes that happened while merging
+ public void testConcurrentUpdates() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Map values = new HashMap<>();
+
+ final int numDocs = atLeast(100);
+ Thread[] threads = new Thread[2];
+ final AtomicInteger updateCount = new AtomicInteger(atLeast(1000));
+ final CountDownLatch latch = new CountDownLatch(1);
+ for (int i = 0; i < threads.length; ++i) {
+ Random r = new Random(random().nextLong());
+ threads[i] = new Thread(new UpdateRunnable(numDocs, r, latch, updateCount, w, values));
+ }
+ for (Thread thread : threads) {
+ thread.start();
+ }
+ latch.countDown();
+ for (Thread thread : threads) {
+ thread.join();
+ }
+ w.forceMerge(1);
+ DirectoryReader reader = DirectoryReader.open(w);
+ IndexSearcher searcher = newSearcher(reader);
+ for (int i = 0; i < numDocs; ++i) {
+ final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1);
+ if (values.containsKey(i) == false) {
+ assertEquals(0, topDocs.totalHits);
+ } else {
+ assertEquals(1, topDocs.totalHits);
+ assertEquals(values.get(i).longValue(), MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc));
+ }
+ }
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+ static class DVUpdateRunnable implements Runnable {
+
+ private final int numDocs;
+ private final Random random;
+ private final AtomicInteger updateCount;
+ private final IndexWriter w;
+ private final Map values;
+ private final CountDownLatch latch;
+
+ DVUpdateRunnable(int numDocs, Random random, CountDownLatch latch, AtomicInteger updateCount, IndexWriter w, Map values) {
+ this.numDocs = numDocs;
+ this.random = random;
+ this.latch = latch;
+ this.updateCount = updateCount;
+ this.w = w;
+ this.values = values;
+ }
+
+ @Override
+ public void run() {
+ try {
+ latch.await();
+ while (updateCount.decrementAndGet() >= 0) {
+ final int id = random.nextInt(numDocs);
+ final long value = random.nextInt(20);
+
+ synchronized (values) {
+ w.updateDocValues(new Term("id", Integer.toString(id)), new NumericDocValuesField("foo", value));
+ values.put(id, value);
+ }
+
+ switch (random.nextInt(10)) {
+ case 0:
+ case 1:
+ // reopen
+ DirectoryReader.open(w).close();
+ break;
+ case 2:
+ w.forceMerge(3);
+ break;
+ }
+ }
+ } catch (IOException | InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ }
+
+ // There is tricky logic to resolve dv updates that happened while merging
+ public void testConcurrentDVUpdates() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Map values = new HashMap<>();
+
+ final int numDocs = atLeast(100);
+ for (int i = 0; i < numDocs; ++i) {
+ Document doc = new Document();
+ doc.add(new StringField("id", Integer.toString(i), Store.NO));
+ doc.add(new NumericDocValuesField("foo", -1));
+ w.addDocument(doc);
+ values.put(i, -1L);
+ }
+ Thread[] threads = new Thread[2];
+ final AtomicInteger updateCount = new AtomicInteger(atLeast(1000));
+ final CountDownLatch latch = new CountDownLatch(1);
+ for (int i = 0; i < threads.length; ++i) {
+ Random r = new Random(random().nextLong());
+ threads[i] = new Thread(new DVUpdateRunnable(numDocs, r, latch, updateCount, w, values));
+ }
+ for (Thread thread : threads) {
+ thread.start();
+ }
+ latch.countDown();
+ for (Thread thread : threads) {
+ thread.join();
+ }
+ w.forceMerge(1);
+ DirectoryReader reader = DirectoryReader.open(w);
+ IndexSearcher searcher = newSearcher(reader);
+ for (int i = 0; i < numDocs; ++i) {
+ final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1);
+ assertEquals(1, topDocs.totalHits);
+ assertEquals(values.get(i).longValue(), MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc));
+ }
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testAddIndexes(boolean withDeletes, boolean useReaders) throws Exception {
+ Directory dir = newDirectory();
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
+ IndexWriterConfig iwc1 = newIndexWriterConfig();
+ if (random().nextBoolean()) {
+ iwc1.setIndexSort(indexSort);
+ }
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ final int numDocs = atLeast(100);
+ for (int i = 0; i < numDocs; ++i) {
+ Document doc = new Document();
+ doc.add(new StringField("id", Integer.toString(i), Store.NO));
+ doc.add(new NumericDocValuesField("foo", random().nextInt(20)));
+ w.addDocument(doc);
+ }
+ if (withDeletes) {
+ for (int i = random().nextInt(5); i < numDocs; i += TestUtil.nextInt(random(), 1, 5)) {
+ w.deleteDocuments(new Term("id", Integer.toString(i)));
+ }
+ }
+ if (random().nextBoolean()) {
+ w.forceMerge(1);
+ }
+ final IndexReader reader = w.getReader();
+ w.close();
+
+ Directory dir2 = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w2 = new IndexWriter(dir2, iwc);
+
+ if (useReaders) {
+ CodecReader[] codecReaders = new CodecReader[reader.leaves().size()];
+ for (int i = 0; i < codecReaders.length; ++i) {
+ codecReaders[i] = (CodecReader) reader.leaves().get(i).reader();
+ }
+ w2.addIndexes(codecReaders);
+ } else {
+ w2.addIndexes(dir);
+ }
+ final IndexReader reader2 = w2.getReader();
+ final IndexSearcher searcher = newSearcher(reader);
+ final IndexSearcher searcher2 = newSearcher(reader2);
+ for (int i = 0; i < numDocs; ++i) {
+ Query query = new TermQuery(new Term("id", Integer.toString(i)));
+ final TopDocs topDocs = searcher.search(query, 1);
+ final TopDocs topDocs2 = searcher2.search(query, 1);
+ assertEquals(topDocs.totalHits, topDocs2.totalHits);
+ if (topDocs.totalHits == 1) {
+ assertEquals(
+ MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc),
+ MultiDocValues.getNumericValues(reader2, "foo").get(topDocs2.scoreDocs[0].doc));
+ }
+ }
+
+ IOUtils.close(reader, reader2, w2, dir, dir2);
+ }
+
+ public void testAddIndexes() throws Exception {
+ testAddIndexes(false, true);
+ }
+
+ public void testAddIndexesWithDeletions() throws Exception {
+ testAddIndexes(true, true);
+ }
+
+ public void testAddIndexesWithDirectory() throws Exception {
+ testAddIndexes(false, false);
+ }
+
+ public void testAddIndexesWithDeletionsAndDirectory() throws Exception {
+ testAddIndexes(true, false);
+ }
+
+ public void testBadSort() throws Exception {
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+ iwc.setIndexSort(Sort.RELEVANCE);
+ });
+ assertEquals("invalid SortField type: must be one of [STRING, INT, FLOAT, LONG, DOUBLE] but got: ", expected.getMessage());
+ }
+
+ // you can't change the index sort on an existing index:
+ public void testIllegalChangeSort() throws Exception {
+ final Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.LONG)));
+ IndexWriter w = new IndexWriter(dir, iwc);
+ w.addDocument(new Document());
+ DirectoryReader.open(w).close();
+ w.addDocument(new Document());
+ w.forceMerge(1);
+ w.close();
+
+ final IndexWriterConfig iwc2 = new IndexWriterConfig(new MockAnalyzer(random()));
+ iwc2.setIndexSort(new Sort(new SortField("bar", SortField.Type.LONG)));
+ IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> {
+ new IndexWriter(dir, iwc2);
+ });
+ String message = e.getMessage();
+ assertTrue(message.contains("cannot change previous indexSort="));
+ assertTrue(message.contains("to new indexSort="));
+ dir.close();
+ }
+
+ static final class NormsSimilarity extends Similarity {
+
+ private final Similarity in;
+
+ public NormsSimilarity(Similarity in) {
+ this.in = in;
+ }
+
+ @Override
+ public long computeNorm(FieldInvertState state) {
+ if (state.getName().equals("norms")) {
+ return Float.floatToIntBits(state.getBoost());
+ } else {
+ return in.computeNorm(state);
+ }
+ }
+
+ @Override
+ public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
+ return in.computeWeight(collectionStats, termStats);
+ }
+
+ @Override
+ public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
+ return in.simScorer(weight, context);
+ }
+
+ }
+
+ static final class PositionsTokenStream extends TokenStream {
+
+ private final CharTermAttribute term;
+ private final PayloadAttribute payload;
+ private final OffsetAttribute offset;
+
+ private int pos, off;
+
+ public PositionsTokenStream() {
+ term = addAttribute(CharTermAttribute.class);
+ payload = addAttribute(PayloadAttribute.class);
+ offset = addAttribute(OffsetAttribute.class);
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (pos == 0) {
+ return false;
+ }
+
+ clearAttributes();
+ term.append("#all#");
+ payload.setPayload(new BytesRef(Integer.toString(pos)));
+ offset.setOffset(off, off);
+ --pos;
+ ++off;
+ return true;
+ }
+
+ void setId(int id) {
+ pos = id / 10 + 1;
+ off = 0;
+ }
+ }
+
+ public void testRandom2() throws Exception {
+ int numDocs = atLeast(100);
+
+ FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
+ POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ POSITIONS_TYPE.freeze();
+
+ FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
+ TERM_VECTORS_TYPE.setStoreTermVectors(true);
+ TERM_VECTORS_TYPE.freeze();
+
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer();
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ };
+
+ List docs = new ArrayList<>();
+ for (int i=0;i docs = new ArrayList<>();
+
+ Sort sort = randomSort();
+ if (VERBOSE) {
+ System.out.println("TEST: numDocs=" + numDocs + " use sort=" + sort);
+ }
+
+ // no index sorting, all search-time sorting:
+ Directory dir1 = newFSDirectory(createTempDir());
+ IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
+ IndexWriter w1 = new IndexWriter(dir1, iwc1);
+
+ // use index sorting:
+ Directory dir2 = newFSDirectory(createTempDir());
+ IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc2.setIndexSort(sort);
+ IndexWriter w2 = new IndexWriter(dir2, iwc2);
+
+ Set toDelete = new HashSet<>();
+
+ double deleteChance = random().nextDouble();
+
+ for(int id=0;id {
+ new ParallelLeafReader(getOnlyLeafReader(r1), getOnlyLeafReader(r2));
+ }).getMessage();
+ assertEquals("cannot combine LeafReaders that have different index sorts: saw both sort= and ", message);
+ IOUtils.close(r1, dir1, r2, dir2);
+ }
+
+ // ok to have one leaf w/ index sort and the other with no sort
+ public void testWithIndexSort2() throws Exception {
+ Directory dir1 = newDirectory();
+ IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc1.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT)));
+ IndexWriter w1 = new IndexWriter(dir1, iwc1);
+ w1.addDocument(new Document());
+ w1.commit();
+ w1.addDocument(new Document());
+ w1.forceMerge(1);
+ w1.close();
+ IndexReader r1 = DirectoryReader.open(dir1);
+
+ Directory dir2 = newDirectory();
+ IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
+ IndexWriter w2 = new IndexWriter(dir2, iwc2);
+ w2.addDocument(new Document());
+ w2.addDocument(new Document());
+ w2.close();
+
+ IndexReader r2 = DirectoryReader.open(dir2);
+ new ParallelLeafReader(false, getOnlyLeafReader(r1), getOnlyLeafReader(r2)).close();
+ new ParallelLeafReader(false, getOnlyLeafReader(r2), getOnlyLeafReader(r1)).close();
+ IOUtils.close(r1, dir1, r2, dir2);
+ }
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
index 9f55ec3fdcc..9693c5c32b1 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
@@ -394,11 +394,11 @@ public class TestPointValues extends LuceneTestCase {
dir.close();
}
- // Write point values, one segment with Lucene60, another with SimpleText, then forceMerge with SimpleText
+ // Write point values, one segment with Lucene62, another with SimpleText, then forceMerge with SimpleText
public void testDifferentCodecs1() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- iwc.setCodec(Codec.forName("Lucene60"));
+ iwc.setCodec(Codec.forName("Lucene62"));
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new IntPoint("int", 1));
@@ -417,7 +417,7 @@ public class TestPointValues extends LuceneTestCase {
dir.close();
}
- // Write point values, one segment with Lucene60, another with SimpleText, then forceMerge with Lucene60
+ // Write point values, one segment with Lucene62, another with SimpleText, then forceMerge with Lucene60
public void testDifferentCodecs2() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
@@ -429,7 +429,7 @@ public class TestPointValues extends LuceneTestCase {
w.close();
iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- iwc.setCodec(Codec.forName("Lucene60"));
+ iwc.setCodec(Codec.forName("Lucene62"));
w = new IndexWriter(dir, iwc);
doc = new Document();
doc.add(new IntPoint("int", 1));
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java
index 58ceb445ef6..179d2663a58 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java
@@ -51,7 +51,7 @@ public class TestSegmentInfos extends LuceneTestCase {
SegmentInfos sis = new SegmentInfos();
SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_6_0_0, "_0", 1, false, Codec.getDefault(),
- Collections.emptyMap(), id, Collections.emptyMap());
+ Collections.emptyMap(), id, Collections.emptyMap(), null);
info.setFiles(Collections.emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1);
@@ -73,14 +73,14 @@ public class TestSegmentInfos extends LuceneTestCase {
SegmentInfos sis = new SegmentInfos();
SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_6_0_0, "_0", 1, false, Codec.getDefault(),
- Collections.emptyMap(), id, Collections.emptyMap());
+ Collections.emptyMap(), id, Collections.emptyMap(), null);
info.setFiles(Collections.emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1);
sis.add(commitInfo);
info = new SegmentInfo(dir, Version.LUCENE_6_0_0, "_1", 1, false, Codec.getDefault(),
- Collections.emptyMap(), id, Collections.emptyMap());
+ Collections.emptyMap(), id, Collections.emptyMap(), null);
info.setFiles(Collections.emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1);
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
index e1075abe735..1ef37c0892f 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
@@ -35,6 +35,7 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version;
+import org.apache.lucene.util.packed.PackedLongValues;
public class TestSegmentMerger extends LuceneTestCase {
//The variables for the new merged segment
@@ -83,7 +84,7 @@ public class TestSegmentMerger extends LuceneTestCase {
public void testMerge() throws IOException {
final Codec codec = Codec.getDefault();
- final SegmentInfo si = new SegmentInfo(mergedDir, Version.LATEST, mergedSegment, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ final SegmentInfo si = new SegmentInfo(mergedDir, Version.LATEST, mergedSegment, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
SegmentMerger merger = new SegmentMerger(Arrays.asList(reader1, reader2),
si, InfoStream.getDefault(), mergedDir,
@@ -144,22 +145,9 @@ public class TestSegmentMerger extends LuceneTestCase {
mergedReader.close();
}
- private static boolean equals(MergeState.DocMap map1, MergeState.DocMap map2) {
- if (map1.maxDoc() != map2.maxDoc()) {
- return false;
- }
- for (int i = 0; i < map1.maxDoc(); ++i) {
- if (map1.get(i) != map2.get(i)) {
- return false;
- }
- }
- return true;
- }
-
public void testBuildDocMap() {
final int maxDoc = TestUtil.nextInt(random(), 1, 128);
final int numDocs = TestUtil.nextInt(random(), 0, maxDoc);
- final int numDeletedDocs = maxDoc - numDocs;
final FixedBitSet liveDocs = new FixedBitSet(maxDoc);
for (int i = 0; i < numDocs; ++i) {
while (true) {
@@ -171,15 +159,11 @@ public class TestSegmentMerger extends LuceneTestCase {
}
}
- final MergeState.DocMap docMap = MergeState.DocMap.build(maxDoc, liveDocs);
+ final PackedLongValues docMap = MergeState.removeDeletes(maxDoc, liveDocs);
- assertEquals(maxDoc, docMap.maxDoc());
- assertEquals(numDocs, docMap.numDocs());
- assertEquals(numDeletedDocs, docMap.numDeletedDocs());
// assert the mapping is compact
for (int i = 0, del = 0; i < maxDoc; ++i) {
- if (!liveDocs.get(i)) {
- assertEquals(-1, docMap.get(i));
+ if (liveDocs.get(i) == false) {
++del;
} else {
assertEquals(i - del, docMap.get(i));
diff --git a/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java
similarity index 75%
rename from lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java
rename to lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java
index 14bd43714fb..84d326ff48c 100644
--- a/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java
@@ -25,23 +25,23 @@ import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.ExitableDirectoryReader;
-import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MockRandomMergePolicy;
import org.apache.lucene.index.QueryTimeout;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SerialMergeScheduler;
-import org.apache.lucene.index.SortingMergePolicy;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TestSortingMergePolicy;
-import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
@@ -50,8 +50,6 @@ import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.uninverting.UninvertingReader;
-import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
@@ -62,18 +60,11 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
private int numDocs;
private List terms;
private Directory dir;
- private Sort sort;
+ private final Sort sort = new Sort(new SortField("ndv1", SortField.Type.LONG));
private RandomIndexWriter iw;
private IndexReader reader;
- private SortingMergePolicy mergePolicy;
private final int forceMergeMaxSegmentCount = 5;
- @Override
- public void setUp() throws Exception {
- super.setUp();
- sort = new Sort(new SortField("ndv1", SortField.Type.LONG));
- }
-
private Document randomDocument() {
final Document doc = new Document();
doc.add(new NumericDocValuesField("ndv1", random().nextInt(10)));
@@ -93,9 +84,12 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
terms = new ArrayList<>(randomTerms);
final long seed = random().nextLong();
final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
+ if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
+ // MockRandomMP randomly wraps the leaf readers which makes merging angry
+ iwc.setMergePolicy(newTieredMergePolicy());
+ }
iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests
- mergePolicy = TestSortingMergePolicy.newSortingMergePolicy(sort);
- iwc.setMergePolicy(mergePolicy);
+ iwc.setIndexSort(sort);
iw = new RandomIndexWriter(new Random(seed), dir, iwc);
iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP
for (int i = 0; i < numDocs; ++i) {
@@ -151,7 +145,7 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
query = new MatchAllDocsQuery();
}
searcher.search(query, collector1);
- searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits, mergePolicy.getSort()));
+ searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
}
@@ -190,40 +184,16 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
}
public void testEarlyTerminationDifferentSorter() throws IOException {
- createRandomIndex(false);
- final int iters = atLeast(3);
- for (int i = 0; i < iters; ++i) {
- final IndexSearcher searcher = newSearcher(reader);
- // test that the collector works correctly when the index was sorted by a
- // different sorter than the one specified in the ctor.
- final int numHits = TestUtil.nextInt(random(), 1, numDocs);
- final Sort sort = new Sort(new SortField("ndv2", SortField.Type.LONG, false));
- final boolean fillFields = random().nextBoolean();
- final boolean trackDocScores = random().nextBoolean();
- final boolean trackMaxScore = random().nextBoolean();
- final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore);
- final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore);
-
- final Query query;
- if (random().nextBoolean()) {
- query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
- } else {
- query = new MatchAllDocsQuery();
- }
- searcher.search(query, collector1);
- Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG));
+ createRandomIndex(true);
- searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits, different) {
- @Override
- public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
- final LeafCollector ret = super.getLeafCollector(context);
- assertTrue("segment should not be recognized as sorted as different sorter was used", ret.getClass() == in.getLeafCollector(context).getClass());
- return ret;
- }
- });
- assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
- assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
- }
+ Sort sort = new Sort(new SortField("ndv2", SortField.Type.LONG, false));
+ Collector c = new EarlyTerminatingSortingCollector(TopFieldCollector.create(sort, 10, true, true, true), sort, 10);
+ IndexSearcher searcher = newSearcher(reader);
+ Exception e = expectThrows(IllegalStateException.class,
+ () -> {
+ searcher.search(new MatchAllDocsQuery(), c);
+ });
+ assertEquals("Cannot early terminate with sort order if segments are sorted with ", e.getMessage());
closeIndex();
}
@@ -262,34 +232,19 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
}
}
- private IndexSearcher newSearcherForTestTerminatedEarly(IndexReader r) throws IOException {
- switch(random().nextInt(2)) {
- case 0:
- return new IndexSearcher(r);
- case 1:
- assertTrue(r+" is not a DirectoryReader", (r instanceof DirectoryReader));
- final DirectoryReader directoryReader = ExitableDirectoryReader.wrap(
- UninvertingReader.wrap((DirectoryReader) r, new HashMap()),
- new TestEarlyTerminatingSortingcollectorQueryTimeout(false));
- return new IndexSearcher(directoryReader);
- }
- fail("newSearcherForTestTerminatedEarly("+r+") fell through switch");
- return null;
- }
-
public void testTerminatedEarly() throws IOException {
final int iters = atLeast(8);
for (int i = 0; i < iters; ++i) {
createRandomIndex(true);
- final IndexSearcher searcher = newSearcherForTestTerminatedEarly(reader); // future TODO: use newSearcher(reader);
+ final IndexSearcher searcher = new IndexSearcher(reader); // future TODO: use newSearcher(reader);
final Query query = new MatchAllDocsQuery(); // search for everything/anything
final TestTerminatedEarlySimpleCollector collector1 = new TestTerminatedEarlySimpleCollector();
searcher.search(query, collector1);
final TestTerminatedEarlySimpleCollector collector2 = new TestTerminatedEarlySimpleCollector();
- final EarlyTerminatingSortingCollector etsCollector = new EarlyTerminatingSortingCollector(collector2, sort, 1, mergePolicy.getSort());
+ final EarlyTerminatingSortingCollector etsCollector = new EarlyTerminatingSortingCollector(collector2, sort, 1);
searcher.search(query, etsCollector);
assertTrue("collector1="+collector1.collectedSomething()+" vs. collector2="+collector2.collectedSomething(), collector1.collectedSomething() == collector2.collectedSomething());
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
index 88d89d29417..078c8da3653 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
@@ -1151,14 +1151,14 @@ public class TestPointQueries extends LuceneTestCase {
}
private static Codec getCodec() {
- if (Codec.getDefault().getName().equals("Lucene60")) {
+ if (Codec.getDefault().getName().equals("Lucene62")) {
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
double maxMBSortInHeap = 5.0 + (3*random().nextDouble());
if (VERBOSE) {
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
}
- return new FilterCodec("Lucene60", Codec.getDefault()) {
+ return new FilterCodec("Lucene62", Codec.getDefault()) {
@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
index 38b3fb5c87c..b1a8f8d3f88 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@@ -25,6 +25,7 @@ import java.util.BitSet;
import java.util.List;
import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.store.CorruptingIndexOutput;
@@ -554,7 +555,7 @@ public class TestBKD extends LuceneTestCase {
}
List toMerge = null;
- List docIDBases = null;
+ List docMaps = null;
int seg = 0;
BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, docValues.length, false);
@@ -601,9 +602,15 @@ public class TestBKD extends LuceneTestCase {
if (useMerge && segCount == valuesInThisSeg) {
if (toMerge == null) {
toMerge = new ArrayList<>();
- docIDBases = new ArrayList<>();
+ docMaps = new ArrayList<>();
}
- docIDBases.add(lastDocIDBase);
+ final int curDocIDBase = lastDocIDBase;
+ docMaps.add(new MergeState.DocMap() {
+ @Override
+ public int get(int docID) {
+ return curDocIDBase + docID;
+ }
+ });
toMerge.add(w.finish(out));
valuesInThisSeg = TestUtil.nextInt(random(), numValues/10, numValues/2);
segCount = 0;
@@ -620,8 +627,14 @@ public class TestBKD extends LuceneTestCase {
if (toMerge != null) {
if (segCount > 0) {
- docIDBases.add(lastDocIDBase);
toMerge.add(w.finish(out));
+ final int curDocIDBase = lastDocIDBase;
+ docMaps.add(new MergeState.DocMap() {
+ @Override
+ public int get(int docID) {
+ return curDocIDBase + docID;
+ }
+ });
}
out.close();
in = dir.openInput("bkd", IOContext.DEFAULT);
@@ -633,7 +646,7 @@ public class TestBKD extends LuceneTestCase {
readers.add(new BKDReader(in));
}
out = dir.createOutput("bkd2", IOContext.DEFAULT);
- indexFP = w.merge(out, null, readers, docIDBases);
+ indexFP = w.merge(out, docMaps, readers);
out.close();
in.close();
in = dir.openInput("bkd2", IOContext.DEFAULT);
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java
index 4d76fa9dd39..55f360ad308 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java
@@ -21,7 +21,6 @@ import java.util.Collections;
import java.util.Iterator;
import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@@ -29,11 +28,13 @@ import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
/**
@@ -178,4 +179,8 @@ public class TermVectorLeafReader extends LeafReader {
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
}
+ @Override
+ public Sort getIndexSort() {
+ return null;
+ }
}
diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index 5b133013ed5..e3aa4b14a13 100644
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -40,6 +40,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SimpleCollector;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.*;
@@ -1606,6 +1607,10 @@ public class MemoryIndex {
return info.getNormDocValues();
}
+ @Override
+ public Sort getIndexSort() {
+ return null;
+ }
}
/**
diff --git a/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java b/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java
index c672ed00bac..368c2854a22 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java
@@ -140,7 +140,7 @@ public class IndexSplitter {
SegmentInfo info = infoPerCommit.info;
// Same info just changing the dir:
SegmentInfo newInfo = new SegmentInfo(destFSDir, info.getVersion(), info.name, info.maxDoc(),
- info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics(), info.getId(), new HashMap<>());
+ info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics(), info.getId(), new HashMap<>(), null);
destInfos.add(new SegmentCommitInfo(newInfo, infoPerCommit.getDelCount(),
infoPerCommit.getDelGen(), infoPerCommit.getFieldInfosGen(),
infoPerCommit.getDocValuesGen()));
diff --git a/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java b/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
index de79ab07f76..de711fda460 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
+++ b/lucene/misc/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
@@ -24,6 +24,7 @@ import java.util.Map;
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
/**
@@ -272,4 +273,9 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
ctx.reader().checkIntegrity();
}
}
+
+ @Override
+ public Sort getIndexSort() {
+ return null;
+ }
}
diff --git a/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java b/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java
deleted file mode 100644
index cd8f84e056e..00000000000
--- a/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.MergePolicy;
-import org.apache.lucene.index.MergeState;
-import org.apache.lucene.index.MergeTrigger;
-import org.apache.lucene.index.MultiReader;
-import org.apache.lucene.index.SegmentCommitInfo;
-import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentInfos;
-import org.apache.lucene.index.SegmentReader;
-import org.apache.lucene.index.SlowCompositeReaderWrapper;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.InfoStream;
-import org.apache.lucene.util.packed.PackedInts;
-import org.apache.lucene.util.packed.PackedLongValues;
-
-/** A {@link MergePolicy} that reorders documents according to a {@link Sort}
- * before merging them. As a consequence, all segments resulting from a merge
- * will be sorted while segments resulting from a flush will be in the order
- * in which documents have been added.
- * NOTE : Never use this policy if you rely on
- * {@link IndexWriter#addDocuments(Iterable) IndexWriter.addDocuments}
- * to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
- *
NOTE : This policy should only be used with idempotent {@code Sort}s
- * so that the order of segments is predictable. For example, using
- * {@link Sort#INDEXORDER} in reverse (which is not idempotent) will make
- * the order of documents in a segment depend on the number of times the segment
- * has been merged.
- * @lucene.experimental */
-public final class SortingMergePolicy extends MergePolicyWrapper {
-
- /**
- * Put in the {@link SegmentInfo#getDiagnostics() diagnostics} to denote that
- * this segment is sorted.
- */
- public static final String SORTER_ID_PROP = "sorter";
-
- class SortingOneMerge extends OneMerge {
-
- List unsortedReaders;
- Sorter.DocMap docMap;
- LeafReader sortedView;
- final InfoStream infoStream;
-
- SortingOneMerge(List segments, InfoStream infoStream) {
- super(segments);
- this.infoStream = infoStream;
- }
-
- @Override
- public List getMergeReaders() throws IOException {
- if (unsortedReaders == null) {
- unsortedReaders = super.getMergeReaders();
- if (infoStream.isEnabled("SMP")) {
- infoStream.message("SMP", "sorting " + unsortedReaders);
- for (LeafReader leaf : unsortedReaders) {
- String sortDescription = getSortDescription(leaf);
- if (sortDescription == null) {
- sortDescription = "not sorted";
- }
- infoStream.message("SMP", "seg=" + leaf + " " + sortDescription);
- }
- }
- // wrap readers, to be optimal for merge;
- List wrapped = new ArrayList<>(unsortedReaders.size());
- for (LeafReader leaf : unsortedReaders) {
- if (leaf instanceof SegmentReader) {
- leaf = new MergeReaderWrapper((SegmentReader)leaf);
- }
- wrapped.add(leaf);
- }
- final LeafReader atomicView;
- if (wrapped.size() == 1) {
- atomicView = wrapped.get(0);
- } else {
- final CompositeReader multiReader = new MultiReader(wrapped.toArray(new LeafReader[wrapped.size()]));
- atomicView = new SlowCompositeReaderWrapper(multiReader, true);
- }
- docMap = sorter.sort(atomicView);
- sortedView = SortingLeafReader.wrap(atomicView, docMap);
- }
- // a null doc map means that the readers are already sorted
- if (docMap == null) {
- if (infoStream.isEnabled("SMP")) {
- infoStream.message("SMP", "readers already sorted, omitting sort");
- }
- return unsortedReaders;
- } else {
- if (infoStream.isEnabled("SMP")) {
- infoStream.message("SMP", "sorting readers by " + sort);
- }
- return Collections.singletonList(SlowCodecReaderWrapper.wrap(sortedView));
- }
- }
-
- @Override
- public void setMergeInfo(SegmentCommitInfo info) {
- Map diagnostics = info.info.getDiagnostics();
- diagnostics.put(SORTER_ID_PROP, sorter.getID());
- super.setMergeInfo(info);
- }
-
- private PackedLongValues getDeletes(List readers) {
- PackedLongValues.Builder deletes = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
- int deleteCount = 0;
- for (LeafReader reader : readers) {
- final int maxDoc = reader.maxDoc();
- final Bits liveDocs = reader.getLiveDocs();
- for (int i = 0; i < maxDoc; ++i) {
- if (liveDocs != null && !liveDocs.get(i)) {
- ++deleteCount;
- } else {
- deletes.add(deleteCount);
- }
- }
- }
- return deletes.build();
- }
-
- @Override
- public MergePolicy.DocMap getDocMap(final MergeState mergeState) {
- if (unsortedReaders == null) {
- throw new IllegalStateException();
- }
- if (docMap == null) {
- return super.getDocMap(mergeState);
- }
- assert mergeState.docMaps.length == 1; // we returned a singleton reader
- final PackedLongValues deletes = getDeletes(unsortedReaders);
- return new MergePolicy.DocMap() {
- @Override
- public int map(int old) {
- final int oldWithDeletes = old + (int) deletes.get(old);
- final int newWithDeletes = docMap.oldToNew(oldWithDeletes);
- return mergeState.docMaps[0].get(newWithDeletes);
- }
- };
- }
-
- @Override
- public String toString() {
- return "SortingMergePolicy.SortingOneMerge(segments=" + segString() + " sort=" + sort + ")";
- }
- }
-
- class SortingMergeSpecification extends MergeSpecification {
- final InfoStream infoStream;
-
- SortingMergeSpecification(InfoStream infoStream) {
- this.infoStream = infoStream;
- }
-
- @Override
- public void add(OneMerge merge) {
- super.add(new SortingOneMerge(merge.segments, infoStream));
- }
-
- @Override
- public String segString(Directory dir) {
- return "SortingMergeSpec(" + super.segString(dir) + ", sorter=" + sorter + ")";
- }
-
- }
-
- /** Returns {@code true} if the given {@code reader} is sorted by the
- * {@code sort} given. Typically the given {@code sort} would be the
- * {@link SortingMergePolicy#getSort()} order of a {@link SortingMergePolicy}. */
- public static boolean isSorted(LeafReader reader, Sort sort) {
- String description = getSortDescription(reader);
- if (description != null && description.equals(sort.toString())) {
- return true;
- }
- return false;
- }
-
- private static String getSortDescription(LeafReader reader) {
- if (reader instanceof SegmentReader) {
- final SegmentReader segReader = (SegmentReader) reader;
- final Map diagnostics = segReader.getSegmentInfo().info.getDiagnostics();
- if (diagnostics != null) {
- return diagnostics.get(SORTER_ID_PROP);
- }
- } else if (reader instanceof FilterLeafReader) {
- return getSortDescription(FilterLeafReader.unwrap(reader));
- }
- return null;
- }
-
- private MergeSpecification sortedMergeSpecification(MergeSpecification specification, InfoStream infoStream) {
- if (specification == null) {
- return null;
- }
- MergeSpecification sortingSpec = new SortingMergeSpecification(infoStream);
- for (OneMerge merge : specification.merges) {
- sortingSpec.add(merge);
- }
- return sortingSpec;
- }
-
- final Sorter sorter;
- final Sort sort;
-
- /** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */
- public SortingMergePolicy(MergePolicy in, Sort sort) {
- super(in);
- this.sorter = new Sorter(sort);
- this.sort = sort;
- }
-
- /** Return the {@link Sort} order that is used to sort segments when merging. */
- public Sort getSort() {
- return sort;
- }
-
- @Override
- public MergeSpecification findMerges(MergeTrigger mergeTrigger,
- SegmentInfos segmentInfos, IndexWriter writer) throws IOException {
- return sortedMergeSpecification(in.findMerges(mergeTrigger, segmentInfos, writer), writer.infoStream);
- }
-
- @Override
- public MergeSpecification findForcedMerges(SegmentInfos segmentInfos,
- int maxSegmentCount, Map segmentsToMerge, IndexWriter writer)
- throws IOException {
- return sortedMergeSpecification(in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, writer), writer.infoStream);
- }
-
- @Override
- public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer)
- throws IOException {
- return sortedMergeSpecification(in.findForcedDeletesMerges(segmentInfos, writer), writer.infoStream);
- }
-
- @Override
- public String toString() {
- return "SortingMergePolicy(" + in + ", sorter=" + sorter + ")";
- }
-}
diff --git a/lucene/misc/src/java/org/apache/lucene/index/package.html b/lucene/misc/src/java/org/apache/lucene/index/package.html
index dc9cbb7b676..33ce964eaf2 100644
--- a/lucene/misc/src/java/org/apache/lucene/index/package.html
+++ b/lucene/misc/src/java/org/apache/lucene/index/package.html
@@ -18,23 +18,5 @@
Misc index tools and index support.
-
-SortingMergePolicy:
-Provides index sorting capablities. The application can use any
-Sort specification, e.g. to sort by fields using DocValues or FieldCache, or to
-reverse the order of the documents (by using SortField.Type.DOC in reverse).
-Multi-level sorts can be specified the same way you would when searching, by
-building Sort from multiple SortFields.
-
-
{@link org.apache.lucene.index.SortingMergePolicy} can be used to
-make Lucene sort segments before merging them. This will ensure that every
-segment resulting from a merge will be sorted according to the provided
-{@link org.apache.lucene.search.Sort}. This however makes merging and
-thus indexing slower.
-
-
Sorted segments allow for early query termination when the sort order
-matches index order. This makes query execution faster since not all documents
-need to be visited. Please note that this is an expert feature and should not
-be used without a deep understanding of Lucene merging and document collection.
diff --git a/lucene/misc/src/java/org/apache/lucene/search/BlockJoinComparatorSource.java b/lucene/misc/src/java/org/apache/lucene/search/BlockJoinComparatorSource.java
deleted file mode 100644
index 03a2cb8ee78..00000000000
--- a/lucene/misc/src/java/org/apache/lucene/search/BlockJoinComparatorSource.java
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search;
-
-import java.io.IOException;
-
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.ReaderUtil;
-import org.apache.lucene.index.SortingMergePolicy;
-import org.apache.lucene.util.BitSet;
-
-/**
- * Helper class to sort readers that contain blocks of documents.
- *
- * Note that this class is intended to used with {@link SortingMergePolicy},
- * and for other purposes has some limitations:
- *
- * Cannot yet be used with {@link IndexSearcher#searchAfter(ScoreDoc, Query, int, Sort) IndexSearcher.searchAfter}
- * Filling sort field values is not yet supported.
- *
- * @lucene.experimental
- */
-// TODO: can/should we clean this thing up (e.g. return a proper sort value)
-// and move to the join/ module?
-public class BlockJoinComparatorSource extends FieldComparatorSource {
- final Query parentsFilter;
- final Sort parentSort;
- final Sort childSort;
-
- /**
- * Create a new BlockJoinComparatorSource, sorting only blocks of documents
- * with {@code parentSort} and not reordering children with a block.
- *
- * @param parentsFilter Filter identifying parent documents
- * @param parentSort Sort for parent documents
- */
- public BlockJoinComparatorSource(Query parentsFilter, Sort parentSort) {
- this(parentsFilter, parentSort, new Sort(SortField.FIELD_DOC));
- }
-
- /**
- * Create a new BlockJoinComparatorSource, specifying the sort order for both
- * blocks of documents and children within a block.
- *
- * @param parentsFilter Filter identifying parent documents
- * @param parentSort Sort for parent documents
- * @param childSort Sort for child documents in the same block
- */
- public BlockJoinComparatorSource(Query parentsFilter, Sort parentSort, Sort childSort) {
- this.parentsFilter = parentsFilter;
- this.parentSort = parentSort;
- this.childSort = childSort;
- }
-
- @Override
- @SuppressWarnings({"unchecked", "rawtypes"})
- public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
- // we keep parallel slots: the parent ids and the child ids
- final int parentSlots[] = new int[numHits];
- final int childSlots[] = new int[numHits];
-
- SortField parentFields[] = parentSort.getSort();
- final int parentReverseMul[] = new int[parentFields.length];
- final FieldComparator> parentComparators[] = new FieldComparator[parentFields.length];
- for (int i = 0; i < parentFields.length; i++) {
- parentReverseMul[i] = parentFields[i].getReverse() ? -1 : 1;
- parentComparators[i] = parentFields[i].getComparator(1, i);
- }
-
- SortField childFields[] = childSort.getSort();
- final int childReverseMul[] = new int[childFields.length];
- final FieldComparator> childComparators[] = new FieldComparator[childFields.length];
- for (int i = 0; i < childFields.length; i++) {
- childReverseMul[i] = childFields[i].getReverse() ? -1 : 1;
- childComparators[i] = childFields[i].getComparator(1, i);
- }
-
- // NOTE: we could return parent ID as value but really our sort "value" is more complex...
- // So we throw UOE for now. At the moment you really should only use this at indexing time.
- return new FieldComparator() {
- int bottomParent;
- int bottomChild;
- BitSet parentBits;
- LeafFieldComparator[] parentLeafComparators;
- LeafFieldComparator[] childLeafComparators;
-
- @Override
- public int compare(int slot1, int slot2) {
- try {
- return compare(childSlots[slot1], parentSlots[slot1], childSlots[slot2], parentSlots[slot2]);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- @Override
- public void setTopValue(Integer value) {
- // we dont have enough information (the docid is needed)
- throw new UnsupportedOperationException("this comparator cannot be used with deep paging");
- }
-
- @Override
- public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
- if (parentBits != null) {
- throw new IllegalStateException("This comparator can only be used on a single segment");
- }
- IndexSearcher searcher = new IndexSearcher(ReaderUtil.getTopLevelContext(context));
- searcher.setQueryCache(null);
- final Weight weight = searcher.createNormalizedWeight(parentsFilter, false);
- final Scorer parents = weight.scorer(context);
- if (parents == null) {
- throw new IllegalStateException("LeafReader " + context.reader() + " contains no parents!");
- }
- parentBits = BitSet.of(parents.iterator(), context.reader().maxDoc());
- parentLeafComparators = new LeafFieldComparator[parentComparators.length];
- for (int i = 0; i < parentComparators.length; i++) {
- parentLeafComparators[i] = parentComparators[i].getLeafComparator(context);
- }
- childLeafComparators = new LeafFieldComparator[childComparators.length];
- for (int i = 0; i < childComparators.length; i++) {
- childLeafComparators[i] = childComparators[i].getLeafComparator(context);
- }
-
- return new LeafFieldComparator() {
-
- @Override
- public int compareBottom(int doc) throws IOException {
- return compare(bottomChild, bottomParent, doc, parent(doc));
- }
-
- @Override
- public int compareTop(int doc) throws IOException {
- // we dont have enough information (the docid is needed)
- throw new UnsupportedOperationException("this comparator cannot be used with deep paging");
- }
-
- @Override
- public void copy(int slot, int doc) throws IOException {
- childSlots[slot] = doc;
- parentSlots[slot] = parent(doc);
- }
-
- @Override
- public void setBottom(int slot) {
- bottomParent = parentSlots[slot];
- bottomChild = childSlots[slot];
- }
-
- @Override
- public void setScorer(Scorer scorer) {
- for (LeafFieldComparator comp : parentLeafComparators) {
- comp.setScorer(scorer);
- }
- for (LeafFieldComparator comp : childLeafComparators) {
- comp.setScorer(scorer);
- }
- }
-
- };
- }
-
- @Override
- public Integer value(int slot) {
- // really our sort "value" is more complex...
- throw new UnsupportedOperationException("filling sort field values is not yet supported");
- }
-
- int parent(int doc) {
- return parentBits.nextSetBit(doc);
- }
-
- int compare(int docID1, int parent1, int docID2, int parent2) throws IOException {
- if (parent1 == parent2) { // both are in the same block
- if (docID1 == parent1 || docID2 == parent2) {
- // keep parents at the end of blocks
- return docID1 - docID2;
- } else {
- return compare(docID1, docID2, childLeafComparators, childReverseMul);
- }
- } else {
- int cmp = compare(parent1, parent2, parentLeafComparators, parentReverseMul);
- if (cmp == 0) {
- return parent1 - parent2;
- } else {
- return cmp;
- }
- }
- }
-
- int compare(int docID1, int docID2, LeafFieldComparator comparators[], int reverseMul[]) throws IOException {
- for (int i = 0; i < comparators.length; i++) {
- // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
- // the segments are always the same here...
- comparators[i].copy(0, docID1);
- comparators[i].setBottom(0);
- int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
- if (comp != 0) {
- return comp;
- }
- }
- return 0; // no need to docid tiebreak
- }
- };
- }
-
- @Override
- public String toString() {
- return "blockJoin(parentSort=" + parentSort + ",childSort=" + childSort + ")";
- }
-}
diff --git a/lucene/misc/src/test/org/apache/lucene/index/IndexSortingTest.java b/lucene/misc/src/test/org/apache/lucene/index/IndexSortingTest.java
deleted file mode 100644
index 8b384f41db1..00000000000
--- a/lucene/misc/src/test/org/apache/lucene/index/IndexSortingTest.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.SlowCompositeReaderWrapper;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.TestUtil;
-import org.junit.BeforeClass;
-
-public class IndexSortingTest extends SorterTestBase {
-
- private static final Sort[] SORT = new Sort[] {
- new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG)),
- new Sort(new SortField(null, SortField.Type.DOC, true))
- };
-
- @BeforeClass
- public static void beforeClassSorterUtilTest() throws Exception {
- // NOTE: index was created by by super's @BeforeClass
-
- // only read the values of the undeleted documents, since after addIndexes,
- // the deleted ones will be dropped from the index.
- Bits liveDocs = unsortedReader.getLiveDocs();
- List values = new ArrayList<>();
- for (int i = 0; i < unsortedReader.maxDoc(); i++) {
- if (liveDocs == null || liveDocs.get(i)) {
- values.add(Integer.valueOf(unsortedReader.document(i).get(ID_FIELD)));
- }
- }
- int idx = random().nextInt(SORT.length);
- Sort sorter = SORT[idx];
- if (idx == 1) { // reverse doc sort
- Collections.reverse(values);
- } else {
- Collections.sort(values);
- if (random().nextBoolean()) {
- sorter = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG, true)); // descending
- Collections.reverse(values);
- }
- }
- sortedValues = values.toArray(new Integer[values.size()]);
- if (VERBOSE) {
- System.out.println("sortedValues: " + sortedValues);
- System.out.println("Sorter: " + sorter);
- }
-
- Directory target = newDirectory();
- IndexWriter writer = new IndexWriter(target, newIndexWriterConfig(null));
- LeafReader reader = SortingLeafReader.wrap(unsortedReader, sorter);
- writer.addIndexes(SlowCodecReaderWrapper.wrap(reader));
- writer.close();
- // NOTE: also closes unsortedReader
- reader.close();
- dir.close();
-
- // CheckIndex the target directory
- dir = target;
- TestUtil.checkIndex(dir);
-
- // set reader for tests
- sortedReader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir));
- assertFalse("index should not have deletions", sortedReader.hasDeletions());
- }
-
-}
diff --git a/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java b/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java
deleted file mode 100644
index df1c80f881a..00000000000
--- a/lucene/misc/src/test/org/apache/lucene/index/SorterTestBase.java
+++ /dev/null
@@ -1,405 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Random;
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.document.BinaryDocValuesField;
-import org.apache.lucene.document.BinaryPoint;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.SortedDocValuesField;
-import org.apache.lucene.document.SortedNumericDocValuesField;
-import org.apache.lucene.document.SortedSetDocValuesField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.PointValues.IntersectVisitor;
-import org.apache.lucene.index.PointValues.Relation;
-import org.apache.lucene.index.SortingLeafReader.SortingDocsEnum;
-import org.apache.lucene.index.TermsEnum.SeekStatus;
-import org.apache.lucene.search.CollectionStatistics;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.TermStatistics;
-import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.FixedBitSet;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.NumericUtils;
-import org.apache.lucene.util.TestUtil;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-
-public abstract class SorterTestBase extends LuceneTestCase {
-
- static final class NormsSimilarity extends Similarity {
-
- private final Similarity in;
-
- public NormsSimilarity(Similarity in) {
- this.in = in;
- }
-
- @Override
- public long computeNorm(FieldInvertState state) {
- if (state.getName().equals(NORMS_FIELD)) {
- return Float.floatToIntBits(state.getBoost());
- } else {
- return in.computeNorm(state);
- }
- }
-
- @Override
- public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
- return in.computeWeight(collectionStats, termStats);
- }
-
- @Override
- public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
- return in.simScorer(weight, context);
- }
-
- }
-
- static final class PositionsTokenStream extends TokenStream {
-
- private final CharTermAttribute term;
- private final PayloadAttribute payload;
- private final OffsetAttribute offset;
-
- private int pos, off;
-
- public PositionsTokenStream() {
- term = addAttribute(CharTermAttribute.class);
- payload = addAttribute(PayloadAttribute.class);
- offset = addAttribute(OffsetAttribute.class);
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- if (pos == 0) {
- return false;
- }
-
- clearAttributes();
- term.append(DOC_POSITIONS_TERM);
- payload.setPayload(new BytesRef(Integer.toString(pos)));
- offset.setOffset(off, off);
- --pos;
- ++off;
- return true;
- }
-
- void setId(int id) {
- pos = id / 10 + 1;
- off = 0;
- }
- }
-
- protected static final String ID_FIELD = "id";
- protected static final String DOCS_ENUM_FIELD = "docs";
- protected static final String DOCS_ENUM_TERM = "$all$";
- protected static final String DOC_POSITIONS_FIELD = "positions";
- protected static final String DOC_POSITIONS_TERM = "$all$";
- protected static final String NUMERIC_DV_FIELD = "numeric";
- protected static final String SORTED_NUMERIC_DV_FIELD = "sorted_numeric";
- protected static final String NORMS_FIELD = "norm";
- protected static final String BINARY_DV_FIELD = "binary";
- protected static final String SORTED_DV_FIELD = "sorted";
- protected static final String SORTED_SET_DV_FIELD = "sorted_set";
- protected static final String TERM_VECTORS_FIELD = "term_vectors";
- protected static final String DIMENSIONAL_FIELD = "numeric1d";
-
- private static final FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
- static {
- TERM_VECTORS_TYPE.setStoreTermVectors(true);
- TERM_VECTORS_TYPE.freeze();
- }
-
- private static final FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
- static {
- POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- POSITIONS_TYPE.freeze();
- }
-
- protected static Directory dir;
- protected static LeafReader unsortedReader;
- protected static LeafReader sortedReader;
- protected static Integer[] sortedValues;
-
- private static Document doc(final int id, PositionsTokenStream positions) {
- final Document doc = new Document();
- doc.add(new StringField(ID_FIELD, Integer.toString(id), Store.YES));
- doc.add(new StringField(DOCS_ENUM_FIELD, DOCS_ENUM_TERM, Store.NO));
- positions.setId(id);
- doc.add(new Field(DOC_POSITIONS_FIELD, positions, POSITIONS_TYPE));
- doc.add(new NumericDocValuesField(NUMERIC_DV_FIELD, id));
- TextField norms = new TextField(NORMS_FIELD, Integer.toString(id), Store.NO);
- norms.setBoost(Float.intBitsToFloat(id));
- doc.add(norms);
- doc.add(new BinaryDocValuesField(BINARY_DV_FIELD, new BytesRef(Integer.toString(id))));
- doc.add(new SortedDocValuesField(SORTED_DV_FIELD, new BytesRef(Integer.toString(id))));
- doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id))));
- doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id + 1))));
- doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id));
- doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id + 1));
- doc.add(new Field(TERM_VECTORS_FIELD, Integer.toString(id), TERM_VECTORS_TYPE));
- byte[] bytes = new byte[4];
- NumericUtils.intToSortableBytes(id, bytes, 0);
- // TODO: index time sorting doesn't yet support points
- //doc.add(new BinaryPoint(DIMENSIONAL_FIELD, bytes));
- return doc;
- }
-
- /** Creates an unsorted index; subclasses then sort this index and open sortedReader. */
- private static void createIndex(Directory dir, int numDocs, Random random) throws IOException {
- List ids = new ArrayList<>();
- for (int i = 0; i < numDocs; i++) {
- ids.add(Integer.valueOf(i * 10));
- }
- // shuffle them for indexing
- Collections.shuffle(ids, random);
- if (VERBOSE) {
- System.out.println("Shuffled IDs for indexing: " + Arrays.toString(ids.toArray()));
- }
-
- PositionsTokenStream positions = new PositionsTokenStream();
- IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random));
- conf.setMaxBufferedDocs(4); // create some segments
- conf.setSimilarity(new NormsSimilarity(conf.getSimilarity())); // for testing norms field
- RandomIndexWriter writer = new RandomIndexWriter(random, dir, conf);
- writer.setDoRandomForceMerge(false);
- for (int id : ids) {
- writer.addDocument(doc(id, positions));
- }
- // delete some documents
- writer.commit();
- for (Integer id : ids) {
- if (random.nextDouble() < 0.2) {
- if (VERBOSE) {
- System.out.println("delete doc_id " + id);
- }
- writer.deleteDocuments(new Term(ID_FIELD, id.toString()));
- }
- }
- writer.close();
- }
-
- @BeforeClass
- public static void beforeClassSorterTestBase() throws Exception {
- dir = newDirectory();
- int numDocs = atLeast(20);
- createIndex(dir, numDocs, random());
-
- unsortedReader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir));
- }
-
- @AfterClass
- public static void afterClassSorterTestBase() throws Exception {
- unsortedReader.close();
- sortedReader.close();
- dir.close();
- unsortedReader = sortedReader = null;
- dir = null;
- }
-
- public void testBinaryDocValuesField() throws Exception {
- BinaryDocValues dv = sortedReader.getBinaryDocValues(BINARY_DV_FIELD);
- for (int i = 0; i < sortedReader.maxDoc(); i++) {
- final BytesRef bytes = dv.get(i);
- assertEquals("incorrect binary DocValues for doc " + i, sortedValues[i].toString(), bytes.utf8ToString());
- }
- }
-
- public void testDocsAndPositionsEnum() throws Exception {
- TermsEnum termsEnum = sortedReader.terms(DOC_POSITIONS_FIELD).iterator();
- assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOC_POSITIONS_TERM)));
- PostingsEnum sortedPositions = termsEnum.postings(null, PostingsEnum.ALL);
- int doc;
-
- // test nextDoc()
- while ((doc = sortedPositions.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- int freq = sortedPositions.freq();
- assertEquals("incorrect freq for doc=" + doc, sortedValues[doc].intValue() / 10 + 1, freq);
- for (int i = 0; i < freq; i++) {
- assertEquals("incorrect position for doc=" + doc, i, sortedPositions.nextPosition());
- assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.startOffset());
- assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.endOffset());
- assertEquals("incorrect payload for doc=" + doc, freq - i, Integer.parseInt(sortedPositions.getPayload().utf8ToString()));
- }
- }
-
- // test advance()
- final PostingsEnum reuse = sortedPositions;
- sortedPositions = termsEnum.postings(reuse, PostingsEnum.ALL);
- if (sortedPositions instanceof SortingDocsEnum) {
- assertTrue(((SortingDocsEnum) sortedPositions).reused(reuse)); // make sure reuse worked
- }
- doc = 0;
- while ((doc = sortedPositions.advance(doc + TestUtil.nextInt(random(), 1, 5))) != DocIdSetIterator.NO_MORE_DOCS) {
- int freq = sortedPositions.freq();
- assertEquals("incorrect freq for doc=" + doc, sortedValues[doc].intValue() / 10 + 1, freq);
- for (int i = 0; i < freq; i++) {
- assertEquals("incorrect position for doc=" + doc, i, sortedPositions.nextPosition());
- assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.startOffset());
- assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.endOffset());
- assertEquals("incorrect payload for doc=" + doc, freq - i, Integer.parseInt(sortedPositions.getPayload().utf8ToString()));
- }
- }
- }
-
- Bits randomLiveDocs(int maxDoc) {
- if (rarely()) {
- if (random().nextBoolean()) {
- return null;
- } else {
- return new Bits.MatchNoBits(maxDoc);
- }
- }
- final FixedBitSet bits = new FixedBitSet(maxDoc);
- final int bitsSet = TestUtil.nextInt(random(), 1, maxDoc - 1);
- for (int i = 0; i < bitsSet; ++i) {
- while (true) {
- final int index = random().nextInt(maxDoc);
- if (!bits.get(index)) {
- bits.set(index);
- break;
- }
- }
- }
- return bits;
- }
-
- public void testDocsEnum() throws Exception {
- TermsEnum termsEnum = sortedReader.terms(DOCS_ENUM_FIELD).iterator();
- assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOCS_ENUM_TERM)));
- PostingsEnum docs = termsEnum.postings(null);
-
- int doc;
- while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- assertEquals("incorrect value; doc " + doc, sortedValues[doc].intValue(), Integer.parseInt(sortedReader.document(doc).get(ID_FIELD)));
- }
-
- PostingsEnum reuse = docs;
- docs = termsEnum.postings(reuse);
- if (docs instanceof SortingDocsEnum) {
- assertTrue(((SortingDocsEnum) docs).reused(reuse)); // make sure reuse worked
- }
- doc = -1;
- while ((doc = docs.advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS) {
- assertEquals("incorrect value; doc " + doc, sortedValues[doc].intValue(), Integer.parseInt(sortedReader.document(doc).get(ID_FIELD)));
- }
- }
-
- public void testNormValues() throws Exception {
- NumericDocValues dv = sortedReader.getNormValues(NORMS_FIELD);
- int maxDoc = sortedReader.maxDoc();
- for (int i = 0; i < maxDoc; i++) {
- assertEquals("incorrect norm value for doc " + i, sortedValues[i].intValue(), dv.get(i));
- }
- }
-
- public void testNumericDocValuesField() throws Exception {
- NumericDocValues dv = sortedReader.getNumericDocValues(NUMERIC_DV_FIELD);
- int maxDoc = sortedReader.maxDoc();
- for (int i = 0; i < maxDoc; i++) {
- assertEquals("incorrect numeric DocValues for doc " + i, sortedValues[i].intValue(), dv.get(i));
- }
- }
-
- public void testSortedDocValuesField() throws Exception {
- SortedDocValues dv = sortedReader.getSortedDocValues(SORTED_DV_FIELD);
- int maxDoc = sortedReader.maxDoc();
- for (int i = 0; i < maxDoc; i++) {
- final BytesRef bytes = dv.get(i);
- assertEquals("incorrect sorted DocValues for doc " + i, sortedValues[i].toString(), bytes.utf8ToString());
- }
- }
-
- public void testSortedSetDocValuesField() throws Exception {
- SortedSetDocValues dv = sortedReader.getSortedSetDocValues(SORTED_SET_DV_FIELD);
- int maxDoc = sortedReader.maxDoc();
- for (int i = 0; i < maxDoc; i++) {
- dv.setDocument(i);
- BytesRef bytes = dv.lookupOrd(dv.nextOrd());
- int value = sortedValues[i].intValue();
- assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value).toString(), bytes.utf8ToString());
- bytes = dv.lookupOrd(dv.nextOrd());
- assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value + 1).toString(), bytes.utf8ToString());
- assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd());
- }
- }
-
- public void testSortedNumericDocValuesField() throws Exception {
- SortedNumericDocValues dv = sortedReader.getSortedNumericDocValues(SORTED_NUMERIC_DV_FIELD);
- int maxDoc = sortedReader.maxDoc();
- for (int i = 0; i < maxDoc; i++) {
- dv.setDocument(i);
- assertEquals(2, dv.count());
- int value = sortedValues[i].intValue();
- assertEquals("incorrect sorted-numeric DocValues for doc " + i, value, dv.valueAt(0));
- assertEquals("incorrect sorted-numeric DocValues for doc " + i, value + 1, dv.valueAt(1));
- }
- }
-
- public void testTermVectors() throws Exception {
- int maxDoc = sortedReader.maxDoc();
- for (int i = 0; i < maxDoc; i++) {
- Terms terms = sortedReader.getTermVector(i, TERM_VECTORS_FIELD);
- assertNotNull("term vectors not found for doc " + i + " field [" + TERM_VECTORS_FIELD + "]", terms);
- assertEquals("incorrect term vector for doc " + i, sortedValues[i].toString(), terms.iterator().next().utf8ToString());
- }
- }
-
- // TODO: index sorting doesn't yet support points
- /*
- public void testPoints() throws Exception {
- PointValues values = sortedReader.getPointValues();
- values.intersect(DIMENSIONAL_FIELD,
- new IntersectVisitor() {
- @Override
- public void visit(int docID) {
- throw new IllegalStateException();
- }
-
- @Override
- public void visit(int docID, byte[] packedValues) {
- assertEquals(sortedValues[docID].intValue(), NumericUtils.bytesToInt(packedValues, 0));
- }
-
- @Override
- public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
- return Relation.CELL_CROSSES_QUERY;
- }
- });
- }
- */
-}
diff --git a/lucene/misc/src/test/org/apache/lucene/index/SortingLeafReaderTest.java b/lucene/misc/src/test/org/apache/lucene/index/SortingLeafReaderTest.java
deleted file mode 100644
index 3e8cb99ae07..00000000000
--- a/lucene/misc/src/test/org/apache/lucene/index/SortingLeafReaderTest.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.util.Arrays;
-
-import org.apache.lucene.index.NumericDocValues;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.TestUtil;
-import org.junit.BeforeClass;
-
-public class SortingLeafReaderTest extends SorterTestBase {
-
- @BeforeClass
- public static void beforeClassSortingLeafReaderTest() throws Exception {
- // NOTE: index was created by by super's @BeforeClass
-
- // sort the index by id (as integer, in NUMERIC_DV_FIELD)
- Sort sort = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.INT));
- final Sorter.DocMap docMap = new Sorter(sort).sort(unsortedReader);
-
- // Sorter.compute also sorts the values
- NumericDocValues dv = unsortedReader.getNumericDocValues(NUMERIC_DV_FIELD);
- sortedValues = new Integer[unsortedReader.maxDoc()];
- for (int i = 0; i < unsortedReader.maxDoc(); ++i) {
- sortedValues[docMap.oldToNew(i)] = (int)dv.get(i);
- }
- if (VERBOSE) {
- System.out.println("docMap: " + docMap);
- System.out.println("sortedValues: " + Arrays.toString(sortedValues));
- }
-
- // sort the index by id (as integer, in NUMERIC_DV_FIELD)
- sortedReader = SortingLeafReader.wrap(unsortedReader, sort);
-
- if (VERBOSE) {
- System.out.print("mapped-deleted-docs: ");
- Bits mappedLiveDocs = sortedReader.getLiveDocs();
- for (int i = 0; i < mappedLiveDocs.length(); i++) {
- if (!mappedLiveDocs.get(i)) {
- System.out.print(i + " ");
- }
- }
- System.out.println();
- }
-
- TestUtil.checkReader(sortedReader);
- }
-
- public void testBadSort() throws Exception {
- IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
- SortingLeafReader.wrap(sortedReader, Sort.RELEVANCE);
- });
- assertEquals("Cannot sort an index with a Sort that refers to the relevance score", expected.getMessage());
- }
-
-}
diff --git a/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java b/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java
deleted file mode 100644
index 4a0d2b5a594..00000000000
--- a/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.search.BlockJoinComparatorSource;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.Weight;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BitSet;
-import org.apache.lucene.util.LuceneTestCase;
-
-public class TestBlockJoinSorter extends LuceneTestCase {
-
- public void test() throws IOException {
- final int numParents = atLeast(200);
- IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
- cfg.setMergePolicy(newLogMergePolicy());
- final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory(), cfg);
- final Document parentDoc = new Document();
- final NumericDocValuesField parentVal = new NumericDocValuesField("parent_val", 0L);
- parentDoc.add(parentVal);
- final StringField parent = new StringField("parent", "true", Store.YES);
- parentDoc.add(parent);
- for (int i = 0; i < numParents; ++i) {
- List documents = new ArrayList<>();
- final int numChildren = random().nextInt(10);
- for (int j = 0; j < numChildren; ++j) {
- final Document childDoc = new Document();
- childDoc.add(new NumericDocValuesField("child_val", random().nextInt(5)));
- documents.add(childDoc);
- }
- parentVal.setLongValue(random().nextInt(50));
- documents.add(parentDoc);
- writer.addDocuments(documents);
- }
- writer.forceMerge(1);
- IndexReader indexReader = writer.getReader();
- writer.close();
-
- IndexSearcher searcher = newSearcher(indexReader);
- indexReader = searcher.getIndexReader(); // newSearcher may have wrapped it
- assertEquals(1, indexReader.leaves().size());
- final LeafReader reader = indexReader.leaves().get(0).reader();
- final Query parentsFilter = new TermQuery(new Term("parent", "true"));
-
- final Weight weight = searcher.createNormalizedWeight(parentsFilter, false);
- final Scorer parents = weight.scorer(indexReader.leaves().get(0));
- final BitSet parentBits = BitSet.of(parents.iterator(), reader.maxDoc());
- final NumericDocValues parentValues = reader.getNumericDocValues("parent_val");
- final NumericDocValues childValues = reader.getNumericDocValues("child_val");
-
- final Sort parentSort = new Sort(new SortField("parent_val", SortField.Type.LONG));
- final Sort childSort = new Sort(new SortField("child_val", SortField.Type.LONG));
-
- final Sort sort = new Sort(new SortField("custom", new BlockJoinComparatorSource(parentsFilter, parentSort, childSort)));
- final Sorter sorter = new Sorter(sort);
- final Sorter.DocMap docMap = sorter.sort(reader);
- assertEquals(reader.maxDoc(), docMap.size());
-
- int[] children = new int[1];
- int numChildren = 0;
- int previousParent = -1;
- for (int i = 0; i < docMap.size(); ++i) {
- final int oldID = docMap.newToOld(i);
- if (parentBits.get(oldID)) {
- // check that we have the right children
- for (int j = 0; j < numChildren; ++j) {
- assertEquals(oldID, parentBits.nextSetBit(children[j]));
- }
- // check that children are sorted
- for (int j = 1; j < numChildren; ++j) {
- final int doc1 = children[j-1];
- final int doc2 = children[j];
- if (childValues.get(doc1) == childValues.get(doc2)) {
- assertTrue(doc1 < doc2); // sort is stable
- } else {
- assertTrue(childValues.get(doc1) < childValues.get(doc2));
- }
- }
- // check that parents are sorted
- if (previousParent != -1) {
- if (parentValues.get(previousParent) == parentValues.get(oldID)) {
- assertTrue(previousParent < oldID);
- } else {
- assertTrue(parentValues.get(previousParent) < parentValues.get(oldID));
- }
- }
- // reset
- previousParent = oldID;
- numChildren = 0;
- } else {
- children = ArrayUtil.grow(children, numChildren+1);
- children[numChildren++] = oldID;
- }
- }
- indexReader.close();
- writer.w.getDirectory().close();
- }
-
-}
diff --git a/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java b/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java
deleted file mode 100644
index a5486f4ce7e..00000000000
--- a/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-import java.lang.reflect.Method;
-import java.lang.reflect.Modifier;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Random;
-import java.util.Set;
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.LogMergePolicy;
-import org.apache.lucene.index.MergePolicy;
-import org.apache.lucene.index.NumericDocValues;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.SlowCompositeReaderWrapper;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TieredMergePolicy;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
-
-import com.carrotsearch.randomizedtesting.generators.RandomPicks;
-
-public class TestSortingMergePolicy extends BaseMergePolicyTestCase {
-
- private List terms;
- private Directory dir1, dir2;
- private Sort sort;
- private boolean reversedSort;
- private IndexReader reader;
- private IndexReader sortedReader;
-
- @Override
- public void setUp() throws Exception {
- super.setUp();
- final Boolean reverse = (random().nextBoolean() ? null : new Boolean(random().nextBoolean()));
- final SortField sort_field = (reverse == null
- ? new SortField("ndv", SortField.Type.LONG)
- : new SortField("ndv", SortField.Type.LONG, reverse.booleanValue()));
- sort = new Sort(sort_field);
- reversedSort = (null != reverse && reverse.booleanValue());
- createRandomIndexes();
- }
-
- private Document randomDocument() {
- final Document doc = new Document();
- doc.add(new NumericDocValuesField("ndv", random().nextLong()));
- doc.add(new StringField("s", RandomPicks.randomFrom(random(), terms), Store.YES));
- return doc;
- }
-
- public MergePolicy mergePolicy() {
- return newSortingMergePolicy(sort);
- }
-
- public static SortingMergePolicy newSortingMergePolicy(Sort sort) {
- // usually create a MP with a low merge factor so that many merges happen
- MergePolicy mp;
- int thingToDo = random().nextInt(3);
- if (thingToDo == 0) {
- TieredMergePolicy tmp = newTieredMergePolicy(random());
- final int numSegs = TestUtil.nextInt(random(), 3, 5);
- tmp.setSegmentsPerTier(numSegs);
- tmp.setMaxMergeAtOnce(TestUtil.nextInt(random(), 2, numSegs));
- mp = tmp;
- } else if (thingToDo == 1) {
- LogMergePolicy lmp = newLogMergePolicy(random());
- lmp.setMergeFactor(TestUtil.nextInt(random(), 3, 5));
- mp = lmp;
- } else {
- // just a regular random one from LTC (could be alcoholic etc)
- mp = newMergePolicy();
- }
- // wrap it with a sorting mp
- if (VERBOSE) {
- System.out.println("TEST: return SortingMergePolicy(mp=" + mp + " sort=" + sort + ")");
- }
- return new SortingMergePolicy(mp, sort);
- }
-
- private void createRandomIndexes() throws IOException {
- dir1 = newDirectory();
- dir2 = newDirectory();
- final int numDocs = atLeast(150);
- final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
- Set randomTerms = new HashSet<>();
- while (randomTerms.size() < numTerms) {
- randomTerms.add(TestUtil.randomSimpleString(random()));
- }
- terms = new ArrayList<>(randomTerms);
- final long seed = random().nextLong();
- final IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
- final IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
- iwc2.setMergePolicy(mergePolicy());
- final RandomIndexWriter iw1 = new RandomIndexWriter(new Random(seed), dir1, iwc1);
- final RandomIndexWriter iw2 = new RandomIndexWriter(new Random(seed), dir2, iwc2);
- for (int i = 0; i < numDocs; ++i) {
- if (random().nextInt(5) == 0 && i != numDocs - 1) {
- final String term = RandomPicks.randomFrom(random(), terms);
- iw1.deleteDocuments(new Term("s", term));
- iw2.deleteDocuments(new Term("s", term));
- }
- final Document doc = randomDocument();
- iw1.addDocument(doc);
- iw2.addDocument(doc);
- if (random().nextInt(8) == 0) {
- iw1.commit();
- iw2.commit();
- }
- }
- // Make sure we have something to merge
- iw1.commit();
- iw2.commit();
- final Document doc = randomDocument();
- // NOTE: don't use RIW.addDocument directly, since it sometimes commits
- // which may trigger a merge, at which case forceMerge may not do anything.
- // With field updates this is a problem, since the updates can go into the
- // single segment in the index, and threefore the index won't be sorted.
- // This hurts the assumption of the test later on, that the index is sorted
- // by SortingMP.
- iw1.w.addDocument(doc);
- iw2.w.addDocument(doc);
-
- // update NDV of docs belonging to one term (covers many documents)
- final long value = random().nextLong();
- final String term = RandomPicks.randomFrom(random(), terms);
- iw1.w.updateNumericDocValue(new Term("s", term), "ndv", value);
- iw2.w.updateNumericDocValue(new Term("s", term), "ndv", value);
-
- iw1.forceMerge(1);
- iw2.forceMerge(1);
- iw1.close();
- iw2.close();
- reader = DirectoryReader.open(dir1);
- sortedReader = DirectoryReader.open(dir2);
- }
-
- @Override
- public void tearDown() throws Exception {
- reader.close();
- sortedReader.close();
- dir1.close();
- dir2.close();
- super.tearDown();
- }
-
- private static void assertSorted(LeafReader reader, boolean reverse) throws IOException {
- final NumericDocValues ndv = reader.getNumericDocValues("ndv");
- for (int i = 1; i < reader.maxDoc(); ++i) {
- final int lhs = (!reverse ? i-1 : i);
- final int rhs = (!reverse ? i : i-1);
- assertTrue("ndv(" + (i-1) + ")=" + ndv.get(i-1) + ",ndv(" + i + ")=" + ndv.get(i)+",reverse="+reverse, ndv.get(lhs) <= ndv.get(rhs));
- }
- }
-
- public void testSortingMP() throws IOException {
- final LeafReader sortedReader1 = SortingLeafReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
- final LeafReader sortedReader2 = SlowCompositeReaderWrapper.wrap(sortedReader);
-
- assertSorted(sortedReader1, reversedSort);
- assertSorted(sortedReader2, reversedSort);
-
- assertReaderEquals("", sortedReader1, sortedReader2);
- }
-
- public void testBadSort() throws Exception {
- IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
- new SortingMergePolicy(newMergePolicy(), Sort.RELEVANCE);
- });
- assertEquals("Cannot sort an index with a Sort that refers to the relevance score", expected.getMessage());
- }
-
-}
diff --git a/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java b/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java
index 3fbe14a4217..54ad7445c79 100644
--- a/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java
+++ b/lucene/misc/src/test/org/apache/lucene/search/TestDiversifiedTopDocsCollector.java
@@ -32,9 +32,9 @@ import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
@@ -367,8 +367,7 @@ public class TestDiversifiedTopDocsCollector extends LuceneTestCase {
reader = writer.getReader();
writer.close();
searcher = newSearcher(reader);
- LeafReader ar = SlowCompositeReaderWrapper.wrap(reader);
- artistDocValues = ar.getSortedDocValues("artist");
+ artistDocValues = MultiDocValues.getSortedValues(reader, "artist");
// All searches sort by song popularity
final Similarity base = searcher.getSimilarity(true);
diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java
index 66630df2bca..0b19254d985 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java
@@ -247,7 +247,7 @@ public class TestNearest extends LuceneTestCase {
private IndexWriterConfig getIndexWriterConfig() {
IndexWriterConfig iwc = newIndexWriterConfig();
- iwc.setCodec(Codec.forName("Lucene60"));
+ iwc.setCodec(Codec.forName("Lucene62"));
return iwc;
}
}
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java
index afdde71c0a7..4d3ef3b3c1d 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java
@@ -17,7 +17,7 @@
package org.apache.lucene.spatial.prefix.tree;
import java.text.ParseException;
-import java.text.SimpleDateFormat;
+import java.time.ZonedDateTime;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
@@ -58,60 +58,97 @@ public class DateRangePrefixTree extends NumberRangePrefixTree {
*/
private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
- private static Calendar CAL_TMP;//template
+
+ /**
+ * The Java platform default {@link Calendar} with UTC & ROOT Locale. Generally a {@link GregorianCalendar}.
+ * Do not modify this!
+ */
+ public static final Calendar DEFAULT_CAL;//template
static {
- CAL_TMP = Calendar.getInstance(UTC, Locale.ROOT);
- CAL_TMP.clear();
+ DEFAULT_CAL = Calendar.getInstance(UTC, Locale.ROOT);
+ DEFAULT_CAL.clear();
}
- private static final Calendar MINCAL = (Calendar) CAL_TMP.clone();
- private static final Calendar MAXCAL = (Calendar) CAL_TMP.clone();
+ /**
+ * A Calendar instance compatible with {@link java.time.ZonedDateTime} as seen from
+ * {@link GregorianCalendar#from(ZonedDateTime)}.
+ * Do not modify this!
+ */
+ public static final Calendar JAVA_UTIL_TIME_COMPAT_CAL;
static {
- MINCAL.setTimeInMillis(Long.MIN_VALUE);
- MAXCAL.setTimeInMillis(Long.MAX_VALUE);
- }
- //BC years are decreasing, remember. Yet ActualMaximum is the numerically high value, ActualMinimum is 1.
- private static final int BC_FIRSTYEAR = MINCAL.getActualMaximum(Calendar.YEAR);
- private static final int BC_LASTYEAR = MINCAL.getActualMinimum(Calendar.YEAR);//1
- private static final int BC_YEARS = BC_FIRSTYEAR - BC_LASTYEAR + 1;
- private static final int AD_FIRSTYEAR = MAXCAL.getActualMinimum(Calendar.YEAR);//1
- private static final int AD_LASTYEAR = MAXCAL.getActualMaximum(Calendar.YEAR);
- private static final int AD_YEAR_BASE = (((BC_YEARS-1) / 1000_000)+1) * 1000_000;
- static { assert BC_LASTYEAR == 1 && AD_FIRSTYEAR == 1; }
-
- //how many million years are there?
- private static final int NUM_MYEARS = (AD_YEAR_BASE + AD_LASTYEAR) / 1000_000;
-
- private static int calFieldLen(int field) {
- return CAL_TMP.getMaximum(field) - CAL_TMP.getMinimum(field) + 1;
+ // see source of GregorianCalendar.from(ZonedDateTime)
+ GregorianCalendar cal = new GregorianCalendar(UTC, Locale.ROOT);
+ cal.setGregorianChange(new Date(Long.MIN_VALUE));
+ cal.setFirstDayOfWeek(Calendar.MONDAY);// might not matter?
+ cal.setMinimalDaysInFirstWeek(4);// might not matter
+ cal.clear();
+ JAVA_UTIL_TIME_COMPAT_CAL = cal;
}
private static final int[] FIELD_BY_LEVEL = {
-1/*unused*/, -1, -1, Calendar.YEAR, Calendar.MONTH, Calendar.DAY_OF_MONTH,
Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND};
- private static final int yearLevel = 3;
- public static final DateRangePrefixTree INSTANCE = new DateRangePrefixTree();
+ private static final int YEAR_LEVEL = 3;
+
+ //how many million years are there?
+ private static final int NUM_MYEARS = 585;// we assert how this was computed in the constructor
+
+ /** An instanced based on {@link Calendar#getInstance(TimeZone, Locale)} with UTC and Locale.Root. This
+ * will (always?) be a {@link GregorianCalendar} with a so-called "Gregorian Change Date" of 1582.
+ */
+ @Deprecated
+ public static final DateRangePrefixTree INSTANCE = new DateRangePrefixTree(DEFAULT_CAL);
+
+ // Instance fields: (all are final)
+
+ private final Calendar CAL_TMP;//template
+
+ private final Calendar MINCAL;
+ private final Calendar MAXCAL;
+
+ private final int BC_FIRSTYEAR;
+ private final int BC_LASTYEAR;
+ private final int BC_YEARS;
+ private final int AD_FIRSTYEAR;
+ private final int AD_LASTYEAR;
+ private final int AD_YEAR_BASE;
private final UnitNRShape minLV, maxLV;
private final UnitNRShape gregorianChangeDateLV;
- protected DateRangePrefixTree() {
+ /** Constructs with the specified calendar used as a template to be cloned whenever a new
+ * Calendar needs to be created. See {@link #DEFAULT_CAL} and {@link #JAVA_UTIL_TIME_COMPAT_CAL}. */
+ public DateRangePrefixTree(Calendar templateCal) {
super(new int[]{//sublevels by level
NUM_MYEARS,
1000,//1 thousand thousand-years in a million years
1000,//1 thousand years in a thousand-year
- calFieldLen(Calendar.MONTH),
- calFieldLen(Calendar.DAY_OF_MONTH),
- calFieldLen(Calendar.HOUR_OF_DAY),
- calFieldLen(Calendar.MINUTE),
- calFieldLen(Calendar.SECOND),
- calFieldLen(Calendar.MILLISECOND),
+ calFieldLen(templateCal, Calendar.MONTH),
+ calFieldLen(templateCal, Calendar.DAY_OF_MONTH),
+ calFieldLen(templateCal, Calendar.HOUR_OF_DAY),
+ calFieldLen(templateCal, Calendar.MINUTE),
+ calFieldLen(templateCal, Calendar.SECOND),
+ calFieldLen(templateCal, Calendar.MILLISECOND),
});
+ CAL_TMP = (Calendar) templateCal.clone();// defensive copy
+ MINCAL = (Calendar) CAL_TMP.clone();
+ MINCAL.setTimeInMillis(Long.MIN_VALUE);
+ MAXCAL = (Calendar) CAL_TMP.clone();
+ MAXCAL.setTimeInMillis(Long.MAX_VALUE);
+ //BC years are decreasing, remember. Yet ActualMaximum is the numerically high value, ActualMinimum is 1.
+ BC_FIRSTYEAR = MINCAL.getActualMaximum(Calendar.YEAR);
+ BC_LASTYEAR = MINCAL.getActualMinimum(Calendar.YEAR); // 1
+ BC_YEARS = BC_FIRSTYEAR - BC_LASTYEAR + 1;
+ AD_FIRSTYEAR = MAXCAL.getActualMinimum(Calendar.YEAR); // 1
+ AD_LASTYEAR = MAXCAL.getActualMaximum(Calendar.YEAR);
+ AD_YEAR_BASE = (((BC_YEARS-1) / 1000_000)+1) * 1000_000;
+ assert BC_LASTYEAR == 1 && AD_FIRSTYEAR == 1;
+ assert NUM_MYEARS == (AD_YEAR_BASE + AD_LASTYEAR) / 1000_000;
+
maxLV = toShape((Calendar)MAXCAL.clone());
minLV = toShape((Calendar)MINCAL.clone());
if (MAXCAL instanceof GregorianCalendar) {
- //TODO this should be a configurable param by passing a Calendar serving as a template.
GregorianCalendar gCal = (GregorianCalendar)MAXCAL;
gregorianChangeDateLV = toUnitShape(gCal.getGregorianChange());
} else {
@@ -119,6 +156,10 @@ public class DateRangePrefixTree extends NumberRangePrefixTree {
}
}
+ private static int calFieldLen(Calendar cal, int field) {
+ return cal.getMaximum(field) - cal.getMinimum(field) + 1;
+ }
+
@Override
public int getNumSubCells(UnitNRShape lv) {
int cmp = comparePrefix(lv, maxLV);
@@ -140,7 +181,7 @@ public class DateRangePrefixTree extends NumberRangePrefixTree {
}
private int fastSubCells(UnitNRShape lv) {
- if (lv.getLevel() == yearLevel+1) {//month
+ if (lv.getLevel() == YEAR_LEVEL + 1) {//month
switch (lv.getValAtLevel(lv.getLevel())) {
case Calendar.SEPTEMBER:
case Calendar.APRIL:
@@ -175,7 +216,7 @@ public class DateRangePrefixTree extends NumberRangePrefixTree {
}
/** Calendar utility method:
- * Returns a new {@link Calendar} in UTC TimeZone, ROOT Locale, with all fields cleared. */
+ * Returns a clone of the {@link Calendar} passed to the constructor with all fields cleared. */
public Calendar newCal() {
return (Calendar) CAL_TMP.clone();
}
@@ -185,7 +226,7 @@ public class DateRangePrefixTree extends NumberRangePrefixTree {
* {@link java.util.Calendar#YEAR}. If there's no match, the next greatest level is returned as a negative value.
*/
public int getTreeLevelForCalendarField(int calField) {
- for (int i = yearLevel; i < FIELD_BY_LEVEL.length; i++) {
+ for (int i = YEAR_LEVEL; i < FIELD_BY_LEVEL.length; i++) {
if (FIELD_BY_LEVEL[i] == calField) {
return i;
} else if (FIELD_BY_LEVEL[i] > calField) {
@@ -200,7 +241,7 @@ public class DateRangePrefixTree extends NumberRangePrefixTree {
* examines fields relevant to the prefix tree. If no fields are set, it returns -1. */
public int getCalPrecisionField(Calendar cal) {
int lastField = -1;
- for (int level = yearLevel; level < FIELD_BY_LEVEL.length; level++) {
+ for (int level = YEAR_LEVEL; level < FIELD_BY_LEVEL.length; level++) {
int field = FIELD_BY_LEVEL[level];
if (!cal.isSet(field))
break;
@@ -212,20 +253,18 @@ public class DateRangePrefixTree extends NumberRangePrefixTree {
/** Calendar utility method:
* Calls {@link Calendar#clear(int)} for every field after {@code field}. Beware of Calendar underflow. */
public void clearFieldsAfter(Calendar cal, int field) {
- if (field == -1) {
- cal.clear();
- return;
- }
int assertEra = -1;
assert (assertEra = (((Calendar)cal.clone()).get(Calendar.ERA))) >= 0;//a trick to only get this if assert enabled
- for (int f = field+1; f < Calendar.FIELD_COUNT; f++) {
+ //note: Calendar.ERA == 0;
+ for (int f = field + 1; f <= Calendar.MILLISECOND; f++) {
cal.clear(f);
}
- assert ((Calendar)cal.clone()).get(Calendar.ERA) == assertEra : "Calendar underflow";
+ assert field + 1 == Calendar.ERA || ((Calendar)cal.clone()).get(Calendar.ERA) == assertEra : "Calendar underflow";
}
/** Converts {@code value} from a {@link Calendar} or {@link Date} to a {@link Shape}. Other arguments
* result in a {@link java.lang.IllegalArgumentException}.
+ * If a Calendar is passed in, there might be problems if it is not created via {@link #newCal()}.
*/
@Override
public UnitNRShape toUnitShape(Object value) {
@@ -240,7 +279,9 @@ public class DateRangePrefixTree extends NumberRangePrefixTree {
}
/** Converts the Calendar into a Shape.
- * The isSet() state of the Calendar is re-instated when done. */
+ * The isSet() state of the Calendar is re-instated when done.
+ * If a Calendar is passed in, there might be problems if it is not created via {@link #newCal()}.
+ */
public UnitNRShape toShape(Calendar cal) {
// Convert a Calendar into a stack of cell numbers
final int calPrecField = getCalPrecisionField(cal);//must call first; getters set all fields
@@ -256,7 +297,7 @@ public class DateRangePrefixTree extends NumberRangePrefixTree {
valStack[len++] = yearAdj / 1000;
yearAdj -= valStack[len-1] * 1000;
valStack[len++] = yearAdj;
- for (int level = yearLevel+1; level < FIELD_BY_LEVEL.length; level++) {
+ for (int level = YEAR_LEVEL +1; level < FIELD_BY_LEVEL.length; level++) {
int field = FIELD_BY_LEVEL[level];
if (field > calPrecField)
break;
@@ -301,7 +342,7 @@ public class DateRangePrefixTree extends NumberRangePrefixTree {
cal.set(Calendar.ERA, 0);//we assert this "sticks" at the end
cal.set(Calendar.YEAR, (AD_YEAR_BASE - yearAdj) + 1);
}
- for (int level = yearLevel+1; level <= lv.getLevel(); level++) {
+ for (int level = YEAR_LEVEL + 1; level <= lv.getLevel(); level++) {
int field = FIELD_BY_LEVEL[level];
cal.set(field, lv.getValAtLevel(level) + cal.getActualMinimum(field));
}
@@ -314,59 +355,77 @@ public class DateRangePrefixTree extends NumberRangePrefixTree {
return toString(toCalendar(lv));
}
- /** Calendar utility method:
- * Formats the calendar to ISO-8601 format, to include proper BC handling (1BC is "0000", 2BC is "-0001", etc.);
- * and WITHOUT a trailing 'Z'.
+ /** Calendar utility method consistent with {@link java.time.format.DateTimeFormatter#ISO_INSTANT} except
+ * has no trailing 'Z', and will be truncated to the units given according to
+ * {@link Calendar#isSet(int)}.
* A fully cleared calendar will yield the string "*".
* The isSet() state of the Calendar is re-instated when done. */
- @SuppressWarnings("fallthrough")
public String toString(Calendar cal) {
final int calPrecField = getCalPrecisionField(cal);//must call first; getters set all fields
if (calPrecField == -1)
return "*";
try {
- //TODO not fully optimized; but it's at least not used in 'search'.
- //TODO maybe borrow code from Solr DateUtil (put in Lucene util somewhere), and have it reference this back?
- String pattern = "yyyy-MM-dd'T'HH:mm:ss.SSS";
- int ptnLen = 0;
- switch (calPrecField) {//switch fall-through is deliberate
- case Calendar.MILLISECOND: ptnLen += 4;
- case Calendar.SECOND: ptnLen += 3;
- case Calendar.MINUTE: ptnLen += 3;
- case Calendar.HOUR_OF_DAY: ptnLen += 5;
- case Calendar.DAY_OF_MONTH: ptnLen += 3;
- case Calendar.MONTH: ptnLen += 3;
- case Calendar.YEAR: ptnLen += 4;
- break;
- default: throw new IllegalStateException(""+calPrecField);
- }
- pattern = pattern.substring(0, ptnLen);
- SimpleDateFormat format = new SimpleDateFormat(pattern, Locale.ROOT);
- format.setTimeZone(cal.getTimeZone());
- if (cal.get(Calendar.ERA) == 0) {//BC
- //SDF doesn't do this properly according to ISO-8601
- // Example: 1BC == "0000" (actually 0 AD), 2BC == "-0001", 3BC == "-0002", ...
- final int yearOrig = cal.get(Calendar.YEAR);
- cal.set(Calendar.YEAR, yearOrig-1);
- String str;
- try {
- str = format.format(cal.getTime());
- } finally {
- //reset to what it was
- cal.set(Calendar.ERA, 0);//necessary!
- cal.set(Calendar.YEAR, yearOrig);
+ StringBuilder builder = new StringBuilder("yyyy-MM-dd'T'HH:mm:ss.SSS".length());//typical
+ int year = cal.get(Calendar.YEAR); // within the era (thus always positve). >= 1.
+ if (cal.get(Calendar.ERA) == 0) { // BC
+ year -= 1; // 1BC should be "0000", so shift by one
+ if (year > 0) {
+ builder.append('-');
}
- if (yearOrig > 1)
- return "-" + str;
- else
- return "0000" + str.substring(4);
+ } else if (year > 9999) {
+ builder.append('+');
}
- return format.format(cal.getTime());
+ appendPadded(builder, year, (short) 4);
+ if (calPrecField >= Calendar.MONTH) {
+ builder.append('-');
+ appendPadded(builder, cal.get(Calendar.MONTH) + 1, (short) 2); // +1 since first is 0
+ }
+ if (calPrecField >= Calendar.DAY_OF_MONTH) {
+ builder.append('-');
+ appendPadded(builder, cal.get(Calendar.DAY_OF_MONTH), (short) 2);
+ }
+ if (calPrecField >= Calendar.HOUR_OF_DAY) {
+ builder.append('T');
+ appendPadded(builder, cal.get(Calendar.HOUR_OF_DAY), (short) 2);
+ }
+ if (calPrecField >= Calendar.MINUTE) {
+ builder.append(':');
+ appendPadded(builder, cal.get(Calendar.MINUTE), (short) 2);
+ }
+ if (calPrecField >= Calendar.SECOND) {
+ builder.append(':');
+ appendPadded(builder, cal.get(Calendar.SECOND), (short) 2);
+ }
+ if (calPrecField >= Calendar.MILLISECOND && cal.get(Calendar.MILLISECOND) > 0) { // only if non-zero
+ builder.append('.');
+ appendPadded(builder, cal.get(Calendar.MILLISECOND), (short) 3);
+ }
+
+ return builder.toString();
} finally {
clearFieldsAfter(cal, calPrecField);//restore precision state modified by get()
}
}
+ private void appendPadded(StringBuilder builder, int integer, short positions) {
+ assert integer >= 0 && positions >= 1 && positions <= 4;
+ int preBuilderLen = builder.length();
+ int intStrLen;
+ if (integer > 999) {
+ intStrLen = 4;
+ } else if (integer > 99) {
+ intStrLen = 3;
+ } else if (integer > 9) {
+ intStrLen = 2;
+ } else {
+ intStrLen = 1;
+ }
+ for (int i = 0; i < positions - intStrLen; i++) {
+ builder.append('0');
+ }
+ builder.append(integer);
+ }
+
@Override
protected UnitNRShape parseUnitShape(String str) throws ParseException {
return toShape(parseCalendar(str));
diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java
index 33c8a330af9..9b93aac04e0 100644
--- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java
+++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java
@@ -20,12 +20,12 @@ import java.io.IOException;
import java.util.Calendar;
import com.carrotsearch.randomizedtesting.annotations.Repeat;
-import org.locationtech.spatial4j.shape.Shape;
import org.apache.lucene.spatial.prefix.tree.DateRangePrefixTree;
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.junit.Before;
import org.junit.Test;
+import org.locationtech.spatial4j.shape.Shape;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
@@ -54,7 +54,7 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
};
}
Calendar tmpCal = tree.newCal();
- int randomCalWindowField = randomIntBetween(1, Calendar.ZONE_OFFSET - 1);//we're not allowed to add zone offset
+ int randomCalWindowField = randomIntBetween(Calendar.YEAR, Calendar.MILLISECOND);
tmpCal.add(randomCalWindowField, 2_000);
randomCalWindowMs = Math.max(2000L, tmpCal.getTimeInMillis());
}
diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java
index 12e9744064b..e8c63518ca3 100644
--- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java
+++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java
@@ -17,19 +17,32 @@
package org.apache.lucene.spatial.prefix.tree;
import java.text.ParseException;
+import java.time.Instant;
import java.util.Arrays;
import java.util.Calendar;
import java.util.GregorianCalendar;
-import org.locationtech.spatial4j.shape.Shape;
-import org.locationtech.spatial4j.shape.SpatialRelation;
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
+import org.locationtech.spatial4j.shape.Shape;
+import org.locationtech.spatial4j.shape.SpatialRelation;
public class DateRangePrefixTreeTest extends LuceneTestCase {
- private DateRangePrefixTree tree = DateRangePrefixTree.INSTANCE;
+ @ParametersFactory
+ public static Iterable parameters() {
+ return Arrays.asList(new Object[][]{
+ {DateRangePrefixTree.DEFAULT_CAL}, {DateRangePrefixTree.JAVA_UTIL_TIME_COMPAT_CAL}
+ });
+ }
+
+ private final DateRangePrefixTree tree;
+
+ public DateRangePrefixTreeTest(Calendar templateCal) {
+ tree = new DateRangePrefixTree(templateCal);
+ }
public void testRoundTrip() throws Exception {
Calendar cal = tree.newCal();
@@ -77,6 +90,10 @@ public class DateRangePrefixTreeTest extends LuceneTestCase {
//test random
cal.setTimeInMillis(random().nextLong());
roundTrip(cal);
+ //assert same toString as java.time, provided it's after the GCD
+ if (cal.getTimeInMillis() > ((GregorianCalendar)tree.newCal()).getGregorianChange().getTime()) {
+ assertEquals(Instant.ofEpochMilli(cal.getTimeInMillis()).toString(), tree.toString(cal) + 'Z');
+ }
}
//copies from DateRangePrefixTree
@@ -88,8 +105,14 @@ public class DateRangePrefixTreeTest extends LuceneTestCase {
Calendar cal = (Calendar) calOrig.clone();
String lastString = null;
while (true) {
- String calString = tree.toString(cal);
- assert lastString == null || calString.length() < lastString.length();
+ String calString;
+ {
+ Calendar preToStringCalClone = (Calendar) cal.clone();
+ calString = tree.toString(cal);
+ assert lastString == null || calString.length() < lastString.length();
+ assertEquals(preToStringCalClone, cal);//ensure toString doesn't modify cal state
+ }
+
//test parseCalendar
assertEquals(cal, tree.parseCalendar(calString));
diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java
index 0f8f2026fe4..c2cb93b13f0 100644
--- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java
+++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/TestGeo3DPoint.java
@@ -85,14 +85,14 @@ import com.carrotsearch.randomizedtesting.generators.RandomInts;
public class TestGeo3DPoint extends LuceneTestCase {
private static Codec getCodec() {
- if (Codec.getDefault().getName().equals("Lucene60")) {
+ if (Codec.getDefault().getName().equals("Lucene62")) {
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
double maxMBSortInHeap = 3.0 + (3*random().nextDouble());
if (VERBOSE) {
System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
}
- return new FilterCodec("Lucene60", Codec.getDefault()) {
+ return new FilterCodec("Lucene62", Codec.getDefault()) {
@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
index 2b14d6e8016..16e9406310f 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
@@ -56,7 +56,6 @@ import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SortedSetDocValues;
-import org.apache.lucene.index.SortingMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanClause;
@@ -232,7 +231,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
// This way all merged segments will be sorted at
// merge time, allow for per-segment early termination
// when those segments are searched:
- iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), SORT));
+ iwc.setIndexSort(SORT);
return iwc;
}
@@ -586,10 +585,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
// We sorted postings by weight during indexing, so we
// only retrieve the first num hits now:
- final SortingMergePolicy sortingMergePolicy = (SortingMergePolicy) writer.getConfig().getMergePolicy();
- Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num, sortingMergePolicy.getSort());
- IndexSearcher searcher = searcherMgr.acquire();
+ Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
List results = null;
+ IndexSearcher searcher = searcherMgr.acquire();
try {
//System.out.println("got searcher=" + searcher);
searcher.search(finalQuery, c2);
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
index 97e0ef1c917..63454635a2a 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
@@ -66,7 +66,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a penny saved is a penny ear ned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
- assertEquals(new BytesRef("foobaz"), results.get(0).payload);
+ assertEquals("foobaz", results.get(0).payload.utf8ToString());
assertEquals("lend me your ear", results.get(1).key);
assertEquals("lend me your ear ", results.get(1).highlightKey);
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
index 62ed08b8d0e..6b1c2d1b21d 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
@@ -32,7 +32,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene60.Lucene60Codec;
+import org.apache.lucene.codecs.lucene62.Lucene62Codec;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -646,7 +646,7 @@ public class TestSuggestField extends LuceneTestCase {
static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set suggestFields) {
IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
iwc.setMergePolicy(newLogMergePolicy());
- Codec filterCodec = new Lucene60Codec() {
+ Codec filterCodec = new Lucene62Codec() {
PostingsFormat postingsFormat = new Completion50PostingsFormat();
@Override
diff --git a/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java
index bda4cdebaad..275c1864857 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java
@@ -1242,7 +1242,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
int pointsInLeaf = 2 + random().nextInt(4);
- iwc.setCodec(new FilterCodec("Lucene60", TestUtil.getDefaultCodec()) {
+ iwc.setCodec(new FilterCodec("Lucene62", TestUtil.getDefaultCodec()) {
@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java
index f09be9d05a6..7c19596aa81 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java
@@ -627,7 +627,7 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
/** Returns a new fake segment */
protected static SegmentInfo newSegmentInfo(Directory dir, String name) {
- return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
}
/** Creates a file of the specified size with random data. */
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java
index d8e2296e336..528e92afc17 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java
@@ -347,7 +347,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
/** Returns a new fake segment */
protected static SegmentInfo newSegmentInfo(Directory dir, String name) {
- return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
}
@Override
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
index 2c6f379f4b6..d7dc44bbeed 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
@@ -303,7 +303,7 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
Directory dir = newFSDirectory(createTempDir("justSoYouGetSomeChannelErrors"));
Codec codec = getCodec();
- SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", 1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", 1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
FieldInfo proto = oneDocReader.getFieldInfos().fieldInfo("field");
FieldInfo field = new FieldInfo(proto.name, proto.number, proto.hasVectors(), proto.omitsNorms(), proto.hasPayloads(),
proto.getIndexOptions(), proto.getDocValuesType(), proto.getDocValuesGen(), new HashMap<>(),
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java
index 1136afa64fb..49d19ae4322 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java
@@ -26,7 +26,8 @@ import java.util.Set;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
-import org.apache.lucene.document.TextField;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.MockDirectoryWrapper;
@@ -52,7 +53,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
Codec codec = getCodec();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.emptyMap(), id, new HashMap<>());
+ Collections.emptyMap(), id, new HashMap<>(), null);
info.setFiles(Collections.emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
@@ -66,7 +67,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
Codec codec = getCodec();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.emptyMap(), id, new HashMap<>());
+ Collections.emptyMap(), id, new HashMap<>(), null);
Set originalFiles = Collections.singleton("_123.a");
info.setFiles(originalFiles);
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
@@ -95,7 +96,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
diagnostics.put("key1", "value1");
diagnostics.put("key2", "value2");
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- diagnostics, id, new HashMap<>());
+ diagnostics, id, new HashMap<>(), null);
info.setFiles(Collections.emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
@@ -118,7 +119,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
attributes.put("key1", "value1");
attributes.put("key2", "value2");
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.emptyMap(), id, attributes);
+ Collections.emptyMap(), id, attributes, null);
info.setFiles(Collections.emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
@@ -138,7 +139,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
Directory dir = newDirectory();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.emptyMap(), id, new HashMap<>());
+ Collections.emptyMap(), id, new HashMap<>(), null);
info.setFiles(Collections.emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
@@ -153,7 +154,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
Directory dir = newDirectory();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, v, "_123", 1, false, codec,
- Collections.emptyMap(), id, new HashMap<>());
+ Collections.emptyMap(), id, new HashMap<>(), null);
info.setFiles(Collections.emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
@@ -161,7 +162,57 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
dir.close();
}
}
-
+
+ protected boolean supportsIndexSort() {
+ return true;
+ }
+
+ /** Test sort */
+ public void testSort() throws IOException {
+ assumeTrue("test requires a codec that can read/write index sort", supportsIndexSort());
+
+ final int iters = atLeast(5);
+ for (int i = 0; i < iters; ++i) {
+ Sort sort;
+ if (i == 0) {
+ sort = null;
+ } else {
+ final int numSortFields = TestUtil.nextInt(random(), 1, 3);
+ SortField[] sortFields = new SortField[numSortFields];
+ for (int j = 0; j < numSortFields; ++j) {
+ sortFields[j] = new SortField(
+ TestUtil.randomSimpleString(random()),
+ random().nextBoolean() ? SortField.Type.LONG : SortField.Type.STRING,
+ random().nextBoolean());
+ if (random().nextBoolean()) {
+ switch (sortFields[j].getType()) {
+ case LONG:
+ sortFields[j].setMissingValue(random().nextLong());
+ break;
+ case STRING:
+ sortFields[j].setMissingValue(random().nextBoolean() ? SortField.STRING_FIRST : SortField.STRING_LAST);
+ break;
+ default:
+ fail();
+ }
+ }
+ }
+ sort = new Sort(sortFields);
+ }
+
+ Directory dir = newDirectory();
+ Codec codec = getCodec();
+ byte id[] = StringHelper.randomId();
+ SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
+ Collections.emptyMap(), id, new HashMap<>(), sort);
+ info.setFiles(Collections.emptySet());
+ codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
+ SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
+ assertEquals(sort, info2.getIndexSort());
+ dir.close();
+ }
+ }
+
/**
* Test segment infos write that hits exception immediately on open.
* make sure we get our exception back, no file handle leaks, etc.
@@ -183,7 +234,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
Codec codec = getCodec();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.emptyMap(), id, new HashMap<>());
+ Collections.emptyMap(), id, new HashMap<>(), null);
info.setFiles(Collections.emptySet());
fail.setDoFail();
@@ -216,7 +267,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
Codec codec = getCodec();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.emptyMap(), id, new HashMap<>());
+ Collections.emptyMap(), id, new HashMap<>(), null);
info.setFiles(Collections.emptySet());
fail.setDoFail();
@@ -249,7 +300,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
Codec codec = getCodec();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.emptyMap(), id, new HashMap<>());
+ Collections.emptyMap(), id, new HashMap<>(), null);
info.setFiles(Collections.emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
@@ -283,7 +334,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
Codec codec = getCodec();
byte id[] = StringHelper.randomId();
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], "_123", 1, false, codec,
- Collections.emptyMap(), id, new HashMap<>());
+ Collections.emptyMap(), id, new HashMap<>(), null);
info.setFiles(Collections.emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
@@ -332,7 +383,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
TestUtil.randomUnicodeString(random()));
}
- SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, codec, diagnostics, id, attributes);
+ SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, codec, diagnostics, id, attributes, null);
info.setFiles(files);
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, name, id, IOContext.DEFAULT);
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java b/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java
index b40ac2685d5..f32e4d3c118 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java
@@ -138,7 +138,6 @@ public class MockRandomMergePolicy extends MergePolicy {
static class MockRandomOneMerge extends OneMerge {
final Random r;
- ArrayList readers;
MockRandomOneMerge(List segments, long seed) {
super(segments);
@@ -146,34 +145,31 @@ public class MockRandomMergePolicy extends MergePolicy {
}
@Override
- public List getMergeReaders() throws IOException {
- if (readers == null) {
- readers = new ArrayList(super.getMergeReaders());
- for (int i = 0; i < readers.size(); i++) {
- // wrap it (e.g. prevent bulk merge etc)
- // TODO: cut this over to FilterCodecReader api, we can explicitly
- // enable/disable bulk merge for portions of the index we want.
- int thingToDo = r.nextInt(7);
- if (thingToDo == 0) {
- // simple no-op FilterReader
- if (LuceneTestCase.VERBOSE) {
- System.out.println("NOTE: MockRandomMergePolicy now swaps in a SlowCodecReaderWrapper for merging reader=" + readers.get(i));
- }
- readers.set(i, SlowCodecReaderWrapper.wrap(new FilterLeafReader(readers.get(i)) {}));
- } else if (thingToDo == 1) {
- // renumber fields
- // NOTE: currently this only "blocks" bulk merges just by
- // being a FilterReader. But it might find bugs elsewhere,
- // and maybe the situation can be improved in the future.
- if (LuceneTestCase.VERBOSE) {
- System.out.println("NOTE: MockRandomMergePolicy now swaps in a MismatchedLeafReader for merging reader=" + readers.get(i));
- }
- readers.set(i, SlowCodecReaderWrapper.wrap(new MismatchedLeafReader(readers.get(i), r)));
- }
- // otherwise, reader is unchanged
+ public CodecReader wrapForMerge(CodecReader reader) throws IOException {
+
+ // wrap it (e.g. prevent bulk merge etc)
+ // TODO: cut this over to FilterCodecReader api, we can explicitly
+ // enable/disable bulk merge for portions of the index we want.
+ int thingToDo = r.nextInt(7);
+ if (thingToDo == 0) {
+ // simple no-op FilterReader
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("NOTE: MockRandomMergePolicy now swaps in a SlowCodecReaderWrapper for merging reader=" + reader);
}
+ return SlowCodecReaderWrapper.wrap(new FilterLeafReader(reader) {});
+ } else if (thingToDo == 1) {
+ // renumber fields
+ // NOTE: currently this only "blocks" bulk merges just by
+ // being a FilterReader. But it might find bugs elsewhere,
+ // and maybe the situation can be improved in the future.
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("NOTE: MockRandomMergePolicy now swaps in a MismatchedLeafReader for merging reader=" + reader);
+ }
+ return SlowCodecReaderWrapper.wrap(new MismatchedLeafReader(reader, r));
+ } else {
+ // otherwise, reader is unchanged
+ return reader;
}
- return readers;
}
}
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
index 90064c4d7ff..d4159279311 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
@@ -611,7 +611,7 @@ public class RandomPostingsTester {
// maxAllowed = the "highest" we can index, but we will still
// randomly index at lower IndexOption
public FieldsProducer buildIndex(Codec codec, Directory dir, IndexOptions maxAllowed, boolean allowPayloads, boolean alwaysTestMax) throws IOException {
- SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", maxDoc, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+ SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", maxDoc, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
int maxIndexOption = Arrays.asList(IndexOptions.values()).indexOf(maxAllowed);
if (LuceneTestCase.VERBOSE) {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
index b517af00cf1..74a46d4f5e7 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
@@ -283,6 +283,11 @@ public class QueryUtils {
@Override
protected void doClose() throws IOException {}
+
+ @Override
+ public Sort getIndexSort() {
+ return null;
+ }
};
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
index 52aca7e8f52..98cd2a790ea 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
@@ -2008,9 +2008,9 @@ public abstract class LuceneTestCase extends Assert {
return;
}
assertTermsStatisticsEquals(info, leftTerms, rightTerms);
- assertEquals(leftTerms.hasOffsets(), rightTerms.hasOffsets());
- assertEquals(leftTerms.hasPositions(), rightTerms.hasPositions());
- assertEquals(leftTerms.hasPayloads(), rightTerms.hasPayloads());
+ assertEquals("hasOffsets", leftTerms.hasOffsets(), rightTerms.hasOffsets());
+ assertEquals("hasPositions", leftTerms.hasPositions(), rightTerms.hasPositions());
+ assertEquals("hasPayloads", leftTerms.hasPayloads(), rightTerms.hasPayloads());
TermsEnum leftTermsEnum = leftTerms.iterator();
TermsEnum rightTermsEnum = rightTerms.iterator();
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
index 7ac40375f15..5c88dc7ec92 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
@@ -32,7 +32,7 @@ import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
import org.apache.lucene.codecs.cheapbastard.CheapBastardCodec;
import org.apache.lucene.codecs.compressing.CompressingCodec;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
-import org.apache.lucene.codecs.lucene60.Lucene60Codec;
+import org.apache.lucene.codecs.lucene62.Lucene62Codec;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
import org.apache.lucene.index.RandomCodec;
@@ -181,8 +181,8 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
codec = new AssertingCodec();
} else if ("Compressing".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Compressing"))) {
codec = CompressingCodec.randomInstance(random);
- } else if ("Lucene60".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene60"))) {
- codec = new Lucene60Codec(RandomPicks.randomFrom(random, Lucene50StoredFieldsFormat.Mode.values()));
+ } else if ("Lucene62".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene62"))) {
+ codec = new Lucene62Codec(RandomPicks.randomFrom(random, Lucene50StoredFieldsFormat.Mode.values()));
} else if (!"random".equals(TEST_CODEC)) {
codec = Codec.forName(TEST_CODEC);
} else if ("random".equals(TEST_POSTINGSFORMAT)) {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
index d772ae321d3..b63216085b3 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
@@ -54,7 +54,7 @@ import org.apache.lucene.codecs.blockterms.LuceneFixedGap;
import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat;
-import org.apache.lucene.codecs.lucene60.Lucene60Codec;
+import org.apache.lucene.codecs.lucene62.Lucene62Codec;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.document.BinaryDocValuesField;
@@ -911,7 +911,7 @@ public final class TestUtil {
* This may be different than {@link Codec#getDefault()} because that is randomized.
*/
public static Codec getDefaultCodec() {
- return new Lucene60Codec();
+ return new Lucene62Codec();
}
/**
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 5f52cf89b29..22a8211fe4e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -47,6 +47,11 @@ Optimizations
================== 6.1.0 ==================
+Upgrading from Solr any prior release
+----------------------
+
+* If you use historical dates, specifically on or before the year 1582, you should re-index.
+
Detailed Change List
----------------------
@@ -135,6 +140,8 @@ New Features
* SOLR-8208: [subquery] document transformer executes separate requests per result document. (Cao Manh Dat via Mikhail Khludnev)
+* SOLR-8323: All CollectionStateWatcher API (Alan Woodward, Scott Blum)
+
Bug Fixes
----------------------
@@ -206,6 +213,10 @@ Bug Fixes
* SOLR-8970: Change SSLTestConfig to use a keystore file that is included as a resource in the
test-framework jar so users subclassing SolrTestCaseJ4 don't need to preserve magic paths (hossman)
+* SOLR-9080, SOLR-9085: (6.0 bug) For years <= 1582, date math (round,add,sub) introduced error. Range faceting
+ on such dates was also affected. With this fixed, this is the first release range faceting works on BC years.
+ (David Smiley)
+
Optimizations
----------------------
* SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation.
@@ -228,6 +239,8 @@ Optimizations
* SOLR-9014: Deprecate and reduce usage of ClusterState methods which may make calls to ZK via
the lazy collection reference. (Scott Blum, shalin)
+* SOLR-9106: Cluster properties are now cached on ZkStateReader. (Alan Woodward)
+
Other Changes
----------------------
* SOLR-7516: Improve javadocs for JavaBinCodec, ObjectResolver and enforce the single-usage policy.
@@ -286,6 +299,11 @@ Other Changes
* SOLR-9105: Fix a bunch of typos across 103 files (Bartosz Krasiński via janhoy)
+* SOLR-9072: Migrate morphline-core tests to SolrCloudTestCase. (Alan Woodward)
+
+================== 6.0.1 ==================
+(No Changes)
+
================== 6.0.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java
index 7892c057292..e69b3fd9063 100644
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java
+++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java
@@ -54,7 +54,7 @@ public class DataImportHandlerException extends RuntimeException {
return errCode;
}
- public static void wrapAndThrow(int err, Exception e) {
+ public static DataImportHandlerException wrapAndThrow(int err, Exception e) {
if (e instanceof DataImportHandlerException) {
throw (DataImportHandlerException) e;
} else {
@@ -62,7 +62,7 @@ public class DataImportHandlerException extends RuntimeException {
}
}
- public static void wrapAndThrow(int err, Exception e, String msg) {
+ public static DataImportHandlerException wrapAndThrow(int err, Exception e, String msg) {
if (e instanceof DataImportHandlerException) {
throw (DataImportHandlerException) e;
} else {
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java
index 30e16cef323..f4df82080aa 100644
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java
+++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java
@@ -16,9 +16,6 @@
*/
package org.apache.solr.handler.dataimport;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
@@ -35,6 +32,9 @@ import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.handler.dataimport.config.EntityField;
import org.apache.solr.util.DateMathParser;
+import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
+import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
+
/**
* Formats values using a given date format.
* Pass three parameters:
@@ -99,7 +99,7 @@ public class DateFormatEvaluator extends Evaluator {
throw new DataImportHandlerException(SEVERE, "Malformed / non-existent locale: " + localeStr, ex);
}
}
- TimeZone tz = TimeZone.getDefault();
+ TimeZone tz = TimeZone.getDefault(); // DWS TODO: is this the right default for us? Deserves explanation if so.
if(l.size()==4) {
Object tzObj = l.get(3);
String tzStr = null;
@@ -153,24 +153,19 @@ public class DateFormatEvaluator extends Evaluator {
* @return the result of evaluating a string
*/
protected Date evaluateString(String datemathfmt, Locale locale, TimeZone tz) {
- Date date = null;
- datemathfmt = datemathfmt.replaceAll("NOW", "");
- try {
- DateMathParser parser = getDateMathParser(locale, tz);
- date = parseMathString(parser,datemathfmt);
- } catch (ParseException e) {
- wrapAndThrow(SEVERE, e, "Invalid expression for date");
+ // note: DMP does not use the locale but perhaps a subclass might use it, for e.g. parsing a date in a custom
+ // string that doesn't necessarily have date math?
+ //TODO refactor DateMathParser.parseMath a bit to have a static method for this logic.
+ if (datemathfmt.startsWith("NOW")) {
+ datemathfmt = datemathfmt.substring("NOW".length());
+ }
+ try {
+ DateMathParser parser = new DateMathParser(tz);
+ parser.setNow(new Date());// thus do *not* use SolrRequestInfo
+ return parser.parseMath(datemathfmt);
+ } catch (ParseException e) {
+ throw wrapAndThrow(SEVERE, e, "Invalid expression for date");
}
- return date;
- }
-
- /**
- * NOTE: declared as a method to allow for extensibility
- * @lucene.experimental
- * @return the result of resolving the variable wrapper
- */
- protected Date parseMathString(DateMathParser parser, String datemathfmt) throws ParseException {
- return parser.parseMath(datemathfmt);
}
/**
@@ -182,16 +177,4 @@ public class DateFormatEvaluator extends Evaluator {
return variableWrapper.resolve();
}
- /**
- * @lucene.experimental
- * @return a DateMathParser
- */
- protected DateMathParser getDateMathParser(Locale l, TimeZone tz) {
- return new DateMathParser(tz, l) {
- @Override
- public Date getNow() {
- return new Date();
- }
- };
- }
}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java
index e86d09df5e1..a03354f2d2f 100644
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java
+++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java
@@ -20,7 +20,13 @@ import java.io.File;
import java.io.FilenameFilter;
import java.text.ParseException;
import java.text.SimpleDateFormat;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -153,10 +159,14 @@ public class FileListEntityProcessor extends EntityProcessorBase {
}
m = Evaluator.IN_SINGLE_QUOTES.matcher(dateStr);
if (m.find()) {
- String expr = null;
- expr = m.group(1).replaceAll("NOW", "");
+ String expr = m.group(1);
+ //TODO refactor DateMathParser.parseMath a bit to have a static method for this logic.
+ if (expr.startsWith("NOW")) {
+ expr = expr.substring("NOW".length());
+ }
try {
- return new DateMathParser(TimeZone.getDefault(), Locale.ROOT).parseMath(expr);
+ // DWS TODO: is this TimeZone the right default for us? Deserves explanation if so.
+ return new DateMathParser(TimeZone.getDefault()).parseMath(expr);
} catch (ParseException exp) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Invalid expression for date", exp);
diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestVariableResolver.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestVariableResolver.java
index e7ff2e698c0..00285649fe6 100644
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestVariableResolver.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestVariableResolver.java
@@ -16,12 +16,19 @@
*/
package org.apache.solr.handler.dataimport;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.apache.solr.util.DateMathParser;
-
import java.text.SimpleDateFormat;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Properties;
+import java.util.TimeZone;
+
+import org.apache.solr.util.DateMathParser;
+import org.junit.Test;
/**
*
@@ -103,7 +110,7 @@ public class TestVariableResolver extends AbstractDataImportHandlerTestCase {
.> emptyList()));
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT);
format.setTimeZone(TimeZone.getTimeZone("UTC"));
- DateMathParser dmp = new DateMathParser(TimeZone.getDefault(), Locale.ROOT);
+ DateMathParser dmp = new DateMathParser(TimeZone.getDefault());
String s = vri
.replaceTokens("${dataimporter.functions.formatDate('NOW/DAY','yyyy-MM-dd HH:mm')}");
@@ -144,7 +151,7 @@ public class TestVariableResolver extends AbstractDataImportHandlerTestCase {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT);
format.setTimeZone(TimeZone.getTimeZone("UTC"));
- DateMathParser dmp = new DateMathParser(TimeZone.getDefault(), Locale.ROOT);
+ DateMathParser dmp = new DateMathParser(TimeZone.getDefault());
String s = resolver
.replaceTokens("${dataimporter.functions.formatDate('NOW/DAY','yyyy-MM-dd HH:mm')}");
diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java
index 4d95a4f9198..535fe9db3af 100644
--- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java
+++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java
@@ -24,14 +24,13 @@ import java.util.Locale;
import com.codahale.metrics.MetricRegistry;
import com.google.common.collect.ListMultimap;
import com.typesafe.config.Config;
-import org.apache.commons.io.FileUtils;
import org.apache.lucene.util.Constants;
import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.cloud.AbstractFullDistribZkTestBase;
-import org.apache.solr.cloud.AbstractZkTestCase;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.cloud.AbstractDistribZkTestBase;
+import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.SolrDocument;
-import org.apache.solr.common.cloud.SolrZkClient;
-import org.junit.AfterClass;
+import org.junit.Before;
import org.junit.BeforeClass;
import org.kitesdk.morphline.api.Collector;
import org.kitesdk.morphline.api.Command;
@@ -42,72 +41,58 @@ import org.kitesdk.morphline.base.FaultTolerance;
import org.kitesdk.morphline.base.Notifications;
import org.kitesdk.morphline.stdlib.PipeBuilder;
-public abstract class AbstractSolrMorphlineZkTestBase extends AbstractFullDistribZkTestBase {
- private static File solrHomeDirectory;
-
- protected static final String RESOURCES_DIR = getFile("morphlines-core.marker").getParent();
- private static final File SOLR_INSTANCE_DIR = new File(RESOURCES_DIR + "/solr");
- private static final File SOLR_CONF_DIR = new File(RESOURCES_DIR + "/solr/collection1");
+public abstract class AbstractSolrMorphlineZkTestBase extends SolrCloudTestCase {
+
+ protected static final String COLLECTION = "collection1";
+
+ protected static final int TIMEOUT = 30;
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ configureCluster(2)
+ .addConfig("conf", SOLR_CONF_DIR.toPath())
+ .configure();
+
+ CollectionAdminRequest.createCollection(COLLECTION, "conf", 2, 1)
+ .processAndWait(cluster.getSolrClient(), TIMEOUT);
+ AbstractDistribZkTestBase.waitForRecoveriesToFinish(COLLECTION, cluster.getSolrClient().getZkStateReader(),
+ false, true, TIMEOUT);
+ }
+
+ protected static final String RESOURCES_DIR = getFile("morphlines-core.marker").getParent();
+ private static final File SOLR_CONF_DIR = new File(RESOURCES_DIR + "/solr/collection1/conf");
protected Collector collector;
protected Command morphline;
-
- @Override
- public String getSolrHome() {
- return solrHomeDirectory.getPath();
- }
-
- public AbstractSolrMorphlineZkTestBase() {
- sliceCount = 3;
- fixShardCount(3);
- }
@BeforeClass
public static void setupClass() throws Exception {
- assumeFalse("This test fails on Java 9 (https://issues.apache.org/jira/browse/SOLR-8876)", Constants.JRE_IS_MINIMUM_JAVA9);
+
+ assumeFalse("This test fails on Java 9 (https://issues.apache.org/jira/browse/SOLR-8876)",
+ Constants.JRE_IS_MINIMUM_JAVA9);
assumeFalse("This test fails on UNIX with Turkish default locale (https://issues.apache.org/jira/browse/SOLR-6387)",
new Locale("tr").getLanguage().equals(Locale.getDefault().getLanguage()));
- solrHomeDirectory = createTempDir().toFile();
- AbstractZkTestCase.SOLRHOME = solrHomeDirectory;
- FileUtils.copyDirectory(SOLR_INSTANCE_DIR, solrHomeDirectory);
+
}
-
- @AfterClass
- public static void tearDownClass() throws Exception {
- solrHomeDirectory = null;
- }
-
- @Override
- public void distribSetUp() throws Exception {
- super.distribSetUp();
- System.setProperty("host", "127.0.0.1");
- System.setProperty("numShards", Integer.toString(sliceCount));
- uploadConfFiles();
+
+ @Before
+ public void setup() throws Exception {
collector = new Collector();
}
-
- @Override
- public void distribTearDown() throws Exception {
- super.distribTearDown();
- System.clearProperty("host");
- System.clearProperty("numShards");
- }
-
- @Override
+
protected void commit() throws Exception {
- Notifications.notifyCommitTransaction(morphline);
- super.commit();
+ Notifications.notifyCommitTransaction(morphline);
}
protected Command parse(String file) throws IOException {
- return parse(file, "collection1");
+ return parse(file, COLLECTION);
}
protected Command parse(String file, String collection) throws IOException {
SolrLocator locator = new SolrLocator(createMorphlineContext());
locator.setCollectionName(collection);
- locator.setZkHost(zkServer.getZkAddress());
+ locator.setZkHost(cluster.getZkServer().getZkAddress());
//locator.setServerUrl(cloudJettys.get(0).url); // TODO: download IndexSchema from solrUrl not yet implemented
//locator.setSolrHomeDir(SOLR_HOME_DIR.getPath());
Config config = new Compiler().parse(new File(RESOURCES_DIR + "/" + file + ".conf"), locator.toConfig("SOLR_LOCATOR"));
@@ -145,33 +130,4 @@ public abstract class AbstractSolrMorphlineZkTestBase extends AbstractFullDistri
return record;
}
- private void putConfig(SolrZkClient zkClient, String name) throws Exception {
- File file = new File(new File(SOLR_CONF_DIR, "conf"), name);
- String destPath = "/configs/conf1/" + name;
- System.out.println("put " + file.getAbsolutePath() + " to " + destPath);
- zkClient.makePath(destPath, file, false, true);
- }
-
- private void uploadConfFiles(SolrZkClient zkClient, File dir, String prefix) throws Exception {
- boolean found = false;
- for (File f : dir.listFiles()) {
- String name = f.getName();
- if (name.startsWith(".")) continue;
- if (f.isFile()) {
- putConfig(zkClient, prefix + name);
- found = true;
- } else if (f.isDirectory()) {
- uploadConfFiles(zkClient, new File(dir, name), prefix + name + "/");
- }
- }
- assertTrue("Config folder '" + dir + "' with files to upload to zookeeper was empty.", found);
- }
-
- private void uploadConfFiles() throws Exception {
- // upload our own config files
- SolrZkClient zkClient = new SolrZkClient(zkServer.getZkAddress(), 10000);
- uploadConfFiles(zkClient, new File(SOLR_CONF_DIR, "conf"), "");
- zkClient.close();
- }
-
}
diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java
index a654e94de27..ddaf2f69e6e 100644
--- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java
+++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java
@@ -17,39 +17,32 @@
package org.apache.solr.morphlines.solr;
import java.io.File;
-import java.io.IOException;
import java.util.Iterator;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrQuery;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
-import org.apache.solr.common.params.CollectionParams.CollectionAction;
-import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.util.NamedList;
import org.apache.solr.util.BadHdfsThreadsFilter;
import org.junit.Test;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.base.Notifications;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
-
@ThreadLeakFilters(defaultFilters = true, filters = {
BadHdfsThreadsFilter.class // hdfs currently leaks thread(s)
})
@Slow
public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase {
-
@Test
public void test() throws Exception {
-
- waitForRecoveriesToFinish(false);
-
- createAlias("aliascollection", "collection1");
+
+ CollectionAdminRequest.createAlias("aliascollection", "collection1")
+ .process(cluster.getSolrClient());
morphline = parse("test-morphlines" + File.separator + "loadSolrBasic", "aliascollection");
Record record = new Record();
@@ -84,9 +77,11 @@ public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase {
assertFalse(citer.hasNext());
- commit();
+ Notifications.notifyCommitTransaction(morphline);
+ new UpdateRequest().commit(cluster.getSolrClient(), COLLECTION);
- QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc));
+ QueryResponse rsp = cluster.getSolrClient()
+ .query(COLLECTION, new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc));
//System.out.println(rsp);
Iterator iter = rsp.getResults().iterator();
assertEquals(expected.getFields(), next(iter));
@@ -95,26 +90,14 @@ public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase {
Notifications.notifyRollbackTransaction(morphline);
Notifications.notifyShutdown(morphline);
-
-
- createAlias("aliascollection", "collection1,collection2");
-
- try {
+
+ CollectionAdminRequest.createAlias("aliascollection", "collection1,collection2")
+ .processAndWait(cluster.getSolrClient(), TIMEOUT);
+
+ expectThrows(IllegalArgumentException.class, () -> {
parse("test-morphlines" + File.separator + "loadSolrBasic", "aliascollection");
- fail("Expected IAE because update alias maps to multiple collections");
- } catch (IllegalArgumentException e) {
-
- }
- }
-
- private NamedList createAlias(String alias, String collections) throws SolrServerException, IOException {
- ModifiableSolrParams params = new ModifiableSolrParams();
- params.set("collections", collections);
- params.set("name", alias);
- params.set("action", CollectionAction.CREATEALIAS.toString());
- QueryRequest request = new QueryRequest(params);
- request.setPath("/admin/collections");
- return cloudClient.request(request);
+ });
+
}
}
diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java
index 1c30a84e107..4f3b27f0063 100644
--- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java
+++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java
@@ -17,19 +17,23 @@
package org.apache.solr.morphlines.solr;
import java.io.File;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
-import org.apache.avro.Schema.Field;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
+import com.google.common.base.Preconditions;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.FileReader;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.util.BadHdfsThreadsFilter;
@@ -38,16 +42,6 @@ import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.base.Notifications;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
-import com.google.common.base.Joiner;
-import com.google.common.base.Preconditions;
-import com.google.common.io.Files;
-
@ThreadLeakFilters(defaultFilters = true, filters = {
BadHdfsThreadsFilter.class // hdfs currently leaks thread(s)
})
@@ -57,33 +51,33 @@ public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase {
@Test
public void test() throws Exception {
- Joiner joiner = Joiner.on(File.separator);
- File file = new File(joiner.join(RESOURCES_DIR, "test-documents", "sample-statuses-20120906-141433-medium.avro"));
-
- waitForRecoveriesToFinish(false);
-
+
+ Path avro = Paths.get(RESOURCES_DIR).resolve("test-documents").resolve("sample-statuses-20120906-141433-medium.avro");
+
// load avro records via morphline and zk into solr
morphline = parse("test-morphlines" + File.separator + "tutorialReadAvroContainer");
Record record = new Record();
- byte[] body = Files.toByteArray(file);
+ byte[] body = Files.readAllBytes(avro);
record.put(Fields.ATTACHMENT_BODY, body);
startSession();
Notifications.notifyBeginTransaction(morphline);
assertTrue(morphline.process(record));
assertEquals(1, collector.getNumStartEvents());
- commit();
+ Notifications.notifyCommitTransaction(morphline);
+ new UpdateRequest().commit(cluster.getSolrClient(), COLLECTION);
// fetch sorted result set from solr
- QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort("id", SolrQuery.ORDER.asc));
+ QueryResponse rsp = cluster.getSolrClient()
+ .query(COLLECTION, new SolrQuery("*:*").setRows(100000).addSort("id", SolrQuery.ORDER.asc));
assertEquals(2104, collector.getRecords().size());
assertEquals(collector.getRecords().size(), rsp.getResults().size());
Collections.sort(collector.getRecords(), (r1, r2) -> r1.get("id").toString().compareTo(r2.get("id").toString()));
// fetch test input data and sort like solr result set
- List records = new ArrayList();
- FileReader reader = new DataFileReader(file, new GenericDatumReader());
+ List records = new ArrayList<>();
+ FileReader reader = new DataFileReader(avro.toFile(), new GenericDatumReader());
while (reader.hasNext()) {
GenericData.Record expected = reader.next();
records.add(expected);
@@ -121,7 +115,7 @@ public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase {
Notifications.notifyRollbackTransaction(morphline);
Notifications.notifyShutdown(morphline);
- cloudClient.close();
+
}
private void assertTweetEquals(GenericData.Record expected, Record actual, int i) {
@@ -144,12 +138,4 @@ public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase {
}
}
- private String toString(GenericData.Record avroRecord) {
- Record record = new Record();
- for (Field field : avroRecord.getSchema().getFields()) {
- record.put(field.name(), avroRecord.get(field.pos()));
- }
- return record.toString(); // prints sorted by key for human readability
- }
-
}
diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java
index 97c6dfb77d4..24d8682003b 100644
--- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java
+++ b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java
@@ -19,8 +19,10 @@ package org.apache.solr.morphlines.solr;
import java.io.File;
import java.util.Iterator;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.util.BadHdfsThreadsFilter;
@@ -29,21 +31,16 @@ import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.base.Notifications;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
-
@ThreadLeakFilters(defaultFilters = true, filters = {
BadHdfsThreadsFilter.class // hdfs currently leaks thread(s)
})
@Slow
public class SolrMorphlineZkTest extends AbstractSolrMorphlineZkTestBase {
-
@Test
public void test() throws Exception {
- waitForRecoveriesToFinish(false);
-
- morphline = parse("test-morphlines" + File.separator + "loadSolrBasic");
+ morphline = parse("test-morphlines" + File.separator + "loadSolrBasic");
Record record = new Record();
record.put(Fields.ID, "id0-innsbruck");
record.put("text", "mytext");
@@ -76,9 +73,11 @@ public class SolrMorphlineZkTest extends AbstractSolrMorphlineZkTestBase {
assertFalse(citer.hasNext());
- commit();
+ Notifications.notifyCommitTransaction(morphline);
+ new UpdateRequest().commit(cluster.getSolrClient(), COLLECTION);
- QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc));
+ QueryResponse rsp = cluster.getSolrClient()
+ .query(COLLECTION, new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc));
//System.out.println(rsp);
Iterator iter = rsp.getResults().iterator();
assertEquals(expected.getFields(), next(iter));
@@ -87,7 +86,7 @@ public class SolrMorphlineZkTest extends AbstractSolrMorphlineZkTestBase {
Notifications.notifyRollbackTransaction(morphline);
Notifications.notifyShutdown(morphline);
- cloudClient.close();
+
}
}
diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
index c5e51f918da..cf73b62cb08 100644
--- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
@@ -1062,8 +1062,9 @@ public class Overseer implements Closeable {
throw new RuntimeException(e);
}
}
- public static boolean isLegacy(Map clusterProps) {
- return !"false".equals(clusterProps.get(ZkStateReader.LEGACY_CLOUD));
+ public static boolean isLegacy(ZkStateReader stateReader) {
+ String legacyProperty = stateReader.getClusterProperty(ZkStateReader.LEGACY_CLOUD, "true");
+ return !"false".equals(legacyProperty);
}
public ZkStateReader getZkStateReader() {
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java
index c3571e366ed..93f88cdddb0 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java
@@ -24,7 +24,6 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
@@ -148,7 +147,7 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
// TODO: extract to configurable strategy class ??
ClusterState clusterState = zkStateReader.getClusterState();
//check if we have disabled autoAddReplicas cluster wide
- String autoAddReplicas = (String) zkStateReader.getClusterProps().get(ZkStateReader.AUTO_ADD_REPLICAS);
+ String autoAddReplicas = zkStateReader.getClusterProperty(ZkStateReader.AUTO_ADD_REPLICAS, (String) null);
if (autoAddReplicas != null && autoAddReplicas.equals("false")) {
return;
}
@@ -229,7 +228,7 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
private boolean addReplica(final String collection, DownReplica badReplica) {
// first find best home - first strategy, sort by number of cores
// hosted where maxCoresPerNode is not violated
- final Integer maxCoreCount = (Integer) zkStateReader.getClusterProps().get(ZkStateReader.MAX_CORES_PER_NODE);
+ final Integer maxCoreCount = zkStateReader.getClusterProperty(ZkStateReader.MAX_CORES_PER_NODE, (Integer) null);
final String createUrl = getBestCreateUrl(zkStateReader, badReplica, maxCoreCount);
if (createUrl == null) {
log.warn("Could not find a node to create new replica on.");
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
index b4dc93511e7..ed23e7754e1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
@@ -1894,7 +1894,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
positionVsNodes = identifyNodes(clusterState, nodeList, message, shardNames, repFactor);
}
- boolean isLegacyCloud = Overseer.isLegacy(zkStateReader.getClusterProps());
+ boolean isLegacyCloud = Overseer.isLegacy(zkStateReader);
createConfNode(configName, collectionName, isLegacyCloud);
@@ -2126,7 +2126,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
}
ModifiableSolrParams params = new ModifiableSolrParams();
- if (!Overseer.isLegacy(zkStateReader.getClusterProps())) {
+ if (!Overseer.isLegacy(zkStateReader)) {
if (!skipCreateReplicaInClusterState) {
ZkNodeProps props = new ZkNodeProps(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower(), ZkStateReader.COLLECTION_PROP,
collection, ZkStateReader.SHARD_ID_PROP, shard, ZkStateReader.CORE_NAME_PROP, coreName,
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java b/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java
index 14f6c7185ad..0e2f1892191 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java
@@ -16,7 +16,16 @@
*/
package org.apache.solr.cloud;
-import static org.apache.solr.common.params.CommonParams.*;
+import javax.xml.parsers.ParserConfigurationException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.concurrent.TimeoutException;
+import java.util.regex.Pattern;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
@@ -28,26 +37,16 @@ import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
-import org.apache.solr.common.SolrException;
+import org.apache.solr.common.cloud.ClusterProperties;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkConfigManager;
-import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.core.CoreContainer;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.xml.sax.SAXException;
-import javax.xml.parsers.ParserConfigurationException;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Paths;
-import java.util.List;
-import java.util.concurrent.TimeoutException;
-import java.util.regex.Pattern;
+import static org.apache.solr.common.params.CommonParams.NAME;
+import static org.apache.solr.common.params.CommonParams.VALUE_LONG;
public class ZkCLI {
@@ -324,28 +323,12 @@ public class ZkCLI {
//If -val option is missing, we will use the null value. This is required to maintain
//compatibility with Collections API.
String propertyValue = line.getOptionValue(VALUE_LONG);
- ZkStateReader reader = new ZkStateReader(zkClient);
+ ClusterProperties props = new ClusterProperties(zkClient);
try {
- reader.setClusterProperty(propertyName, propertyValue);
- } catch (SolrException ex) {
- //This can happen if two concurrent invocations of this command collide
- //with each other. Here we are just adding a defensive check to see if
- //the value is already set to expected value. If yes, then we don't
- //fail the command.
- Throwable cause = ex.getCause();
- if(cause instanceof KeeperException.NodeExistsException
- || cause instanceof KeeperException.BadVersionException) {
- String currentValue = (String)reader.getClusterProps().get(propertyName);
- if((currentValue == propertyValue) || (currentValue != null && currentValue.equals(propertyValue))) {
- return;
- }
- }
- System.out.println("Unable to set the cluster property due to following error : " +
- ex.getLocalizedMessage() +
- ((cause instanceof KeeperException.BadVersionException)?". Try again":""));
+ props.setClusterProperty(propertyName, propertyValue);
+ } catch (IOException ex) {
+ System.out.println("Unable to set the cluster property due to following error : " + ex.getLocalizedMessage());
System.exit(1);
- } finally {
- reader.close();
}
} else {
// If not cmd matches
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index ae73633cf33..444887b08a9 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -151,7 +151,7 @@ public final class ZkController {
private final int localHostPort; // example: 54065
private final String hostName; // example: 127.0.0.1
private final String nodeName; // example: 127.0.0.1:54065_solr
- private final String baseURL; // example: http://127.0.0.1:54065/solr
+ private String baseURL; // example: http://127.0.0.1:54065/solr
private final CloudConfig cloudConfig;
@@ -386,8 +386,6 @@ public final class ZkController {
if (cc != null) cc.securityNodeChanged();
});
- this.baseURL = zkStateReader.getBaseUrlForNodeName(this.nodeName);
-
init(registerOnReconnect);
}
@@ -642,6 +640,7 @@ public final class ZkController {
try {
createClusterZkNodes(zkClient);
zkStateReader.createClusterStateWatchersAndUpdate();
+ this.baseURL = zkStateReader.getBaseUrlForNodeName(this.nodeName);
// start the overseer first as following code may need it's processing
if (!zkRunOnly) {
@@ -1215,23 +1214,10 @@ public final class ZkController {
if (context != null) {
context.cancelElection();
}
-
- final Collection cores = cc.getCores();
-
- // if there is no SolrCore which is a member of this collection, remove the watch
+
CloudDescriptor cloudDescriptor = cd.getCloudDescriptor();
- boolean removeWatch = true;
- for (SolrCore solrCore : cores) {
- final CloudDescriptor cloudDesc = solrCore.getCoreDescriptor().getCloudDescriptor();
- if (cloudDesc != null && cloudDescriptor.getCollectionName().equals(cloudDesc.getCollectionName())) {
- removeWatch = false;
- break;
- }
- }
-
- if (removeWatch) {
- zkStateReader.removeZKWatch(collection);
- }
+ zkStateReader.unregisterCore(cloudDescriptor.getCollectionName());
+
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
OverseerAction.DELETECORE.toLower(), ZkStateReader.CORE_NAME_PROP, coreName,
ZkStateReader.NODE_NAME_PROP, getNodeName(),
@@ -1481,7 +1467,7 @@ public final class ZkController {
"Collection {} not visible yet, but flagging it so a watch is registered when it becomes visible" :
"Registering watch for collection {}",
collectionName);
- zkStateReader.addCollectionWatch(collectionName);
+ zkStateReader.registerCore(collectionName);
} catch (KeeperException e) {
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
@@ -1501,7 +1487,7 @@ public final class ZkController {
}
private void checkStateInZk(CoreDescriptor cd) throws InterruptedException {
- if (!Overseer.isLegacy(zkStateReader.getClusterProps())) {
+ if (!Overseer.isLegacy(zkStateReader)) {
CloudDescriptor cloudDesc = cd.getCloudDescriptor();
String coreNodeName = cloudDesc.getCoreNodeName();
assert coreNodeName != null : "SolrCore: " + cd.getName() + " has no coreNodeName";
diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java
index 5147f43797b..495d1d3b932 100644
--- a/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java
+++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java
@@ -197,7 +197,7 @@ public class ReplicaMutator {
}
public ZkWriteCommand setState(ClusterState clusterState, ZkNodeProps message) {
- if (Overseer.isLegacy(zkStateReader.getClusterProps())) {
+ if (Overseer.isLegacy(zkStateReader)) {
return updateState(clusterState, message);
} else {
return updateStateNew(clusterState, message);
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index f933faba822..db9887cf1f1 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -761,7 +761,7 @@ public class CoreContainer {
boolean preExisitingZkEntry = false;
try {
if (getZkController() != null) {
- if (!Overseer.isLegacy(getZkController().getZkStateReader().getClusterProps())) {
+ if (!Overseer.isLegacy(getZkController().getZkStateReader())) {
if (cd.getCloudDescriptor().getCoreNodeName() == null) {
throw new SolrException(ErrorCode.SERVER_ERROR, "non legacy mode coreNodeName missing " + parameters.toString());
diff --git a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
index dc423d99212..c575ecbca10 100644
--- a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
@@ -24,9 +24,9 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
-import org.apache.lucene.codecs.lucene60.Lucene60Codec;
-import org.apache.solr.common.SolrException;
+import org.apache.lucene.codecs.lucene62.Lucene62Codec;
import org.apache.solr.common.SolrException.ErrorCode;
+import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.plugin.SolrCoreAware;
@@ -91,7 +91,7 @@ public class SchemaCodecFactory extends CodecFactory implements SolrCoreAware {
compressionMode = SOLR_DEFAULT_COMPRESSION_MODE;
log.info("Using default compressionMode: " + compressionMode);
}
- codec = new Lucene60Codec(compressionMode) {
+ codec = new Lucene62Codec(compressionMode) {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
final SchemaField schemaField = core.getLatestSchema().getFieldOrNull(field);
diff --git a/solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java b/solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java
index c7a668908ca..f9da1cfa7c1 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java
@@ -20,7 +20,6 @@ import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.List;
-import java.util.Locale;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
@@ -174,7 +173,7 @@ public class SolrDeletionPolicy extends IndexDeletionPolicy implements NamedList
try {
if (maxCommitAge != null) {
if (maxCommitAgeTimeStamp==-1) {
- DateMathParser dmp = new DateMathParser(DateMathParser.UTC, Locale.ROOT);
+ DateMathParser dmp = new DateMathParser(DateMathParser.UTC);
maxCommitAgeTimeStamp = dmp.parseMath(maxCommitAge).getTime();
}
if (IndexDeletionPolicyWrapper.getCommitTimestamp(commit) < maxCommitAgeTimeStamp) {
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/ClusterStatus.java b/solr/core/src/java/org/apache/solr/handler/admin/ClusterStatus.java
index 48302cd818d..dbe2e790f9d 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/ClusterStatus.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/ClusterStatus.java
@@ -152,7 +152,7 @@ public class ClusterStatus {
clusterStatus.add("collections", collectionProps);
// read cluster properties
- Map clusterProps = zkStateReader.getClusterProps();
+ Map clusterProps = zkStateReader.getClusterProperties();
if (clusterProps != null && !clusterProps.isEmpty()) {
clusterStatus.add("properties", clusterProps);
}
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
index 4deddedbf5d..6d501a1cb5a 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
@@ -51,17 +51,8 @@ import org.apache.solr.cloud.rule.ReplicaAssigner;
import org.apache.solr.cloud.rule.Rule;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.common.cloud.ClusterState;
-import org.apache.solr.common.cloud.DocCollection;
-import org.apache.solr.common.cloud.ImplicitDocRouter;
-import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.*;
import org.apache.solr.common.cloud.Replica.State;
-import org.apache.solr.common.cloud.Slice;
-import org.apache.solr.common.cloud.SolrZkClient;
-import org.apache.solr.common.cloud.ZkCmdExecutor;
-import org.apache.solr.common.cloud.ZkCoreNodeProps;
-import org.apache.solr.common.cloud.ZkNodeProps;
-import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CollectionParams.CollectionAction;
@@ -570,7 +561,8 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
Map call(SolrQueryRequest req, SolrQueryResponse rsp, CollectionsHandler h) throws Exception {
String name = req.getParams().required().get(NAME);
String val = req.getParams().get(VALUE_LONG);
- h.coreContainer.getZkController().getZkStateReader().setClusterProperty(name, val);
+ ClusterProperties cp = new ClusterProperties(h.coreContainer.getZkController().getZkClient());
+ cp.setClusterProperty(name, val);
return null;
}
},
@@ -808,7 +800,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
String location = req.getParams().get("location");
if (location == null) {
- location = (String) h.coreContainer.getZkController().getZkStateReader().getClusterProps().get("location");
+ location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty("location", (String) null);
}
if (location == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query parameter or set as a cluster property");
@@ -832,7 +824,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
String location = req.getParams().get("location");
if (location == null) {
- location = (String) h.coreContainer.getZkController().getZkStateReader().getClusterProps().get("location");
+ location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty("location", (String) null);
}
if (location == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query parameter or set as a cluster property");
diff --git a/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java b/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java
new file mode 100644
index 00000000000..b58d0a4ba16
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.index;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.MergePolicy;
+import org.apache.lucene.index.MergePolicyWrapper;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.MergeTrigger;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.SegmentCommitInfo;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.index.SegmentReader;
+import org.apache.lucene.index.SlowCompositeReaderWrapper;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.InfoStream;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
+
+// TODO: remove this and add indexSort specification directly to solrconfig.xml? But for BWC, also accept SortingMergePolicy specifiction?
+
+public final class SortingMergePolicy extends MergePolicyWrapper {
+
+ private final Sort sort;
+
+ /** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */
+ public SortingMergePolicy(MergePolicy in, Sort sort) {
+ super(in);
+ this.sort = sort;
+ }
+
+ /** Return the {@link Sort} order that is used to sort segments when merging. */
+ public Sort getSort() {
+ return sort;
+ }
+
+ @Override
+ public String toString() {
+ return "SortingMergePolicy(" + in + ", sort=" + sort + ")";
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/index/SortingMergePolicyFactory.java b/solr/core/src/java/org/apache/solr/index/SortingMergePolicyFactory.java
index 53190b5f4f9..b22df3b3f97 100644
--- a/solr/core/src/java/org/apache/solr/index/SortingMergePolicyFactory.java
+++ b/solr/core/src/java/org/apache/solr/index/SortingMergePolicyFactory.java
@@ -17,7 +17,6 @@
package org.apache.solr.index;
import org.apache.lucene.index.MergePolicy;
-import org.apache.lucene.index.SortingMergePolicy;
import org.apache.lucene.search.Sort;
import org.apache.solr.core.SolrResourceLoader;
diff --git a/solr/core/src/java/org/apache/solr/schema/DateRangeField.java b/solr/core/src/java/org/apache/solr/schema/DateRangeField.java
index f4070d38d30..d51c1f17022 100644
--- a/solr/core/src/java/org/apache/solr/schema/DateRangeField.java
+++ b/solr/core/src/java/org/apache/solr/schema/DateRangeField.java
@@ -39,16 +39,18 @@ import org.apache.solr.util.DateMathParser;
import org.locationtech.spatial4j.shape.Shape;
/**
- * A field for indexed dates and date ranges. It's mostly compatible with TrieDateField.
+ * A field for indexed dates and date ranges. It's mostly compatible with TrieDateField. It has the potential to allow
+ * efficient faceting, similar to facet.enum.
*
* @see NumberRangePrefixTreeStrategy
* @see DateRangePrefixTree
*/
-public class DateRangeField extends AbstractSpatialPrefixTreeFieldType {
+public class DateRangeField extends AbstractSpatialPrefixTreeFieldType
+ implements DateValueFieldType { // used by ParseDateFieldUpdateProcessorFactory
private static final String OP_PARAM = "op";//local-param to resolve SpatialOperation
- private static final DateRangePrefixTree tree = DateRangePrefixTree.INSTANCE;
+ private static final DateRangePrefixTree tree = new DateRangePrefixTree(DateRangePrefixTree.JAVA_UTIL_TIME_COMPAT_CAL);
@Override
protected void init(IndexSchema schema, Map args) {
@@ -69,17 +71,24 @@ public class DateRangeField extends AbstractSpatialPrefixTreeFieldType= 0) {
- //use Solr standard date format parsing rules.
- //TODO parse a Calendar instead of a Date, rounded according to DateMath syntax.
+ if (str.startsWith("NOW") || str.lastIndexOf('Z') >= 0) { // ? but not if Z is last char ? Ehh, whatever.
+ //use Solr standard date format parsing rules:
+ //TODO add DMP utility to return ZonedDateTime alternative, then set cal fields manually, which is faster?
Date date = DateMathParser.parseMath(null, str);
Calendar cal = tree.newCal();
cal.setTime(date);
@@ -119,19 +128,6 @@ public class DateRangeField extends AbstractSpatialPrefixTreeFieldType
*
+ *
+ * Historical dates: The calendar computation is completely done with the
+ * Gregorian system/algorithm. It does not switch to Julian or
+ * anything else, unlike the default {@link java.util.GregorianCalendar}.
+ *
* @see SolrRequestInfo#getClientTimeZone
* @see SolrRequestInfo#getNOW
*/
@@ -103,9 +114,6 @@ public class DateMathParser {
/** Default TimeZone for DateMath rounding (UTC) */
public static final TimeZone DEFAULT_MATH_TZ = UTC;
- /** Default Locale for DateMath rounding (Locale.ROOT) */
- public static final Locale DEFAULT_MATH_LOCALE = Locale.ROOT;
-
/**
* Differs by {@link DateTimeFormatter#ISO_INSTANT} in that it's lenient.
* @see #parseNoMath(String)
@@ -115,22 +123,22 @@ public class DateMathParser {
/**
* A mapping from (uppercased) String labels identifying time units,
- * to the corresponding Calendar constant used to set/add/roll that unit
- * of measurement.
+ * to the corresponding {@link ChronoUnit} enum (e.g. "YEARS") used to
+ * set/add/roll that unit of measurement.
*
*
* A single logical unit of time might be represented by multiple labels
- * for convenience (ie: DATE==DAY
,
- * MILLI==MILLISECOND
)
+ * for convenience (ie: DATE==DAYS
,
+ * MILLI==MILLIS
)
*
*
* @see Calendar
*/
- public static final Map CALENDAR_UNITS = makeUnitsMap();
+ public static final Map CALENDAR_UNITS = makeUnitsMap();
/** @see #CALENDAR_UNITS */
- private static Map makeUnitsMap() {
+ private static Map makeUnitsMap() {
// NOTE: consciously choosing not to support WEEK at this time,
// because of complexity in rounding down to the nearest week
@@ -141,90 +149,69 @@ public class DateMathParser {
// we probably need to change "Locale loc" to default to something
// from a param via SolrRequestInfo as well.
- Map units = new HashMap<>(13);
- units.put("YEAR", Calendar.YEAR);
- units.put("YEARS", Calendar.YEAR);
- units.put("MONTH", Calendar.MONTH);
- units.put("MONTHS", Calendar.MONTH);
- units.put("DAY", Calendar.DATE);
- units.put("DAYS", Calendar.DATE);
- units.put("DATE", Calendar.DATE);
- units.put("HOUR", Calendar.HOUR_OF_DAY);
- units.put("HOURS", Calendar.HOUR_OF_DAY);
- units.put("MINUTE", Calendar.MINUTE);
- units.put("MINUTES", Calendar.MINUTE);
- units.put("SECOND", Calendar.SECOND);
- units.put("SECONDS", Calendar.SECOND);
- units.put("MILLI", Calendar.MILLISECOND);
- units.put("MILLIS", Calendar.MILLISECOND);
- units.put("MILLISECOND", Calendar.MILLISECOND);
- units.put("MILLISECONDS",Calendar.MILLISECOND);
+ Map units = new HashMap<>(13);
+ units.put("YEAR", ChronoUnit.YEARS);
+ units.put("YEARS", ChronoUnit.YEARS);
+ units.put("MONTH", ChronoUnit.MONTHS);
+ units.put("MONTHS", ChronoUnit.MONTHS);
+ units.put("DAY", ChronoUnit.DAYS);
+ units.put("DAYS", ChronoUnit.DAYS);
+ units.put("DATE", ChronoUnit.DAYS);
+ units.put("HOUR", ChronoUnit.HOURS);
+ units.put("HOURS", ChronoUnit.HOURS);
+ units.put("MINUTE", ChronoUnit.MINUTES);
+ units.put("MINUTES", ChronoUnit.MINUTES);
+ units.put("SECOND", ChronoUnit.SECONDS);
+ units.put("SECONDS", ChronoUnit.SECONDS);
+ units.put("MILLI", ChronoUnit.MILLIS);
+ units.put("MILLIS", ChronoUnit.MILLIS);
+ units.put("MILLISECOND", ChronoUnit.MILLIS);
+ units.put("MILLISECONDS",ChronoUnit.MILLIS);
+
+ // NOTE: Maybe eventually support NANOS
return units;
}
/**
- * Modifies the specified Calendar by "adding" the specified value of units
+ * Returns a modified time by "adding" the specified value of units
*
* @exception IllegalArgumentException if unit isn't recognized.
* @see #CALENDAR_UNITS
*/
- public static void add(Calendar c, int val, String unit) {
- Integer uu = CALENDAR_UNITS.get(unit);
+ private static LocalDateTime add(LocalDateTime t, int val, String unit) {
+ ChronoUnit uu = CALENDAR_UNITS.get(unit);
if (null == uu) {
throw new IllegalArgumentException("Adding Unit not recognized: "
+ unit);
}
- c.add(uu.intValue(), val);
+ return t.plus(val, uu);
}
/**
- * Modifies the specified Calendar by "rounding" down to the specified unit
+ * Returns a modified time by "rounding" down to the specified unit
*
* @exception IllegalArgumentException if unit isn't recognized.
* @see #CALENDAR_UNITS
*/
- public static void round(Calendar c, String unit) {
- Integer uu = CALENDAR_UNITS.get(unit);
+ private static LocalDateTime round(LocalDateTime t, String unit) {
+ ChronoUnit uu = CALENDAR_UNITS.get(unit);
if (null == uu) {
throw new IllegalArgumentException("Rounding Unit not recognized: "
+ unit);
}
- int u = uu.intValue();
-
- switch (u) {
-
- case Calendar.YEAR:
- c.clear(Calendar.MONTH);
- /* fall through */
- case Calendar.MONTH:
- c.clear(Calendar.DAY_OF_MONTH);
- c.clear(Calendar.DAY_OF_WEEK);
- c.clear(Calendar.DAY_OF_WEEK_IN_MONTH);
- c.clear(Calendar.DAY_OF_YEAR);
- c.clear(Calendar.WEEK_OF_MONTH);
- c.clear(Calendar.WEEK_OF_YEAR);
- /* fall through */
- case Calendar.DATE:
- c.clear(Calendar.HOUR_OF_DAY);
- c.clear(Calendar.HOUR);
- c.clear(Calendar.AM_PM);
- /* fall through */
- case Calendar.HOUR_OF_DAY:
- c.clear(Calendar.MINUTE);
- /* fall through */
- case Calendar.MINUTE:
- c.clear(Calendar.SECOND);
- /* fall through */
- case Calendar.SECOND:
- c.clear(Calendar.MILLISECOND);
- break;
- default:
- throw new IllegalStateException(
- "No logic for rounding value ("+u+") " + unit
- );
+ // note: OffsetDateTime.truncatedTo does not support >= DAYS units so we handle those
+ switch (uu) {
+ case YEARS:
+ return LocalDateTime.of(LocalDate.of(t.getYear(), 1, 1), LocalTime.MIDNIGHT); // midnight is 00:00:00
+ case MONTHS:
+ return LocalDateTime.of(LocalDate.of(t.getYear(), t.getMonth(), 1), LocalTime.MIDNIGHT);
+ case DAYS:
+ return LocalDateTime.of(t.toLocalDate(), LocalTime.MIDNIGHT);
+ default:
+ assert !uu.isDateBased();// >= DAY
+ return t.truncatedTo(uu);
}
-
}
/**
@@ -290,23 +277,19 @@ public class DateMathParser {
* otherwise specified in the SolrRequestInfo
*
* @see SolrRequestInfo#getClientTimeZone
- * @see #DEFAULT_MATH_LOCALE
*/
public DateMathParser() {
- this(null, DEFAULT_MATH_LOCALE);
+ this(null);
}
/**
- * @param tz The TimeZone used for rounding (to determine when hours/days begin). If null, then this method defaults to the value dicated by the SolrRequestInfo if it
- * exists -- otherwise it uses UTC.
- * @param l The Locale used for rounding (to determine when weeks begin). If null, then this method defaults to en_US.
+ * @param tz The TimeZone used for rounding (to determine when hours/days begin). If null, then this method defaults
+ * to the value dictated by the SolrRequestInfo if it exists -- otherwise it uses UTC.
* @see #DEFAULT_MATH_TZ
- * @see #DEFAULT_MATH_LOCALE
* @see Calendar#getInstance(TimeZone,Locale)
* @see SolrRequestInfo#getClientTimeZone
*/
- public DateMathParser(TimeZone tz, Locale l) {
- loc = (null != l) ? l : DEFAULT_MATH_LOCALE;
+ public DateMathParser(TimeZone tz) {
if (null == tz) {
SolrRequestInfo reqInfo = SolrRequestInfo.getRequestInfo();
tz = (null != reqInfo) ? reqInfo.getClientTimeZone() : DEFAULT_MATH_TZ;
@@ -321,13 +304,6 @@ public class DateMathParser {
return this.zone;
}
- /**
- * @return the locale
- */
- public Locale getLocale() {
- return this.loc;
- }
-
/**
* Defines this instance's concept of "now".
* @see #getNow
@@ -337,7 +313,7 @@ public class DateMathParser {
}
/**
- * Returns a cloned of this instance's concept of "now".
+ * Returns a clone of this instance's concept of "now" (never null).
*
* If setNow was never called (or if null was specified) then this method
* first defines 'now' as the value dictated by the SolrRequestInfo if it
@@ -353,7 +329,7 @@ public class DateMathParser {
// fall back to current time if no request info set
now = new Date();
} else {
- now = reqInfo.getNOW();
+ now = reqInfo.getNOW(); // never null
}
}
return (Date) now.clone();
@@ -365,15 +341,15 @@ public class DateMathParser {
* @exception ParseException positions in ParseExceptions are token positions, not character positions.
*/
public Date parseMath(String math) throws ParseException {
-
- Calendar cal = Calendar.getInstance(zone, loc);
- cal.setTime(getNow());
-
/* check for No-Op */
if (0==math.length()) {
- return cal.getTime();
+ return getNow();
}
-
+
+ ZoneId zoneId = zone.toZoneId();
+ // localDateTime is a date and time local to the timezone specified
+ LocalDateTime localDateTime = ZonedDateTime.ofInstant(getNow().toInstant(), zoneId).toLocalDateTime();
+
String[] ops = splitter.split(math);
int pos = 0;
while ( pos < ops.length ) {
@@ -391,7 +367,7 @@ public class DateMathParser {
("Need a unit after command: \"" + command + "\"", pos);
}
try {
- round(cal, ops[pos++]);
+ localDateTime = round(localDateTime, ops[pos++]);
} catch (IllegalArgumentException e) {
throw new ParseException
("Unit not recognized: \"" + ops[pos-1] + "\"", pos-1);
@@ -415,7 +391,7 @@ public class DateMathParser {
}
try {
String unit = ops[pos++];
- add(cal, val, unit);
+ localDateTime = add(localDateTime, val, unit);
} catch (IllegalArgumentException e) {
throw new ParseException
("Unit not recognized: \"" + ops[pos-1] + "\"", pos-1);
@@ -427,7 +403,7 @@ public class DateMathParser {
}
}
- return cal.getTime();
+ return Date.from(ZonedDateTime.of(localDateTime, zoneId).toInstant());
}
private static Pattern splitter = Pattern.compile("\\b|(?<=\\d)(?=\\D)");
diff --git a/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java b/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
index 9d45a0dabc9..579ccf082f4 100644
--- a/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
+++ b/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
@@ -867,7 +867,7 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 {
assertQ("check counts using fixed NOW and TZ rounding",
req("q", "bday:[NOW/DAY TO NOW/DAY+1DAY]",
- "TZ", "GMT-23",
+ "TZ", "GMT+01",
"NOW", "205369736000" // 1976-07-04T23:08:56.235Z
),
"*[count(//doc)=0]");
diff --git a/solr/core/src/test/org/apache/solr/TestTrie.java b/solr/core/src/test/org/apache/solr/TestTrie.java
index 07935b209bf..47c50b2e099 100644
--- a/solr/core/src/test/org/apache/solr/TestTrie.java
+++ b/solr/core/src/test/org/apache/solr/TestTrie.java
@@ -172,7 +172,7 @@ public class TestTrie extends SolrTestCaseJ4 {
format.setTimeZone(TimeZone.getTimeZone("UTC"));
assertU(delQ("*:*"));
- DateMathParser dmp = new DateMathParser(DateMathParser.UTC, Locale.ROOT);
+ DateMathParser dmp = new DateMathParser(DateMathParser.UTC);
String largestDate = "";
for (int i = 0; i < 10; i++) {
// index 10 days starting with today
@@ -221,7 +221,7 @@ public class TestTrie extends SolrTestCaseJ4 {
// For tdate tests
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT);
format.setTimeZone(TimeZone.getTimeZone("UTC"));
- DateMathParser dmp = new DateMathParser(DateMathParser.UTC, Locale.ROOT);
+ DateMathParser dmp = new DateMathParser(DateMathParser.UTC);
for (int i = 0; i < 10; i++) {
long l = Integer.MAX_VALUE + i*1L;
diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java
index 6ada6bb5d98..f77829d135f 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java
@@ -1322,7 +1322,7 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
boolean changed = false;
while(! timeout.hasTimedOut()){
Thread.sleep(10);
- changed = Objects.equals(val,client.getZkStateReader().getClusterProps().get(name));
+ changed = Objects.equals(val,client.getZkStateReader().getClusterProperty(name, (String) null));
if(changed) break;
}
return changed;
diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
index 0975b9aae2e..b04bfbc3ffa 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
@@ -45,7 +45,6 @@ import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.util.TimeOut;
import org.apache.zookeeper.KeeperException;
-import org.junit.Ignore;
import org.junit.Test;
import static org.apache.solr.cloud.ReplicaPropertiesBase.verifyUniqueAcrossCollection;
@@ -336,7 +335,7 @@ public class CollectionsAPISolrJTest extends AbstractFullDistribZkTestBase {
while(! timeout.hasTimedOut()){
Thread.sleep(10);
changed = Objects.equals("false",
- cloudClient.getZkStateReader().getClusterProps().get(ZkStateReader.LEGACY_CLOUD));
+ cloudClient.getZkStateReader().getClusterProperty(ZkStateReader.LEGACY_CLOUD, "none"));
if(changed) break;
}
assertTrue("The Cluster property wasn't set", changed);
@@ -351,7 +350,7 @@ public class CollectionsAPISolrJTest extends AbstractFullDistribZkTestBase {
changed = false;
while(! timeout.hasTimedOut()) {
Thread.sleep(10);
- changed = (cloudClient.getZkStateReader().getClusterProps().get(ZkStateReader.LEGACY_CLOUD) == null);
+ changed = (cloudClient.getZkStateReader().getClusterProperty(ZkStateReader.LEGACY_CLOUD, (String) null) == null);
if(changed)
break;
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
index f5a09b0da13..8efb60525a0 100644
--- a/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
@@ -16,11 +16,17 @@
*/
package org.apache.solr.cloud;
+import java.lang.invoke.MethodHandles;
+import java.util.*;
+import java.util.Map.Entry;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.TimeUnit;
+
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.client.solrj.response.QueryResponse;
-import org.apache.solr.cloud.OverseerTaskQueue.QueueEvent;
import org.apache.solr.cloud.Overseer.LeaderStatus;
+import org.apache.solr.cloud.OverseerTaskQueue.QueueEvent;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkNodeProps;
@@ -48,32 +54,7 @@ import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Queue;
-import java.util.Set;
-import java.util.concurrent.ArrayBlockingQueue;
-import java.util.concurrent.TimeUnit;
-
-import static org.easymock.EasyMock.anyBoolean;
-import static org.easymock.EasyMock.anyObject;
-import static org.easymock.EasyMock.capture;
-import static org.easymock.EasyMock.createMock;
-import static org.easymock.EasyMock.expect;
-import static org.easymock.EasyMock.expectLastCall;
-import static org.easymock.EasyMock.getCurrentArguments;
-import static org.easymock.EasyMock.replay;
-import static org.easymock.EasyMock.reset;
-import static org.easymock.EasyMock.verify;
+import static org.easymock.EasyMock.*;
public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
@@ -284,11 +265,12 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
}).anyTimes();
}
- zkStateReaderMock.getClusterProps();
- expectLastCall().andAnswer(new IAnswer() {
+
+ zkStateReaderMock.getClusterProperty("legacyCloud", "true");
+ expectLastCall().andAnswer(new IAnswer() {
@Override
- public Map answer() throws Throwable {
- return new HashMap();
+ public String answer() throws Throwable {
+ return "true";
}
});
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java b/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java
index 1109f9ed0a8..d1521a0d399 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java
@@ -16,6 +16,15 @@
*/
package org.apache.solr.cloud;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.lang.invoke.MethodHandles;
+import java.nio.charset.StandardCharsets;
+import java.util.Collection;
+import java.util.List;
+
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.filefilter.RegexFileFilter;
@@ -23,6 +32,7 @@ import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.solr.SolrJettyTestBase;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
+import org.apache.solr.common.cloud.ClusterProperties;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.VMParamsAllAndReadonlyDigestZkACLProvider;
import org.apache.solr.common.cloud.ZkConfigManager;
@@ -37,15 +47,6 @@ import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.InputStream;
-import java.lang.invoke.MethodHandles;
-import java.nio.charset.StandardCharsets;
-import java.util.Collection;
-import java.util.List;
-
// TODO: This test would be a lot faster if it used a solrhome with fewer config
// files - there are a lot of them to upload
public class ZkCLITest extends SolrTestCaseJ4 {
@@ -321,22 +322,19 @@ public class ZkCLITest extends SolrTestCaseJ4 {
@Test
public void testSetClusterProperty() throws Exception {
- ZkStateReader reader = new ZkStateReader(zkClient);
- try {
- // add property urlScheme=http
- String[] args = new String[] {"-zkhost", zkServer.getZkAddress(),
- "-cmd", "CLUSTERPROP", "-name", "urlScheme", "-val", "http"};
- ZkCLI.main(args);
- assertEquals("http", reader.getClusterProps().get("urlScheme"));
-
- // remove it again
- args = new String[] {"-zkhost", zkServer.getZkAddress(),
- "-cmd", "CLUSTERPROP", "-name", "urlScheme"};
- ZkCLI.main(args);
- assertNull(reader.getClusterProps().get("urlScheme"));
- } finally {
- reader.close();
- }
+ ClusterProperties properties = new ClusterProperties(zkClient);
+ // add property urlScheme=http
+ String[] args = new String[] {"-zkhost", zkServer.getZkAddress(),
+ "-cmd", "CLUSTERPROP", "-name", "urlScheme", "-val", "http"};
+ ZkCLI.main(args);
+ assertEquals("http", properties.getClusterProperty("urlScheme", "none"));
+
+ // remove it again
+ args = new String[] {"-zkhost", zkServer.getZkAddress(),
+ "-cmd", "CLUSTERPROP", "-name", "urlScheme"};
+ ZkCLI.main(args);
+ assertNull(properties.getClusterProperty("urlScheme", (String) null));
+
}
@Test
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
index 912369787a0..b8654391977 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
@@ -16,17 +16,15 @@
*/
package org.apache.solr.cloud;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.common.cloud.ClusterState;
-import org.apache.solr.common.cloud.DocCollection;
-import org.apache.solr.common.cloud.DocRouter;
-import org.apache.solr.common.cloud.Replica;
-import org.apache.solr.common.cloud.Slice;
-import org.apache.solr.common.cloud.SolrZkClient;
-import org.apache.solr.common.cloud.ZkConfigManager;
-import org.apache.solr.common.cloud.ZkNodeProps;
-import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.cloud.*;
import org.apache.solr.common.util.Utils;
import org.apache.solr.core.CloudConfig;
import org.apache.solr.core.CoreContainer;
@@ -40,12 +38,6 @@ import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.TimeUnit;
-
@Slow
@SolrTestCaseJ4.SuppressSSL
public class ZkControllerTest extends SolrTestCaseJ4 {
@@ -98,59 +90,71 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
- ZkStateReader zkStateReader = new ZkStateReader(server.getZkAddress(), TIMEOUT, TIMEOUT);
- try {
- // getBaseUrlForNodeName
- assertEquals("http://zzz.xxx:1234/solr",
- zkStateReader.getBaseUrlForNodeName("zzz.xxx:1234_solr"));
- assertEquals("http://xxx:99",
- zkStateReader.getBaseUrlForNodeName("xxx:99_"));
- assertEquals("http://foo-bar.baz.org:9999/some_dir",
- zkStateReader.getBaseUrlForNodeName("foo-bar.baz.org:9999_some_dir"));
- assertEquals("http://foo-bar.baz.org:9999/solr/sub_dir",
- zkStateReader.getBaseUrlForNodeName("foo-bar.baz.org:9999_solr%2Fsub_dir"));
-
- // generateNodeName + getBaseUrlForNodeName
- assertEquals("http://foo:9876/solr",
- zkStateReader.getBaseUrlForNodeName
- (ZkController.generateNodeName("foo","9876","solr")));
- assertEquals("http://foo:9876/solr",
- zkStateReader.getBaseUrlForNodeName
- (ZkController.generateNodeName("foo","9876","/solr")));
- assertEquals("http://foo:9876/solr",
- zkStateReader.getBaseUrlForNodeName
- (ZkController.generateNodeName("foo","9876","/solr/")));
- assertEquals("http://foo.bar.com:9876/solr/sub_dir",
- zkStateReader.getBaseUrlForNodeName
- (ZkController.generateNodeName("foo.bar.com","9876","solr/sub_dir")));
- assertEquals("http://foo.bar.com:9876/solr/sub_dir",
- zkStateReader.getBaseUrlForNodeName
- (ZkController.generateNodeName("foo.bar.com","9876","/solr/sub_dir/")));
- assertEquals("http://foo-bar:9876",
- zkStateReader.getBaseUrlForNodeName
- (ZkController.generateNodeName("foo-bar","9876","")));
- assertEquals("http://foo-bar:9876",
- zkStateReader.getBaseUrlForNodeName
- (ZkController.generateNodeName("foo-bar","9876","/")));
- assertEquals("http://foo-bar.com:80/some_dir",
- zkStateReader.getBaseUrlForNodeName
- (ZkController.generateNodeName("foo-bar.com","80","some_dir")));
- assertEquals("http://foo-bar.com:80/some_dir",
- zkStateReader.getBaseUrlForNodeName
- (ZkController.generateNodeName("foo-bar.com","80","/some_dir")));
+ try (SolrZkClient client = new SolrZkClient(server.getZkAddress(), TIMEOUT)) {
+
+ ZkController.createClusterZkNodes(client);
+
+ try (ZkStateReader zkStateReader = new ZkStateReader(client)) {
+ zkStateReader.createClusterStateWatchersAndUpdate();
+
+ // getBaseUrlForNodeName
+ assertEquals("http://zzz.xxx:1234/solr",
+ zkStateReader.getBaseUrlForNodeName("zzz.xxx:1234_solr"));
+ assertEquals("http://xxx:99",
+ zkStateReader.getBaseUrlForNodeName("xxx:99_"));
+ assertEquals("http://foo-bar.baz.org:9999/some_dir",
+ zkStateReader.getBaseUrlForNodeName("foo-bar.baz.org:9999_some_dir"));
+ assertEquals("http://foo-bar.baz.org:9999/solr/sub_dir",
+ zkStateReader.getBaseUrlForNodeName("foo-bar.baz.org:9999_solr%2Fsub_dir"));
+
+ // generateNodeName + getBaseUrlForNodeName
+ assertEquals("http://foo:9876/solr",
+ zkStateReader.getBaseUrlForNodeName
+ (ZkController.generateNodeName("foo", "9876", "solr")));
+ assertEquals("http://foo:9876/solr",
+ zkStateReader.getBaseUrlForNodeName
+ (ZkController.generateNodeName("foo", "9876", "/solr")));
+ assertEquals("http://foo:9876/solr",
+ zkStateReader.getBaseUrlForNodeName
+ (ZkController.generateNodeName("foo", "9876", "/solr/")));
+ assertEquals("http://foo.bar.com:9876/solr/sub_dir",
+ zkStateReader.getBaseUrlForNodeName
+ (ZkController.generateNodeName("foo.bar.com", "9876", "solr/sub_dir")));
+ assertEquals("http://foo.bar.com:9876/solr/sub_dir",
+ zkStateReader.getBaseUrlForNodeName
+ (ZkController.generateNodeName("foo.bar.com", "9876", "/solr/sub_dir/")));
+ assertEquals("http://foo-bar:9876",
+ zkStateReader.getBaseUrlForNodeName
+ (ZkController.generateNodeName("foo-bar", "9876", "")));
+ assertEquals("http://foo-bar:9876",
+ zkStateReader.getBaseUrlForNodeName
+ (ZkController.generateNodeName("foo-bar", "9876", "/")));
+ assertEquals("http://foo-bar.com:80/some_dir",
+ zkStateReader.getBaseUrlForNodeName
+ (ZkController.generateNodeName("foo-bar.com", "80", "some_dir")));
+ assertEquals("http://foo-bar.com:80/some_dir",
+ zkStateReader.getBaseUrlForNodeName
+ (ZkController.generateNodeName("foo-bar.com", "80", "/some_dir")));
+
+ }
+
+ ClusterProperties cp = new ClusterProperties(client);
+ cp.setClusterProperty("urlScheme", "https");
//Verify the URL Scheme is taken into account
- zkStateReader.getZkClient().create(ZkStateReader.CLUSTER_PROPS,
- Utils.toJSON(Collections.singletonMap("urlScheme", "https")), CreateMode.PERSISTENT, true);
-
- assertEquals("https://zzz.xxx:1234/solr",
- zkStateReader.getBaseUrlForNodeName("zzz.xxx:1234_solr"));
-
- assertEquals("https://foo-bar.com:80/some_dir",
- zkStateReader.getBaseUrlForNodeName
- (ZkController.generateNodeName("foo-bar.com","80","/some_dir")));
- } finally {
- zkStateReader.close();
+
+ try (ZkStateReader zkStateReader = new ZkStateReader(client)) {
+
+ zkStateReader.createClusterStateWatchersAndUpdate();
+
+ assertEquals("https://zzz.xxx:1234/solr",
+ zkStateReader.getBaseUrlForNodeName("zzz.xxx:1234_solr"));
+
+ assertEquals("https://foo-bar.com:80/some_dir",
+ zkStateReader.getBaseUrlForNodeName
+ (ZkController.generateNodeName("foo-bar.com", "80", "/some_dir")));
+
+ }
}
} finally {
server.shutdown();
diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java
index fd8d4939176..ad51614b19c 100644
--- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java
@@ -62,6 +62,7 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 {
ZkTestServer server = new ZkTestServer(zkDir);
SolrZkClient zkClient = null;
+ ZkStateReader reader = null;
try {
server.run();
@@ -71,10 +72,10 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 {
zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT);
ZkController.createClusterZkNodes(zkClient);
- ZkStateReader reader = new ZkStateReader(zkClient);
+ reader = new ZkStateReader(zkClient);
reader.createClusterStateWatchersAndUpdate();
if (isInteresting) {
- reader.addCollectionWatch("c1");
+ reader.registerCore("c1");
}
ZkStateWriter writer = new ZkStateWriter(reader, new Overseer.Stats());
@@ -136,7 +137,7 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 {
assertEquals(2, collection.getStateFormat());
}
} finally {
- IOUtils.close(zkClient);
+ IOUtils.close(reader, zkClient);
server.shutdown();
}
@@ -146,6 +147,7 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 {
String zkDir = createTempDir("testExternalCollectionWatchedNotWatched").toFile().getAbsolutePath();
ZkTestServer server = new ZkTestServer(zkDir);
SolrZkClient zkClient = null;
+ ZkStateReader reader = null;
try {
server.run();
@@ -155,7 +157,7 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 {
zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT);
ZkController.createClusterZkNodes(zkClient);
- ZkStateReader reader = new ZkStateReader(zkClient);
+ reader = new ZkStateReader(zkClient);
reader.createClusterStateWatchersAndUpdate();
ZkStateWriter writer = new ZkStateWriter(reader, new Overseer.Stats());
@@ -170,13 +172,13 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 {
reader.forceUpdateCollection("c1");
assertTrue(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded());
- reader.addCollectionWatch("c1");
+ reader.registerCore("c1");
assertFalse(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded());
- reader.removeZKWatch("c1");
+ reader.unregisterCore("c1");
assertTrue(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded());
} finally {
- IOUtils.close(zkClient);
+ IOUtils.close(reader, zkClient);
server.shutdown();
}
}
@@ -187,6 +189,7 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 {
ZkTestServer server = new ZkTestServer(zkDir);
SolrZkClient zkClient = null;
+ ZkStateReader reader = null;
try {
server.run();
@@ -196,9 +199,9 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 {
zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT);
ZkController.createClusterZkNodes(zkClient);
- ZkStateReader reader = new ZkStateReader(zkClient);
+ reader = new ZkStateReader(zkClient);
reader.createClusterStateWatchersAndUpdate();
- reader.addCollectionWatch("c1");
+ reader.registerCore("c1");
// Initially there should be no c1 collection.
assertNull(reader.getClusterState().getCollectionRef("c1"));
@@ -234,7 +237,7 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 {
assertFalse(ref.isLazilyLoaded());
assertEquals(2, ref.get().getStateFormat());
} finally {
- IOUtils.close(zkClient);
+ IOUtils.close(reader, zkClient);
server.shutdown();
}
diff --git a/solr/core/src/test/org/apache/solr/handler/component/SearchHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/component/SearchHandlerTest.java
index 7e8ab470350..70744e24af0 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/SearchHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/SearchHandlerTest.java
@@ -21,21 +21,16 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.QueryRequest;
-import org.apache.solr.client.solrj.request.SolrPing;
import org.apache.solr.client.solrj.response.QueryResponse;
-import org.apache.solr.client.solrj.response.SolrPingResponse;
import org.apache.solr.cloud.MiniSolrCloudCluster;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
-import org.apache.solr.response.SolrQueryResponse;
import org.junit.BeforeClass;
import org.junit.Test;
-
public class SearchHandlerTest extends SolrTestCaseJ4
{
@BeforeClass
diff --git a/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java b/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java
index 1bb860e20a0..05149185668 100644
--- a/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java
+++ b/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java
@@ -939,7 +939,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,"*[count("+pre+"/int)=2]"
,pre+"/int[@name='1976-07-05T00:00:00Z'][.='2' ]"
,pre+"/int[@name='1976-07-06T00:00:00Z'][.='0']"
-
+
,meta+"/int[@name='before' ][.='5']"
);
assertQ("check after is not inclusive of lower bound by default (for dates)",
@@ -955,10 +955,10 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,"*[count("+pre+"/int)=2]"
,pre+"/int[@name='1976-07-03T00:00:00Z'][.='2' ]"
,pre+"/int[@name='1976-07-04T00:00:00Z']" + jul4
-
+
,meta+"/int[@name='after' ][.='9']"
);
-
+
assertQ("check hardend=false",
req( "q", "*:*"
@@ -975,7 +975,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,pre+"/int[@name='1976-07-01T00:00:00Z'][.='5' ]"
,pre+"/int[@name='1976-07-06T00:00:00Z'][.='0' ]"
,pre+"/int[@name='1976-07-11T00:00:00Z'][.='4' ]"
-
+
,meta+"/int[@name='before' ][.='2']"
,meta+"/int[@name='after' ][.='3']"
,meta+"/int[@name='between'][.='9']"
@@ -996,12 +996,33 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,pre+"/int[@name='1976-07-01T00:00:00Z'][.='5' ]"
,pre+"/int[@name='1976-07-06T00:00:00Z'][.='0' ]"
,pre+"/int[@name='1976-07-11T00:00:00Z'][.='1' ]"
-
+
,meta+"/int[@name='before' ][.='2']"
,meta+"/int[@name='after' ][.='6']"
,meta+"/int[@name='between'][.='6']"
);
-
+
+ //Fixed by SOLR-9080 related to the Gregorian Change Date
+ assertQ("check BC era",
+ req( "q", "*:*"
+ ,"rows", "0"
+ ,"facet", "true"
+ ,p, f
+ ,p+".start", "-0200-01-01T00:00:00Z" // BC
+ ,p+".end", "+0200-01-01T00:00:00Z" // AD
+ ,p+".gap", "+100YEARS"
+ ,p+".other", "all"
+ )
+ ,pre+"/int[@name='-0200-01-01T00:00:00Z'][.='0']"
+ ,pre+"/int[@name='-0100-01-01T00:00:00Z'][.='0']"
+ ,pre+"/int[@name='0000-01-01T00:00:00Z'][.='0']"
+ ,pre+"/int[@name='0100-01-01T00:00:00Z'][.='0']"
+ ,meta+"/int[@name='before' ][.='0']"
+ ,meta+"/int[@name='after' ][.='14']"
+ ,meta+"/int[@name='between'][.='0']"
+
+ );
+
}
@Test
diff --git a/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java b/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java
index c0882d3499a..87cfeb36b3b 100644
--- a/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java
+++ b/solr/core/src/test/org/apache/solr/response/transform/TestSubQueryTransformerDistrib.java
@@ -54,14 +54,12 @@ public class TestSubQueryTransformerDistrib extends AbstractFullDistribZkTestBas
int peopleMultiplier = atLeast(1);
int deptMultiplier = atLeast(1);
- String people = "people";
- int numPeopleShards;
- createCollection(people, atLeast(1), numPeopleShards = atLeast(2), numPeopleShards);
+ final String people = "people";
+ createCollection(people, 2, 1, 10);
- String depts = "departments";
- int numDeptsShards;
- createCollection(depts, atLeast(1), numDeptsShards = atLeast(2), numDeptsShards);
+ final String depts = "departments";
+ createCollection(depts, 2, 1, 10);
createIndex(people, peopleMultiplier, depts, deptMultiplier);
diff --git a/solr/core/src/test/org/apache/solr/schema/DateRangeFieldTest.java b/solr/core/src/test/org/apache/solr/schema/DateRangeFieldTest.java
index 372c9e97eec..e76f8217cfd 100644
--- a/solr/core/src/test/org/apache/solr/schema/DateRangeFieldTest.java
+++ b/solr/core/src/test/org/apache/solr/schema/DateRangeFieldTest.java
@@ -55,6 +55,35 @@ public class DateRangeFieldTest extends SolrTestCaseJ4 {
assertQ(req("q", "dateRange:[1999 TO 2001]"), xpathMatches(0, 2));
}
+ public void testBeforeGregorianChangeDate() { // GCD is the year 1582
+ assertU(delQ("*:*"));
+ assertU(adoc("id", "0", "dateRange", "1500-01-01T00:00:00Z"));
+ assertU(adoc("id", "1", "dateRange", "-1500-01-01T00:00:00Z")); // BC
+ assertU(adoc("id", "2", "dateRange", "1400-01-01T00:00:00Z/YEAR")); // date math of month or year can cause issues
+ assertU(adoc("id", "3", "dateRange", "1300")); // the whole year of 1300
+ assertU(commit());
+
+ //ensure round-trip toString
+ assertQ(req("q", "id:0", "fl", "dateRange"), "//result/doc/arr[@name='dateRange']/str[.='1500-01-01T00:00:00Z']");
+ assertQ(req("q", "id:1", "fl", "dateRange"), "//result/doc/arr[@name='dateRange']/str[.='-1500-01-01T00:00:00Z']");
+ // note: fixed by SOLR-9080, would instead find "1399-01-09T00:00:00Z"
+ assertQ(req("q", "id:2", "fl", "dateRange"), "//result/doc/arr[@name='dateRange']/str[.='1400-01-01T00:00:00Z']");
+ assertQ(req("q", "id:3", "fl", "dateRange"), "//result/doc/arr[@name='dateRange']/str[.='1300']");
+
+ //ensure range syntax works
+ assertQ(req("q", "dateRange:[1450-01-01T00:00:00Z TO 1499-12-31T23:59:59Z]"), xpathMatches());// before
+ assertQ(req("q", "dateRange:[1500-01-01T00:00:00Z TO 1500-01-01T00:00:00Z]"), xpathMatches(0));// spot on
+ assertQ(req("q", "dateRange:[1500-01-01T00:00:01Z TO 1550-01-01T00:00:00Z]"), xpathMatches());// after
+
+ assertQ(req("q", "dateRange:[-1500-01-01T00:00:00Z TO -1500-01-01T00:00:00Z]"), xpathMatches(1));
+
+ // do range queries in the vicinity of docId=3 val:"1300"
+ assertQ(req("q", "dateRange:[1299 TO 1299-12-31T23:59:59Z]"), xpathMatches());//adjacent
+ assertQ(req("q", "dateRange:[1299 TO 1300-01-01T00:00:00Z]"), xpathMatches(3));// expand + 1 sec
+ assertQ(req("q", "dateRange:1301"), xpathMatches()); // adjacent
+ assertQ(req("q", "dateRange:[1300-12-31T23:59:59Z TO 1301]"), xpathMatches(3)); // expand + 1 sec
+ }
+
@Test
public void testMultiValuedDateRanges() {
assertU(delQ("*:*"));
diff --git a/solr/core/src/test/org/apache/solr/search/TestDocSet.java b/solr/core/src/test/org/apache/solr/search/TestDocSet.java
index 9c46d5baa57..cdddd86e7ec 100644
--- a/solr/core/src/test/org/apache/solr/search/TestDocSet.java
+++ b/solr/core/src/test/org/apache/solr/search/TestDocSet.java
@@ -22,7 +22,6 @@ import java.util.List;
import java.util.Random;
import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
@@ -32,12 +31,14 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
@@ -467,6 +468,11 @@ public class TestDocSet extends LuceneTestCase {
@Override
public void checkIntegrity() throws IOException {
}
+
+ @Override
+ public Sort getIndexSort() {
+ return null;
+ }
};
}
diff --git a/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java b/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java
index ffb495e9f4b..08a9037608a 100644
--- a/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java
+++ b/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java
@@ -23,7 +23,6 @@ import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.SimpleMergedSegmentWarmer;
-import org.apache.lucene.index.SortingMergePolicy;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
@@ -31,6 +30,7 @@ import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.TestMergePolicyConfig;
+import org.apache.solr.index.SortingMergePolicy;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.IndexSchemaFactory;
import org.junit.BeforeClass;
diff --git a/solr/core/src/test/org/apache/solr/util/DateMathParserTest.java b/solr/core/src/test/org/apache/solr/util/DateMathParserTest.java
index 8840d34b354..8cc417b33f6 100644
--- a/solr/core/src/test/org/apache/solr/util/DateMathParserTest.java
+++ b/solr/core/src/test/org/apache/solr/util/DateMathParserTest.java
@@ -16,11 +16,10 @@
*/
package org.apache.solr.util;
-import java.text.DateFormat;
import java.text.ParseException;
-import java.text.SimpleDateFormat;
import java.time.Instant;
-import java.util.Calendar;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
import java.util.Date;
import java.util.HashMap;
import java.util.Locale;
@@ -40,44 +39,37 @@ public class DateMathParserTest extends LuceneTestCase {
* A formatter for specifying every last nuance of a Date for easy
* reference in assertion statements
*/
- private DateFormat fmt;
+ private DateTimeFormatter fmt;
+
/**
* A parser for reading in explicit dates that are convenient to type
* in a test
*/
- private DateFormat parser;
+ private DateTimeFormatter parser;
public DateMathParserTest() {
- super();
- fmt = new SimpleDateFormat
- ("G yyyyy MM ww WW DD dd F E aa HH hh mm ss SSS z Z",Locale.ROOT);
- fmt.setTimeZone(UTC);
+ fmt = DateTimeFormatter.ofPattern("G yyyyy MM ww W D dd F E a HH hh mm ss SSS z Z", Locale.ROOT)
+ .withZone(ZoneOffset.UTC);
- parser = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS",Locale.ROOT);
- parser.setTimeZone(UTC);
+ parser = DateTimeFormatter.ISO_LOCAL_DATE_TIME.withZone(ZoneOffset.UTC); // basically without the 'Z'
}
/** MACRO: Round: parses s, rounds with u, fmts */
protected String r(String s, String u) throws Exception {
- Date d = parser.parse(s);
- Calendar c = Calendar.getInstance(UTC, Locale.ROOT);
- c.setTime(d);
- DateMathParser.round(c, u);
- return fmt.format(c.getTime());
+ Date dt = DateMathParser.parseMath(null, s + "Z/" + u);
+ return fmt.format(dt.toInstant());
}
/** MACRO: Add: parses s, adds v u, fmts */
protected String a(String s, int v, String u) throws Exception {
- Date d = parser.parse(s);
- Calendar c = Calendar.getInstance(UTC, Locale.ROOT);
- c.setTime(d);
- DateMathParser.add(c, v, u);
- return fmt.format(c.getTime());
+ char sign = v >= 0 ? '+' : '-';
+ Date dt = DateMathParser.parseMath(null, s + 'Z' + sign + Math.abs(v) + u);
+ return fmt.format(dt.toInstant());
}
/** MACRO: Expected: parses s, fmts */
protected String e(String s) throws Exception {
- return fmt.format(parser.parse(s));
+ return fmt.format(parser.parse(s, Instant::from));
}
protected void assertRound(String e, String i, String u) throws Exception {
@@ -85,6 +77,7 @@ public class DateMathParserTest extends LuceneTestCase {
String rr = r(i,u);
assertEquals(ee + " != " + rr + " round:" + i + ":" + u, ee, rr);
}
+
protected void assertAdd(String e, String i, int v, String u)
throws Exception {
@@ -97,13 +90,17 @@ public class DateMathParserTest extends LuceneTestCase {
throws Exception {
String ee = e(e);
- String aa = fmt.format(p.parseMath(i));
+ String aa = fmt.format(p.parseMath(i).toInstant());
assertEquals(ee + " != " + aa + " math:" +
- parser.format(p.getNow()) + ":" + i, ee, aa);
+ parser.format(p.getNow().toInstant()) + ":" + i, ee, aa);
+ }
+
+ private void setNow(DateMathParser p, String text) {
+ p.setNow(Date.from(parser.parse(text, Instant::from)));
}
public void testCalendarUnitsConsistency() throws Exception {
- String input = "2001-07-04T12:08:56.235";
+ String input = "1234-07-04T12:08:56.235";
for (String u : DateMathParser.CALENDAR_UNITS.keySet()) {
try {
r(input, u);
@@ -120,20 +117,20 @@ public class DateMathParserTest extends LuceneTestCase {
public void testRound() throws Exception {
- String input = "2001-07-04T12:08:56.235";
+ String input = "1234-07-04T12:08:56.235";
- assertRound("2001-07-04T12:08:56.000", input, "SECOND");
- assertRound("2001-07-04T12:08:00.000", input, "MINUTE");
- assertRound("2001-07-04T12:00:00.000", input, "HOUR");
- assertRound("2001-07-04T00:00:00.000", input, "DAY");
- assertRound("2001-07-01T00:00:00.000", input, "MONTH");
- assertRound("2001-01-01T00:00:00.000", input, "YEAR");
+ assertRound("1234-07-04T12:08:56.000", input, "SECOND");
+ assertRound("1234-07-04T12:08:00.000", input, "MINUTE");
+ assertRound("1234-07-04T12:00:00.000", input, "HOUR");
+ assertRound("1234-07-04T00:00:00.000", input, "DAY");
+ assertRound("1234-07-01T00:00:00.000", input, "MONTH");
+ assertRound("1234-01-01T00:00:00.000", input, "YEAR");
}
public void testAddZero() throws Exception {
- String input = "2001-07-04T12:08:56.235";
+ String input = "1234-07-04T12:08:56.235";
for (String u : DateMathParser.CALENDAR_UNITS.keySet()) {
assertAdd(input, input, 0, u);
@@ -143,24 +140,24 @@ public class DateMathParserTest extends LuceneTestCase {
public void testAdd() throws Exception {
- String input = "2001-07-04T12:08:56.235";
+ String input = "1234-07-04T12:08:56.235";
- assertAdd("2001-07-04T12:08:56.236", input, 1, "MILLISECOND");
- assertAdd("2001-07-04T12:08:57.235", input, 1, "SECOND");
- assertAdd("2001-07-04T12:09:56.235", input, 1, "MINUTE");
- assertAdd("2001-07-04T13:08:56.235", input, 1, "HOUR");
- assertAdd("2001-07-05T12:08:56.235", input, 1, "DAY");
- assertAdd("2001-08-04T12:08:56.235", input, 1, "MONTH");
- assertAdd("2002-07-04T12:08:56.235", input, 1, "YEAR");
+ assertAdd("1234-07-04T12:08:56.236", input, 1, "MILLISECOND");
+ assertAdd("1234-07-04T12:08:57.235", input, 1, "SECOND");
+ assertAdd("1234-07-04T12:09:56.235", input, 1, "MINUTE");
+ assertAdd("1234-07-04T13:08:56.235", input, 1, "HOUR");
+ assertAdd("1234-07-05T12:08:56.235", input, 1, "DAY");
+ assertAdd("1234-08-04T12:08:56.235", input, 1, "MONTH");
+ assertAdd("1235-07-04T12:08:56.235", input, 1, "YEAR");
}
public void testParseStatelessness() throws Exception {
- DateMathParser p = new DateMathParser(UTC, Locale.ROOT);
- p.setNow(parser.parse("2001-07-04T12:08:56.235"));
+ DateMathParser p = new DateMathParser(UTC);
+ setNow(p, "1234-07-04T12:08:56.235");
- String e = fmt.format(p.parseMath(""));
+ String e = fmt.format(p.parseMath("").toInstant());
Date trash = p.parseMath("+7YEARS");
trash = p.parseMath("/MONTH");
@@ -168,90 +165,89 @@ public class DateMathParserTest extends LuceneTestCase {
Thread.currentThread();
Thread.sleep(5);
- String a = fmt.format(p.parseMath(""));
+ String a =fmt.format(p.parseMath("").toInstant());
assertEquals("State of DateMathParser changed", e, a);
}
-
+
public void testParseMath() throws Exception {
- DateMathParser p = new DateMathParser(UTC, Locale.ROOT);
- p.setNow(parser.parse("2001-07-04T12:08:56.235"));
+ DateMathParser p = new DateMathParser(UTC);
+ setNow(p, "1234-07-04T12:08:56.235");
// No-Op
- assertMath("2001-07-04T12:08:56.235", p, "");
+ assertMath("1234-07-04T12:08:56.235", p, "");
// simple round
- assertMath("2001-07-04T12:08:56.000", p, "/SECOND");
- assertMath("2001-07-04T12:08:00.000", p, "/MINUTE");
- assertMath("2001-07-04T12:00:00.000", p, "/HOUR");
- assertMath("2001-07-04T00:00:00.000", p, "/DAY");
- assertMath("2001-07-01T00:00:00.000", p, "/MONTH");
- assertMath("2001-01-01T00:00:00.000", p, "/YEAR");
+ assertMath("1234-07-04T12:08:56.235", p, "/MILLIS"); // no change
+ assertMath("1234-07-04T12:08:56.000", p, "/SECOND");
+ assertMath("1234-07-04T12:08:00.000", p, "/MINUTE");
+ assertMath("1234-07-04T12:00:00.000", p, "/HOUR");
+ assertMath("1234-07-04T00:00:00.000", p, "/DAY");
+ assertMath("1234-07-01T00:00:00.000", p, "/MONTH");
+ assertMath("1234-01-01T00:00:00.000", p, "/YEAR");
// simple addition
- assertMath("2001-07-04T12:08:56.236", p, "+1MILLISECOND");
- assertMath("2001-07-04T12:08:57.235", p, "+1SECOND");
- assertMath("2001-07-04T12:09:56.235", p, "+1MINUTE");
- assertMath("2001-07-04T13:08:56.235", p, "+1HOUR");
- assertMath("2001-07-05T12:08:56.235", p, "+1DAY");
- assertMath("2001-08-04T12:08:56.235", p, "+1MONTH");
- assertMath("2002-07-04T12:08:56.235", p, "+1YEAR");
+ assertMath("1234-07-04T12:08:56.236", p, "+1MILLISECOND");
+ assertMath("1234-07-04T12:08:57.235", p, "+1SECOND");
+ assertMath("1234-07-04T12:09:56.235", p, "+1MINUTE");
+ assertMath("1234-07-04T13:08:56.235", p, "+1HOUR");
+ assertMath("1234-07-05T12:08:56.235", p, "+1DAY");
+ assertMath("1234-08-04T12:08:56.235", p, "+1MONTH");
+ assertMath("1235-07-04T12:08:56.235", p, "+1YEAR");
// simple subtraction
- assertMath("2001-07-04T12:08:56.234", p, "-1MILLISECOND");
- assertMath("2001-07-04T12:08:55.235", p, "-1SECOND");
- assertMath("2001-07-04T12:07:56.235", p, "-1MINUTE");
- assertMath("2001-07-04T11:08:56.235", p, "-1HOUR");
- assertMath("2001-07-03T12:08:56.235", p, "-1DAY");
- assertMath("2001-06-04T12:08:56.235", p, "-1MONTH");
- assertMath("2000-07-04T12:08:56.235", p, "-1YEAR");
+ assertMath("1234-07-04T12:08:56.234", p, "-1MILLISECOND");
+ assertMath("1234-07-04T12:08:55.235", p, "-1SECOND");
+ assertMath("1234-07-04T12:07:56.235", p, "-1MINUTE");
+ assertMath("1234-07-04T11:08:56.235", p, "-1HOUR");
+ assertMath("1234-07-03T12:08:56.235", p, "-1DAY");
+ assertMath("1234-06-04T12:08:56.235", p, "-1MONTH");
+ assertMath("1233-07-04T12:08:56.235", p, "-1YEAR");
// simple '+/-'
- assertMath("2001-07-04T12:08:56.235", p, "+1MILLISECOND-1MILLISECOND");
- assertMath("2001-07-04T12:08:56.235", p, "+1SECOND-1SECOND");
- assertMath("2001-07-04T12:08:56.235", p, "+1MINUTE-1MINUTE");
- assertMath("2001-07-04T12:08:56.235", p, "+1HOUR-1HOUR");
- assertMath("2001-07-04T12:08:56.235", p, "+1DAY-1DAY");
- assertMath("2001-07-04T12:08:56.235", p, "+1MONTH-1MONTH");
- assertMath("2001-07-04T12:08:56.235", p, "+1YEAR-1YEAR");
+ assertMath("1234-07-04T12:08:56.235", p, "+1MILLISECOND-1MILLISECOND");
+ assertMath("1234-07-04T12:08:56.235", p, "+1SECOND-1SECOND");
+ assertMath("1234-07-04T12:08:56.235", p, "+1MINUTE-1MINUTE");
+ assertMath("1234-07-04T12:08:56.235", p, "+1HOUR-1HOUR");
+ assertMath("1234-07-04T12:08:56.235", p, "+1DAY-1DAY");
+ assertMath("1234-07-04T12:08:56.235", p, "+1MONTH-1MONTH");
+ assertMath("1234-07-04T12:08:56.235", p, "+1YEAR-1YEAR");
// simple '-/+'
- assertMath("2001-07-04T12:08:56.235", p, "-1MILLISECOND+1MILLISECOND");
- assertMath("2001-07-04T12:08:56.235", p, "-1SECOND+1SECOND");
- assertMath("2001-07-04T12:08:56.235", p, "-1MINUTE+1MINUTE");
- assertMath("2001-07-04T12:08:56.235", p, "-1HOUR+1HOUR");
- assertMath("2001-07-04T12:08:56.235", p, "-1DAY+1DAY");
- assertMath("2001-07-04T12:08:56.235", p, "-1MONTH+1MONTH");
- assertMath("2001-07-04T12:08:56.235", p, "-1YEAR+1YEAR");
+ assertMath("1234-07-04T12:08:56.235", p, "-1MILLISECOND+1MILLISECOND");
+ assertMath("1234-07-04T12:08:56.235", p, "-1SECOND+1SECOND");
+ assertMath("1234-07-04T12:08:56.235", p, "-1MINUTE+1MINUTE");
+ assertMath("1234-07-04T12:08:56.235", p, "-1HOUR+1HOUR");
+ assertMath("1234-07-04T12:08:56.235", p, "-1DAY+1DAY");
+ assertMath("1234-07-04T12:08:56.235", p, "-1MONTH+1MONTH");
+ assertMath("1234-07-04T12:08:56.235", p, "-1YEAR+1YEAR");
// more complex stuff
- assertMath("2000-07-04T12:08:56.236", p, "+1MILLISECOND-1YEAR");
- assertMath("2000-07-04T12:08:57.235", p, "+1SECOND-1YEAR");
- assertMath("2000-07-04T12:09:56.235", p, "+1MINUTE-1YEAR");
- assertMath("2000-07-04T13:08:56.235", p, "+1HOUR-1YEAR");
- assertMath("2000-07-05T12:08:56.235", p, "+1DAY-1YEAR");
- assertMath("2000-08-04T12:08:56.235", p, "+1MONTH-1YEAR");
- assertMath("2000-07-04T12:08:56.236", p, "-1YEAR+1MILLISECOND");
- assertMath("2000-07-04T12:08:57.235", p, "-1YEAR+1SECOND");
- assertMath("2000-07-04T12:09:56.235", p, "-1YEAR+1MINUTE");
- assertMath("2000-07-04T13:08:56.235", p, "-1YEAR+1HOUR");
- assertMath("2000-07-05T12:08:56.235", p, "-1YEAR+1DAY");
- assertMath("2000-08-04T12:08:56.235", p, "-1YEAR+1MONTH");
- assertMath("2000-07-01T00:00:00.000", p, "-1YEAR+1MILLISECOND/MONTH");
- assertMath("2000-07-04T00:00:00.000", p, "-1YEAR+1SECOND/DAY");
- assertMath("2000-07-04T00:00:00.000", p, "-1YEAR+1MINUTE/DAY");
- assertMath("2000-07-04T13:00:00.000", p, "-1YEAR+1HOUR/HOUR");
- assertMath("2000-07-05T12:08:56.000", p, "-1YEAR+1DAY/SECOND");
- assertMath("2000-08-04T12:08:56.000", p, "-1YEAR+1MONTH/SECOND");
+ assertMath("1233-07-04T12:08:56.236", p, "+1MILLISECOND-1YEAR");
+ assertMath("1233-07-04T12:08:57.235", p, "+1SECOND-1YEAR");
+ assertMath("1233-07-04T12:09:56.235", p, "+1MINUTE-1YEAR");
+ assertMath("1233-07-04T13:08:56.235", p, "+1HOUR-1YEAR");
+ assertMath("1233-07-05T12:08:56.235", p, "+1DAY-1YEAR");
+ assertMath("1233-08-04T12:08:56.235", p, "+1MONTH-1YEAR");
+ assertMath("1233-07-04T12:08:56.236", p, "-1YEAR+1MILLISECOND");
+ assertMath("1233-07-04T12:08:57.235", p, "-1YEAR+1SECOND");
+ assertMath("1233-07-04T12:09:56.235", p, "-1YEAR+1MINUTE");
+ assertMath("1233-07-04T13:08:56.235", p, "-1YEAR+1HOUR");
+ assertMath("1233-07-05T12:08:56.235", p, "-1YEAR+1DAY");
+ assertMath("1233-08-04T12:08:56.235", p, "-1YEAR+1MONTH");
+ assertMath("1233-07-01T00:00:00.000", p, "-1YEAR+1MILLISECOND/MONTH");
+ assertMath("1233-07-04T00:00:00.000", p, "-1YEAR+1SECOND/DAY");
+ assertMath("1233-07-04T00:00:00.000", p, "-1YEAR+1MINUTE/DAY");
+ assertMath("1233-07-04T13:00:00.000", p, "-1YEAR+1HOUR/HOUR");
+ assertMath("1233-07-05T12:08:56.000", p, "-1YEAR+1DAY/SECOND");
+ assertMath("1233-08-04T12:08:56.000", p, "-1YEAR+1MONTH/SECOND");
// "tricky" cases
- p.setNow(parser.parse("2006-01-31T17:09:59.999"));
+ setNow(p, "2006-01-31T17:09:59.999");
assertMath("2006-02-28T17:09:59.999", p, "+1MONTH");
assertMath("2008-02-29T17:09:59.999", p, "+25MONTH");
assertMath("2006-02-01T00:00:00.000", p, "/MONTH+35DAYS/MONTH");
assertMath("2006-01-31T17:10:00.000", p, "+3MILLIS/MINUTE");
-
-
}
public void testParseMathTz() throws Exception {
@@ -267,13 +263,14 @@ public class DateMathParserTest extends LuceneTestCase {
// US, Positive Offset with DST
TimeZone tz = TimeZone.getTimeZone(PLUS_TZS);
- DateMathParser p = new DateMathParser(tz, Locale.ROOT);
+ DateMathParser p = new DateMathParser(tz);
- p.setNow(parser.parse("2001-07-04T12:08:56.235"));
+ setNow(p, "2001-07-04T12:08:56.235");
// No-Op
assertMath("2001-07-04T12:08:56.235", p, "");
-
+ assertMath("2001-07-04T12:08:56.235", p, "/MILLIS");
+
assertMath("2001-07-04T12:08:56.000", p, "/SECOND");
assertMath("2001-07-04T12:08:00.000", p, "/MINUTE");
assertMath("2001-07-04T12:00:00.000", p, "/HOUR");
@@ -289,8 +286,8 @@ public class DateMathParserTest extends LuceneTestCase {
// France, Negative Offset with DST
tz = TimeZone.getTimeZone(NEG_TZS);
- p = new DateMathParser(tz, Locale.ROOT);
- p.setNow(parser.parse("2001-07-04T12:08:56.235"));
+ p = new DateMathParser(tz);
+ setNow(p, "2001-07-04T12:08:56.235");
assertMath("2001-07-04T12:08:56.000", p, "/SECOND");
assertMath("2001-07-04T12:08:00.000", p, "/MINUTE");
@@ -306,8 +303,8 @@ public class DateMathParserTest extends LuceneTestCase {
public void testParseMathExceptions() throws Exception {
- DateMathParser p = new DateMathParser(UTC, Locale.ROOT);
- p.setNow(parser.parse("2001-07-04T12:08:56.235"));
+ DateMathParser p = new DateMathParser(UTC);
+ setNow(p, "1234-07-04T12:08:56.235");
Map badCommands = new HashMap<>();
badCommands.put("/", 1);
@@ -373,7 +370,8 @@ public class DateMathParserTest extends LuceneTestCase {
}
private void assertFormat(final String expected, final long millis) {
- assertEquals(expected, Instant.ofEpochMilli(millis).toString());
+ assertEquals(expected, Instant.ofEpochMilli(millis).toString()); // assert same as ISO_INSTANT
+ assertEquals(millis, DateMathParser.parseMath(null, expected).getTime()); // assert DMP has same result
}
/**
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
index 4fed84e00f7..7b59d29d77c 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
@@ -56,6 +56,8 @@ import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.ToleratedUpdateError;
import org.apache.solr.common.cloud.Aliases;
import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.CollectionStatePredicate;
+import org.apache.solr.common.cloud.CollectionStateWatcher;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.DocRouter;
import org.apache.solr.common.cloud.ImplicitDocRouter;
@@ -572,6 +574,40 @@ public class CloudSolrClient extends SolrClient {
zkStateReader.getConfigManager().downloadConfigDir(configName, downloadPath);
}
+ /**
+ * Block until a collection state matches a predicate, or a timeout
+ *
+ * Note that the predicate may be called again even after it has returned true, so
+ * implementors should avoid changing state within the predicate call itself.
+ *
+ * @param collection the collection to watch
+ * @param wait how long to wait
+ * @param unit the units of the wait parameter
+ * @param predicate a {@link CollectionStatePredicate} to check the collection state
+ * @throws InterruptedException on interrupt
+ * @throws TimeoutException on timeout
+ */
+ public void waitForState(String collection, long wait, TimeUnit unit, CollectionStatePredicate predicate)
+ throws InterruptedException, TimeoutException {
+ connect();
+ zkStateReader.waitForState(collection, wait, unit, predicate);
+ }
+
+ /**
+ * Register a CollectionStateWatcher to be called when the cluster state for a collection changes
+ *
+ * Note that the watcher is unregistered after it has been called once. To make a watcher persistent,
+ * it should re-register itself in its {@link CollectionStateWatcher#onStateChanged(Set, DocCollection)}
+ * call
+ *
+ * @param collection the collection to watch
+ * @param watcher a watcher that will be called when the state changes
+ */
+ public void registerCollectionStateWatcher(String collection, CollectionStateWatcher watcher) {
+ connect();
+ zkStateReader.registerCollectionStateWatcher(collection, watcher);
+ }
+
private NamedList directUpdate(AbstractUpdateRequest request, String collection, ClusterState clusterState) throws SolrServerException {
UpdateRequest updateRequest = (UpdateRequest) request;
ModifiableSolrParams params = (ModifiableSolrParams) request.getParams();
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterProperties.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterProperties.java
new file mode 100644
index 00000000000..6645336f13b
--- /dev/null
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterProperties.java
@@ -0,0 +1,126 @@
+package org.apache.solr.common.cloud;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.Utils;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.data.Stat;
+
+/**
+ * Interact with solr cluster properties
+ *
+ * Note that all methods on this class make calls to ZK on every invocation. For
+ * read-only eventually-consistent uses, clients should instead call
+ * {@link ZkStateReader#getClusterProperty(String, Object)}
+ */
+public class ClusterProperties {
+
+ private final SolrZkClient client;
+
+ /**
+ * Creates a ClusterProperties object using a provided SolrZkClient
+ */
+ public ClusterProperties(SolrZkClient client) {
+ this.client = client;
+ }
+
+ /**
+ * Read the value of a cluster property, returning a default if it is not set
+ * @param key the property name
+ * @param defaultValue the default value
+ * @param the type of the property
+ * @return the property value
+ * @throws IOException if there is an error reading the value from the cluster
+ */
+ @SuppressWarnings("unchecked")
+ public T getClusterProperty(String key, T defaultValue) throws IOException {
+ T value = (T) getClusterProperties().get(key);
+ if (value == null)
+ return defaultValue;
+ return value;
+ }
+
+ /**
+ * Return the cluster properties
+ * @throws IOException if there is an error reading properties from the cluster
+ */
+ @SuppressWarnings("unchecked")
+ public Map getClusterProperties() throws IOException {
+ try {
+ return (Map) Utils.fromJSON(client.getData(ZkStateReader.CLUSTER_PROPS, null, new Stat(), true));
+ } catch (KeeperException.NoNodeException e) {
+ return Collections.emptyMap();
+ } catch (KeeperException | InterruptedException e) {
+ throw new IOException("Error reading cluster property", SolrZkClient.checkInterrupted(e));
+ }
+ }
+
+ /**
+ * This method sets a cluster property.
+ *
+ * @param propertyName The property name to be set.
+ * @param propertyValue The value of the property.
+ * @throws IOException if there is an error writing data to the cluster
+ */
+ @SuppressWarnings("unchecked")
+ public void setClusterProperty(String propertyName, String propertyValue) throws IOException {
+
+ if (!ZkStateReader.KNOWN_CLUSTER_PROPS.contains(propertyName)) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Not a known cluster property " + propertyName);
+ }
+
+ for (; ; ) {
+ Stat s = new Stat();
+ try {
+ if (client.exists(ZkStateReader.CLUSTER_PROPS, true)) {
+ Map properties = (Map) Utils.fromJSON(client.getData(ZkStateReader.CLUSTER_PROPS, null, s, true));
+ if (propertyValue == null) {
+ //Don't update ZK unless absolutely necessary.
+ if (properties.get(propertyName) != null) {
+ properties.remove(propertyName);
+ client.setData(ZkStateReader.CLUSTER_PROPS, Utils.toJSON(properties), s.getVersion(), true);
+ }
+ } else {
+ //Don't update ZK unless absolutely necessary.
+ if (!propertyValue.equals(properties.get(propertyName))) {
+ properties.put(propertyName, propertyValue);
+ client.setData(ZkStateReader.CLUSTER_PROPS, Utils.toJSON(properties), s.getVersion(), true);
+ }
+ }
+ } else {
+ Map properties = new LinkedHashMap();
+ properties.put(propertyName, propertyValue);
+ client.create(ZkStateReader.CLUSTER_PROPS, Utils.toJSON(properties), CreateMode.PERSISTENT, true);
+ }
+ } catch (KeeperException.BadVersionException | KeeperException.NodeExistsException e) {
+ //race condition
+ continue;
+ } catch (InterruptedException | KeeperException e) {
+ throw new IOException("Error setting cluster property", SolrZkClient.checkInterrupted(e));
+ }
+ break;
+ }
+ }
+}
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/CollectionStatePredicate.java b/solr/solrj/src/java/org/apache/solr/common/cloud/CollectionStatePredicate.java
new file mode 100644
index 00000000000..0b0a28eeed0
--- /dev/null
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/CollectionStatePredicate.java
@@ -0,0 +1,42 @@
+package org.apache.solr.common.cloud;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Interface to determine if a collection state matches a required state
+ *
+ * @see ZkStateReader#waitForState(String, long, TimeUnit, CollectionStatePredicate)
+ */
+public interface CollectionStatePredicate {
+
+ /**
+ * Check the collection state matches a required state
+ *
+ * Note that both liveNodes and collectionState should be consulted to determine
+ * the overall state.
+ *
+ * @param liveNodes the current set of live nodes
+ * @param collectionState the latest collection state, or null if the collection
+ * does not exist
+ */
+ boolean matches(Set liveNodes, DocCollection collectionState);
+
+}
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/CollectionStateWatcher.java b/solr/solrj/src/java/org/apache/solr/common/cloud/CollectionStateWatcher.java
new file mode 100644
index 00000000000..0bf66b012e8
--- /dev/null
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/CollectionStateWatcher.java
@@ -0,0 +1,42 @@
+package org.apache.solr.common.cloud;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+import java.util.Set;
+
+/**
+ * Callback registered with {@link ZkStateReader#registerCollectionStateWatcher(String, CollectionStateWatcher)}
+ * and called whenever the collection state changes.
+ */
+public interface CollectionStateWatcher {
+
+ /**
+ * Called when the collection we are registered against has a change of state
+ *
+ * Note that, due to the way Zookeeper watchers are implemented, a single call may be
+ * the result of several state changes
+ *
+ * A watcher is unregistered after it has been called once. To make a watcher persistent,
+ * implementors should re-register during this call.
+ *
+ * @param liveNodes the set of live nodes
+ * @param collectionState the new collection state
+ */
+ void onStateChanged(Set liveNodes, DocCollection collectionState);
+
+}
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java b/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java
index d30a73fa145..b5c65a6d847 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java
@@ -22,6 +22,8 @@ import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
@@ -35,7 +37,8 @@ import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
/**
* Models a Collection in zookeeper (but that Java name is obviously taken, hence "DocCollection")
*/
-public class DocCollection extends ZkNodeProps {
+public class DocCollection extends ZkNodeProps implements Iterable {
+
public static final String DOC_ROUTER = "router";
public static final String SHARDS = "shards";
public static final String STATE_FORMAT = "stateFormat";
@@ -217,4 +220,34 @@ public class DocCollection extends ZkNodeProps {
if (slice == null) return null;
return slice.getLeader();
}
+
+ /**
+ * Check that all replicas in a collection are live
+ *
+ * @see CollectionStatePredicate
+ */
+ public static boolean isFullyActive(Set